diff options
588 files changed, 21207 insertions, 6469 deletions
diff --git a/Documentation/ABI/testing/sysfs-driver-hid b/Documentation/ABI/testing/sysfs-driver-hid index b6490e14fe83..48942cacb0bf 100644 --- a/Documentation/ABI/testing/sysfs-driver-hid +++ b/Documentation/ABI/testing/sysfs-driver-hid @@ -8,3 +8,13 @@ Description: When read, this file returns the device's raw binary HID report descriptor. This file cannot be written. Users: HIDAPI library (http://www.signal11.us/oss/hidapi) + +What: For USB devices : /sys/bus/usb/devices/<busnum>-<devnum>:<config num>.<interface num>/<hid-bus>:<vendor-id>:<product-id>.<num>/country + For BT devices : /sys/class/bluetooth/hci<addr>/<hid-bus>:<vendor-id>:<product-id>.<num>/country + Symlink : /sys/class/hidraw/hidraw<num>/device/country +Date: February 2015 +KernelVersion: 3.19 +Contact: Olivier Gay <ogay@logitech.com> +Description: When read, this file returns the hex integer value in ASCII + of the device's HID country code (e.g. 21 for US). + This file cannot be written. diff --git a/Documentation/ABI/testing/sysfs-driver-hid-logitech-lg4ff b/Documentation/ABI/testing/sysfs-driver-hid-logitech-lg4ff index 167d9032b970..b3f6a2ac5007 100644 --- a/Documentation/ABI/testing/sysfs-driver-hid-logitech-lg4ff +++ b/Documentation/ABI/testing/sysfs-driver-hid-logitech-lg4ff @@ -5,3 +5,48 @@ Contact: Michal Malý <madcatxster@gmail.com> Description: Display minimum, maximum and current range of the steering wheel. Writing a value within min and max boundaries sets the range of the wheel. + +What: /sys/bus/hid/drivers/logitech/<dev>/alternate_modes +Date: Feb 2015 +KernelVersion: 4.1 +Contact: Michal Malý <madcatxster@gmail.com> +Description: Displays a set of alternate modes supported by a wheel. Each + mode is listed as follows: + Tag: Mode Name + Currently active mode is marked with an asterisk. List also + contains an abstract item "native" which always denotes the + native mode of the wheel. Echoing the mode tag switches the + wheel into the corresponding mode. Depending on the exact model + of the wheel not all listed modes might always be selectable. + If a wheel cannot be switched into the desired mode, -EINVAL + is returned accompanied with an explanatory message in the + kernel log. + This entry is not created for devices that have only one mode. + + Currently supported mode switches: + Driving Force Pro: + DF-EX --> DFP + + G25: + DF-EX --> DFP --> G25 + + G27: + DF-EX <*> DFP <-> G25 <-> G27 + DF-EX <*--------> G25 <-> G27 + DF-EX <*----------------> G27 + + DFGT: + DF-EX <*> DFP <-> DFGT + DF-EX <*--------> DFGT + + * hid_logitech module must be loaded with lg4ff_no_autoswitch=1 + parameter set in order for the switch to DF-EX mode to work. + +What: /sys/bus/hid/drivers/logitech/<dev>/real_id +Date: Feb 2015 +KernelVersion: 4.1 +Contact: Michal Malý <madcatxster@gmail.com> +Description: Displays the real model of the wheel regardless of any + alternate mode the wheel might be switched to. + It is a read-only value. + This entry is not created for devices that have only one mode. diff --git a/Documentation/atomic_ops.txt b/Documentation/atomic_ops.txt index 183e41bdcb69..dab6da3382d9 100644 --- a/Documentation/atomic_ops.txt +++ b/Documentation/atomic_ops.txt @@ -201,11 +201,11 @@ These routines add 1 and subtract 1, respectively, from the given atomic_t and return the new counter value after the operation is performed. -Unlike the above routines, it is required that explicit memory -barriers are performed before and after the operation. It must be -done such that all memory operations before and after the atomic -operation calls are strongly ordered with respect to the atomic -operation itself. +Unlike the above routines, it is required that these primitives +include explicit memory barriers that are performed before and after +the operation. It must be done such that all memory operations before +and after the atomic operation calls are strongly ordered with respect +to the atomic operation itself. For example, it should behave as if a smp_mb() call existed both before and after the atomic operation. @@ -233,21 +233,21 @@ These two routines increment and decrement by 1, respectively, the given atomic counter. They return a boolean indicating whether the resulting counter value was zero or not. -It requires explicit memory barrier semantics around the operation as -above. +Again, these primitives provide explicit memory barrier semantics around +the atomic operation. int atomic_sub_and_test(int i, atomic_t *v); This is identical to atomic_dec_and_test() except that an explicit -decrement is given instead of the implicit "1". It requires explicit -memory barrier semantics around the operation. +decrement is given instead of the implicit "1". This primitive must +provide explicit memory barrier semantics around the operation. int atomic_add_negative(int i, atomic_t *v); -The given increment is added to the given atomic counter value. A -boolean is return which indicates whether the resulting counter value -is negative. It requires explicit memory barrier semantics around the -operation. +The given increment is added to the given atomic counter value. A boolean +is return which indicates whether the resulting counter value is negative. +This primitive must provide explicit memory barrier semantics around +the operation. Then: @@ -257,7 +257,7 @@ This performs an atomic exchange operation on the atomic variable v, setting the given new value. It returns the old value that the atomic variable v had just before the operation. -atomic_xchg requires explicit memory barriers around the operation. +atomic_xchg must provide explicit memory barriers around the operation. int atomic_cmpxchg(atomic_t *v, int old, int new); @@ -266,7 +266,7 @@ with the given old and new values. Like all atomic_xxx operations, atomic_cmpxchg will only satisfy its atomicity semantics as long as all other accesses of *v are performed through atomic_xxx operations. -atomic_cmpxchg requires explicit memory barriers around the operation. +atomic_cmpxchg must provide explicit memory barriers around the operation. The semantics for atomic_cmpxchg are the same as those defined for 'cas' below. @@ -279,8 +279,8 @@ If the atomic value v is not equal to u, this function adds a to v, and returns non zero. If v is equal to u then it returns zero. This is done as an atomic operation. -atomic_add_unless requires explicit memory barriers around the operation -unless it fails (returns 0). +atomic_add_unless must provide explicit memory barriers around the +operation unless it fails (returns 0). atomic_inc_not_zero, equivalent to atomic_add_unless(v, 1, 0) @@ -460,9 +460,9 @@ the return value into an int. There are other places where things like this occur as well. These routines, like the atomic_t counter operations returning values, -require explicit memory barrier semantics around their execution. All -memory operations before the atomic bit operation call must be made -visible globally before the atomic bit operation is made visible. +must provide explicit memory barrier semantics around their execution. +All memory operations before the atomic bit operation call must be +made visible globally before the atomic bit operation is made visible. Likewise, the atomic bit operation must be visible globally before any subsequent memory operation is made visible. For example: @@ -536,8 +536,9 @@ except that two underscores are prefixed to the interface name. These non-atomic variants also do not require any special memory barrier semantics. -The routines xchg() and cmpxchg() need the same exact memory barriers -as the atomic and bit operations returning values. +The routines xchg() and cmpxchg() must provide the same exact +memory-barrier semantics as the atomic and bit operations returning +values. Spinlocks and rwlocks have memory barrier expectations as well. The rule to follow is simple: diff --git a/Documentation/hid/hid-sensor.txt b/Documentation/hid/hid-sensor.txt index 948b0989c433..b287752a31cd 100644 --- a/Documentation/hid/hid-sensor.txt +++ b/Documentation/hid/hid-sensor.txt @@ -138,3 +138,87 @@ accelerometer wants to poll X axis value, then it can call this function with the usage id of X axis. HID sensors can provide events, so this is not necessary to poll for any field. If there is some new sample, the core driver will call registered callback function to process the sample. + + +---------- + +HID Custom and generic Sensors + +HID Sensor specification defines two special sensor usage types. Since they +don't represent a standard sensor, it is not possible to define using Linux IIO +type interfaces. +The purpose of these sensors is to extend the functionality or provide a +way to obfuscate the data being communicated by a sensor. Without knowing the +mapping between the data and its encapsulated form, it is difficult for +an application/driver to determine what data is being communicated by the sensor. +This allows some differentiating use cases, where vendor can provide applications. +Some common use cases are debug other sensors or to provide some events like +keyboard attached/detached or lid open/close. + +To allow application to utilize these sensors, here they are exported uses sysfs +attribute groups, attributes and misc device interface. + +An example of this representation on sysfs: +/sys/devices/pci0000:00/INT33C2:00/i2c-0/i2c-INT33D1:00/0018:8086:09FA.0001/HID-SENSOR-2000e1.6.auto$ tree -R +. +????????? enable_sensor +????????? feature-0-200316 +??????? ????????? feature-0-200316-maximum +??????? ????????? feature-0-200316-minimum +??????? ????????? feature-0-200316-name +??????? ????????? feature-0-200316-size +??????? ????????? feature-0-200316-unit-expo +??????? ????????? feature-0-200316-units +??????? ????????? feature-0-200316-value +????????? feature-1-200201 +??????? ????????? feature-1-200201-maximum +??????? ????????? feature-1-200201-minimum +??????? ????????? feature-1-200201-name +??????? ????????? feature-1-200201-size +??????? ????????? feature-1-200201-unit-expo +??????? ????????? feature-1-200201-units +??????? ????????? feature-1-200201-value +????????? input-0-200201 +??????? ????????? input-0-200201-maximum +??????? ????????? input-0-200201-minimum +??????? ????????? input-0-200201-name +??????? ????????? input-0-200201-size +??????? ????????? input-0-200201-unit-expo +??????? ????????? input-0-200201-units +??????? ????????? input-0-200201-value +????????? input-1-200202 +??????? ????????? input-1-200202-maximum +??????? ????????? input-1-200202-minimum +??????? ????????? input-1-200202-name +??????? ????????? input-1-200202-size +??????? ????????? input-1-200202-unit-expo +??????? ????????? input-1-200202-units +??????? ????????? input-1-200202-value + +Here there is a custom sensors with four fields, two feature and two inputs. +Each field is represented by a set of attributes. All fields except the "value" +are read only. The value field is a RW field. +Example +/sys/bus/platform/devices/HID-SENSOR-2000e1.6.auto/feature-0-200316$ grep -r . * +feature-0-200316-maximum:6 +feature-0-200316-minimum:0 +feature-0-200316-name:property-reporting-state +feature-0-200316-size:1 +feature-0-200316-unit-expo:0 +feature-0-200316-units:25 +feature-0-200316-value:1 + +How to enable such sensor? +By default sensor can be power gated. To enable sysfs attribute "enable" can be +used. +$ echo 1 > enable_sensor + +Once enabled and powered on, sensor can report value using HID reports. +These reports are pushed using misc device interface in a FIFO order. +/dev$ tree | grep HID-SENSOR-2000e1.6.auto +??????? ????????? 10:53 -> ../HID-SENSOR-2000e1.6.auto +????????? HID-SENSOR-2000e1.6.auto + +Each reports can be of variable length preceded by a header. This header +consist of a 32 bit usage id, 64 bit time stamp and 32 bit length field of raw +data. diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 8cc635f4c4f0..327556349757 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -2973,6 +2973,12 @@ bytes respectively. Such letter suffixes can also be entirely omitted. Set maximum number of finished RCU callbacks to process in one batch. + rcutree.gp_init_delay= [KNL] + Set the number of jiffies to delay each step of + RCU grace-period initialization. This only has + effect when CONFIG_RCU_TORTURE_TEST_SLOW_INIT is + set. + rcutree.rcu_fanout_leaf= [KNL] Increase the number of CPUs assigned to each leaf rcu_node structure. Useful for very large @@ -2996,11 +3002,15 @@ bytes respectively. Such letter suffixes can also be entirely omitted. value is one, and maximum value is HZ. rcutree.kthread_prio= [KNL,BOOT] - Set the SCHED_FIFO priority of the RCU - per-CPU kthreads (rcuc/N). This value is also - used for the priority of the RCU boost threads - (rcub/N). Valid values are 1-99 and the default - is 1 (the least-favored priority). + Set the SCHED_FIFO priority of the RCU per-CPU + kthreads (rcuc/N). This value is also used for + the priority of the RCU boost threads (rcub/N) + and for the RCU grace-period kthreads (rcu_bh, + rcu_preempt, and rcu_sched). If RCU_BOOST is + set, valid values are 1-99 and the default is 1 + (the least-favored priority). Otherwise, when + RCU_BOOST is not set, valid values are 0-99 and + the default is zero (non-realtime operation). rcutree.rcu_nocb_leader_stride= [KNL] Set the number of NOCB kthread groups, which diff --git a/Documentation/kernel-per-CPU-kthreads.txt b/Documentation/kernel-per-CPU-kthreads.txt index f3cd299fcc41..f4cbfe0ba108 100644 --- a/Documentation/kernel-per-CPU-kthreads.txt +++ b/Documentation/kernel-per-CPU-kthreads.txt @@ -190,20 +190,24 @@ To reduce its OS jitter, do any of the following: on each CPU, including cs_dbs_timer() and od_dbs_timer(). WARNING: Please check your CPU specifications to make sure that this is safe on your particular system. - d. It is not possible to entirely get rid of OS jitter - from vmstat_update() on CONFIG_SMP=y systems, but you - can decrease its frequency by writing a large value - to /proc/sys/vm/stat_interval. The default value is - HZ, for an interval of one second. Of course, larger - values will make your virtual-memory statistics update - more slowly. Of course, you can also run your workload - at a real-time priority, thus preempting vmstat_update(), + d. As of v3.18, Christoph Lameter's on-demand vmstat workers + commit prevents OS jitter due to vmstat_update() on + CONFIG_SMP=y systems. Before v3.18, is not possible + to entirely get rid of the OS jitter, but you can + decrease its frequency by writing a large value to + /proc/sys/vm/stat_interval. The default value is HZ, + for an interval of one second. Of course, larger values + will make your virtual-memory statistics update more + slowly. Of course, you can also run your workload at + a real-time priority, thus preempting vmstat_update(), but if your workload is CPU-bound, this is a bad idea. However, there is an RFC patch from Christoph Lameter (based on an earlier one from Gilad Ben-Yossef) that reduces or even eliminates vmstat overhead for some workloads at https://lkml.org/lkml/2013/9/4/379. - e. If running on high-end powerpc servers, build with + e. Boot with "elevator=noop" to avoid workqueue use by + the block layer. + f. If running on high-end powerpc servers, build with CONFIG_PPC_RTAS_DAEMON=n. This prevents the RTAS daemon from running on each CPU every second or so. (This will require editing Kconfig files and will defeat @@ -211,12 +215,12 @@ To reduce its OS jitter, do any of the following: due to the rtas_event_scan() function. WARNING: Please check your CPU specifications to make sure that this is safe on your particular system. - f. If running on Cell Processor, build your kernel with + g. If running on Cell Processor, build your kernel with CBE_CPUFREQ_SPU_GOVERNOR=n to avoid OS jitter from spu_gov_work(). WARNING: Please check your CPU specifications to make sure that this is safe on your particular system. - g. If running on PowerMAC, build your kernel with + h. If running on PowerMAC, build your kernel with CONFIG_PMAC_RACKMETER=n to disable the CPU-meter, avoiding OS jitter from rackmeter_do_timer(). @@ -258,8 +262,12 @@ Purpose: Detect software lockups on each CPU. To reduce its OS jitter, do at least one of the following: 1. Build with CONFIG_LOCKUP_DETECTOR=n, which will prevent these kthreads from being created in the first place. -2. Echo a zero to /proc/sys/kernel/watchdog to disable the +2. Boot with "nosoftlockup=0", which will also prevent these kthreads + from being created. Other related watchdog and softlockup boot + parameters may be found in Documentation/kernel-parameters.txt + and Documentation/watchdog/watchdog-parameters.txt. +3. Echo a zero to /proc/sys/kernel/watchdog to disable the watchdog timer. -3. Echo a large number of /proc/sys/kernel/watchdog_thresh in +4. Echo a large number of /proc/sys/kernel/watchdog_thresh in order to reduce the frequency of OS jitter due to the watchdog timer down to a level that is acceptable for your workload. diff --git a/Documentation/memory-barriers.txt b/Documentation/memory-barriers.txt index ca2387ef27ab..6974f1c2b4e1 100644 --- a/Documentation/memory-barriers.txt +++ b/Documentation/memory-barriers.txt @@ -592,9 +592,9 @@ See also the subsection on "Cache Coherency" for a more thorough example. CONTROL DEPENDENCIES -------------------- -A control dependency requires a full read memory barrier, not simply a data -dependency barrier to make it work correctly. Consider the following bit of -code: +A load-load control dependency requires a full read memory barrier, not +simply a data dependency barrier to make it work correctly. Consider the +following bit of code: q = ACCESS_ONCE(a); if (q) { @@ -615,14 +615,15 @@ case what's actually required is: } However, stores are not speculated. This means that ordering -is- provided -in the following example: +for load-store control dependencies, as in the following example: q = ACCESS_ONCE(a); if (q) { ACCESS_ONCE(b) = p; } -Please note that ACCESS_ONCE() is not optional! Without the +Control dependencies pair normally with other types of barriers. +That said, please note that ACCESS_ONCE() is not optional! Without the ACCESS_ONCE(), might combine the load from 'a' with other loads from 'a', and the store to 'b' with other stores to 'b', with possible highly counterintuitive effects on ordering. @@ -813,6 +814,8 @@ In summary: barrier() can help to preserve your control dependency. Please see the Compiler Barrier section for more information. + (*) Control dependencies pair normally with other types of barriers. + (*) Control dependencies do -not- provide transitivity. If you need transitivity, use smp_mb(). @@ -823,14 +826,14 @@ SMP BARRIER PAIRING When dealing with CPU-CPU interactions, certain types of memory barrier should always be paired. A lack of appropriate pairing is almost certainly an error. -General barriers pair with each other, though they also pair with -most other types of barriers, albeit without transitivity. An acquire -barrier pairs with a release barrier, but both may also pair with other -barriers, including of course general barriers. A write barrier pairs -with a data dependency barrier, an acquire barrier, a release barrier, -a read barrier, or a general barrier. Similarly a read barrier or a -data dependency barrier pairs with a write barrier, an acquire barrier, -a release barrier, or a general barrier: +General barriers pair with each other, though they also pair with most +other types of barriers, albeit without transitivity. An acquire barrier +pairs with a release barrier, but both may also pair with other barriers, +including of course general barriers. A write barrier pairs with a data +dependency barrier, a control dependency, an acquire barrier, a release +barrier, a read barrier, or a general barrier. Similarly a read barrier, +control dependency, or a data dependency barrier pairs with a write +barrier, an acquire barrier, a release barrier, or a general barrier: CPU 1 CPU 2 =============== =============== @@ -850,6 +853,19 @@ Or: <data dependency barrier> y = *x; +Or even: + + CPU 1 CPU 2 + =============== =============================== + r1 = ACCESS_ONCE(y); + <general barrier> + ACCESS_ONCE(y) = 1; if (r2 = ACCESS_ONCE(x)) { + <implicit control dependency> + ACCESS_ONCE(y) = 1; + } + + assert(r1 == 0 || r2 == 0); + Basically, the read barrier always has to be there, even though it can be of the "weaker" type. diff --git a/Documentation/timers/NO_HZ.txt b/Documentation/timers/NO_HZ.txt index cca122f25120..6eaf576294f3 100644 --- a/Documentation/timers/NO_HZ.txt +++ b/Documentation/timers/NO_HZ.txt @@ -158,13 +158,9 @@ not come for free: to the need to inform kernel subsystems (such as RCU) about the change in mode. -3. POSIX CPU timers on adaptive-tick CPUs may miss their deadlines - (perhaps indefinitely) because they currently rely on - scheduling-tick interrupts. This will likely be fixed in - one of two ways: (1) Prevent CPUs with POSIX CPU timers from - entering adaptive-tick mode, or (2) Use hrtimers or other - adaptive-ticks-immune mechanism to cause the POSIX CPU timer to - fire properly. +3. POSIX CPU timers prevent CPUs from entering adaptive-tick mode. + Real-time applications needing to take actions based on CPU time + consumption need to use other means of doing so. 4. If there are more perf events pending than the hardware can accommodate, they are normally round-robined so as to collect diff --git a/MAINTAINERS b/MAINTAINERS index 38579ac581ce..17631e6c0151 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -4337,10 +4337,10 @@ F: scripts/get_maintainer.pl GFS2 FILE SYSTEM M: Steven Whitehouse <swhiteho@redhat.com> +M: Bob Peterson <rpeterso@redhat.com> L: cluster-devel@redhat.com W: http://sources.redhat.com/cluster/ -T: git git://git.kernel.org/pub/scm/linux/kernel/git/steve/gfs2-3.0-fixes.git -T: git git://git.kernel.org/pub/scm/linux/kernel/git/steve/gfs2-3.0-nmw.git +T: git git://git.kernel.org/pub/scm/linux/kernel/git/gfs2/linux-gfs2.git S: Supported F: Documentation/filesystems/gfs2*.txt F: fs/gfs2/ diff --git a/arch/arc/kernel/process.c b/arch/arc/kernel/process.c index 98c00a2d4dd9..f46efd14059d 100644 --- a/arch/arc/kernel/process.c +++ b/arch/arc/kernel/process.c @@ -155,8 +155,6 @@ int copy_thread(unsigned long clone_flags, */ void start_thread(struct pt_regs * regs, unsigned long pc, unsigned long usp) { - set_fs(USER_DS); /* user space */ - regs->sp = usp; regs->ret = pc; diff --git a/arch/arc/kernel/unwind.c b/arch/arc/kernel/unwind.c index e550b117ec4f..93c6ea52b671 100644 --- a/arch/arc/kernel/unwind.c +++ b/arch/arc/kernel/unwind.c @@ -841,7 +841,7 @@ static int processCFI(const u8 *start, const u8 *end, unsigned long targetLoc, break; case DW_CFA_GNU_window_save: default: - unw_debug("UNKNOW OPCODE 0x%x\n", opcode); + unw_debug("UNKNOWN OPCODE 0x%x\n", opcode); result = 0; break; } diff --git a/arch/arm/kernel/hw_breakpoint.c b/arch/arm/kernel/hw_breakpoint.c index 7fc70ae21185..dc7d0a95bd36 100644 --- a/arch/arm/kernel/hw_breakpoint.c +++ b/arch/arm/kernel/hw_breakpoint.c @@ -648,7 +648,7 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp) * Per-cpu breakpoints are not supported by our stepping * mechanism. */ - if (!bp->hw.bp_target) + if (!bp->hw.target) return -EINVAL; /* diff --git a/arch/arm64/kernel/hw_breakpoint.c b/arch/arm64/kernel/hw_breakpoint.c index 98bbe06e469c..e7d934d3afe0 100644 --- a/arch/arm64/kernel/hw_breakpoint.c +++ b/arch/arm64/kernel/hw_breakpoint.c @@ -527,7 +527,7 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp) * Disallow per-task kernel breakpoints since these would * complicate the stepping code. */ - if (info->ctrl.privilege == AARCH64_BREAKPOINT_EL1 && bp->hw.bp_target) + if (info->ctrl.privilege == AARCH64_BREAKPOINT_EL1 && bp->hw.target) return -EINVAL; return 0; diff --git a/arch/blackfin/mach-common/smp.c b/arch/blackfin/mach-common/smp.c index 8ad3e90cc8fc..1c7259597395 100644 --- a/arch/blackfin/mach-common/smp.c +++ b/arch/blackfin/mach-common/smp.c @@ -413,16 +413,14 @@ int __cpu_disable(void) return 0; } -static DECLARE_COMPLETION(cpu_killed); - int __cpu_die(unsigned int cpu) { - return wait_for_completion_timeout(&cpu_killed, 5000); + return cpu_wait_death(cpu, 5); } void cpu_die(void) { - complete(&cpu_killed); + (void)cpu_report_death(); atomic_dec(&init_mm.mm_users); atomic_dec(&init_mm.mm_count); diff --git a/arch/c6x/kernel/process.c b/arch/c6x/kernel/process.c index 57d2ea8d1977..3ae9f5a166a0 100644 --- a/arch/c6x/kernel/process.c +++ b/arch/c6x/kernel/process.c @@ -101,7 +101,6 @@ void start_thread(struct pt_regs *regs, unsigned int pc, unsigned long usp) */ usp -= 8; - set_fs(USER_DS); regs->pc = pc; regs->sp = usp; regs->tsr |= 0x40; /* set user mode */ diff --git a/arch/frv/kernel/signal.c b/arch/frv/kernel/signal.c index 336713ab4745..85ca6727ca07 100644 --- a/arch/frv/kernel/signal.c +++ b/arch/frv/kernel/signal.c @@ -176,8 +176,6 @@ static int setup_frame(struct ksignal *ksig, sigset_t *set) struct sigframe __user *frame; int rsig, sig = ksig->sig; - set_fs(USER_DS); - frame = get_sigframe(ksig, sizeof(*frame)); if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) @@ -257,8 +255,6 @@ static int setup_rt_frame(struct ksignal *ksig, sigset_t *set) struct rt_sigframe __user *frame; int rsig, sig = ksig->sig; - set_fs(USER_DS); - frame = get_sigframe(ksig, sizeof(*frame)); if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) diff --git a/arch/hexagon/kernel/process.c b/arch/hexagon/kernel/process.c index 0a0dd5c05b46..a9ebd471823a 100644 --- a/arch/hexagon/kernel/process.c +++ b/arch/hexagon/kernel/process.c @@ -37,8 +37,6 @@ */ void start_thread(struct pt_regs *regs, unsigned long pc, unsigned long sp) { - /* Set to run with user-mode data segmentation */ - set_fs(USER_DS); /* We want to zero all data-containing registers. Is this overkill? */ memset(regs, 0, sizeof(*regs)); /* We might want to also zero all Processor registers here */ diff --git a/arch/m32r/kernel/signal.c b/arch/m32r/kernel/signal.c index 7736c6660a15..8c25e0c8f6a5 100644 --- a/arch/m32r/kernel/signal.c +++ b/arch/m32r/kernel/signal.c @@ -214,8 +214,6 @@ static int setup_rt_frame(struct ksignal *ksig, sigset_t *set, regs->r2 = (unsigned long)&frame->uc; regs->bpc = (unsigned long)ksig->ka.sa.sa_handler; - set_fs(USER_DS); - #if DEBUG_SIG printk("SIG deliver (%s:%d): sp=%p pc=%p\n", current->comm, current->pid, frame, regs->pc); diff --git a/arch/metag/include/asm/processor.h b/arch/metag/include/asm/processor.h index 13272fd5a5ba..0838ca699764 100644 --- a/arch/metag/include/asm/processor.h +++ b/arch/metag/include/asm/processor.h @@ -111,7 +111,6 @@ struct thread_struct { */ #define start_thread(regs, pc, usp) do { \ unsigned int *argc = (unsigned int *) bprm->exec; \ - set_fs(USER_DS); \ current->thread.int_depth = 1; \ /* Force this process down to user land */ \ regs->ctx.SaveMask = TBICTX_PRIV_BIT; \ diff --git a/arch/metag/kernel/smp.c b/arch/metag/kernel/smp.c index f006d2276f40..ac3a199e33e7 100644 --- a/arch/metag/kernel/smp.c +++ b/arch/metag/kernel/smp.c @@ -261,7 +261,6 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle) } #ifdef CONFIG_HOTPLUG_CPU -static DECLARE_COMPLETION(cpu_killed); /* * __cpu_disable runs on the processor to be shutdown. @@ -299,7 +298,7 @@ int __cpu_disable(void) */ void __cpu_die(unsigned int cpu) { - if (!wait_for_completion_timeout(&cpu_killed, msecs_to_jiffies(1))) + if (!cpu_wait_death(cpu, 1)) pr_err("CPU%u: unable to kill\n", cpu); } @@ -314,7 +313,7 @@ void cpu_die(void) local_irq_disable(); idle_task_exit(); - complete(&cpu_killed); + (void)cpu_report_death(); asm ("XOR TXENABLE, D0Re0,D0Re0\n"); } diff --git a/arch/microblaze/kernel/signal.c b/arch/microblaze/kernel/signal.c index a1cbaf90e2ea..20ccd4e2baa5 100644 --- a/arch/microblaze/kernel/signal.c +++ b/arch/microblaze/kernel/signal.c @@ -236,8 +236,6 @@ static int setup_rt_frame(struct ksignal *ksig, sigset_t *set, /* Offset to handle microblaze rtid r14, 0 */ regs->pc = (unsigned long)ksig->ka.sa.sa_handler; - set_fs(USER_DS); - #ifdef DEBUG_SIG pr_info("SIG deliver (%s:%d): sp=%p pc=%08lx\n", current->comm, current->pid, frame, regs->pc); diff --git a/arch/nios2/kernel/process.c b/arch/nios2/kernel/process.c index 0e075b5ad2a5..2f8c74f93e70 100644 --- a/arch/nios2/kernel/process.c +++ b/arch/nios2/kernel/process.c @@ -94,7 +94,6 @@ void show_regs(struct pt_regs *regs) void flush_thread(void) { - set_fs(USER_DS); } int copy_thread(unsigned long clone_flags, diff --git a/arch/openrisc/kernel/process.c b/arch/openrisc/kernel/process.c index 386af258591d..7095dfe7666b 100644 --- a/arch/openrisc/kernel/process.c +++ b/arch/openrisc/kernel/process.c @@ -197,7 +197,6 @@ void start_thread(struct pt_regs *regs, unsigned long pc, unsigned long sp) { unsigned long sr = mfspr(SPR_SR) & ~SPR_SR_SM; - set_fs(USER_DS); memset(regs, 0, sizeof(struct pt_regs)); regs->pc = pc; diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h index 942c7b1678e3..993090422690 100644 --- a/arch/powerpc/include/asm/kvm_book3s.h +++ b/arch/powerpc/include/asm/kvm_book3s.h @@ -106,10 +106,6 @@ struct kvmppc_vcpu_book3s { spinlock_t mmu_lock; }; -#define CONTEXT_HOST 0 -#define CONTEXT_GUEST 1 -#define CONTEXT_GUEST_END 2 - #define VSID_REAL 0x07ffffffffc00000ULL #define VSID_BAT 0x07ffffffffb00000ULL #define VSID_64K 0x0800000000000000ULL diff --git a/arch/powerpc/include/asm/smu.h b/arch/powerpc/include/asm/smu.h index 6e909f3e6a46..37d2da6feabf 100644 --- a/arch/powerpc/include/asm/smu.h +++ b/arch/powerpc/include/asm/smu.h @@ -478,7 +478,7 @@ extern unsigned long smu_cmdbuf_abs; /* - * Kenrel asynchronous i2c interface + * Kernel asynchronous i2c interface */ #define SMU_I2C_READ_MAX 0x1d diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index b8e15c678960..308c5e15676b 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -721,7 +721,7 @@ void __init early_init_devtree(void *params) */ of_scan_flat_dt(early_init_dt_scan_cpus, NULL); if (boot_cpuid < 0) { - printk("Failed to indentify boot CPU !\n"); + printk("Failed to identify boot CPU !\n"); BUG(); } diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c index 7c4f6690533a..7fd60dcb2cb0 100644 --- a/arch/powerpc/perf/core-book3s.c +++ b/arch/powerpc/perf/core-book3s.c @@ -124,7 +124,7 @@ static unsigned long ebb_switch_in(bool ebb, struct cpu_hw_events *cpuhw) static inline void power_pmu_bhrb_enable(struct perf_event *event) {} static inline void power_pmu_bhrb_disable(struct perf_event *event) {} -static void power_pmu_flush_branch_stack(void) {} +static void power_pmu_sched_task(struct perf_event_context *ctx, bool sched_in) {} static inline void power_pmu_bhrb_read(struct cpu_hw_events *cpuhw) {} static void pmao_restore_workaround(bool ebb) { } #endif /* CONFIG_PPC32 */ @@ -350,6 +350,7 @@ static void power_pmu_bhrb_enable(struct perf_event *event) cpuhw->bhrb_context = event->ctx; } cpuhw->bhrb_users++; + perf_sched_cb_inc(event->ctx->pmu); } static void power_pmu_bhrb_disable(struct perf_event *event) @@ -361,6 +362,7 @@ static void power_pmu_bhrb_disable(struct perf_event *event) cpuhw->bhrb_users--; WARN_ON_ONCE(cpuhw->bhrb_users < 0); + perf_sched_cb_dec(event->ctx->pmu); if (!cpuhw->disabled && !cpuhw->bhrb_users) { /* BHRB cannot be turned off when other @@ -375,9 +377,12 @@ static void power_pmu_bhrb_disable(struct perf_event *event) /* Called from ctxsw to prevent one process's branch entries to * mingle with the other process's entries during context switch. */ -static void power_pmu_flush_branch_stack(void) +static void power_pmu_sched_task(struct perf_event_context *ctx, bool sched_in) { - if (ppmu->bhrb_nr) + if (!ppmu->bhrb_nr) + return; + + if (sched_in) power_pmu_bhrb_reset(); } /* Calculate the to address for a branch */ @@ -1901,7 +1906,7 @@ static struct pmu power_pmu = { .cancel_txn = power_pmu_cancel_txn, .commit_txn = power_pmu_commit_txn, .event_idx = power_pmu_event_idx, - .flush_branch_stack = power_pmu_flush_branch_stack, + .sched_task = power_pmu_sched_task, }; /* diff --git a/arch/powerpc/perf/hv-24x7.c b/arch/powerpc/perf/hv-24x7.c index 9445a824819e..abeb9ec0d117 100644 --- a/arch/powerpc/perf/hv-24x7.c +++ b/arch/powerpc/perf/hv-24x7.c @@ -1126,7 +1126,7 @@ static int h_24x7_event_init(struct perf_event *event) /* Physical domains & other lpars require extra capabilities */ if (!caps.collect_privileged && (is_physical_domain(domain) || (event_get_lpar(event) != event_get_lpar_max()))) { - pr_devel("hv permisions disallow: is_physical_domain:%d, lpar=0x%llx\n", + pr_devel("hv permissions disallow: is_physical_domain:%d, lpar=0x%llx\n", is_physical_domain(domain), event_get_lpar(event)); return -EACCES; diff --git a/arch/powerpc/platforms/85xx/p1022_rdk.c b/arch/powerpc/platforms/85xx/p1022_rdk.c index 7a180f0308d5..680232d6ba48 100644 --- a/arch/powerpc/platforms/85xx/p1022_rdk.c +++ b/arch/powerpc/platforms/85xx/p1022_rdk.c @@ -50,14 +50,14 @@ void p1022rdk_set_pixel_clock(unsigned int pixclock) /* Map the global utilities registers. */ guts_np = of_find_compatible_node(NULL, NULL, "fsl,p1022-guts"); if (!guts_np) { - pr_err("p1022rdk: missing global utilties device node\n"); + pr_err("p1022rdk: missing global utilities device node\n"); return; } guts = of_iomap(guts_np, 0); of_node_put(guts_np); if (!guts) { - pr_err("p1022rdk: could not map global utilties device\n"); + pr_err("p1022rdk: could not map global utilities device\n"); return; } diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index ac2b75d74cd2..6321fd8bf813 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -134,6 +134,7 @@ config S390 select HAVE_KPROBES select HAVE_KRETPROBES select HAVE_KVM if 64BIT + select HAVE_LIVEPATCH select HAVE_MEMBLOCK select HAVE_MEMBLOCK_NODE_MAP select HAVE_MEMBLOCK_PHYS_MAP @@ -165,6 +166,8 @@ source "init/Kconfig" source "kernel/Kconfig.freezer" +source "kernel/livepatch/Kconfig" + menu "Processor type and features" config HAVE_MARCH_Z900_FEATURES diff --git a/arch/s390/hypfs/inode.c b/arch/s390/hypfs/inode.c index 99824ff8dd35..df7d8cbee377 100644 --- a/arch/s390/hypfs/inode.c +++ b/arch/s390/hypfs/inode.c @@ -21,7 +21,7 @@ #include <linux/module.h> #include <linux/seq_file.h> #include <linux/mount.h> -#include <linux/aio.h> +#include <linux/uio.h> #include <asm/ebcdic.h> #include "hypfs.h" diff --git a/arch/s390/include/asm/livepatch.h b/arch/s390/include/asm/livepatch.h new file mode 100644 index 000000000000..7aa799134a11 --- /dev/null +++ b/arch/s390/include/asm/livepatch.h @@ -0,0 +1,43 @@ +/* + * livepatch.h - s390-specific Kernel Live Patching Core + * + * Copyright (c) 2013-2015 SUSE + * Authors: Jiri Kosina + * Vojtech Pavlik + * Jiri Slaby + */ + +/* + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + */ + +#ifndef ASM_LIVEPATCH_H +#define ASM_LIVEPATCH_H + +#include <linux/module.h> + +#ifdef CONFIG_LIVEPATCH +static inline int klp_check_compiler_support(void) +{ + return 0; +} + +static inline int klp_write_module_reloc(struct module *mod, unsigned long + type, unsigned long loc, unsigned long value) +{ + /* not supported yet */ + return -ENOSYS; +} + +static inline void klp_arch_set_pc(struct pt_regs *regs, unsigned long ip) +{ + regs->psw.addr = ip; +} +#else +#error Live patching support is disabled; check CONFIG_LIVEPATCH +#endif + +#endif diff --git a/arch/s390/kvm/trace-s390.h b/arch/s390/kvm/trace-s390.h index 653a7ec09ef5..3208d33a48cb 100644 --- a/arch/s390/kvm/trace-s390.h +++ b/arch/s390/kvm/trace-s390.h @@ -10,6 +10,13 @@ #define TRACE_INCLUDE_FILE trace-s390 /* + * The TRACE_SYSTEM_VAR defaults to TRACE_SYSTEM, but must be a + * legitimate C variable. It is not exported to user space. + */ +#undef TRACE_SYSTEM_VAR +#define TRACE_SYSTEM_VAR kvm_s390 + +/* * Trace point for the creation of the kvm instance. */ TRACE_EVENT(kvm_s390_create_vm, diff --git a/arch/sh/kernel/signal_32.c b/arch/sh/kernel/signal_32.c index 0b34f2a704fe..97292890b51b 100644 --- a/arch/sh/kernel/signal_32.c +++ b/arch/sh/kernel/signal_32.c @@ -329,8 +329,6 @@ static int setup_frame(struct ksignal *ksig, sigset_t *set, if (err) return -EFAULT; - set_fs(USER_DS); - pr_debug("SIG deliver (%s:%d): sp=%p pc=%08lx pr=%08lx\n", current->comm, task_pid_nr(current), frame, regs->pc, regs->pr); @@ -408,8 +406,6 @@ static int setup_rt_frame(struct ksignal *ksig, sigset_t *set, if (err) return -EFAULT; - set_fs(USER_DS); - pr_debug("SIG deliver (%s:%d): sp=%p pc=%08lx pr=%08lx\n", current->comm, task_pid_nr(current), frame, regs->pc, regs->pr); diff --git a/arch/sh/kernel/signal_64.c b/arch/sh/kernel/signal_64.c index 71993c6a7d94..0462995d4d7f 100644 --- a/arch/sh/kernel/signal_64.c +++ b/arch/sh/kernel/signal_64.c @@ -457,8 +457,6 @@ static int setup_frame(struct ksignal *ksig, sigset_t *set, struct pt_regs *regs regs->pc = neff_sign_extend((unsigned long)ksig->ka.sa.sa_handler); - set_fs(USER_DS); - /* Broken %016Lx */ pr_debug("SIG deliver (#%d,%s:%d): sp=%p pc=%08Lx%08Lx link=%08Lx%08Lx\n", signal, current->comm, current->pid, frame, @@ -547,8 +545,6 @@ static int setup_rt_frame(struct ksignal *kig, sigset_t *set, regs->regs[REG_ARG3] = (unsigned long long)(unsigned long)(signed long)&frame->uc.uc_mcontext; regs->pc = neff_sign_extend((unsigned long)ksig->ka.sa.sa_handler); - set_fs(USER_DS); - pr_debug("SIG deliver (#%d,%s:%d): sp=%p pc=%08Lx%08Lx link=%08Lx%08Lx\n", signal, current->comm, current->pid, frame, regs->pc >> 32, regs->pc & 0xffffffff, diff --git a/arch/x86/include/asm/cpu.h b/arch/x86/include/asm/cpu.h index d2b12988d2ed..bf2caa1dedc5 100644 --- a/arch/x86/include/asm/cpu.h +++ b/arch/x86/include/asm/cpu.h @@ -34,8 +34,6 @@ extern int _debug_hotplug_cpu(int cpu, int action); #endif #endif -DECLARE_PER_CPU(int, cpu_state); - int mwait_usable(const struct cpuinfo_x86 *); #endif /* _ASM_X86_CPU_H */ diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index 854c04b3c9c2..7ee9b94d9921 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -12,7 +12,7 @@ #include <asm/disabled-features.h> #endif -#define NCAPINTS 11 /* N 32-bit words worth of info */ +#define NCAPINTS 13 /* N 32-bit words worth of info */ #define NBUGINTS 1 /* N 32-bit bug flags */ /* @@ -195,6 +195,7 @@ #define X86_FEATURE_HWP_ACT_WINDOW ( 7*32+ 12) /* Intel HWP_ACT_WINDOW */ #define X86_FEATURE_HWP_EPP ( 7*32+13) /* Intel HWP_EPP */ #define X86_FEATURE_HWP_PKG_REQ ( 7*32+14) /* Intel HWP_PKG_REQ */ +#define X86_FEATURE_INTEL_PT ( 7*32+15) /* Intel Processor Trace */ /* Virtualization flags: Linux defined, word 8 */ #define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */ @@ -226,6 +227,7 @@ #define X86_FEATURE_ERMS ( 9*32+ 9) /* Enhanced REP MOVSB/STOSB */ #define X86_FEATURE_INVPCID ( 9*32+10) /* Invalidate Processor Context ID */ #define X86_FEATURE_RTM ( 9*32+11) /* Restricted Transactional Memory */ +#define X86_FEATURE_CQM ( 9*32+12) /* Cache QoS Monitoring */ #define X86_FEATURE_MPX ( 9*32+14) /* Memory Protection Extension */ #define X86_FEATURE_AVX512F ( 9*32+16) /* AVX-512 Foundation */ #define X86_FEATURE_RDSEED ( 9*32+18) /* The RDSEED instruction */ @@ -244,6 +246,12 @@ #define X86_FEATURE_XGETBV1 (10*32+ 2) /* XGETBV with ECX = 1 */ #define X86_FEATURE_XSAVES (10*32+ 3) /* XSAVES/XRSTORS */ +/* Intel-defined CPU QoS Sub-leaf, CPUID level 0x0000000F:0 (edx), word 11 */ +#define X86_FEATURE_CQM_LLC (11*32+ 1) /* LLC QoS if 1 */ + +/* Intel-defined CPU QoS Sub-leaf, CPUID level 0x0000000F:1 (edx), word 12 */ +#define X86_FEATURE_CQM_OCCUP_LLC (12*32+ 0) /* LLC occupancy monitoring if 1 */ + /* * BUG word(s) */ diff --git a/arch/x86/include/asm/livepatch.h b/arch/x86/include/asm/livepatch.h index a455a53d789a..2d29197bd2fb 100644 --- a/arch/x86/include/asm/livepatch.h +++ b/arch/x86/include/asm/livepatch.h @@ -32,8 +32,8 @@ static inline int klp_check_compiler_support(void) #endif return 0; } -extern int klp_write_module_reloc(struct module *mod, unsigned long type, - unsigned long loc, unsigned long value); +int klp_write_module_reloc(struct module *mod, unsigned long type, + unsigned long loc, unsigned long value); static inline void klp_arch_set_pc(struct pt_regs *regs, unsigned long ip) { diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index d2203b5d9538..23ba6765b718 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -109,6 +109,9 @@ struct cpuinfo_x86 { /* in KB - valid for CPUS which support this call: */ int x86_cache_size; int x86_cache_alignment; /* In bytes */ + /* Cache QoS architectural values: */ + int x86_cache_max_rmid; /* max index */ + int x86_cache_occ_scale; /* scale to bytes */ int x86_power; unsigned long loops_per_jiffy; /* cpuid returned max cores value: */ diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h index 81d02fc7dafa..17a8dced12da 100644 --- a/arch/x86/include/asm/smp.h +++ b/arch/x86/include/asm/smp.h @@ -150,13 +150,13 @@ static inline void arch_send_call_function_ipi_mask(const struct cpumask *mask) } void cpu_disable_common(void); -void cpu_die_common(unsigned int cpu); void native_smp_prepare_boot_cpu(void); void native_smp_prepare_cpus(unsigned int max_cpus); void native_smp_cpus_done(unsigned int max_cpus); void common_cpu_up(unsigned int cpunum, struct task_struct *tidle); int native_cpu_up(unsigned int cpunum, struct task_struct *tidle); int native_cpu_disable(void); +int common_cpu_die(unsigned int cpu); void native_cpu_die(unsigned int cpu); void native_play_dead(void); void play_dead_common(void); diff --git a/arch/x86/include/uapi/asm/msr-index.h b/arch/x86/include/uapi/asm/msr-index.h index 3ce079136c11..1a4eae695ca8 100644 --- a/arch/x86/include/uapi/asm/msr-index.h +++ b/arch/x86/include/uapi/asm/msr-index.h @@ -74,6 +74,24 @@ #define MSR_IA32_PERF_CAPABILITIES 0x00000345 #define MSR_PEBS_LD_LAT_THRESHOLD 0x000003f6 +#define MSR_IA32_RTIT_CTL 0x00000570 +#define RTIT_CTL_TRACEEN BIT(0) +#define RTIT_CTL_OS BIT(2) +#define RTIT_CTL_USR BIT(3) +#define RTIT_CTL_CR3EN BIT(7) +#define RTIT_CTL_TOPA BIT(8) +#define RTIT_CTL_TSC_EN BIT(10) +#define RTIT_CTL_DISRETC BIT(11) +#define RTIT_CTL_BRANCH_EN BIT(13) +#define MSR_IA32_RTIT_STATUS 0x00000571 +#define RTIT_STATUS_CONTEXTEN BIT(1) +#define RTIT_STATUS_TRIGGEREN BIT(2) +#define RTIT_STATUS_ERROR BIT(4) +#define RTIT_STATUS_STOPPED BIT(5) +#define MSR_IA32_RTIT_CR3_MATCH 0x00000572 +#define MSR_IA32_RTIT_OUTPUT_BASE 0x00000560 +#define MSR_IA32_RTIT_OUTPUT_MASK 0x00000561 + #define MSR_MTRRfix64K_00000 0x00000250 #define MSR_MTRRfix16K_80000 0x00000258 #define MSR_MTRRfix16K_A0000 0x00000259 diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index 80091ae54c2b..9bff68798836 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile @@ -39,7 +39,8 @@ obj-$(CONFIG_CPU_SUP_AMD) += perf_event_amd_iommu.o endif obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_p6.o perf_event_knc.o perf_event_p4.o obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_lbr.o perf_event_intel_ds.o perf_event_intel.o -obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_rapl.o +obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_rapl.o perf_event_intel_cqm.o +obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_pt.o perf_event_intel_bts.o obj-$(CONFIG_PERF_EVENTS_INTEL_UNCORE) += perf_event_intel_uncore.o \ perf_event_intel_uncore_snb.o \ diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 3f70538012e2..a62cf04dac8a 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -646,6 +646,30 @@ void get_cpu_cap(struct cpuinfo_x86 *c) c->x86_capability[10] = eax; } + /* Additional Intel-defined flags: level 0x0000000F */ + if (c->cpuid_level >= 0x0000000F) { + u32 eax, ebx, ecx, edx; + + /* QoS sub-leaf, EAX=0Fh, ECX=0 */ + cpuid_count(0x0000000F, 0, &eax, &ebx, &ecx, &edx); + c->x86_capability[11] = edx; + if (cpu_has(c, X86_FEATURE_CQM_LLC)) { + /* will be overridden if occupancy monitoring exists */ + c->x86_cache_max_rmid = ebx; + + /* QoS sub-leaf, EAX=0Fh, ECX=1 */ + cpuid_count(0x0000000F, 1, &eax, &ebx, &ecx, &edx); + c->x86_capability[12] = edx; + if (cpu_has(c, X86_FEATURE_CQM_OCCUP_LLC)) { + c->x86_cache_max_rmid = ecx; + c->x86_cache_occ_scale = ebx; + } + } else { + c->x86_cache_max_rmid = -1; + c->x86_cache_occ_scale = -1; + } + } + /* AMD-defined flags: level 0x80000001 */ xlvl = cpuid_eax(0x80000000); c->extended_cpuid_level = xlvl; @@ -834,6 +858,20 @@ static void generic_identify(struct cpuinfo_x86 *c) detect_nopl(c); } +static void x86_init_cache_qos(struct cpuinfo_x86 *c) +{ + /* + * The heavy lifting of max_rmid and cache_occ_scale are handled + * in get_cpu_cap(). Here we just set the max_rmid for the boot_cpu + * in case CQM bits really aren't there in this CPU. + */ + if (c != &boot_cpu_data) { + boot_cpu_data.x86_cache_max_rmid = + min(boot_cpu_data.x86_cache_max_rmid, + c->x86_cache_max_rmid); + } +} + /* * This does the hard work of actually picking apart the CPU stuff... */ @@ -923,6 +961,7 @@ static void identify_cpu(struct cpuinfo_x86 *c) init_hypervisor(c); x86_init_rdrand(c); + x86_init_cache_qos(c); /* * Clear/Set all flags overriden by options, need do it diff --git a/arch/x86/kernel/cpu/intel_pt.h b/arch/x86/kernel/cpu/intel_pt.h new file mode 100644 index 000000000000..1c338b0eba05 --- /dev/null +++ b/arch/x86/kernel/cpu/intel_pt.h @@ -0,0 +1,131 @@ +/* + * Intel(R) Processor Trace PMU driver for perf + * Copyright (c) 2013-2014, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * Intel PT is specified in the Intel Architecture Instruction Set Extensions + * Programming Reference: + * http://software.intel.com/en-us/intel-isa-extensions + */ + +#ifndef __INTEL_PT_H__ +#define __INTEL_PT_H__ + +/* + * Single-entry ToPA: when this close to region boundary, switch + * buffers to avoid losing data. + */ +#define TOPA_PMI_MARGIN 512 + +/* + * Table of Physical Addresses bits + */ +enum topa_sz { + TOPA_4K = 0, + TOPA_8K, + TOPA_16K, + TOPA_32K, + TOPA_64K, + TOPA_128K, + TOPA_256K, + TOPA_512K, + TOPA_1MB, + TOPA_2MB, + TOPA_4MB, + TOPA_8MB, + TOPA_16MB, + TOPA_32MB, + TOPA_64MB, + TOPA_128MB, + TOPA_SZ_END, +}; + +static inline unsigned int sizes(enum topa_sz tsz) +{ + return 1 << (tsz + 12); +}; + +struct topa_entry { + u64 end : 1; + u64 rsvd0 : 1; + u64 intr : 1; + u64 rsvd1 : 1; + u64 stop : 1; + u64 rsvd2 : 1; + u64 size : 4; + u64 rsvd3 : 2; + u64 base : 36; + u64 rsvd4 : 16; +}; + +#define TOPA_SHIFT 12 +#define PT_CPUID_LEAVES 2 + +enum pt_capabilities { + PT_CAP_max_subleaf = 0, + PT_CAP_cr3_filtering, + PT_CAP_topa_output, + PT_CAP_topa_multiple_entries, + PT_CAP_payloads_lip, +}; + +struct pt_pmu { + struct pmu pmu; + u32 caps[4 * PT_CPUID_LEAVES]; +}; + +/** + * struct pt_buffer - buffer configuration; one buffer per task_struct or + * cpu, depending on perf event configuration + * @cpu: cpu for per-cpu allocation + * @tables: list of ToPA tables in this buffer + * @first: shorthand for first topa table + * @last: shorthand for last topa table + * @cur: current topa table + * @nr_pages: buffer size in pages + * @cur_idx: current output region's index within @cur table + * @output_off: offset within the current output region + * @data_size: running total of the amount of data in this buffer + * @lost: if data was lost/truncated + * @head: logical write offset inside the buffer + * @snapshot: if this is for a snapshot/overwrite counter + * @stop_pos: STOP topa entry in the buffer + * @intr_pos: INT topa entry in the buffer + * @data_pages: array of pages from perf + * @topa_index: table of topa entries indexed by page offset + */ +struct pt_buffer { + int cpu; + struct list_head tables; + struct topa *first, *last, *cur; + unsigned int cur_idx; + size_t output_off; + unsigned long nr_pages; + local_t data_size; + local_t lost; + local64_t head; + bool snapshot; + unsigned long stop_pos, intr_pos; + void **data_pages; + struct topa_entry *topa_index[0]; +}; + +/** + * struct pt - per-cpu pt context + * @handle: perf output handle + * @handle_nmi: do handle PT PMI on this cpu, there's an active event + */ +struct pt { + struct perf_output_handle handle; + int handle_nmi; +}; + +#endif /* __INTEL_PT_H__ */ diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index e2888a3ad1e3..87848ebe2bb7 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -263,6 +263,14 @@ static void hw_perf_event_destroy(struct perf_event *event) } } +void hw_perf_lbr_event_destroy(struct perf_event *event) +{ + hw_perf_event_destroy(event); + + /* undo the lbr/bts event accounting */ + x86_del_exclusive(x86_lbr_exclusive_lbr); +} + static inline int x86_pmu_initialized(void) { return x86_pmu.handle_irq != NULL; @@ -302,6 +310,35 @@ set_ext_hw_attr(struct hw_perf_event *hwc, struct perf_event *event) return x86_pmu_extra_regs(val, event); } +/* + * Check if we can create event of a certain type (that no conflicting events + * are present). + */ +int x86_add_exclusive(unsigned int what) +{ + int ret = -EBUSY, i; + + if (atomic_inc_not_zero(&x86_pmu.lbr_exclusive[what])) + return 0; + + mutex_lock(&pmc_reserve_mutex); + for (i = 0; i < ARRAY_SIZE(x86_pmu.lbr_exclusive); i++) + if (i != what && atomic_read(&x86_pmu.lbr_exclusive[i])) + goto out; + + atomic_inc(&x86_pmu.lbr_exclusive[what]); + ret = 0; + +out: + mutex_unlock(&pmc_reserve_mutex); + return ret; +} + +void x86_del_exclusive(unsigned int what) +{ + atomic_dec(&x86_pmu.lbr_exclusive[what]); +} + int x86_setup_perfctr(struct perf_event *event) { struct perf_event_attr *attr = &event->attr; @@ -346,6 +383,12 @@ int x86_setup_perfctr(struct perf_event *event) /* BTS is currently only allowed for user-mode. */ if (!attr->exclude_kernel) return -EOPNOTSUPP; + + /* disallow bts if conflicting events are present */ + if (x86_add_exclusive(x86_lbr_exclusive_lbr)) + return -EBUSY; + + event->destroy = hw_perf_lbr_event_destroy; } hwc->config |= config; @@ -399,39 +442,41 @@ int x86_pmu_hw_config(struct perf_event *event) if (event->attr.precise_ip > precise) return -EOPNOTSUPP; - /* - * check that PEBS LBR correction does not conflict with - * whatever the user is asking with attr->branch_sample_type - */ - if (event->attr.precise_ip > 1 && - x86_pmu.intel_cap.pebs_format < 2) { - u64 *br_type = &event->attr.branch_sample_type; - - if (has_branch_stack(event)) { - if (!precise_br_compat(event)) - return -EOPNOTSUPP; - - /* branch_sample_type is compatible */ - - } else { - /* - * user did not specify branch_sample_type - * - * For PEBS fixups, we capture all - * the branches at the priv level of the - * event. - */ - *br_type = PERF_SAMPLE_BRANCH_ANY; - - if (!event->attr.exclude_user) - *br_type |= PERF_SAMPLE_BRANCH_USER; - - if (!event->attr.exclude_kernel) - *br_type |= PERF_SAMPLE_BRANCH_KERNEL; - } + } + /* + * check that PEBS LBR correction does not conflict with + * whatever the user is asking with attr->branch_sample_type + */ + if (event->attr.precise_ip > 1 && x86_pmu.intel_cap.pebs_format < 2) { + u64 *br_type = &event->attr.branch_sample_type; + + if (has_branch_stack(event)) { + if (!precise_br_compat(event)) + return -EOPNOTSUPP; + + /* branch_sample_type is compatible */ + + } else { + /* + * user did not specify branch_sample_type + * + * For PEBS fixups, we capture all + * the branches at the priv level of the + * event. + */ + *br_type = PERF_SAMPLE_BRANCH_ANY; + + if (!event->attr.exclude_user) + *br_type |= PERF_SAMPLE_BRANCH_USER; + + if (!event->attr.exclude_kernel) + *br_type |= PERF_SAMPLE_BRANCH_KERNEL; } } + if (event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_CALL_STACK) + event->attach_state |= PERF_ATTACH_TASK_DATA; + /* * Generate PMC IRQs: * (keep 'enabled' bit clear for now) @@ -449,6 +494,12 @@ int x86_pmu_hw_config(struct perf_event *event) if (event->attr.type == PERF_TYPE_RAW) event->hw.config |= event->attr.config & X86_RAW_EVENT_MASK; + if (event->attr.sample_period && x86_pmu.limit_period) { + if (x86_pmu.limit_period(event, event->attr.sample_period) > + event->attr.sample_period) + return -EINVAL; + } + return x86_setup_perfctr(event); } @@ -728,14 +779,17 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign) struct event_constraint *c; unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; struct perf_event *e; - int i, wmin, wmax, num = 0; + int i, wmin, wmax, unsched = 0; struct hw_perf_event *hwc; bitmap_zero(used_mask, X86_PMC_IDX_MAX); + if (x86_pmu.start_scheduling) + x86_pmu.start_scheduling(cpuc); + for (i = 0, wmin = X86_PMC_IDX_MAX, wmax = 0; i < n; i++) { hwc = &cpuc->event_list[i]->hw; - c = x86_pmu.get_event_constraints(cpuc, cpuc->event_list[i]); + c = x86_pmu.get_event_constraints(cpuc, i, cpuc->event_list[i]); hwc->constraint = c; wmin = min(wmin, c->weight); @@ -768,24 +822,30 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign) /* slow path */ if (i != n) - num = perf_assign_events(cpuc->event_list, n, wmin, - wmax, assign); + unsched = perf_assign_events(cpuc->event_list, n, wmin, + wmax, assign); /* - * Mark the event as committed, so we do not put_constraint() - * in case new events are added and fail scheduling. + * In case of success (unsched = 0), mark events as committed, + * so we do not put_constraint() in case new events are added + * and fail to be scheduled + * + * We invoke the lower level commit callback to lock the resource + * + * We do not need to do all of this in case we are called to + * validate an event group (assign == NULL) */ - if (!num && assign) { + if (!unsched && assign) { for (i = 0; i < n; i++) { e = cpuc->event_list[i]; e->hw.flags |= PERF_X86_EVENT_COMMITTED; + if (x86_pmu.commit_scheduling) + x86_pmu.commit_scheduling(cpuc, e, assign[i]); } } - /* - * scheduling failed or is just a simulation, - * free resources if necessary - */ - if (!assign || num) { + + if (!assign || unsched) { + for (i = 0; i < n; i++) { e = cpuc->event_list[i]; /* @@ -795,11 +855,18 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign) if ((e->hw.flags & PERF_X86_EVENT_COMMITTED)) continue; + /* + * release events that failed scheduling + */ if (x86_pmu.put_event_constraints) x86_pmu.put_event_constraints(cpuc, e); } } - return num ? -EINVAL : 0; + + if (x86_pmu.stop_scheduling) + x86_pmu.stop_scheduling(cpuc); + + return unsched ? -EINVAL : 0; } /* @@ -986,6 +1053,9 @@ int x86_perf_event_set_period(struct perf_event *event) if (left > x86_pmu.max_period) left = x86_pmu.max_period; + if (x86_pmu.limit_period) + left = x86_pmu.limit_period(event, left); + per_cpu(pmc_prev_left[idx], smp_processor_id()) = left; /* @@ -1033,7 +1103,6 @@ static int x86_pmu_add(struct perf_event *event, int flags) hwc = &event->hw; - perf_pmu_disable(event->pmu); n0 = cpuc->n_events; ret = n = collect_events(cpuc, event, false); if (ret < 0) @@ -1071,7 +1140,6 @@ done_collect: ret = 0; out: - perf_pmu_enable(event->pmu); return ret; } @@ -1103,7 +1171,7 @@ static void x86_pmu_start(struct perf_event *event, int flags) void perf_event_print_debug(void) { u64 ctrl, status, overflow, pmc_ctrl, pmc_count, prev_left, fixed; - u64 pebs; + u64 pebs, debugctl; struct cpu_hw_events *cpuc; unsigned long flags; int cpu, idx; @@ -1121,14 +1189,20 @@ void perf_event_print_debug(void) rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status); rdmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, overflow); rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR_CTRL, fixed); - rdmsrl(MSR_IA32_PEBS_ENABLE, pebs); pr_info("\n"); pr_info("CPU#%d: ctrl: %016llx\n", cpu, ctrl); pr_info("CPU#%d: status: %016llx\n", cpu, status); pr_info("CPU#%d: overflow: %016llx\n", cpu, overflow); pr_info("CPU#%d: fixed: %016llx\n", cpu, fixed); - pr_info("CPU#%d: pebs: %016llx\n", cpu, pebs); + if (x86_pmu.pebs_constraints) { + rdmsrl(MSR_IA32_PEBS_ENABLE, pebs); + pr_info("CPU#%d: pebs: %016llx\n", cpu, pebs); + } + if (x86_pmu.lbr_nr) { + rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl); + pr_info("CPU#%d: debugctl: %016llx\n", cpu, debugctl); + } } pr_info("CPU#%d: active: %016llx\n", cpu, *(u64 *)cpuc->active_mask); @@ -1321,11 +1395,12 @@ x86_pmu_notifier(struct notifier_block *self, unsigned long action, void *hcpu) { unsigned int cpu = (long)hcpu; struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu); - int ret = NOTIFY_OK; + int i, ret = NOTIFY_OK; switch (action & ~CPU_TASKS_FROZEN) { case CPU_UP_PREPARE: - cpuc->kfree_on_online = NULL; + for (i = 0 ; i < X86_PERF_KFREE_MAX; i++) + cpuc->kfree_on_online[i] = NULL; if (x86_pmu.cpu_prepare) ret = x86_pmu.cpu_prepare(cpu); break; @@ -1336,7 +1411,10 @@ x86_pmu_notifier(struct notifier_block *self, unsigned long action, void *hcpu) break; case CPU_ONLINE: - kfree(cpuc->kfree_on_online); + for (i = 0 ; i < X86_PERF_KFREE_MAX; i++) { + kfree(cpuc->kfree_on_online[i]); + cpuc->kfree_on_online[i] = NULL; + } break; case CPU_DYING: @@ -1712,7 +1790,7 @@ static int validate_event(struct perf_event *event) if (IS_ERR(fake_cpuc)) return PTR_ERR(fake_cpuc); - c = x86_pmu.get_event_constraints(fake_cpuc, event); + c = x86_pmu.get_event_constraints(fake_cpuc, -1, event); if (!c || !c->weight) ret = -EINVAL; @@ -1914,10 +1992,10 @@ static const struct attribute_group *x86_pmu_attr_groups[] = { NULL, }; -static void x86_pmu_flush_branch_stack(void) +static void x86_pmu_sched_task(struct perf_event_context *ctx, bool sched_in) { - if (x86_pmu.flush_branch_stack) - x86_pmu.flush_branch_stack(); + if (x86_pmu.sched_task) + x86_pmu.sched_task(ctx, sched_in); } void perf_check_microcode(void) @@ -1949,7 +2027,8 @@ static struct pmu pmu = { .commit_txn = x86_pmu_commit_txn, .event_idx = x86_pmu_event_idx, - .flush_branch_stack = x86_pmu_flush_branch_stack, + .sched_task = x86_pmu_sched_task, + .task_ctx_size = sizeof(struct x86_perf_task_context), }; void arch_perf_update_userpage(struct perf_event *event, @@ -1968,13 +2047,23 @@ void arch_perf_update_userpage(struct perf_event *event, data = cyc2ns_read_begin(); + /* + * Internal timekeeping for enabled/running/stopped times + * is always in the local_clock domain. + */ userpg->cap_user_time = 1; userpg->time_mult = data->cyc2ns_mul; userpg->time_shift = data->cyc2ns_shift; userpg->time_offset = data->cyc2ns_offset - now; - userpg->cap_user_time_zero = 1; - userpg->time_zero = data->cyc2ns_offset; + /* + * cap_user_time_zero doesn't make sense when we're using a different + * time base for the records. + */ + if (event->clock == &local_clock) { + userpg->cap_user_time_zero = 1; + userpg->time_zero = data->cyc2ns_offset; + } cyc2ns_read_end(data); } diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h index df525d2be1e8..329f0356ad4a 100644 --- a/arch/x86/kernel/cpu/perf_event.h +++ b/arch/x86/kernel/cpu/perf_event.h @@ -71,6 +71,8 @@ struct event_constraint { #define PERF_X86_EVENT_COMMITTED 0x8 /* event passed commit_txn */ #define PERF_X86_EVENT_PEBS_LD_HSW 0x10 /* haswell style datala, load */ #define PERF_X86_EVENT_PEBS_NA_HSW 0x20 /* haswell style datala, unknown */ +#define PERF_X86_EVENT_EXCL 0x40 /* HT exclusivity on counter */ +#define PERF_X86_EVENT_DYNAMIC 0x80 /* dynamic alloc'd constraint */ #define PERF_X86_EVENT_RDPMC_ALLOWED 0x40 /* grant rdpmc permission */ @@ -123,8 +125,37 @@ struct intel_shared_regs { unsigned core_id; /* per-core: core id */ }; +enum intel_excl_state_type { + INTEL_EXCL_UNUSED = 0, /* counter is unused */ + INTEL_EXCL_SHARED = 1, /* counter can be used by both threads */ + INTEL_EXCL_EXCLUSIVE = 2, /* counter can be used by one thread only */ +}; + +struct intel_excl_states { + enum intel_excl_state_type init_state[X86_PMC_IDX_MAX]; + enum intel_excl_state_type state[X86_PMC_IDX_MAX]; + int num_alloc_cntrs;/* #counters allocated */ + int max_alloc_cntrs;/* max #counters allowed */ + bool sched_started; /* true if scheduling has started */ +}; + +struct intel_excl_cntrs { + raw_spinlock_t lock; + + struct intel_excl_states states[2]; + + int refcnt; /* per-core: #HT threads */ + unsigned core_id; /* per-core: core id */ +}; + #define MAX_LBR_ENTRIES 16 +enum { + X86_PERF_KFREE_SHARED = 0, + X86_PERF_KFREE_EXCL = 1, + X86_PERF_KFREE_MAX +}; + struct cpu_hw_events { /* * Generic x86 PMC bits @@ -179,6 +210,12 @@ struct cpu_hw_events { * used on Intel NHM/WSM/SNB */ struct intel_shared_regs *shared_regs; + /* + * manage exclusive counter access between hyperthread + */ + struct event_constraint *constraint_list; /* in enable order */ + struct intel_excl_cntrs *excl_cntrs; + int excl_thread_id; /* 0 or 1 */ /* * AMD specific bits @@ -187,7 +224,7 @@ struct cpu_hw_events { /* Inverted mask of bits to clear in the perf_ctr ctrl registers */ u64 perf_ctr_virt_mask; - void *kfree_on_online; + void *kfree_on_online[X86_PERF_KFREE_MAX]; }; #define __EVENT_CONSTRAINT(c, n, m, w, o, f) {\ @@ -202,6 +239,10 @@ struct cpu_hw_events { #define EVENT_CONSTRAINT(c, n, m) \ __EVENT_CONSTRAINT(c, n, m, HWEIGHT(n), 0, 0) +#define INTEL_EXCLEVT_CONSTRAINT(c, n) \ + __EVENT_CONSTRAINT(c, n, ARCH_PERFMON_EVENTSEL_EVENT, HWEIGHT(n),\ + 0, PERF_X86_EVENT_EXCL) + /* * The overlap flag marks event constraints with overlapping counter * masks. This is the case if the counter mask of such an event is not @@ -259,6 +300,10 @@ struct cpu_hw_events { #define INTEL_FLAGS_UEVENT_CONSTRAINT(c, n) \ EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS) +#define INTEL_EXCLUEVT_CONSTRAINT(c, n) \ + __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK, \ + HWEIGHT(n), 0, PERF_X86_EVENT_EXCL) + #define INTEL_PLD_CONSTRAINT(c, n) \ __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \ HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_LDLAT) @@ -283,22 +328,40 @@ struct cpu_hw_events { /* Check flags and event code, and set the HSW load flag */ #define INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(code, n) \ - __EVENT_CONSTRAINT(code, n, \ + __EVENT_CONSTRAINT(code, n, \ ARCH_PERFMON_EVENTSEL_EVENT|X86_ALL_EVENT_FLAGS, \ HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_LD_HSW) +#define INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_XLD(code, n) \ + __EVENT_CONSTRAINT(code, n, \ + ARCH_PERFMON_EVENTSEL_EVENT|X86_ALL_EVENT_FLAGS, \ + HWEIGHT(n), 0, \ + PERF_X86_EVENT_PEBS_LD_HSW|PERF_X86_EVENT_EXCL) + /* Check flags and event code/umask, and set the HSW store flag */ #define INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(code, n) \ __EVENT_CONSTRAINT(code, n, \ INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \ HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_ST_HSW) +#define INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XST(code, n) \ + __EVENT_CONSTRAINT(code, n, \ + INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \ + HWEIGHT(n), 0, \ + PERF_X86_EVENT_PEBS_ST_HSW|PERF_X86_EVENT_EXCL) + /* Check flags and event code/umask, and set the HSW load flag */ #define INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(code, n) \ __EVENT_CONSTRAINT(code, n, \ INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \ HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_LD_HSW) +#define INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XLD(code, n) \ + __EVENT_CONSTRAINT(code, n, \ + INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \ + HWEIGHT(n), 0, \ + PERF_X86_EVENT_PEBS_LD_HSW|PERF_X86_EVENT_EXCL) + /* Check flags and event code/umask, and set the HSW N/A flag */ #define INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_NA(code, n) \ __EVENT_CONSTRAINT(code, n, \ @@ -408,6 +471,13 @@ union x86_pmu_config { #define X86_CONFIG(args...) ((union x86_pmu_config){.bits = {args}}).value +enum { + x86_lbr_exclusive_lbr, + x86_lbr_exclusive_bts, + x86_lbr_exclusive_pt, + x86_lbr_exclusive_max, +}; + /* * struct x86_pmu - generic x86 pmu */ @@ -443,14 +513,25 @@ struct x86_pmu { u64 max_period; struct event_constraint * (*get_event_constraints)(struct cpu_hw_events *cpuc, + int idx, struct perf_event *event); void (*put_event_constraints)(struct cpu_hw_events *cpuc, struct perf_event *event); + + void (*commit_scheduling)(struct cpu_hw_events *cpuc, + struct perf_event *event, + int cntr); + + void (*start_scheduling)(struct cpu_hw_events *cpuc); + + void (*stop_scheduling)(struct cpu_hw_events *cpuc); + struct event_constraint *event_constraints; struct x86_pmu_quirk *quirks; int perfctr_second_write; bool late_ack; + unsigned (*limit_period)(struct perf_event *event, unsigned l); /* * sysfs attrs @@ -472,7 +553,8 @@ struct x86_pmu { void (*cpu_dead)(int cpu); void (*check_microcode)(void); - void (*flush_branch_stack)(void); + void (*sched_task)(struct perf_event_context *ctx, + bool sched_in); /* * Intel Arch Perfmon v2+ @@ -504,10 +586,15 @@ struct x86_pmu { bool lbr_double_abort; /* duplicated lbr aborts */ /* + * Intel PT/LBR/BTS are exclusive + */ + atomic_t lbr_exclusive[x86_lbr_exclusive_max]; + + /* * Extra registers for events */ struct extra_reg *extra_regs; - unsigned int er_flags; + unsigned int flags; /* * Intel host/guest support (KVM) @@ -515,6 +602,13 @@ struct x86_pmu { struct perf_guest_switch_msr *(*guest_get_msrs)(int *nr); }; +struct x86_perf_task_context { + u64 lbr_from[MAX_LBR_ENTRIES]; + u64 lbr_to[MAX_LBR_ENTRIES]; + int lbr_callstack_users; + int lbr_stack_state; +}; + #define x86_add_quirk(func_) \ do { \ static struct x86_pmu_quirk __quirk __initdata = { \ @@ -524,8 +618,13 @@ do { \ x86_pmu.quirks = &__quirk; \ } while (0) -#define ERF_NO_HT_SHARING 1 -#define ERF_HAS_RSP_1 2 +/* + * x86_pmu flags + */ +#define PMU_FL_NO_HT_SHARING 0x1 /* no hyper-threading resource sharing */ +#define PMU_FL_HAS_RSP_1 0x2 /* has 2 equivalent offcore_rsp regs */ +#define PMU_FL_EXCL_CNTRS 0x4 /* has exclusive counter requirements */ +#define PMU_FL_EXCL_ENABLED 0x8 /* exclusive counter active */ #define EVENT_VAR(_id) event_attr_##_id #define EVENT_PTR(_id) &event_attr_##_id.attr.attr @@ -546,6 +645,12 @@ static struct perf_pmu_events_attr event_attr_##v = { \ extern struct x86_pmu x86_pmu __read_mostly; +static inline bool x86_pmu_has_lbr_callstack(void) +{ + return x86_pmu.lbr_sel_map && + x86_pmu.lbr_sel_map[PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT] > 0; +} + DECLARE_PER_CPU(struct cpu_hw_events, cpu_hw_events); int x86_perf_event_set_period(struct perf_event *event); @@ -588,6 +693,12 @@ static inline int x86_pmu_rdpmc_index(int index) return x86_pmu.rdpmc_index ? x86_pmu.rdpmc_index(index) : index; } +int x86_add_exclusive(unsigned int what); + +void x86_del_exclusive(unsigned int what); + +void hw_perf_lbr_event_destroy(struct perf_event *event); + int x86_setup_perfctr(struct perf_event *event); int x86_pmu_hw_config(struct perf_event *event); @@ -674,10 +785,34 @@ static inline int amd_pmu_init(void) #ifdef CONFIG_CPU_SUP_INTEL +static inline bool intel_pmu_needs_lbr_smpl(struct perf_event *event) +{ + /* user explicitly requested branch sampling */ + if (has_branch_stack(event)) + return true; + + /* implicit branch sampling to correct PEBS skid */ + if (x86_pmu.intel_cap.pebs_trap && event->attr.precise_ip > 1 && + x86_pmu.intel_cap.pebs_format < 2) + return true; + + return false; +} + +static inline bool intel_pmu_has_bts(struct perf_event *event) +{ + if (event->attr.config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS && + !event->attr.freq && event->hw.sample_period == 1) + return true; + + return false; +} + int intel_pmu_save_and_restart(struct perf_event *event); struct event_constraint * -x86_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event); +x86_get_event_constraints(struct cpu_hw_events *cpuc, int idx, + struct perf_event *event); struct intel_shared_regs *allocate_shared_regs(int cpu); @@ -727,13 +862,15 @@ void intel_pmu_pebs_disable_all(void); void intel_ds_init(void); +void intel_pmu_lbr_sched_task(struct perf_event_context *ctx, bool sched_in); + void intel_pmu_lbr_reset(void); void intel_pmu_lbr_enable(struct perf_event *event); void intel_pmu_lbr_disable(struct perf_event *event); -void intel_pmu_lbr_enable_all(void); +void intel_pmu_lbr_enable_all(bool pmi); void intel_pmu_lbr_disable_all(void); @@ -747,8 +884,18 @@ void intel_pmu_lbr_init_atom(void); void intel_pmu_lbr_init_snb(void); +void intel_pmu_lbr_init_hsw(void); + int intel_pmu_setup_lbr_filter(struct perf_event *event); +void intel_pt_interrupt(void); + +int intel_bts_interrupt(void); + +void intel_bts_enable_local(void); + +void intel_bts_disable_local(void); + int p4_pmu_init(void); int p6_pmu_init(void); @@ -758,6 +905,10 @@ int knc_pmu_init(void); ssize_t events_sysfs_show(struct device *dev, struct device_attribute *attr, char *page); +static inline int is_ht_workaround_enabled(void) +{ + return !!(x86_pmu.flags & PMU_FL_EXCL_ENABLED); +} #else /* CONFIG_CPU_SUP_INTEL */ static inline void reserve_ds_buffers(void) diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c index 28926311aac1..1cee5d2d7ece 100644 --- a/arch/x86/kernel/cpu/perf_event_amd.c +++ b/arch/x86/kernel/cpu/perf_event_amd.c @@ -382,6 +382,7 @@ static int amd_pmu_cpu_prepare(int cpu) static void amd_pmu_cpu_starting(int cpu) { struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu); + void **onln = &cpuc->kfree_on_online[X86_PERF_KFREE_SHARED]; struct amd_nb *nb; int i, nb_id; @@ -399,7 +400,7 @@ static void amd_pmu_cpu_starting(int cpu) continue; if (nb->nb_id == nb_id) { - cpuc->kfree_on_online = cpuc->amd_nb; + *onln = cpuc->amd_nb; cpuc->amd_nb = nb; break; } @@ -429,7 +430,8 @@ static void amd_pmu_cpu_dead(int cpu) } static struct event_constraint * -amd_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event) +amd_get_event_constraints(struct cpu_hw_events *cpuc, int idx, + struct perf_event *event) { /* * if not NB event or no NB, then no constraints @@ -537,7 +539,8 @@ static struct event_constraint amd_f15_PMC50 = EVENT_CONSTRAINT(0, 0x3F, 0); static struct event_constraint amd_f15_PMC53 = EVENT_CONSTRAINT(0, 0x38, 0); static struct event_constraint * -amd_get_event_constraints_f15h(struct cpu_hw_events *cpuc, struct perf_event *event) +amd_get_event_constraints_f15h(struct cpu_hw_events *cpuc, int idx, + struct perf_event *event) { struct hw_perf_event *hwc = &event->hw; unsigned int event_code = amd_get_event_code(hwc); diff --git a/arch/x86/kernel/cpu/perf_event_amd_ibs.c b/arch/x86/kernel/cpu/perf_event_amd_ibs.c index a61f5c6911da..989d3c215d2b 100644 --- a/arch/x86/kernel/cpu/perf_event_amd_ibs.c +++ b/arch/x86/kernel/cpu/perf_event_amd_ibs.c @@ -796,7 +796,7 @@ static int setup_ibs_ctl(int ibs_eilvt_off) * the IBS interrupt vector is handled by perf_ibs_cpu_notifier that * is using the new offset. */ -static int force_ibs_eilvt_setup(void) +static void force_ibs_eilvt_setup(void) { int offset; int ret; @@ -811,26 +811,24 @@ static int force_ibs_eilvt_setup(void) if (offset == APIC_EILVT_NR_MAX) { printk(KERN_DEBUG "No EILVT entry available\n"); - return -EBUSY; + return; } ret = setup_ibs_ctl(offset); if (ret) goto out; - if (!ibs_eilvt_valid()) { - ret = -EFAULT; + if (!ibs_eilvt_valid()) goto out; - } pr_info("IBS: LVT offset %d assigned\n", offset); - return 0; + return; out: preempt_disable(); put_eilvt(offset); preempt_enable(); - return ret; + return; } static void ibs_eilvt_setup(void) diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index 258990688a5e..9da2400c2ec3 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -12,6 +12,7 @@ #include <linux/init.h> #include <linux/slab.h> #include <linux/export.h> +#include <linux/watchdog.h> #include <asm/cpufeature.h> #include <asm/hardirq.h> @@ -113,6 +114,12 @@ static struct event_constraint intel_snb_event_constraints[] __read_mostly = INTEL_EVENT_CONSTRAINT(0xcd, 0x8), /* MEM_TRANS_RETIRED.LOAD_LATENCY */ INTEL_UEVENT_CONSTRAINT(0x04a3, 0xf), /* CYCLE_ACTIVITY.CYCLES_NO_DISPATCH */ INTEL_UEVENT_CONSTRAINT(0x02a3, 0x4), /* CYCLE_ACTIVITY.CYCLES_L1D_PENDING */ + + INTEL_EXCLEVT_CONSTRAINT(0xd0, 0xf), /* MEM_UOPS_RETIRED.* */ + INTEL_EXCLEVT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */ + INTEL_EXCLEVT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */ + INTEL_EXCLEVT_CONSTRAINT(0xd3, 0xf), /* MEM_LOAD_UOPS_LLC_MISS_RETIRED.* */ + EVENT_CONSTRAINT_END }; @@ -131,15 +138,12 @@ static struct event_constraint intel_ivb_event_constraints[] __read_mostly = INTEL_UEVENT_CONSTRAINT(0x08a3, 0x4), /* CYCLE_ACTIVITY.CYCLES_L1D_PENDING */ INTEL_UEVENT_CONSTRAINT(0x0ca3, 0x4), /* CYCLE_ACTIVITY.STALLS_L1D_PENDING */ INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PREC_DIST */ - /* - * Errata BV98 -- MEM_*_RETIRED events can leak between counters of SMT - * siblings; disable these events because they can corrupt unrelated - * counters. - */ - INTEL_EVENT_CONSTRAINT(0xd0, 0x0), /* MEM_UOPS_RETIRED.* */ - INTEL_EVENT_CONSTRAINT(0xd1, 0x0), /* MEM_LOAD_UOPS_RETIRED.* */ - INTEL_EVENT_CONSTRAINT(0xd2, 0x0), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */ - INTEL_EVENT_CONSTRAINT(0xd3, 0x0), /* MEM_LOAD_UOPS_LLC_MISS_RETIRED.* */ + + INTEL_EXCLEVT_CONSTRAINT(0xd0, 0xf), /* MEM_UOPS_RETIRED.* */ + INTEL_EXCLEVT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */ + INTEL_EXCLEVT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */ + INTEL_EXCLEVT_CONSTRAINT(0xd3, 0xf), /* MEM_LOAD_UOPS_LLC_MISS_RETIRED.* */ + EVENT_CONSTRAINT_END }; @@ -217,6 +221,21 @@ static struct event_constraint intel_hsw_event_constraints[] = { INTEL_UEVENT_CONSTRAINT(0x0ca3, 0x4), /* CYCLE_ACTIVITY.CYCLES_NO_EXECUTE */ INTEL_UEVENT_CONSTRAINT(0x04a3, 0xf), + + INTEL_EXCLEVT_CONSTRAINT(0xd0, 0xf), /* MEM_UOPS_RETIRED.* */ + INTEL_EXCLEVT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */ + INTEL_EXCLEVT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */ + INTEL_EXCLEVT_CONSTRAINT(0xd3, 0xf), /* MEM_LOAD_UOPS_LLC_MISS_RETIRED.* */ + + EVENT_CONSTRAINT_END +}; + +struct event_constraint intel_bdw_event_constraints[] = { + FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ + FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ + FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */ + INTEL_UEVENT_CONSTRAINT(0x148, 0x4), /* L1D_PEND_MISS.PENDING */ + INTEL_EVENT_CONSTRAINT(0xa3, 0x4), /* CYCLE_ACTIVITY.* */ EVENT_CONSTRAINT_END }; @@ -415,6 +434,202 @@ static __initconst const u64 snb_hw_cache_event_ids }; +/* + * Notes on the events: + * - data reads do not include code reads (comparable to earlier tables) + * - data counts include speculative execution (except L1 write, dtlb, bpu) + * - remote node access includes remote memory, remote cache, remote mmio. + * - prefetches are not included in the counts because they are not + * reliably counted. + */ + +#define HSW_DEMAND_DATA_RD BIT_ULL(0) +#define HSW_DEMAND_RFO BIT_ULL(1) +#define HSW_ANY_RESPONSE BIT_ULL(16) +#define HSW_SUPPLIER_NONE BIT_ULL(17) +#define HSW_L3_MISS_LOCAL_DRAM BIT_ULL(22) +#define HSW_L3_MISS_REMOTE_HOP0 BIT_ULL(27) +#define HSW_L3_MISS_REMOTE_HOP1 BIT_ULL(28) +#define HSW_L3_MISS_REMOTE_HOP2P BIT_ULL(29) +#define HSW_L3_MISS (HSW_L3_MISS_LOCAL_DRAM| \ + HSW_L3_MISS_REMOTE_HOP0|HSW_L3_MISS_REMOTE_HOP1| \ + HSW_L3_MISS_REMOTE_HOP2P) +#define HSW_SNOOP_NONE BIT_ULL(31) +#define HSW_SNOOP_NOT_NEEDED BIT_ULL(32) +#define HSW_SNOOP_MISS BIT_ULL(33) +#define HSW_SNOOP_HIT_NO_FWD BIT_ULL(34) +#define HSW_SNOOP_HIT_WITH_FWD BIT_ULL(35) +#define HSW_SNOOP_HITM BIT_ULL(36) +#define HSW_SNOOP_NON_DRAM BIT_ULL(37) +#define HSW_ANY_SNOOP (HSW_SNOOP_NONE| \ + HSW_SNOOP_NOT_NEEDED|HSW_SNOOP_MISS| \ + HSW_SNOOP_HIT_NO_FWD|HSW_SNOOP_HIT_WITH_FWD| \ + HSW_SNOOP_HITM|HSW_SNOOP_NON_DRAM) +#define HSW_SNOOP_DRAM (HSW_ANY_SNOOP & ~HSW_SNOOP_NON_DRAM) +#define HSW_DEMAND_READ HSW_DEMAND_DATA_RD +#define HSW_DEMAND_WRITE HSW_DEMAND_RFO +#define HSW_L3_MISS_REMOTE (HSW_L3_MISS_REMOTE_HOP0|\ + HSW_L3_MISS_REMOTE_HOP1|HSW_L3_MISS_REMOTE_HOP2P) +#define HSW_LLC_ACCESS HSW_ANY_RESPONSE + +#define BDW_L3_MISS_LOCAL BIT(26) +#define BDW_L3_MISS (BDW_L3_MISS_LOCAL| \ + HSW_L3_MISS_REMOTE_HOP0|HSW_L3_MISS_REMOTE_HOP1| \ + HSW_L3_MISS_REMOTE_HOP2P) + + +static __initconst const u64 hsw_hw_cache_event_ids + [PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX] = +{ + [ C(L1D ) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x81d0, /* MEM_UOPS_RETIRED.ALL_LOADS */ + [ C(RESULT_MISS) ] = 0x151, /* L1D.REPLACEMENT */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = 0x82d0, /* MEM_UOPS_RETIRED.ALL_STORES */ + [ C(RESULT_MISS) ] = 0x0, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = 0x0, + [ C(RESULT_MISS) ] = 0x0, + }, + }, + [ C(L1I ) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x0, + [ C(RESULT_MISS) ] = 0x280, /* ICACHE.MISSES */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = 0x0, + [ C(RESULT_MISS) ] = 0x0, + }, + }, + [ C(LL ) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x1b7, /* OFFCORE_RESPONSE */ + [ C(RESULT_MISS) ] = 0x1b7, /* OFFCORE_RESPONSE */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = 0x1b7, /* OFFCORE_RESPONSE */ + [ C(RESULT_MISS) ] = 0x1b7, /* OFFCORE_RESPONSE */ + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = 0x0, + [ C(RESULT_MISS) ] = 0x0, + }, + }, + [ C(DTLB) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x81d0, /* MEM_UOPS_RETIRED.ALL_LOADS */ + [ C(RESULT_MISS) ] = 0x108, /* DTLB_LOAD_MISSES.MISS_CAUSES_A_WALK */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = 0x82d0, /* MEM_UOPS_RETIRED.ALL_STORES */ + [ C(RESULT_MISS) ] = 0x149, /* DTLB_STORE_MISSES.MISS_CAUSES_A_WALK */ + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = 0x0, + [ C(RESULT_MISS) ] = 0x0, + }, + }, + [ C(ITLB) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x6085, /* ITLB_MISSES.STLB_HIT */ + [ C(RESULT_MISS) ] = 0x185, /* ITLB_MISSES.MISS_CAUSES_A_WALK */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + }, + [ C(BPU ) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0xc4, /* BR_INST_RETIRED.ALL_BRANCHES */ + [ C(RESULT_MISS) ] = 0xc5, /* BR_MISP_RETIRED.ALL_BRANCHES */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + }, + [ C(NODE) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x1b7, /* OFFCORE_RESPONSE */ + [ C(RESULT_MISS) ] = 0x1b7, /* OFFCORE_RESPONSE */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = 0x1b7, /* OFFCORE_RESPONSE */ + [ C(RESULT_MISS) ] = 0x1b7, /* OFFCORE_RESPONSE */ + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = 0x0, + [ C(RESULT_MISS) ] = 0x0, + }, + }, +}; + +static __initconst const u64 hsw_hw_cache_extra_regs + [PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX] = +{ + [ C(LL ) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = HSW_DEMAND_READ| + HSW_LLC_ACCESS, + [ C(RESULT_MISS) ] = HSW_DEMAND_READ| + HSW_L3_MISS|HSW_ANY_SNOOP, + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = HSW_DEMAND_WRITE| + HSW_LLC_ACCESS, + [ C(RESULT_MISS) ] = HSW_DEMAND_WRITE| + HSW_L3_MISS|HSW_ANY_SNOOP, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = 0x0, + [ C(RESULT_MISS) ] = 0x0, + }, + }, + [ C(NODE) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = HSW_DEMAND_READ| + HSW_L3_MISS_LOCAL_DRAM| + HSW_SNOOP_DRAM, + [ C(RESULT_MISS) ] = HSW_DEMAND_READ| + HSW_L3_MISS_REMOTE| + HSW_SNOOP_DRAM, + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = HSW_DEMAND_WRITE| + HSW_L3_MISS_LOCAL_DRAM| + HSW_SNOOP_DRAM, + [ C(RESULT_MISS) ] = HSW_DEMAND_WRITE| + HSW_L3_MISS_REMOTE| + HSW_SNOOP_DRAM, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = 0x0, + [ C(RESULT_MISS) ] = 0x0, + }, + }, +}; + static __initconst const u64 westmere_hw_cache_event_ids [PERF_COUNT_HW_CACHE_MAX] [PERF_COUNT_HW_CACHE_OP_MAX] @@ -1029,21 +1244,10 @@ static __initconst const u64 slm_hw_cache_event_ids }, }; -static inline bool intel_pmu_needs_lbr_smpl(struct perf_event *event) -{ - /* user explicitly requested branch sampling */ - if (has_branch_stack(event)) - return true; - - /* implicit branch sampling to correct PEBS skid */ - if (x86_pmu.intel_cap.pebs_trap && event->attr.precise_ip > 1 && - x86_pmu.intel_cap.pebs_format < 2) - return true; - - return false; -} - -static void intel_pmu_disable_all(void) +/* + * Use from PMIs where the LBRs are already disabled. + */ +static void __intel_pmu_disable_all(void) { struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); @@ -1051,17 +1255,24 @@ static void intel_pmu_disable_all(void) if (test_bit(INTEL_PMC_IDX_FIXED_BTS, cpuc->active_mask)) intel_pmu_disable_bts(); + else + intel_bts_disable_local(); intel_pmu_pebs_disable_all(); +} + +static void intel_pmu_disable_all(void) +{ + __intel_pmu_disable_all(); intel_pmu_lbr_disable_all(); } -static void intel_pmu_enable_all(int added) +static void __intel_pmu_enable_all(int added, bool pmi) { struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); intel_pmu_pebs_enable_all(); - intel_pmu_lbr_enable_all(); + intel_pmu_lbr_enable_all(pmi); wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, x86_pmu.intel_ctrl & ~cpuc->intel_ctrl_guest_mask); @@ -1073,7 +1284,13 @@ static void intel_pmu_enable_all(int added) return; intel_pmu_enable_bts(event->hw.config); - } + } else + intel_bts_enable_local(); +} + +static void intel_pmu_enable_all(int added) +{ + __intel_pmu_enable_all(added, false); } /* @@ -1207,7 +1424,7 @@ static void intel_pmu_disable_event(struct perf_event *event) * must disable before any actual event * because any event may be combined with LBR */ - if (intel_pmu_needs_lbr_smpl(event)) + if (needs_branch_stack(event)) intel_pmu_lbr_disable(event); if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) { @@ -1268,7 +1485,7 @@ static void intel_pmu_enable_event(struct perf_event *event) * must enabled before any actual event * because any event may be combined with LBR */ - if (intel_pmu_needs_lbr_smpl(event)) + if (needs_branch_stack(event)) intel_pmu_lbr_enable(event); if (event->attr.exclude_host) @@ -1334,6 +1551,18 @@ static void intel_pmu_reset(void) if (ds) ds->bts_index = ds->bts_buffer_base; + /* Ack all overflows and disable fixed counters */ + if (x86_pmu.version >= 2) { + intel_pmu_ack_status(intel_pmu_get_status()); + wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0); + } + + /* Reset LBRs and LBR freezing */ + if (x86_pmu.lbr_nr) { + update_debugctlmsr(get_debugctlmsr() & + ~(DEBUGCTLMSR_FREEZE_LBRS_ON_PMI|DEBUGCTLMSR_LBR)); + } + local_irq_restore(flags); } @@ -1357,8 +1586,9 @@ static int intel_pmu_handle_irq(struct pt_regs *regs) */ if (!x86_pmu.late_ack) apic_write(APIC_LVTPC, APIC_DM_NMI); - intel_pmu_disable_all(); + __intel_pmu_disable_all(); handled = intel_pmu_drain_bts_buffer(); + handled += intel_bts_interrupt(); status = intel_pmu_get_status(); if (!status) goto done; @@ -1399,6 +1629,14 @@ again: } /* + * Intel PT + */ + if (__test_and_clear_bit(55, (unsigned long *)&status)) { + handled++; + intel_pt_interrupt(); + } + + /* * Checkpointed counters can lead to 'spurious' PMIs because the * rollback caused by the PMI will have cleared the overflow status * bit. Therefore always force probe these counters. @@ -1433,7 +1671,7 @@ again: goto again; done: - intel_pmu_enable_all(0); + __intel_pmu_enable_all(0, true); /* * Only unmask the NMI after the overflow counters * have been reset. This avoids spurious NMIs on @@ -1464,7 +1702,7 @@ intel_bts_constraints(struct perf_event *event) static int intel_alt_er(int idx) { - if (!(x86_pmu.er_flags & ERF_HAS_RSP_1)) + if (!(x86_pmu.flags & PMU_FL_HAS_RSP_1)) return idx; if (idx == EXTRA_REG_RSP_0) @@ -1624,7 +1862,8 @@ intel_shared_regs_constraints(struct cpu_hw_events *cpuc, } struct event_constraint * -x86_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event) +x86_get_event_constraints(struct cpu_hw_events *cpuc, int idx, + struct perf_event *event) { struct event_constraint *c; @@ -1641,7 +1880,8 @@ x86_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event) } static struct event_constraint * -intel_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event) +__intel_get_event_constraints(struct cpu_hw_events *cpuc, int idx, + struct perf_event *event) { struct event_constraint *c; @@ -1657,7 +1897,278 @@ intel_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event if (c) return c; - return x86_get_event_constraints(cpuc, event); + return x86_get_event_constraints(cpuc, idx, event); +} + +static void +intel_start_scheduling(struct cpu_hw_events *cpuc) +{ + struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs; + struct intel_excl_states *xl, *xlo; + int tid = cpuc->excl_thread_id; + int o_tid = 1 - tid; /* sibling thread */ + + /* + * nothing needed if in group validation mode + */ + if (cpuc->is_fake || !is_ht_workaround_enabled()) + return; + + /* + * no exclusion needed + */ + if (!excl_cntrs) + return; + + xlo = &excl_cntrs->states[o_tid]; + xl = &excl_cntrs->states[tid]; + + xl->sched_started = true; + xl->num_alloc_cntrs = 0; + /* + * lock shared state until we are done scheduling + * in stop_event_scheduling() + * makes scheduling appear as a transaction + */ + WARN_ON_ONCE(!irqs_disabled()); + raw_spin_lock(&excl_cntrs->lock); + + /* + * save initial state of sibling thread + */ + memcpy(xlo->init_state, xlo->state, sizeof(xlo->init_state)); +} + +static void +intel_stop_scheduling(struct cpu_hw_events *cpuc) +{ + struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs; + struct intel_excl_states *xl, *xlo; + int tid = cpuc->excl_thread_id; + int o_tid = 1 - tid; /* sibling thread */ + + /* + * nothing needed if in group validation mode + */ + if (cpuc->is_fake || !is_ht_workaround_enabled()) + return; + /* + * no exclusion needed + */ + if (!excl_cntrs) + return; + + xlo = &excl_cntrs->states[o_tid]; + xl = &excl_cntrs->states[tid]; + + /* + * make new sibling thread state visible + */ + memcpy(xlo->state, xlo->init_state, sizeof(xlo->state)); + + xl->sched_started = false; + /* + * release shared state lock (acquired in intel_start_scheduling()) + */ + raw_spin_unlock(&excl_cntrs->lock); +} + +static struct event_constraint * +intel_get_excl_constraints(struct cpu_hw_events *cpuc, struct perf_event *event, + int idx, struct event_constraint *c) +{ + struct event_constraint *cx; + struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs; + struct intel_excl_states *xl, *xlo; + int is_excl, i; + int tid = cpuc->excl_thread_id; + int o_tid = 1 - tid; /* alternate */ + + /* + * validating a group does not require + * enforcing cross-thread exclusion + */ + if (cpuc->is_fake || !is_ht_workaround_enabled()) + return c; + + /* + * no exclusion needed + */ + if (!excl_cntrs) + return c; + /* + * event requires exclusive counter access + * across HT threads + */ + is_excl = c->flags & PERF_X86_EVENT_EXCL; + + /* + * xl = state of current HT + * xlo = state of sibling HT + */ + xl = &excl_cntrs->states[tid]; + xlo = &excl_cntrs->states[o_tid]; + + /* + * do not allow scheduling of more than max_alloc_cntrs + * which is set to half the available generic counters. + * this helps avoid counter starvation of sibling thread + * by ensuring at most half the counters cannot be in + * exclusive mode. There is not designated counters for the + * limits. Any N/2 counters can be used. This helps with + * events with specifix counter constraints + */ + if (xl->num_alloc_cntrs++ == xl->max_alloc_cntrs) + return &emptyconstraint; + + cx = c; + + /* + * because we modify the constraint, we need + * to make a copy. Static constraints come + * from static const tables. + * + * only needed when constraint has not yet + * been cloned (marked dynamic) + */ + if (!(c->flags & PERF_X86_EVENT_DYNAMIC)) { + + /* sanity check */ + if (idx < 0) + return &emptyconstraint; + + /* + * grab pre-allocated constraint entry + */ + cx = &cpuc->constraint_list[idx]; + + /* + * initialize dynamic constraint + * with static constraint + */ + memcpy(cx, c, sizeof(*cx)); + + /* + * mark constraint as dynamic, so we + * can free it later on + */ + cx->flags |= PERF_X86_EVENT_DYNAMIC; + } + + /* + * From here on, the constraint is dynamic. + * Either it was just allocated above, or it + * was allocated during a earlier invocation + * of this function + */ + + /* + * Modify static constraint with current dynamic + * state of thread + * + * EXCLUSIVE: sibling counter measuring exclusive event + * SHARED : sibling counter measuring non-exclusive event + * UNUSED : sibling counter unused + */ + for_each_set_bit(i, cx->idxmsk, X86_PMC_IDX_MAX) { + /* + * exclusive event in sibling counter + * our corresponding counter cannot be used + * regardless of our event + */ + if (xl->state[i] == INTEL_EXCL_EXCLUSIVE) + __clear_bit(i, cx->idxmsk); + /* + * if measuring an exclusive event, sibling + * measuring non-exclusive, then counter cannot + * be used + */ + if (is_excl && xl->state[i] == INTEL_EXCL_SHARED) + __clear_bit(i, cx->idxmsk); + } + + /* + * recompute actual bit weight for scheduling algorithm + */ + cx->weight = hweight64(cx->idxmsk64); + + /* + * if we return an empty mask, then switch + * back to static empty constraint to avoid + * the cost of freeing later on + */ + if (cx->weight == 0) + cx = &emptyconstraint; + + return cx; +} + +static struct event_constraint * +intel_get_event_constraints(struct cpu_hw_events *cpuc, int idx, + struct perf_event *event) +{ + struct event_constraint *c1 = event->hw.constraint; + struct event_constraint *c2; + + /* + * first time only + * - static constraint: no change across incremental scheduling calls + * - dynamic constraint: handled by intel_get_excl_constraints() + */ + c2 = __intel_get_event_constraints(cpuc, idx, event); + if (c1 && (c1->flags & PERF_X86_EVENT_DYNAMIC)) { + bitmap_copy(c1->idxmsk, c2->idxmsk, X86_PMC_IDX_MAX); + c1->weight = c2->weight; + c2 = c1; + } + + if (cpuc->excl_cntrs) + return intel_get_excl_constraints(cpuc, event, idx, c2); + + return c2; +} + +static void intel_put_excl_constraints(struct cpu_hw_events *cpuc, + struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs; + struct intel_excl_states *xlo, *xl; + unsigned long flags = 0; /* keep compiler happy */ + int tid = cpuc->excl_thread_id; + int o_tid = 1 - tid; + + /* + * nothing needed if in group validation mode + */ + if (cpuc->is_fake) + return; + + WARN_ON_ONCE(!excl_cntrs); + + if (!excl_cntrs) + return; + + xl = &excl_cntrs->states[tid]; + xlo = &excl_cntrs->states[o_tid]; + + /* + * put_constraint may be called from x86_schedule_events() + * which already has the lock held so here make locking + * conditional + */ + if (!xl->sched_started) + raw_spin_lock_irqsave(&excl_cntrs->lock, flags); + + /* + * if event was actually assigned, then mark the + * counter state as unused now + */ + if (hwc->idx >= 0) + xlo->state[hwc->idx] = INTEL_EXCL_UNUSED; + + if (!xl->sched_started) + raw_spin_unlock_irqrestore(&excl_cntrs->lock, flags); } static void @@ -1678,7 +2189,57 @@ intel_put_shared_regs_event_constraints(struct cpu_hw_events *cpuc, static void intel_put_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event) { + struct event_constraint *c = event->hw.constraint; + intel_put_shared_regs_event_constraints(cpuc, event); + + /* + * is PMU has exclusive counter restrictions, then + * all events are subject to and must call the + * put_excl_constraints() routine + */ + if (c && cpuc->excl_cntrs) + intel_put_excl_constraints(cpuc, event); + + /* cleanup dynamic constraint */ + if (c && (c->flags & PERF_X86_EVENT_DYNAMIC)) + event->hw.constraint = NULL; +} + +static void intel_commit_scheduling(struct cpu_hw_events *cpuc, + struct perf_event *event, int cntr) +{ + struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs; + struct event_constraint *c = event->hw.constraint; + struct intel_excl_states *xlo, *xl; + int tid = cpuc->excl_thread_id; + int o_tid = 1 - tid; + int is_excl; + + if (cpuc->is_fake || !c) + return; + + is_excl = c->flags & PERF_X86_EVENT_EXCL; + + if (!(c->flags & PERF_X86_EVENT_DYNAMIC)) + return; + + WARN_ON_ONCE(!excl_cntrs); + + if (!excl_cntrs) + return; + + xl = &excl_cntrs->states[tid]; + xlo = &excl_cntrs->states[o_tid]; + + WARN_ON_ONCE(!raw_spin_is_locked(&excl_cntrs->lock)); + + if (cntr >= 0) { + if (is_excl) + xlo->init_state[cntr] = INTEL_EXCL_EXCLUSIVE; + else + xlo->init_state[cntr] = INTEL_EXCL_SHARED; + } } static void intel_pebs_aliases_core2(struct perf_event *event) @@ -1747,10 +2308,21 @@ static int intel_pmu_hw_config(struct perf_event *event) if (event->attr.precise_ip && x86_pmu.pebs_aliases) x86_pmu.pebs_aliases(event); - if (intel_pmu_needs_lbr_smpl(event)) { + if (needs_branch_stack(event)) { ret = intel_pmu_setup_lbr_filter(event); if (ret) return ret; + + /* + * BTS is set up earlier in this path, so don't account twice + */ + if (!intel_pmu_has_bts(event)) { + /* disallow lbr if conflicting events are present */ + if (x86_add_exclusive(x86_lbr_exclusive_lbr)) + return -EBUSY; + + event->destroy = hw_perf_lbr_event_destroy; + } } if (event->attr.type != PERF_TYPE_RAW) @@ -1891,9 +2463,12 @@ static struct event_constraint counter2_constraint = EVENT_CONSTRAINT(0, 0x4, 0); static struct event_constraint * -hsw_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event) +hsw_get_event_constraints(struct cpu_hw_events *cpuc, int idx, + struct perf_event *event) { - struct event_constraint *c = intel_get_event_constraints(cpuc, event); + struct event_constraint *c; + + c = intel_get_event_constraints(cpuc, idx, event); /* Handle special quirk on in_tx_checkpointed only in counter 2 */ if (event->hw.config & HSW_IN_TX_CHECKPOINTED) { @@ -1905,6 +2480,32 @@ hsw_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event) return c; } +/* + * Broadwell: + * + * The INST_RETIRED.ALL period always needs to have lowest 6 bits cleared + * (BDM55) and it must not use a period smaller than 100 (BDM11). We combine + * the two to enforce a minimum period of 128 (the smallest value that has bits + * 0-5 cleared and >= 100). + * + * Because of how the code in x86_perf_event_set_period() works, the truncation + * of the lower 6 bits is 'harmless' as we'll occasionally add a longer period + * to make up for the 'lost' events due to carrying the 'error' in period_left. + * + * Therefore the effective (average) period matches the requested period, + * despite coarser hardware granularity. + */ +static unsigned bdw_limit_period(struct perf_event *event, unsigned left) +{ + if ((event->hw.config & INTEL_ARCH_EVENT_MASK) == + X86_CONFIG(.event=0xc0, .umask=0x01)) { + if (left < 128) + left = 128; + left &= ~0x3fu; + } + return left; +} + PMU_FORMAT_ATTR(event, "config:0-7" ); PMU_FORMAT_ATTR(umask, "config:8-15" ); PMU_FORMAT_ATTR(edge, "config:18" ); @@ -1979,16 +2580,52 @@ struct intel_shared_regs *allocate_shared_regs(int cpu) return regs; } +static struct intel_excl_cntrs *allocate_excl_cntrs(int cpu) +{ + struct intel_excl_cntrs *c; + int i; + + c = kzalloc_node(sizeof(struct intel_excl_cntrs), + GFP_KERNEL, cpu_to_node(cpu)); + if (c) { + raw_spin_lock_init(&c->lock); + for (i = 0; i < X86_PMC_IDX_MAX; i++) { + c->states[0].state[i] = INTEL_EXCL_UNUSED; + c->states[0].init_state[i] = INTEL_EXCL_UNUSED; + + c->states[1].state[i] = INTEL_EXCL_UNUSED; + c->states[1].init_state[i] = INTEL_EXCL_UNUSED; + } + c->core_id = -1; + } + return c; +} + static int intel_pmu_cpu_prepare(int cpu) { struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu); - if (!(x86_pmu.extra_regs || x86_pmu.lbr_sel_map)) - return NOTIFY_OK; + if (x86_pmu.extra_regs || x86_pmu.lbr_sel_map) { + cpuc->shared_regs = allocate_shared_regs(cpu); + if (!cpuc->shared_regs) + return NOTIFY_BAD; + } - cpuc->shared_regs = allocate_shared_regs(cpu); - if (!cpuc->shared_regs) - return NOTIFY_BAD; + if (x86_pmu.flags & PMU_FL_EXCL_CNTRS) { + size_t sz = X86_PMC_IDX_MAX * sizeof(struct event_constraint); + + cpuc->constraint_list = kzalloc(sz, GFP_KERNEL); + if (!cpuc->constraint_list) + return NOTIFY_BAD; + + cpuc->excl_cntrs = allocate_excl_cntrs(cpu); + if (!cpuc->excl_cntrs) { + kfree(cpuc->constraint_list); + kfree(cpuc->shared_regs); + return NOTIFY_BAD; + } + cpuc->excl_thread_id = 0; + } return NOTIFY_OK; } @@ -2010,13 +2647,15 @@ static void intel_pmu_cpu_starting(int cpu) if (!cpuc->shared_regs) return; - if (!(x86_pmu.er_flags & ERF_NO_HT_SHARING)) { + if (!(x86_pmu.flags & PMU_FL_NO_HT_SHARING)) { + void **onln = &cpuc->kfree_on_online[X86_PERF_KFREE_SHARED]; + for_each_cpu(i, topology_thread_cpumask(cpu)) { struct intel_shared_regs *pc; pc = per_cpu(cpu_hw_events, i).shared_regs; if (pc && pc->core_id == core_id) { - cpuc->kfree_on_online = cpuc->shared_regs; + *onln = cpuc->shared_regs; cpuc->shared_regs = pc; break; } @@ -2027,6 +2666,44 @@ static void intel_pmu_cpu_starting(int cpu) if (x86_pmu.lbr_sel_map) cpuc->lbr_sel = &cpuc->shared_regs->regs[EXTRA_REG_LBR]; + + if (x86_pmu.flags & PMU_FL_EXCL_CNTRS) { + int h = x86_pmu.num_counters >> 1; + + for_each_cpu(i, topology_thread_cpumask(cpu)) { + struct intel_excl_cntrs *c; + + c = per_cpu(cpu_hw_events, i).excl_cntrs; + if (c && c->core_id == core_id) { + cpuc->kfree_on_online[1] = cpuc->excl_cntrs; + cpuc->excl_cntrs = c; + cpuc->excl_thread_id = 1; + break; + } + } + cpuc->excl_cntrs->core_id = core_id; + cpuc->excl_cntrs->refcnt++; + /* + * set hard limit to half the number of generic counters + */ + cpuc->excl_cntrs->states[0].max_alloc_cntrs = h; + cpuc->excl_cntrs->states[1].max_alloc_cntrs = h; + } +} + +static void free_excl_cntrs(int cpu) +{ + struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu); + struct intel_excl_cntrs *c; + + c = cpuc->excl_cntrs; + if (c) { + if (c->core_id == -1 || --c->refcnt == 0) + kfree(c); + cpuc->excl_cntrs = NULL; + kfree(cpuc->constraint_list); + cpuc->constraint_list = NULL; + } } static void intel_pmu_cpu_dying(int cpu) @@ -2041,19 +2718,9 @@ static void intel_pmu_cpu_dying(int cpu) cpuc->shared_regs = NULL; } - fini_debug_store_on_cpu(cpu); -} + free_excl_cntrs(cpu); -static void intel_pmu_flush_branch_stack(void) -{ - /* - * Intel LBR does not tag entries with the - * PID of the current task, then we need to - * flush it on ctxsw - * For now, we simply reset it - */ - if (x86_pmu.lbr_nr) - intel_pmu_lbr_reset(); + fini_debug_store_on_cpu(cpu); } PMU_FORMAT_ATTR(offcore_rsp, "config1:0-63"); @@ -2107,7 +2774,7 @@ static __initconst const struct x86_pmu intel_pmu = { .cpu_starting = intel_pmu_cpu_starting, .cpu_dying = intel_pmu_cpu_dying, .guest_get_msrs = intel_guest_get_msrs, - .flush_branch_stack = intel_pmu_flush_branch_stack, + .sched_task = intel_pmu_lbr_sched_task, }; static __init void intel_clovertown_quirk(void) @@ -2264,6 +2931,27 @@ static __init void intel_nehalem_quirk(void) } } +/* + * enable software workaround for errata: + * SNB: BJ122 + * IVB: BV98 + * HSW: HSD29 + * + * Only needed when HT is enabled. However detecting + * if HT is enabled is difficult (model specific). So instead, + * we enable the workaround in the early boot, and verify if + * it is needed in a later initcall phase once we have valid + * topology information to check if HT is actually enabled + */ +static __init void intel_ht_bug(void) +{ + x86_pmu.flags |= PMU_FL_EXCL_CNTRS | PMU_FL_EXCL_ENABLED; + + x86_pmu.commit_scheduling = intel_commit_scheduling; + x86_pmu.start_scheduling = intel_start_scheduling; + x86_pmu.stop_scheduling = intel_stop_scheduling; +} + EVENT_ATTR_STR(mem-loads, mem_ld_hsw, "event=0xcd,umask=0x1,ldlat=3"); EVENT_ATTR_STR(mem-stores, mem_st_hsw, "event=0xd0,umask=0x82") @@ -2443,7 +3131,7 @@ __init int intel_pmu_init(void) x86_pmu.event_constraints = intel_slm_event_constraints; x86_pmu.pebs_constraints = intel_slm_pebs_event_constraints; x86_pmu.extra_regs = intel_slm_extra_regs; - x86_pmu.er_flags |= ERF_HAS_RSP_1; + x86_pmu.flags |= PMU_FL_HAS_RSP_1; pr_cont("Silvermont events, "); break; @@ -2461,7 +3149,7 @@ __init int intel_pmu_init(void) x86_pmu.enable_all = intel_pmu_nhm_enable_all; x86_pmu.pebs_constraints = intel_westmere_pebs_event_constraints; x86_pmu.extra_regs = intel_westmere_extra_regs; - x86_pmu.er_flags |= ERF_HAS_RSP_1; + x86_pmu.flags |= PMU_FL_HAS_RSP_1; x86_pmu.cpu_events = nhm_events_attrs; @@ -2478,6 +3166,7 @@ __init int intel_pmu_init(void) case 42: /* 32nm SandyBridge */ case 45: /* 32nm SandyBridge-E/EN/EP */ x86_add_quirk(intel_sandybridge_quirk); + x86_add_quirk(intel_ht_bug); memcpy(hw_cache_event_ids, snb_hw_cache_event_ids, sizeof(hw_cache_event_ids)); memcpy(hw_cache_extra_regs, snb_hw_cache_extra_regs, @@ -2492,9 +3181,11 @@ __init int intel_pmu_init(void) x86_pmu.extra_regs = intel_snbep_extra_regs; else x86_pmu.extra_regs = intel_snb_extra_regs; + + /* all extra regs are per-cpu when HT is on */ - x86_pmu.er_flags |= ERF_HAS_RSP_1; - x86_pmu.er_flags |= ERF_NO_HT_SHARING; + x86_pmu.flags |= PMU_FL_HAS_RSP_1; + x86_pmu.flags |= PMU_FL_NO_HT_SHARING; x86_pmu.cpu_events = snb_events_attrs; @@ -2510,6 +3201,7 @@ __init int intel_pmu_init(void) case 58: /* 22nm IvyBridge */ case 62: /* 22nm IvyBridge-EP/EX */ + x86_add_quirk(intel_ht_bug); memcpy(hw_cache_event_ids, snb_hw_cache_event_ids, sizeof(hw_cache_event_ids)); /* dTLB-load-misses on IVB is different than SNB */ @@ -2528,8 +3220,8 @@ __init int intel_pmu_init(void) else x86_pmu.extra_regs = intel_snb_extra_regs; /* all extra regs are per-cpu when HT is on */ - x86_pmu.er_flags |= ERF_HAS_RSP_1; - x86_pmu.er_flags |= ERF_NO_HT_SHARING; + x86_pmu.flags |= PMU_FL_HAS_RSP_1; + x86_pmu.flags |= PMU_FL_NO_HT_SHARING; x86_pmu.cpu_events = snb_events_attrs; @@ -2545,19 +3237,20 @@ __init int intel_pmu_init(void) case 63: /* 22nm Haswell Server */ case 69: /* 22nm Haswell ULT */ case 70: /* 22nm Haswell + GT3e (Intel Iris Pro graphics) */ + x86_add_quirk(intel_ht_bug); x86_pmu.late_ack = true; - memcpy(hw_cache_event_ids, snb_hw_cache_event_ids, sizeof(hw_cache_event_ids)); - memcpy(hw_cache_extra_regs, snb_hw_cache_extra_regs, sizeof(hw_cache_extra_regs)); + memcpy(hw_cache_event_ids, hsw_hw_cache_event_ids, sizeof(hw_cache_event_ids)); + memcpy(hw_cache_extra_regs, hsw_hw_cache_extra_regs, sizeof(hw_cache_extra_regs)); - intel_pmu_lbr_init_snb(); + intel_pmu_lbr_init_hsw(); x86_pmu.event_constraints = intel_hsw_event_constraints; x86_pmu.pebs_constraints = intel_hsw_pebs_event_constraints; x86_pmu.extra_regs = intel_snbep_extra_regs; x86_pmu.pebs_aliases = intel_pebs_aliases_snb; /* all extra regs are per-cpu when HT is on */ - x86_pmu.er_flags |= ERF_HAS_RSP_1; - x86_pmu.er_flags |= ERF_NO_HT_SHARING; + x86_pmu.flags |= PMU_FL_HAS_RSP_1; + x86_pmu.flags |= PMU_FL_NO_HT_SHARING; x86_pmu.hw_config = hsw_hw_config; x86_pmu.get_event_constraints = hsw_get_event_constraints; @@ -2566,6 +3259,39 @@ __init int intel_pmu_init(void) pr_cont("Haswell events, "); break; + case 61: /* 14nm Broadwell Core-M */ + case 86: /* 14nm Broadwell Xeon D */ + x86_pmu.late_ack = true; + memcpy(hw_cache_event_ids, hsw_hw_cache_event_ids, sizeof(hw_cache_event_ids)); + memcpy(hw_cache_extra_regs, hsw_hw_cache_extra_regs, sizeof(hw_cache_extra_regs)); + + /* L3_MISS_LOCAL_DRAM is BIT(26) in Broadwell */ + hw_cache_extra_regs[C(LL)][C(OP_READ)][C(RESULT_MISS)] = HSW_DEMAND_READ | + BDW_L3_MISS|HSW_SNOOP_DRAM; + hw_cache_extra_regs[C(LL)][C(OP_WRITE)][C(RESULT_MISS)] = HSW_DEMAND_WRITE|BDW_L3_MISS| + HSW_SNOOP_DRAM; + hw_cache_extra_regs[C(NODE)][C(OP_READ)][C(RESULT_ACCESS)] = HSW_DEMAND_READ| + BDW_L3_MISS_LOCAL|HSW_SNOOP_DRAM; + hw_cache_extra_regs[C(NODE)][C(OP_WRITE)][C(RESULT_ACCESS)] = HSW_DEMAND_WRITE| + BDW_L3_MISS_LOCAL|HSW_SNOOP_DRAM; + + intel_pmu_lbr_init_snb(); + + x86_pmu.event_constraints = intel_bdw_event_constraints; + x86_pmu.pebs_constraints = intel_hsw_pebs_event_constraints; + x86_pmu.extra_regs = intel_snbep_extra_regs; + x86_pmu.pebs_aliases = intel_pebs_aliases_snb; + /* all extra regs are per-cpu when HT is on */ + x86_pmu.flags |= PMU_FL_HAS_RSP_1; + x86_pmu.flags |= PMU_FL_NO_HT_SHARING; + + x86_pmu.hw_config = hsw_hw_config; + x86_pmu.get_event_constraints = hsw_get_event_constraints; + x86_pmu.cpu_events = hsw_events_attrs; + x86_pmu.limit_period = bdw_limit_period; + pr_cont("Broadwell events, "); + break; + default: switch (x86_pmu.version) { case 1: @@ -2651,3 +3377,47 @@ __init int intel_pmu_init(void) return 0; } + +/* + * HT bug: phase 2 init + * Called once we have valid topology information to check + * whether or not HT is enabled + * If HT is off, then we disable the workaround + */ +static __init int fixup_ht_bug(void) +{ + int cpu = smp_processor_id(); + int w, c; + /* + * problem not present on this CPU model, nothing to do + */ + if (!(x86_pmu.flags & PMU_FL_EXCL_ENABLED)) + return 0; + + w = cpumask_weight(topology_thread_cpumask(cpu)); + if (w > 1) { + pr_info("PMU erratum BJ122, BV98, HSD29 worked around, HT is on\n"); + return 0; + } + + watchdog_nmi_disable_all(); + + x86_pmu.flags &= ~(PMU_FL_EXCL_CNTRS | PMU_FL_EXCL_ENABLED); + + x86_pmu.commit_scheduling = NULL; + x86_pmu.start_scheduling = NULL; + x86_pmu.stop_scheduling = NULL; + + watchdog_nmi_enable_all(); + + get_online_cpus(); + + for_each_online_cpu(c) { + free_excl_cntrs(c); + } + + put_online_cpus(); + pr_info("PMU erratum BJ122, BV98, HSD29 workaround disabled, HT off\n"); + return 0; +} +subsys_initcall(fixup_ht_bug) diff --git a/arch/x86/kernel/cpu/perf_event_intel_bts.c b/arch/x86/kernel/cpu/perf_event_intel_bts.c new file mode 100644 index 000000000000..ac1f0c55f379 --- /dev/null +++ b/arch/x86/kernel/cpu/perf_event_intel_bts.c @@ -0,0 +1,525 @@ +/* + * BTS PMU driver for perf + * Copyright (c) 2013-2014, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#undef DEBUG + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/bitops.h> +#include <linux/types.h> +#include <linux/slab.h> +#include <linux/debugfs.h> +#include <linux/device.h> +#include <linux/coredump.h> + +#include <asm-generic/sizes.h> +#include <asm/perf_event.h> + +#include "perf_event.h" + +struct bts_ctx { + struct perf_output_handle handle; + struct debug_store ds_back; + int started; +}; + +static DEFINE_PER_CPU(struct bts_ctx, bts_ctx); + +#define BTS_RECORD_SIZE 24 +#define BTS_SAFETY_MARGIN 4080 + +struct bts_phys { + struct page *page; + unsigned long size; + unsigned long offset; + unsigned long displacement; +}; + +struct bts_buffer { + size_t real_size; /* multiple of BTS_RECORD_SIZE */ + unsigned int nr_pages; + unsigned int nr_bufs; + unsigned int cur_buf; + bool snapshot; + local_t data_size; + local_t lost; + local_t head; + unsigned long end; + void **data_pages; + struct bts_phys buf[0]; +}; + +struct pmu bts_pmu; + +void intel_pmu_enable_bts(u64 config); +void intel_pmu_disable_bts(void); + +static size_t buf_size(struct page *page) +{ + return 1 << (PAGE_SHIFT + page_private(page)); +} + +static void * +bts_buffer_setup_aux(int cpu, void **pages, int nr_pages, bool overwrite) +{ + struct bts_buffer *buf; + struct page *page; + int node = (cpu == -1) ? cpu : cpu_to_node(cpu); + unsigned long offset; + size_t size = nr_pages << PAGE_SHIFT; + int pg, nbuf, pad; + + /* count all the high order buffers */ + for (pg = 0, nbuf = 0; pg < nr_pages;) { + page = virt_to_page(pages[pg]); + if (WARN_ON_ONCE(!PagePrivate(page) && nr_pages > 1)) + return NULL; + pg += 1 << page_private(page); + nbuf++; + } + + /* + * to avoid interrupts in overwrite mode, only allow one physical + */ + if (overwrite && nbuf > 1) + return NULL; + + buf = kzalloc_node(offsetof(struct bts_buffer, buf[nbuf]), GFP_KERNEL, node); + if (!buf) + return NULL; + + buf->nr_pages = nr_pages; + buf->nr_bufs = nbuf; + buf->snapshot = overwrite; + buf->data_pages = pages; + buf->real_size = size - size % BTS_RECORD_SIZE; + + for (pg = 0, nbuf = 0, offset = 0, pad = 0; nbuf < buf->nr_bufs; nbuf++) { + unsigned int __nr_pages; + + page = virt_to_page(pages[pg]); + __nr_pages = PagePrivate(page) ? 1 << page_private(page) : 1; + buf->buf[nbuf].page = page; + buf->buf[nbuf].offset = offset; + buf->buf[nbuf].displacement = (pad ? BTS_RECORD_SIZE - pad : 0); + buf->buf[nbuf].size = buf_size(page) - buf->buf[nbuf].displacement; + pad = buf->buf[nbuf].size % BTS_RECORD_SIZE; + buf->buf[nbuf].size -= pad; + + pg += __nr_pages; + offset += __nr_pages << PAGE_SHIFT; + } + + return buf; +} + +static void bts_buffer_free_aux(void *data) +{ + kfree(data); +} + +static unsigned long bts_buffer_offset(struct bts_buffer *buf, unsigned int idx) +{ + return buf->buf[idx].offset + buf->buf[idx].displacement; +} + +static void +bts_config_buffer(struct bts_buffer *buf) +{ + int cpu = raw_smp_processor_id(); + struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; + struct bts_phys *phys = &buf->buf[buf->cur_buf]; + unsigned long index, thresh = 0, end = phys->size; + struct page *page = phys->page; + + index = local_read(&buf->head); + + if (!buf->snapshot) { + if (buf->end < phys->offset + buf_size(page)) + end = buf->end - phys->offset - phys->displacement; + + index -= phys->offset + phys->displacement; + + if (end - index > BTS_SAFETY_MARGIN) + thresh = end - BTS_SAFETY_MARGIN; + else if (end - index > BTS_RECORD_SIZE) + thresh = end - BTS_RECORD_SIZE; + else + thresh = end; + } + + ds->bts_buffer_base = (u64)(long)page_address(page) + phys->displacement; + ds->bts_index = ds->bts_buffer_base + index; + ds->bts_absolute_maximum = ds->bts_buffer_base + end; + ds->bts_interrupt_threshold = !buf->snapshot + ? ds->bts_buffer_base + thresh + : ds->bts_absolute_maximum + BTS_RECORD_SIZE; +} + +static void bts_buffer_pad_out(struct bts_phys *phys, unsigned long head) +{ + unsigned long index = head - phys->offset; + + memset(page_address(phys->page) + index, 0, phys->size - index); +} + +static bool bts_buffer_is_full(struct bts_buffer *buf, struct bts_ctx *bts) +{ + if (buf->snapshot) + return false; + + if (local_read(&buf->data_size) >= bts->handle.size || + bts->handle.size - local_read(&buf->data_size) < BTS_RECORD_SIZE) + return true; + + return false; +} + +static void bts_update(struct bts_ctx *bts) +{ + int cpu = raw_smp_processor_id(); + struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; + struct bts_buffer *buf = perf_get_aux(&bts->handle); + unsigned long index = ds->bts_index - ds->bts_buffer_base, old, head; + + if (!buf) + return; + + head = index + bts_buffer_offset(buf, buf->cur_buf); + old = local_xchg(&buf->head, head); + + if (!buf->snapshot) { + if (old == head) + return; + + if (ds->bts_index >= ds->bts_absolute_maximum) + local_inc(&buf->lost); + + /* + * old and head are always in the same physical buffer, so we + * can subtract them to get the data size. + */ + local_add(head - old, &buf->data_size); + } else { + local_set(&buf->data_size, head); + } +} + +static void __bts_event_start(struct perf_event *event) +{ + struct bts_ctx *bts = this_cpu_ptr(&bts_ctx); + struct bts_buffer *buf = perf_get_aux(&bts->handle); + u64 config = 0; + + if (!buf || bts_buffer_is_full(buf, bts)) + return; + + event->hw.state = 0; + + if (!buf->snapshot) + config |= ARCH_PERFMON_EVENTSEL_INT; + if (!event->attr.exclude_kernel) + config |= ARCH_PERFMON_EVENTSEL_OS; + if (!event->attr.exclude_user) + config |= ARCH_PERFMON_EVENTSEL_USR; + + bts_config_buffer(buf); + + /* + * local barrier to make sure that ds configuration made it + * before we enable BTS + */ + wmb(); + + intel_pmu_enable_bts(config); +} + +static void bts_event_start(struct perf_event *event, int flags) +{ + struct bts_ctx *bts = this_cpu_ptr(&bts_ctx); + + __bts_event_start(event); + + /* PMI handler: this counter is running and likely generating PMIs */ + ACCESS_ONCE(bts->started) = 1; +} + +static void __bts_event_stop(struct perf_event *event) +{ + /* + * No extra synchronization is mandated by the documentation to have + * BTS data stores globally visible. + */ + intel_pmu_disable_bts(); + + if (event->hw.state & PERF_HES_STOPPED) + return; + + ACCESS_ONCE(event->hw.state) |= PERF_HES_STOPPED; +} + +static void bts_event_stop(struct perf_event *event, int flags) +{ + struct bts_ctx *bts = this_cpu_ptr(&bts_ctx); + + /* PMI handler: don't restart this counter */ + ACCESS_ONCE(bts->started) = 0; + + __bts_event_stop(event); + + if (flags & PERF_EF_UPDATE) + bts_update(bts); +} + +void intel_bts_enable_local(void) +{ + struct bts_ctx *bts = this_cpu_ptr(&bts_ctx); + + if (bts->handle.event && bts->started) + __bts_event_start(bts->handle.event); +} + +void intel_bts_disable_local(void) +{ + struct bts_ctx *bts = this_cpu_ptr(&bts_ctx); + + if (bts->handle.event) + __bts_event_stop(bts->handle.event); +} + +static int +bts_buffer_reset(struct bts_buffer *buf, struct perf_output_handle *handle) +{ + unsigned long head, space, next_space, pad, gap, skip, wakeup; + unsigned int next_buf; + struct bts_phys *phys, *next_phys; + int ret; + + if (buf->snapshot) + return 0; + + head = handle->head & ((buf->nr_pages << PAGE_SHIFT) - 1); + if (WARN_ON_ONCE(head != local_read(&buf->head))) + return -EINVAL; + + phys = &buf->buf[buf->cur_buf]; + space = phys->offset + phys->displacement + phys->size - head; + pad = space; + if (space > handle->size) { + space = handle->size; + space -= space % BTS_RECORD_SIZE; + } + if (space <= BTS_SAFETY_MARGIN) { + /* See if next phys buffer has more space */ + next_buf = buf->cur_buf + 1; + if (next_buf >= buf->nr_bufs) + next_buf = 0; + next_phys = &buf->buf[next_buf]; + gap = buf_size(phys->page) - phys->displacement - phys->size + + next_phys->displacement; + skip = pad + gap; + if (handle->size >= skip) { + next_space = next_phys->size; + if (next_space + skip > handle->size) { + next_space = handle->size - skip; + next_space -= next_space % BTS_RECORD_SIZE; + } + if (next_space > space || !space) { + if (pad) + bts_buffer_pad_out(phys, head); + ret = perf_aux_output_skip(handle, skip); + if (ret) + return ret; + /* Advance to next phys buffer */ + phys = next_phys; + space = next_space; + head = phys->offset + phys->displacement; + /* + * After this, cur_buf and head won't match ds + * anymore, so we must not be racing with + * bts_update(). + */ + buf->cur_buf = next_buf; + local_set(&buf->head, head); + } + } + } + + /* Don't go far beyond wakeup watermark */ + wakeup = BTS_SAFETY_MARGIN + BTS_RECORD_SIZE + handle->wakeup - + handle->head; + if (space > wakeup) { + space = wakeup; + space -= space % BTS_RECORD_SIZE; + } + + buf->end = head + space; + + /* + * If we have no space, the lost notification would have been sent when + * we hit absolute_maximum - see bts_update() + */ + if (!space) + return -ENOSPC; + + return 0; +} + +int intel_bts_interrupt(void) +{ + struct bts_ctx *bts = this_cpu_ptr(&bts_ctx); + struct perf_event *event = bts->handle.event; + struct bts_buffer *buf; + s64 old_head; + int err; + + if (!event || !bts->started) + return 0; + + buf = perf_get_aux(&bts->handle); + /* + * Skip snapshot counters: they don't use the interrupt, but + * there's no other way of telling, because the pointer will + * keep moving + */ + if (!buf || buf->snapshot) + return 0; + + old_head = local_read(&buf->head); + bts_update(bts); + + /* no new data */ + if (old_head == local_read(&buf->head)) + return 0; + + perf_aux_output_end(&bts->handle, local_xchg(&buf->data_size, 0), + !!local_xchg(&buf->lost, 0)); + + buf = perf_aux_output_begin(&bts->handle, event); + if (!buf) + return 1; + + err = bts_buffer_reset(buf, &bts->handle); + if (err) + perf_aux_output_end(&bts->handle, 0, false); + + return 1; +} + +static void bts_event_del(struct perf_event *event, int mode) +{ + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + struct bts_ctx *bts = this_cpu_ptr(&bts_ctx); + struct bts_buffer *buf = perf_get_aux(&bts->handle); + + bts_event_stop(event, PERF_EF_UPDATE); + + if (buf) { + if (buf->snapshot) + bts->handle.head = + local_xchg(&buf->data_size, + buf->nr_pages << PAGE_SHIFT); + perf_aux_output_end(&bts->handle, local_xchg(&buf->data_size, 0), + !!local_xchg(&buf->lost, 0)); + } + + cpuc->ds->bts_index = bts->ds_back.bts_buffer_base; + cpuc->ds->bts_buffer_base = bts->ds_back.bts_buffer_base; + cpuc->ds->bts_absolute_maximum = bts->ds_back.bts_absolute_maximum; + cpuc->ds->bts_interrupt_threshold = bts->ds_back.bts_interrupt_threshold; +} + +static int bts_event_add(struct perf_event *event, int mode) +{ + struct bts_buffer *buf; + struct bts_ctx *bts = this_cpu_ptr(&bts_ctx); + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + struct hw_perf_event *hwc = &event->hw; + int ret = -EBUSY; + + event->hw.state = PERF_HES_STOPPED; + + if (test_bit(INTEL_PMC_IDX_FIXED_BTS, cpuc->active_mask)) + return -EBUSY; + + if (bts->handle.event) + return -EBUSY; + + buf = perf_aux_output_begin(&bts->handle, event); + if (!buf) + return -EINVAL; + + ret = bts_buffer_reset(buf, &bts->handle); + if (ret) { + perf_aux_output_end(&bts->handle, 0, false); + return ret; + } + + bts->ds_back.bts_buffer_base = cpuc->ds->bts_buffer_base; + bts->ds_back.bts_absolute_maximum = cpuc->ds->bts_absolute_maximum; + bts->ds_back.bts_interrupt_threshold = cpuc->ds->bts_interrupt_threshold; + + if (mode & PERF_EF_START) { + bts_event_start(event, 0); + if (hwc->state & PERF_HES_STOPPED) { + bts_event_del(event, 0); + return -EBUSY; + } + } + + return 0; +} + +static void bts_event_destroy(struct perf_event *event) +{ + x86_del_exclusive(x86_lbr_exclusive_bts); +} + +static int bts_event_init(struct perf_event *event) +{ + if (event->attr.type != bts_pmu.type) + return -ENOENT; + + if (x86_add_exclusive(x86_lbr_exclusive_bts)) + return -EBUSY; + + event->destroy = bts_event_destroy; + + return 0; +} + +static void bts_event_read(struct perf_event *event) +{ +} + +static __init int bts_init(void) +{ + if (!boot_cpu_has(X86_FEATURE_DTES64) || !x86_pmu.bts) + return -ENODEV; + + bts_pmu.capabilities = PERF_PMU_CAP_AUX_NO_SG | PERF_PMU_CAP_ITRACE; + bts_pmu.task_ctx_nr = perf_sw_context; + bts_pmu.event_init = bts_event_init; + bts_pmu.add = bts_event_add; + bts_pmu.del = bts_event_del; + bts_pmu.start = bts_event_start; + bts_pmu.stop = bts_event_stop; + bts_pmu.read = bts_event_read; + bts_pmu.setup_aux = bts_buffer_setup_aux; + bts_pmu.free_aux = bts_buffer_free_aux; + + return perf_pmu_register(&bts_pmu, "intel_bts", -1); +} + +module_init(bts_init); diff --git a/arch/x86/kernel/cpu/perf_event_intel_cqm.c b/arch/x86/kernel/cpu/perf_event_intel_cqm.c new file mode 100644 index 000000000000..e4d1b8b738fa --- /dev/null +++ b/arch/x86/kernel/cpu/perf_event_intel_cqm.c @@ -0,0 +1,1379 @@ +/* + * Intel Cache Quality-of-Service Monitoring (CQM) support. + * + * Based very, very heavily on work by Peter Zijlstra. + */ + +#include <linux/perf_event.h> +#include <linux/slab.h> +#include <asm/cpu_device_id.h> +#include "perf_event.h" + +#define MSR_IA32_PQR_ASSOC 0x0c8f +#define MSR_IA32_QM_CTR 0x0c8e +#define MSR_IA32_QM_EVTSEL 0x0c8d + +static unsigned int cqm_max_rmid = -1; +static unsigned int cqm_l3_scale; /* supposedly cacheline size */ + +struct intel_cqm_state { + raw_spinlock_t lock; + int rmid; + int cnt; +}; + +static DEFINE_PER_CPU(struct intel_cqm_state, cqm_state); + +/* + * Protects cache_cgroups and cqm_rmid_free_lru and cqm_rmid_limbo_lru. + * Also protects event->hw.cqm_rmid + * + * Hold either for stability, both for modification of ->hw.cqm_rmid. + */ +static DEFINE_MUTEX(cache_mutex); +static DEFINE_RAW_SPINLOCK(cache_lock); + +/* + * Groups of events that have the same target(s), one RMID per group. + */ +static LIST_HEAD(cache_groups); + +/* + * Mask of CPUs for reading CQM values. We only need one per-socket. + */ +static cpumask_t cqm_cpumask; + +#define RMID_VAL_ERROR (1ULL << 63) +#define RMID_VAL_UNAVAIL (1ULL << 62) + +#define QOS_L3_OCCUP_EVENT_ID (1 << 0) + +#define QOS_EVENT_MASK QOS_L3_OCCUP_EVENT_ID + +/* + * This is central to the rotation algorithm in __intel_cqm_rmid_rotate(). + * + * This rmid is always free and is guaranteed to have an associated + * near-zero occupancy value, i.e. no cachelines are tagged with this + * RMID, once __intel_cqm_rmid_rotate() returns. + */ +static unsigned int intel_cqm_rotation_rmid; + +#define INVALID_RMID (-1) + +/* + * Is @rmid valid for programming the hardware? + * + * rmid 0 is reserved by the hardware for all non-monitored tasks, which + * means that we should never come across an rmid with that value. + * Likewise, an rmid value of -1 is used to indicate "no rmid currently + * assigned" and is used as part of the rotation code. + */ +static inline bool __rmid_valid(unsigned int rmid) +{ + if (!rmid || rmid == INVALID_RMID) + return false; + + return true; +} + +static u64 __rmid_read(unsigned int rmid) +{ + u64 val; + + /* + * Ignore the SDM, this thing is _NOTHING_ like a regular perfcnt, + * it just says that to increase confusion. + */ + wrmsr(MSR_IA32_QM_EVTSEL, QOS_L3_OCCUP_EVENT_ID, rmid); + rdmsrl(MSR_IA32_QM_CTR, val); + + /* + * Aside from the ERROR and UNAVAIL bits, assume this thing returns + * the number of cachelines tagged with @rmid. + */ + return val; +} + +enum rmid_recycle_state { + RMID_YOUNG = 0, + RMID_AVAILABLE, + RMID_DIRTY, +}; + +struct cqm_rmid_entry { + unsigned int rmid; + enum rmid_recycle_state state; + struct list_head list; + unsigned long queue_time; +}; + +/* + * cqm_rmid_free_lru - A least recently used list of RMIDs. + * + * Oldest entry at the head, newest (most recently used) entry at the + * tail. This list is never traversed, it's only used to keep track of + * the lru order. That is, we only pick entries of the head or insert + * them on the tail. + * + * All entries on the list are 'free', and their RMIDs are not currently + * in use. To mark an RMID as in use, remove its entry from the lru + * list. + * + * + * cqm_rmid_limbo_lru - list of currently unused but (potentially) dirty RMIDs. + * + * This list is contains RMIDs that no one is currently using but that + * may have a non-zero occupancy value associated with them. The + * rotation worker moves RMIDs from the limbo list to the free list once + * the occupancy value drops below __intel_cqm_threshold. + * + * Both lists are protected by cache_mutex. + */ +static LIST_HEAD(cqm_rmid_free_lru); +static LIST_HEAD(cqm_rmid_limbo_lru); + +/* + * We use a simple array of pointers so that we can lookup a struct + * cqm_rmid_entry in O(1). This alleviates the callers of __get_rmid() + * and __put_rmid() from having to worry about dealing with struct + * cqm_rmid_entry - they just deal with rmids, i.e. integers. + * + * Once this array is initialized it is read-only. No locks are required + * to access it. + * + * All entries for all RMIDs can be looked up in the this array at all + * times. + */ +static struct cqm_rmid_entry **cqm_rmid_ptrs; + +static inline struct cqm_rmid_entry *__rmid_entry(int rmid) +{ + struct cqm_rmid_entry *entry; + + entry = cqm_rmid_ptrs[rmid]; + WARN_ON(entry->rmid != rmid); + + return entry; +} + +/* + * Returns < 0 on fail. + * + * We expect to be called with cache_mutex held. + */ +static int __get_rmid(void) +{ + struct cqm_rmid_entry *entry; + + lockdep_assert_held(&cache_mutex); + + if (list_empty(&cqm_rmid_free_lru)) + return INVALID_RMID; + + entry = list_first_entry(&cqm_rmid_free_lru, struct cqm_rmid_entry, list); + list_del(&entry->list); + + return entry->rmid; +} + +static void __put_rmid(unsigned int rmid) +{ + struct cqm_rmid_entry *entry; + + lockdep_assert_held(&cache_mutex); + + WARN_ON(!__rmid_valid(rmid)); + entry = __rmid_entry(rmid); + + entry->queue_time = jiffies; + entry->state = RMID_YOUNG; + + list_add_tail(&entry->list, &cqm_rmid_limbo_lru); +} + +static int intel_cqm_setup_rmid_cache(void) +{ + struct cqm_rmid_entry *entry; + unsigned int nr_rmids; + int r = 0; + + nr_rmids = cqm_max_rmid + 1; + cqm_rmid_ptrs = kmalloc(sizeof(struct cqm_rmid_entry *) * + nr_rmids, GFP_KERNEL); + if (!cqm_rmid_ptrs) + return -ENOMEM; + + for (; r <= cqm_max_rmid; r++) { + struct cqm_rmid_entry *entry; + + entry = kmalloc(sizeof(*entry), GFP_KERNEL); + if (!entry) + goto fail; + + INIT_LIST_HEAD(&entry->list); + entry->rmid = r; + cqm_rmid_ptrs[r] = entry; + + list_add_tail(&entry->list, &cqm_rmid_free_lru); + } + + /* + * RMID 0 is special and is always allocated. It's used for all + * tasks that are not monitored. + */ + entry = __rmid_entry(0); + list_del(&entry->list); + + mutex_lock(&cache_mutex); + intel_cqm_rotation_rmid = __get_rmid(); + mutex_unlock(&cache_mutex); + + return 0; +fail: + while (r--) + kfree(cqm_rmid_ptrs[r]); + + kfree(cqm_rmid_ptrs); + return -ENOMEM; +} + +/* + * Determine if @a and @b measure the same set of tasks. + * + * If @a and @b measure the same set of tasks then we want to share a + * single RMID. + */ +static bool __match_event(struct perf_event *a, struct perf_event *b) +{ + /* Per-cpu and task events don't mix */ + if ((a->attach_state & PERF_ATTACH_TASK) != + (b->attach_state & PERF_ATTACH_TASK)) + return false; + +#ifdef CONFIG_CGROUP_PERF + if (a->cgrp != b->cgrp) + return false; +#endif + + /* If not task event, we're machine wide */ + if (!(b->attach_state & PERF_ATTACH_TASK)) + return true; + + /* + * Events that target same task are placed into the same cache group. + */ + if (a->hw.target == b->hw.target) + return true; + + /* + * Are we an inherited event? + */ + if (b->parent == a) + return true; + + return false; +} + +#ifdef CONFIG_CGROUP_PERF +static inline struct perf_cgroup *event_to_cgroup(struct perf_event *event) +{ + if (event->attach_state & PERF_ATTACH_TASK) + return perf_cgroup_from_task(event->hw.target); + + return event->cgrp; +} +#endif + +/* + * Determine if @a's tasks intersect with @b's tasks + * + * There are combinations of events that we explicitly prohibit, + * + * PROHIBITS + * system-wide -> cgroup and task + * cgroup -> system-wide + * -> task in cgroup + * task -> system-wide + * -> task in cgroup + * + * Call this function before allocating an RMID. + */ +static bool __conflict_event(struct perf_event *a, struct perf_event *b) +{ +#ifdef CONFIG_CGROUP_PERF + /* + * We can have any number of cgroups but only one system-wide + * event at a time. + */ + if (a->cgrp && b->cgrp) { + struct perf_cgroup *ac = a->cgrp; + struct perf_cgroup *bc = b->cgrp; + + /* + * This condition should have been caught in + * __match_event() and we should be sharing an RMID. + */ + WARN_ON_ONCE(ac == bc); + + if (cgroup_is_descendant(ac->css.cgroup, bc->css.cgroup) || + cgroup_is_descendant(bc->css.cgroup, ac->css.cgroup)) + return true; + + return false; + } + + if (a->cgrp || b->cgrp) { + struct perf_cgroup *ac, *bc; + + /* + * cgroup and system-wide events are mutually exclusive + */ + if ((a->cgrp && !(b->attach_state & PERF_ATTACH_TASK)) || + (b->cgrp && !(a->attach_state & PERF_ATTACH_TASK))) + return true; + + /* + * Ensure neither event is part of the other's cgroup + */ + ac = event_to_cgroup(a); + bc = event_to_cgroup(b); + if (ac == bc) + return true; + + /* + * Must have cgroup and non-intersecting task events. + */ + if (!ac || !bc) + return false; + + /* + * We have cgroup and task events, and the task belongs + * to a cgroup. Check for for overlap. + */ + if (cgroup_is_descendant(ac->css.cgroup, bc->css.cgroup) || + cgroup_is_descendant(bc->css.cgroup, ac->css.cgroup)) + return true; + + return false; + } +#endif + /* + * If one of them is not a task, same story as above with cgroups. + */ + if (!(a->attach_state & PERF_ATTACH_TASK) || + !(b->attach_state & PERF_ATTACH_TASK)) + return true; + + /* + * Must be non-overlapping. + */ + return false; +} + +struct rmid_read { + unsigned int rmid; + atomic64_t value; +}; + +static void __intel_cqm_event_count(void *info); + +/* + * Exchange the RMID of a group of events. + */ +static unsigned int +intel_cqm_xchg_rmid(struct perf_event *group, unsigned int rmid) +{ + struct perf_event *event; + unsigned int old_rmid = group->hw.cqm_rmid; + struct list_head *head = &group->hw.cqm_group_entry; + + lockdep_assert_held(&cache_mutex); + + /* + * If our RMID is being deallocated, perform a read now. + */ + if (__rmid_valid(old_rmid) && !__rmid_valid(rmid)) { + struct rmid_read rr = { + .value = ATOMIC64_INIT(0), + .rmid = old_rmid, + }; + + on_each_cpu_mask(&cqm_cpumask, __intel_cqm_event_count, + &rr, 1); + local64_set(&group->count, atomic64_read(&rr.value)); + } + + raw_spin_lock_irq(&cache_lock); + + group->hw.cqm_rmid = rmid; + list_for_each_entry(event, head, hw.cqm_group_entry) + event->hw.cqm_rmid = rmid; + + raw_spin_unlock_irq(&cache_lock); + + return old_rmid; +} + +/* + * If we fail to assign a new RMID for intel_cqm_rotation_rmid because + * cachelines are still tagged with RMIDs in limbo, we progressively + * increment the threshold until we find an RMID in limbo with <= + * __intel_cqm_threshold lines tagged. This is designed to mitigate the + * problem where cachelines tagged with an RMID are not steadily being + * evicted. + * + * On successful rotations we decrease the threshold back towards zero. + * + * __intel_cqm_max_threshold provides an upper bound on the threshold, + * and is measured in bytes because it's exposed to userland. + */ +static unsigned int __intel_cqm_threshold; +static unsigned int __intel_cqm_max_threshold; + +/* + * Test whether an RMID has a zero occupancy value on this cpu. + */ +static void intel_cqm_stable(void *arg) +{ + struct cqm_rmid_entry *entry; + + list_for_each_entry(entry, &cqm_rmid_limbo_lru, list) { + if (entry->state != RMID_AVAILABLE) + break; + + if (__rmid_read(entry->rmid) > __intel_cqm_threshold) + entry->state = RMID_DIRTY; + } +} + +/* + * If we have group events waiting for an RMID that don't conflict with + * events already running, assign @rmid. + */ +static bool intel_cqm_sched_in_event(unsigned int rmid) +{ + struct perf_event *leader, *event; + + lockdep_assert_held(&cache_mutex); + + leader = list_first_entry(&cache_groups, struct perf_event, + hw.cqm_groups_entry); + event = leader; + + list_for_each_entry_continue(event, &cache_groups, + hw.cqm_groups_entry) { + if (__rmid_valid(event->hw.cqm_rmid)) + continue; + + if (__conflict_event(event, leader)) + continue; + + intel_cqm_xchg_rmid(event, rmid); + return true; + } + + return false; +} + +/* + * Initially use this constant for both the limbo queue time and the + * rotation timer interval, pmu::hrtimer_interval_ms. + * + * They don't need to be the same, but the two are related since if you + * rotate faster than you recycle RMIDs, you may run out of available + * RMIDs. + */ +#define RMID_DEFAULT_QUEUE_TIME 250 /* ms */ + +static unsigned int __rmid_queue_time_ms = RMID_DEFAULT_QUEUE_TIME; + +/* + * intel_cqm_rmid_stabilize - move RMIDs from limbo to free list + * @nr_available: number of freeable RMIDs on the limbo list + * + * Quiescent state; wait for all 'freed' RMIDs to become unused, i.e. no + * cachelines are tagged with those RMIDs. After this we can reuse them + * and know that the current set of active RMIDs is stable. + * + * Return %true or %false depending on whether stabilization needs to be + * reattempted. + * + * If we return %true then @nr_available is updated to indicate the + * number of RMIDs on the limbo list that have been queued for the + * minimum queue time (RMID_AVAILABLE), but whose data occupancy values + * are above __intel_cqm_threshold. + */ +static bool intel_cqm_rmid_stabilize(unsigned int *available) +{ + struct cqm_rmid_entry *entry, *tmp; + + lockdep_assert_held(&cache_mutex); + + *available = 0; + list_for_each_entry(entry, &cqm_rmid_limbo_lru, list) { + unsigned long min_queue_time; + unsigned long now = jiffies; + + /* + * We hold RMIDs placed into limbo for a minimum queue + * time. Before the minimum queue time has elapsed we do + * not recycle RMIDs. + * + * The reasoning is that until a sufficient time has + * passed since we stopped using an RMID, any RMID + * placed onto the limbo list will likely still have + * data tagged in the cache, which means we'll probably + * fail to recycle it anyway. + * + * We can save ourselves an expensive IPI by skipping + * any RMIDs that have not been queued for the minimum + * time. + */ + min_queue_time = entry->queue_time + + msecs_to_jiffies(__rmid_queue_time_ms); + + if (time_after(min_queue_time, now)) + break; + + entry->state = RMID_AVAILABLE; + (*available)++; + } + + /* + * Fast return if none of the RMIDs on the limbo list have been + * sitting on the queue for the minimum queue time. + */ + if (!*available) + return false; + + /* + * Test whether an RMID is free for each package. + */ + on_each_cpu_mask(&cqm_cpumask, intel_cqm_stable, NULL, true); + + list_for_each_entry_safe(entry, tmp, &cqm_rmid_limbo_lru, list) { + /* + * Exhausted all RMIDs that have waited min queue time. + */ + if (entry->state == RMID_YOUNG) + break; + + if (entry->state == RMID_DIRTY) + continue; + + list_del(&entry->list); /* remove from limbo */ + + /* + * The rotation RMID gets priority if it's + * currently invalid. In which case, skip adding + * the RMID to the the free lru. + */ + if (!__rmid_valid(intel_cqm_rotation_rmid)) { + intel_cqm_rotation_rmid = entry->rmid; + continue; + } + + /* + * If we have groups waiting for RMIDs, hand + * them one now provided they don't conflict. + */ + if (intel_cqm_sched_in_event(entry->rmid)) + continue; + + /* + * Otherwise place it onto the free list. + */ + list_add_tail(&entry->list, &cqm_rmid_free_lru); + } + + + return __rmid_valid(intel_cqm_rotation_rmid); +} + +/* + * Pick a victim group and move it to the tail of the group list. + * @next: The first group without an RMID + */ +static void __intel_cqm_pick_and_rotate(struct perf_event *next) +{ + struct perf_event *rotor; + unsigned int rmid; + + lockdep_assert_held(&cache_mutex); + + rotor = list_first_entry(&cache_groups, struct perf_event, + hw.cqm_groups_entry); + + /* + * The group at the front of the list should always have a valid + * RMID. If it doesn't then no groups have RMIDs assigned and we + * don't need to rotate the list. + */ + if (next == rotor) + return; + + rmid = intel_cqm_xchg_rmid(rotor, INVALID_RMID); + __put_rmid(rmid); + + list_rotate_left(&cache_groups); +} + +/* + * Deallocate the RMIDs from any events that conflict with @event, and + * place them on the back of the group list. + */ +static void intel_cqm_sched_out_conflicting_events(struct perf_event *event) +{ + struct perf_event *group, *g; + unsigned int rmid; + + lockdep_assert_held(&cache_mutex); + + list_for_each_entry_safe(group, g, &cache_groups, hw.cqm_groups_entry) { + if (group == event) + continue; + + rmid = group->hw.cqm_rmid; + + /* + * Skip events that don't have a valid RMID. + */ + if (!__rmid_valid(rmid)) + continue; + + /* + * No conflict? No problem! Leave the event alone. + */ + if (!__conflict_event(group, event)) + continue; + + intel_cqm_xchg_rmid(group, INVALID_RMID); + __put_rmid(rmid); + } +} + +/* + * Attempt to rotate the groups and assign new RMIDs. + * + * We rotate for two reasons, + * 1. To handle the scheduling of conflicting events + * 2. To recycle RMIDs + * + * Rotating RMIDs is complicated because the hardware doesn't give us + * any clues. + * + * There's problems with the hardware interface; when you change the + * task:RMID map cachelines retain their 'old' tags, giving a skewed + * picture. In order to work around this, we must always keep one free + * RMID - intel_cqm_rotation_rmid. + * + * Rotation works by taking away an RMID from a group (the old RMID), + * and assigning the free RMID to another group (the new RMID). We must + * then wait for the old RMID to not be used (no cachelines tagged). + * This ensure that all cachelines are tagged with 'active' RMIDs. At + * this point we can start reading values for the new RMID and treat the + * old RMID as the free RMID for the next rotation. + * + * Return %true or %false depending on whether we did any rotating. + */ +static bool __intel_cqm_rmid_rotate(void) +{ + struct perf_event *group, *start = NULL; + unsigned int threshold_limit; + unsigned int nr_needed = 0; + unsigned int nr_available; + bool rotated = false; + + mutex_lock(&cache_mutex); + +again: + /* + * Fast path through this function if there are no groups and no + * RMIDs that need cleaning. + */ + if (list_empty(&cache_groups) && list_empty(&cqm_rmid_limbo_lru)) + goto out; + + list_for_each_entry(group, &cache_groups, hw.cqm_groups_entry) { + if (!__rmid_valid(group->hw.cqm_rmid)) { + if (!start) + start = group; + nr_needed++; + } + } + + /* + * We have some event groups, but they all have RMIDs assigned + * and no RMIDs need cleaning. + */ + if (!nr_needed && list_empty(&cqm_rmid_limbo_lru)) + goto out; + + if (!nr_needed) + goto stabilize; + + /* + * We have more event groups without RMIDs than available RMIDs, + * or we have event groups that conflict with the ones currently + * scheduled. + * + * We force deallocate the rmid of the group at the head of + * cache_groups. The first event group without an RMID then gets + * assigned intel_cqm_rotation_rmid. This ensures we always make + * forward progress. + * + * Rotate the cache_groups list so the previous head is now the + * tail. + */ + __intel_cqm_pick_and_rotate(start); + + /* + * If the rotation is going to succeed, reduce the threshold so + * that we don't needlessly reuse dirty RMIDs. + */ + if (__rmid_valid(intel_cqm_rotation_rmid)) { + intel_cqm_xchg_rmid(start, intel_cqm_rotation_rmid); + intel_cqm_rotation_rmid = __get_rmid(); + + intel_cqm_sched_out_conflicting_events(start); + + if (__intel_cqm_threshold) + __intel_cqm_threshold--; + } + + rotated = true; + +stabilize: + /* + * We now need to stablize the RMID we freed above (if any) to + * ensure that the next time we rotate we have an RMID with zero + * occupancy value. + * + * Alternatively, if we didn't need to perform any rotation, + * we'll have a bunch of RMIDs in limbo that need stabilizing. + */ + threshold_limit = __intel_cqm_max_threshold / cqm_l3_scale; + + while (intel_cqm_rmid_stabilize(&nr_available) && + __intel_cqm_threshold < threshold_limit) { + unsigned int steal_limit; + + /* + * Don't spin if nobody is actively waiting for an RMID, + * the rotation worker will be kicked as soon as an + * event needs an RMID anyway. + */ + if (!nr_needed) + break; + + /* Allow max 25% of RMIDs to be in limbo. */ + steal_limit = (cqm_max_rmid + 1) / 4; + + /* + * We failed to stabilize any RMIDs so our rotation + * logic is now stuck. In order to make forward progress + * we have a few options: + * + * 1. rotate ("steal") another RMID + * 2. increase the threshold + * 3. do nothing + * + * We do both of 1. and 2. until we hit the steal limit. + * + * The steal limit prevents all RMIDs ending up on the + * limbo list. This can happen if every RMID has a + * non-zero occupancy above threshold_limit, and the + * occupancy values aren't dropping fast enough. + * + * Note that there is prioritisation at work here - we'd + * rather increase the number of RMIDs on the limbo list + * than increase the threshold, because increasing the + * threshold skews the event data (because we reuse + * dirty RMIDs) - threshold bumps are a last resort. + */ + if (nr_available < steal_limit) + goto again; + + __intel_cqm_threshold++; + } + +out: + mutex_unlock(&cache_mutex); + return rotated; +} + +static void intel_cqm_rmid_rotate(struct work_struct *work); + +static DECLARE_DELAYED_WORK(intel_cqm_rmid_work, intel_cqm_rmid_rotate); + +static struct pmu intel_cqm_pmu; + +static void intel_cqm_rmid_rotate(struct work_struct *work) +{ + unsigned long delay; + + __intel_cqm_rmid_rotate(); + + delay = msecs_to_jiffies(intel_cqm_pmu.hrtimer_interval_ms); + schedule_delayed_work(&intel_cqm_rmid_work, delay); +} + +/* + * Find a group and setup RMID. + * + * If we're part of a group, we use the group's RMID. + */ +static void intel_cqm_setup_event(struct perf_event *event, + struct perf_event **group) +{ + struct perf_event *iter; + unsigned int rmid; + bool conflict = false; + + list_for_each_entry(iter, &cache_groups, hw.cqm_groups_entry) { + rmid = iter->hw.cqm_rmid; + + if (__match_event(iter, event)) { + /* All tasks in a group share an RMID */ + event->hw.cqm_rmid = rmid; + *group = iter; + return; + } + + /* + * We only care about conflicts for events that are + * actually scheduled in (and hence have a valid RMID). + */ + if (__conflict_event(iter, event) && __rmid_valid(rmid)) + conflict = true; + } + + if (conflict) + rmid = INVALID_RMID; + else + rmid = __get_rmid(); + + event->hw.cqm_rmid = rmid; +} + +static void intel_cqm_event_read(struct perf_event *event) +{ + unsigned long flags; + unsigned int rmid; + u64 val; + + /* + * Task events are handled by intel_cqm_event_count(). + */ + if (event->cpu == -1) + return; + + raw_spin_lock_irqsave(&cache_lock, flags); + rmid = event->hw.cqm_rmid; + + if (!__rmid_valid(rmid)) + goto out; + + val = __rmid_read(rmid); + + /* + * Ignore this reading on error states and do not update the value. + */ + if (val & (RMID_VAL_ERROR | RMID_VAL_UNAVAIL)) + goto out; + + local64_set(&event->count, val); +out: + raw_spin_unlock_irqrestore(&cache_lock, flags); +} + +static void __intel_cqm_event_count(void *info) +{ + struct rmid_read *rr = info; + u64 val; + + val = __rmid_read(rr->rmid); + + if (val & (RMID_VAL_ERROR | RMID_VAL_UNAVAIL)) + return; + + atomic64_add(val, &rr->value); +} + +static inline bool cqm_group_leader(struct perf_event *event) +{ + return !list_empty(&event->hw.cqm_groups_entry); +} + +static u64 intel_cqm_event_count(struct perf_event *event) +{ + unsigned long flags; + struct rmid_read rr = { + .value = ATOMIC64_INIT(0), + }; + + /* + * We only need to worry about task events. System-wide events + * are handled like usual, i.e. entirely with + * intel_cqm_event_read(). + */ + if (event->cpu != -1) + return __perf_event_count(event); + + /* + * Only the group leader gets to report values. This stops us + * reporting duplicate values to userspace, and gives us a clear + * rule for which task gets to report the values. + * + * Note that it is impossible to attribute these values to + * specific packages - we forfeit that ability when we create + * task events. + */ + if (!cqm_group_leader(event)) + return 0; + + /* + * Notice that we don't perform the reading of an RMID + * atomically, because we can't hold a spin lock across the + * IPIs. + * + * Speculatively perform the read, since @event might be + * assigned a different (possibly invalid) RMID while we're + * busying performing the IPI calls. It's therefore necessary to + * check @event's RMID afterwards, and if it has changed, + * discard the result of the read. + */ + rr.rmid = ACCESS_ONCE(event->hw.cqm_rmid); + + if (!__rmid_valid(rr.rmid)) + goto out; + + on_each_cpu_mask(&cqm_cpumask, __intel_cqm_event_count, &rr, 1); + + raw_spin_lock_irqsave(&cache_lock, flags); + if (event->hw.cqm_rmid == rr.rmid) + local64_set(&event->count, atomic64_read(&rr.value)); + raw_spin_unlock_irqrestore(&cache_lock, flags); +out: + return __perf_event_count(event); +} + +static void intel_cqm_event_start(struct perf_event *event, int mode) +{ + struct intel_cqm_state *state = this_cpu_ptr(&cqm_state); + unsigned int rmid = event->hw.cqm_rmid; + unsigned long flags; + + if (!(event->hw.cqm_state & PERF_HES_STOPPED)) + return; + + event->hw.cqm_state &= ~PERF_HES_STOPPED; + + raw_spin_lock_irqsave(&state->lock, flags); + + if (state->cnt++) + WARN_ON_ONCE(state->rmid != rmid); + else + WARN_ON_ONCE(state->rmid); + + state->rmid = rmid; + wrmsrl(MSR_IA32_PQR_ASSOC, state->rmid); + + raw_spin_unlock_irqrestore(&state->lock, flags); +} + +static void intel_cqm_event_stop(struct perf_event *event, int mode) +{ + struct intel_cqm_state *state = this_cpu_ptr(&cqm_state); + unsigned long flags; + + if (event->hw.cqm_state & PERF_HES_STOPPED) + return; + + event->hw.cqm_state |= PERF_HES_STOPPED; + + raw_spin_lock_irqsave(&state->lock, flags); + intel_cqm_event_read(event); + + if (!--state->cnt) { + state->rmid = 0; + wrmsrl(MSR_IA32_PQR_ASSOC, 0); + } else { + WARN_ON_ONCE(!state->rmid); + } + + raw_spin_unlock_irqrestore(&state->lock, flags); +} + +static int intel_cqm_event_add(struct perf_event *event, int mode) +{ + unsigned long flags; + unsigned int rmid; + + raw_spin_lock_irqsave(&cache_lock, flags); + + event->hw.cqm_state = PERF_HES_STOPPED; + rmid = event->hw.cqm_rmid; + + if (__rmid_valid(rmid) && (mode & PERF_EF_START)) + intel_cqm_event_start(event, mode); + + raw_spin_unlock_irqrestore(&cache_lock, flags); + + return 0; +} + +static void intel_cqm_event_del(struct perf_event *event, int mode) +{ + intel_cqm_event_stop(event, mode); +} + +static void intel_cqm_event_destroy(struct perf_event *event) +{ + struct perf_event *group_other = NULL; + + mutex_lock(&cache_mutex); + + /* + * If there's another event in this group... + */ + if (!list_empty(&event->hw.cqm_group_entry)) { + group_other = list_first_entry(&event->hw.cqm_group_entry, + struct perf_event, + hw.cqm_group_entry); + list_del(&event->hw.cqm_group_entry); + } + + /* + * And we're the group leader.. + */ + if (cqm_group_leader(event)) { + /* + * If there was a group_other, make that leader, otherwise + * destroy the group and return the RMID. + */ + if (group_other) { + list_replace(&event->hw.cqm_groups_entry, + &group_other->hw.cqm_groups_entry); + } else { + unsigned int rmid = event->hw.cqm_rmid; + + if (__rmid_valid(rmid)) + __put_rmid(rmid); + list_del(&event->hw.cqm_groups_entry); + } + } + + mutex_unlock(&cache_mutex); +} + +static int intel_cqm_event_init(struct perf_event *event) +{ + struct perf_event *group = NULL; + bool rotate = false; + + if (event->attr.type != intel_cqm_pmu.type) + return -ENOENT; + + if (event->attr.config & ~QOS_EVENT_MASK) + return -EINVAL; + + /* unsupported modes and filters */ + if (event->attr.exclude_user || + event->attr.exclude_kernel || + event->attr.exclude_hv || + event->attr.exclude_idle || + event->attr.exclude_host || + event->attr.exclude_guest || + event->attr.sample_period) /* no sampling */ + return -EINVAL; + + INIT_LIST_HEAD(&event->hw.cqm_group_entry); + INIT_LIST_HEAD(&event->hw.cqm_groups_entry); + + event->destroy = intel_cqm_event_destroy; + + mutex_lock(&cache_mutex); + + /* Will also set rmid */ + intel_cqm_setup_event(event, &group); + + if (group) { + list_add_tail(&event->hw.cqm_group_entry, + &group->hw.cqm_group_entry); + } else { + list_add_tail(&event->hw.cqm_groups_entry, + &cache_groups); + + /* + * All RMIDs are either in use or have recently been + * used. Kick the rotation worker to clean/free some. + * + * We only do this for the group leader, rather than for + * every event in a group to save on needless work. + */ + if (!__rmid_valid(event->hw.cqm_rmid)) + rotate = true; + } + + mutex_unlock(&cache_mutex); + + if (rotate) + schedule_delayed_work(&intel_cqm_rmid_work, 0); + + return 0; +} + +EVENT_ATTR_STR(llc_occupancy, intel_cqm_llc, "event=0x01"); +EVENT_ATTR_STR(llc_occupancy.per-pkg, intel_cqm_llc_pkg, "1"); +EVENT_ATTR_STR(llc_occupancy.unit, intel_cqm_llc_unit, "Bytes"); +EVENT_ATTR_STR(llc_occupancy.scale, intel_cqm_llc_scale, NULL); +EVENT_ATTR_STR(llc_occupancy.snapshot, intel_cqm_llc_snapshot, "1"); + +static struct attribute *intel_cqm_events_attr[] = { + EVENT_PTR(intel_cqm_llc), + EVENT_PTR(intel_cqm_llc_pkg), + EVENT_PTR(intel_cqm_llc_unit), + EVENT_PTR(intel_cqm_llc_scale), + EVENT_PTR(intel_cqm_llc_snapshot), + NULL, +}; + +static struct attribute_group intel_cqm_events_group = { + .name = "events", + .attrs = intel_cqm_events_attr, +}; + +PMU_FORMAT_ATTR(event, "config:0-7"); +static struct attribute *intel_cqm_formats_attr[] = { + &format_attr_event.attr, + NULL, +}; + +static struct attribute_group intel_cqm_format_group = { + .name = "format", + .attrs = intel_cqm_formats_attr, +}; + +static ssize_t +max_recycle_threshold_show(struct device *dev, struct device_attribute *attr, + char *page) +{ + ssize_t rv; + + mutex_lock(&cache_mutex); + rv = snprintf(page, PAGE_SIZE-1, "%u\n", __intel_cqm_max_threshold); + mutex_unlock(&cache_mutex); + + return rv; +} + +static ssize_t +max_recycle_threshold_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + unsigned int bytes, cachelines; + int ret; + + ret = kstrtouint(buf, 0, &bytes); + if (ret) + return ret; + + mutex_lock(&cache_mutex); + + __intel_cqm_max_threshold = bytes; + cachelines = bytes / cqm_l3_scale; + + /* + * The new maximum takes effect immediately. + */ + if (__intel_cqm_threshold > cachelines) + __intel_cqm_threshold = cachelines; + + mutex_unlock(&cache_mutex); + + return count; +} + +static DEVICE_ATTR_RW(max_recycle_threshold); + +static struct attribute *intel_cqm_attrs[] = { + &dev_attr_max_recycle_threshold.attr, + NULL, +}; + +static const struct attribute_group intel_cqm_group = { + .attrs = intel_cqm_attrs, +}; + +static const struct attribute_group *intel_cqm_attr_groups[] = { + &intel_cqm_events_group, + &intel_cqm_format_group, + &intel_cqm_group, + NULL, +}; + +static struct pmu intel_cqm_pmu = { + .hrtimer_interval_ms = RMID_DEFAULT_QUEUE_TIME, + .attr_groups = intel_cqm_attr_groups, + .task_ctx_nr = perf_sw_context, + .event_init = intel_cqm_event_init, + .add = intel_cqm_event_add, + .del = intel_cqm_event_del, + .start = intel_cqm_event_start, + .stop = intel_cqm_event_stop, + .read = intel_cqm_event_read, + .count = intel_cqm_event_count, +}; + +static inline void cqm_pick_event_reader(int cpu) +{ + int phys_id = topology_physical_package_id(cpu); + int i; + + for_each_cpu(i, &cqm_cpumask) { + if (phys_id == topology_physical_package_id(i)) + return; /* already got reader for this socket */ + } + + cpumask_set_cpu(cpu, &cqm_cpumask); +} + +static void intel_cqm_cpu_prepare(unsigned int cpu) +{ + struct intel_cqm_state *state = &per_cpu(cqm_state, cpu); + struct cpuinfo_x86 *c = &cpu_data(cpu); + + raw_spin_lock_init(&state->lock); + state->rmid = 0; + state->cnt = 0; + + WARN_ON(c->x86_cache_max_rmid != cqm_max_rmid); + WARN_ON(c->x86_cache_occ_scale != cqm_l3_scale); +} + +static void intel_cqm_cpu_exit(unsigned int cpu) +{ + int phys_id = topology_physical_package_id(cpu); + int i; + + /* + * Is @cpu a designated cqm reader? + */ + if (!cpumask_test_and_clear_cpu(cpu, &cqm_cpumask)) + return; + + for_each_online_cpu(i) { + if (i == cpu) + continue; + + if (phys_id == topology_physical_package_id(i)) { + cpumask_set_cpu(i, &cqm_cpumask); + break; + } + } +} + +static int intel_cqm_cpu_notifier(struct notifier_block *nb, + unsigned long action, void *hcpu) +{ + unsigned int cpu = (unsigned long)hcpu; + + switch (action & ~CPU_TASKS_FROZEN) { + case CPU_UP_PREPARE: + intel_cqm_cpu_prepare(cpu); + break; + case CPU_DOWN_PREPARE: + intel_cqm_cpu_exit(cpu); + break; + case CPU_STARTING: + cqm_pick_event_reader(cpu); + break; + } + + return NOTIFY_OK; +} + +static const struct x86_cpu_id intel_cqm_match[] = { + { .vendor = X86_VENDOR_INTEL, .feature = X86_FEATURE_CQM_OCCUP_LLC }, + {} +}; + +static int __init intel_cqm_init(void) +{ + char *str, scale[20]; + int i, cpu, ret; + + if (!x86_match_cpu(intel_cqm_match)) + return -ENODEV; + + cqm_l3_scale = boot_cpu_data.x86_cache_occ_scale; + + /* + * It's possible that not all resources support the same number + * of RMIDs. Instead of making scheduling much more complicated + * (where we have to match a task's RMID to a cpu that supports + * that many RMIDs) just find the minimum RMIDs supported across + * all cpus. + * + * Also, check that the scales match on all cpus. + */ + cpu_notifier_register_begin(); + + for_each_online_cpu(cpu) { + struct cpuinfo_x86 *c = &cpu_data(cpu); + + if (c->x86_cache_max_rmid < cqm_max_rmid) + cqm_max_rmid = c->x86_cache_max_rmid; + + if (c->x86_cache_occ_scale != cqm_l3_scale) { + pr_err("Multiple LLC scale values, disabling\n"); + ret = -EINVAL; + goto out; + } + } + + /* + * A reasonable upper limit on the max threshold is the number + * of lines tagged per RMID if all RMIDs have the same number of + * lines tagged in the LLC. + * + * For a 35MB LLC and 56 RMIDs, this is ~1.8% of the LLC. + */ + __intel_cqm_max_threshold = + boot_cpu_data.x86_cache_size * 1024 / (cqm_max_rmid + 1); + + snprintf(scale, sizeof(scale), "%u", cqm_l3_scale); + str = kstrdup(scale, GFP_KERNEL); + if (!str) { + ret = -ENOMEM; + goto out; + } + + event_attr_intel_cqm_llc_scale.event_str = str; + + ret = intel_cqm_setup_rmid_cache(); + if (ret) + goto out; + + for_each_online_cpu(i) { + intel_cqm_cpu_prepare(i); + cqm_pick_event_reader(i); + } + + __perf_cpu_notifier(intel_cqm_cpu_notifier); + + ret = perf_pmu_register(&intel_cqm_pmu, "intel_cqm", -1); + if (ret) + pr_err("Intel CQM perf registration failed: %d\n", ret); + else + pr_info("Intel CQM monitoring enabled\n"); + +out: + cpu_notifier_register_done(); + + return ret; +} +device_initcall(intel_cqm_init); diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c index 073983398364..ca69ea56c712 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_ds.c +++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c @@ -461,7 +461,8 @@ void intel_pmu_enable_bts(u64 config) debugctlmsr |= DEBUGCTLMSR_TR; debugctlmsr |= DEBUGCTLMSR_BTS; - debugctlmsr |= DEBUGCTLMSR_BTINT; + if (config & ARCH_PERFMON_EVENTSEL_INT) + debugctlmsr |= DEBUGCTLMSR_BTINT; if (!(config & ARCH_PERFMON_EVENTSEL_OS)) debugctlmsr |= DEBUGCTLMSR_BTS_OFF_OS; @@ -611,6 +612,10 @@ struct event_constraint intel_snb_pebs_event_constraints[] = { INTEL_PST_CONSTRAINT(0x02cd, 0x8), /* MEM_TRANS_RETIRED.PRECISE_STORES */ /* UOPS_RETIRED.ALL, inv=1, cmask=16 (cycles:p). */ INTEL_FLAGS_EVENT_CONSTRAINT(0x108001c2, 0xf), + INTEL_EXCLEVT_CONSTRAINT(0xd0, 0xf), /* MEM_UOP_RETIRED.* */ + INTEL_EXCLEVT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */ + INTEL_EXCLEVT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */ + INTEL_EXCLEVT_CONSTRAINT(0xd3, 0xf), /* MEM_LOAD_UOPS_LLC_MISS_RETIRED.* */ /* Allow all events as PEBS with no flags */ INTEL_ALL_EVENT_CONSTRAINT(0, 0xf), EVENT_CONSTRAINT_END @@ -622,6 +627,10 @@ struct event_constraint intel_ivb_pebs_event_constraints[] = { INTEL_PST_CONSTRAINT(0x02cd, 0x8), /* MEM_TRANS_RETIRED.PRECISE_STORES */ /* UOPS_RETIRED.ALL, inv=1, cmask=16 (cycles:p). */ INTEL_FLAGS_EVENT_CONSTRAINT(0x108001c2, 0xf), + INTEL_EXCLEVT_CONSTRAINT(0xd0, 0xf), /* MEM_UOP_RETIRED.* */ + INTEL_EXCLEVT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */ + INTEL_EXCLEVT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */ + INTEL_EXCLEVT_CONSTRAINT(0xd3, 0xf), /* MEM_LOAD_UOPS_LLC_MISS_RETIRED.* */ /* Allow all events as PEBS with no flags */ INTEL_ALL_EVENT_CONSTRAINT(0, 0xf), EVENT_CONSTRAINT_END @@ -633,16 +642,16 @@ struct event_constraint intel_hsw_pebs_event_constraints[] = { /* UOPS_RETIRED.ALL, inv=1, cmask=16 (cycles:p). */ INTEL_FLAGS_EVENT_CONSTRAINT(0x108001c2, 0xf), INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_NA(0x01c2, 0xf), /* UOPS_RETIRED.ALL */ - INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x11d0, 0xf), /* MEM_UOPS_RETIRED.STLB_MISS_LOADS */ - INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x21d0, 0xf), /* MEM_UOPS_RETIRED.LOCK_LOADS */ - INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x41d0, 0xf), /* MEM_UOPS_RETIRED.SPLIT_LOADS */ - INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x81d0, 0xf), /* MEM_UOPS_RETIRED.ALL_LOADS */ - INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x12d0, 0xf), /* MEM_UOPS_RETIRED.STLB_MISS_STORES */ - INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x42d0, 0xf), /* MEM_UOPS_RETIRED.SPLIT_STORES */ - INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x82d0, 0xf), /* MEM_UOPS_RETIRED.ALL_STORES */ - INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */ - INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(0xd2, 0xf), /* MEM_LOAD_UOPS_L3_HIT_RETIRED.* */ - INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(0xd3, 0xf), /* MEM_LOAD_UOPS_L3_MISS_RETIRED.* */ + INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XLD(0x11d0, 0xf), /* MEM_UOPS_RETIRED.STLB_MISS_LOADS */ + INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XLD(0x21d0, 0xf), /* MEM_UOPS_RETIRED.LOCK_LOADS */ + INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XLD(0x41d0, 0xf), /* MEM_UOPS_RETIRED.SPLIT_LOADS */ + INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XLD(0x81d0, 0xf), /* MEM_UOPS_RETIRED.ALL_LOADS */ + INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XST(0x12d0, 0xf), /* MEM_UOPS_RETIRED.STLB_MISS_STORES */ + INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XST(0x42d0, 0xf), /* MEM_UOPS_RETIRED.SPLIT_STORES */ + INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XST(0x82d0, 0xf), /* MEM_UOPS_RETIRED.ALL_STORES */ + INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_XLD(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */ + INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_XLD(0xd2, 0xf), /* MEM_LOAD_UOPS_L3_HIT_RETIRED.* */ + INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_XLD(0xd3, 0xf), /* MEM_LOAD_UOPS_L3_MISS_RETIRED.* */ /* Allow all events as PEBS with no flags */ INTEL_ALL_EVENT_CONSTRAINT(0, 0xf), EVENT_CONSTRAINT_END diff --git a/arch/x86/kernel/cpu/perf_event_intel_lbr.c b/arch/x86/kernel/cpu/perf_event_intel_lbr.c index 58f1a94beaf0..94e5b506caa6 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_lbr.c +++ b/arch/x86/kernel/cpu/perf_event_intel_lbr.c @@ -39,6 +39,7 @@ static enum { #define LBR_IND_JMP_BIT 6 /* do not capture indirect jumps */ #define LBR_REL_JMP_BIT 7 /* do not capture relative jumps */ #define LBR_FAR_BIT 8 /* do not capture far branches */ +#define LBR_CALL_STACK_BIT 9 /* enable call stack */ #define LBR_KERNEL (1 << LBR_KERNEL_BIT) #define LBR_USER (1 << LBR_USER_BIT) @@ -49,6 +50,7 @@ static enum { #define LBR_REL_JMP (1 << LBR_REL_JMP_BIT) #define LBR_IND_JMP (1 << LBR_IND_JMP_BIT) #define LBR_FAR (1 << LBR_FAR_BIT) +#define LBR_CALL_STACK (1 << LBR_CALL_STACK_BIT) #define LBR_PLM (LBR_KERNEL | LBR_USER) @@ -69,33 +71,31 @@ static enum { #define LBR_FROM_FLAG_IN_TX (1ULL << 62) #define LBR_FROM_FLAG_ABORT (1ULL << 61) -#define for_each_branch_sample_type(x) \ - for ((x) = PERF_SAMPLE_BRANCH_USER; \ - (x) < PERF_SAMPLE_BRANCH_MAX; (x) <<= 1) - /* * x86control flow change classification * x86control flow changes include branches, interrupts, traps, faults */ enum { - X86_BR_NONE = 0, /* unknown */ - - X86_BR_USER = 1 << 0, /* branch target is user */ - X86_BR_KERNEL = 1 << 1, /* branch target is kernel */ - - X86_BR_CALL = 1 << 2, /* call */ - X86_BR_RET = 1 << 3, /* return */ - X86_BR_SYSCALL = 1 << 4, /* syscall */ - X86_BR_SYSRET = 1 << 5, /* syscall return */ - X86_BR_INT = 1 << 6, /* sw interrupt */ - X86_BR_IRET = 1 << 7, /* return from interrupt */ - X86_BR_JCC = 1 << 8, /* conditional */ - X86_BR_JMP = 1 << 9, /* jump */ - X86_BR_IRQ = 1 << 10,/* hw interrupt or trap or fault */ - X86_BR_IND_CALL = 1 << 11,/* indirect calls */ - X86_BR_ABORT = 1 << 12,/* transaction abort */ - X86_BR_IN_TX = 1 << 13,/* in transaction */ - X86_BR_NO_TX = 1 << 14,/* not in transaction */ + X86_BR_NONE = 0, /* unknown */ + + X86_BR_USER = 1 << 0, /* branch target is user */ + X86_BR_KERNEL = 1 << 1, /* branch target is kernel */ + + X86_BR_CALL = 1 << 2, /* call */ + X86_BR_RET = 1 << 3, /* return */ + X86_BR_SYSCALL = 1 << 4, /* syscall */ + X86_BR_SYSRET = 1 << 5, /* syscall return */ + X86_BR_INT = 1 << 6, /* sw interrupt */ + X86_BR_IRET = 1 << 7, /* return from interrupt */ + X86_BR_JCC = 1 << 8, /* conditional */ + X86_BR_JMP = 1 << 9, /* jump */ + X86_BR_IRQ = 1 << 10,/* hw interrupt or trap or fault */ + X86_BR_IND_CALL = 1 << 11,/* indirect calls */ + X86_BR_ABORT = 1 << 12,/* transaction abort */ + X86_BR_IN_TX = 1 << 13,/* in transaction */ + X86_BR_NO_TX = 1 << 14,/* not in transaction */ + X86_BR_ZERO_CALL = 1 << 15,/* zero length call */ + X86_BR_CALL_STACK = 1 << 16,/* call stack */ }; #define X86_BR_PLM (X86_BR_USER | X86_BR_KERNEL) @@ -112,13 +112,15 @@ enum { X86_BR_JMP |\ X86_BR_IRQ |\ X86_BR_ABORT |\ - X86_BR_IND_CALL) + X86_BR_IND_CALL |\ + X86_BR_ZERO_CALL) #define X86_BR_ALL (X86_BR_PLM | X86_BR_ANY) #define X86_BR_ANY_CALL \ (X86_BR_CALL |\ X86_BR_IND_CALL |\ + X86_BR_ZERO_CALL |\ X86_BR_SYSCALL |\ X86_BR_IRQ |\ X86_BR_INT) @@ -130,17 +132,32 @@ static void intel_pmu_lbr_filter(struct cpu_hw_events *cpuc); * otherwise it becomes near impossible to get a reliable stack. */ -static void __intel_pmu_lbr_enable(void) +static void __intel_pmu_lbr_enable(bool pmi) { - u64 debugctl; struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + u64 debugctl, lbr_select = 0, orig_debugctl; - if (cpuc->lbr_sel) - wrmsrl(MSR_LBR_SELECT, cpuc->lbr_sel->config); + /* + * No need to reprogram LBR_SELECT in a PMI, as it + * did not change. + */ + if (cpuc->lbr_sel && !pmi) { + lbr_select = cpuc->lbr_sel->config; + wrmsrl(MSR_LBR_SELECT, lbr_select); + } rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl); - debugctl |= (DEBUGCTLMSR_LBR | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI); - wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctl); + orig_debugctl = debugctl; + debugctl |= DEBUGCTLMSR_LBR; + /* + * LBR callstack does not work well with FREEZE_LBRS_ON_PMI. + * If FREEZE_LBRS_ON_PMI is set, PMI near call/return instructions + * may cause superfluous increase/decrease of LBR_TOS. + */ + if (!(lbr_select & LBR_CALL_STACK)) + debugctl |= DEBUGCTLMSR_FREEZE_LBRS_ON_PMI; + if (orig_debugctl != debugctl) + wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctl); } static void __intel_pmu_lbr_disable(void) @@ -181,9 +198,116 @@ void intel_pmu_lbr_reset(void) intel_pmu_lbr_reset_64(); } +/* + * TOS = most recently recorded branch + */ +static inline u64 intel_pmu_lbr_tos(void) +{ + u64 tos; + + rdmsrl(x86_pmu.lbr_tos, tos); + return tos; +} + +enum { + LBR_NONE, + LBR_VALID, +}; + +static void __intel_pmu_lbr_restore(struct x86_perf_task_context *task_ctx) +{ + int i; + unsigned lbr_idx, mask; + u64 tos; + + if (task_ctx->lbr_callstack_users == 0 || + task_ctx->lbr_stack_state == LBR_NONE) { + intel_pmu_lbr_reset(); + return; + } + + mask = x86_pmu.lbr_nr - 1; + tos = intel_pmu_lbr_tos(); + for (i = 0; i < x86_pmu.lbr_nr; i++) { + lbr_idx = (tos - i) & mask; + wrmsrl(x86_pmu.lbr_from + lbr_idx, task_ctx->lbr_from[i]); + wrmsrl(x86_pmu.lbr_to + lbr_idx, task_ctx->lbr_to[i]); + } + task_ctx->lbr_stack_state = LBR_NONE; +} + +static void __intel_pmu_lbr_save(struct x86_perf_task_context *task_ctx) +{ + int i; + unsigned lbr_idx, mask; + u64 tos; + + if (task_ctx->lbr_callstack_users == 0) { + task_ctx->lbr_stack_state = LBR_NONE; + return; + } + + mask = x86_pmu.lbr_nr - 1; + tos = intel_pmu_lbr_tos(); + for (i = 0; i < x86_pmu.lbr_nr; i++) { + lbr_idx = (tos - i) & mask; + rdmsrl(x86_pmu.lbr_from + lbr_idx, task_ctx->lbr_from[i]); + rdmsrl(x86_pmu.lbr_to + lbr_idx, task_ctx->lbr_to[i]); + } + task_ctx->lbr_stack_state = LBR_VALID; +} + +void intel_pmu_lbr_sched_task(struct perf_event_context *ctx, bool sched_in) +{ + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + struct x86_perf_task_context *task_ctx; + + if (!x86_pmu.lbr_nr) + return; + + /* + * If LBR callstack feature is enabled and the stack was saved when + * the task was scheduled out, restore the stack. Otherwise flush + * the LBR stack. + */ + task_ctx = ctx ? ctx->task_ctx_data : NULL; + if (task_ctx) { + if (sched_in) { + __intel_pmu_lbr_restore(task_ctx); + cpuc->lbr_context = ctx; + } else { + __intel_pmu_lbr_save(task_ctx); + } + return; + } + + /* + * When sampling the branck stack in system-wide, it may be + * necessary to flush the stack on context switch. This happens + * when the branch stack does not tag its entries with the pid + * of the current task. Otherwise it becomes impossible to + * associate a branch entry with a task. This ambiguity is more + * likely to appear when the branch stack supports priv level + * filtering and the user sets it to monitor only at the user + * level (which could be a useful measurement in system-wide + * mode). In that case, the risk is high of having a branch + * stack with branch from multiple tasks. + */ + if (sched_in) { + intel_pmu_lbr_reset(); + cpuc->lbr_context = ctx; + } +} + +static inline bool branch_user_callstack(unsigned br_sel) +{ + return (br_sel & X86_BR_USER) && (br_sel & X86_BR_CALL_STACK); +} + void intel_pmu_lbr_enable(struct perf_event *event) { struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + struct x86_perf_task_context *task_ctx; if (!x86_pmu.lbr_nr) return; @@ -198,18 +322,33 @@ void intel_pmu_lbr_enable(struct perf_event *event) } cpuc->br_sel = event->hw.branch_reg.reg; + if (branch_user_callstack(cpuc->br_sel) && event->ctx && + event->ctx->task_ctx_data) { + task_ctx = event->ctx->task_ctx_data; + task_ctx->lbr_callstack_users++; + } + cpuc->lbr_users++; + perf_sched_cb_inc(event->ctx->pmu); } void intel_pmu_lbr_disable(struct perf_event *event) { struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + struct x86_perf_task_context *task_ctx; if (!x86_pmu.lbr_nr) return; + if (branch_user_callstack(cpuc->br_sel) && event->ctx && + event->ctx->task_ctx_data) { + task_ctx = event->ctx->task_ctx_data; + task_ctx->lbr_callstack_users--; + } + cpuc->lbr_users--; WARN_ON_ONCE(cpuc->lbr_users < 0); + perf_sched_cb_dec(event->ctx->pmu); if (cpuc->enabled && !cpuc->lbr_users) { __intel_pmu_lbr_disable(); @@ -218,12 +357,12 @@ void intel_pmu_lbr_disable(struct perf_event *event) } } -void intel_pmu_lbr_enable_all(void) +void intel_pmu_lbr_enable_all(bool pmi) { struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); if (cpuc->lbr_users) - __intel_pmu_lbr_enable(); + __intel_pmu_lbr_enable(pmi); } void intel_pmu_lbr_disable_all(void) @@ -234,18 +373,6 @@ void intel_pmu_lbr_disable_all(void) __intel_pmu_lbr_disable(); } -/* - * TOS = most recently recorded branch - */ -static inline u64 intel_pmu_lbr_tos(void) -{ - u64 tos; - - rdmsrl(x86_pmu.lbr_tos, tos); - - return tos; -} - static void intel_pmu_lbr_read_32(struct cpu_hw_events *cpuc) { unsigned long mask = x86_pmu.lbr_nr - 1; @@ -350,7 +477,7 @@ void intel_pmu_lbr_read(void) * - in case there is no HW filter * - in case the HW filter has errata or limitations */ -static void intel_pmu_setup_sw_lbr_filter(struct perf_event *event) +static int intel_pmu_setup_sw_lbr_filter(struct perf_event *event) { u64 br_type = event->attr.branch_sample_type; int mask = 0; @@ -387,11 +514,21 @@ static void intel_pmu_setup_sw_lbr_filter(struct perf_event *event) if (br_type & PERF_SAMPLE_BRANCH_COND) mask |= X86_BR_JCC; + if (br_type & PERF_SAMPLE_BRANCH_CALL_STACK) { + if (!x86_pmu_has_lbr_callstack()) + return -EOPNOTSUPP; + if (mask & ~(X86_BR_USER | X86_BR_KERNEL)) + return -EINVAL; + mask |= X86_BR_CALL | X86_BR_IND_CALL | X86_BR_RET | + X86_BR_CALL_STACK; + } + /* * stash actual user request into reg, it may * be used by fixup code for some CPU */ event->hw.branch_reg.reg = mask; + return 0; } /* @@ -403,14 +540,14 @@ static int intel_pmu_setup_hw_lbr_filter(struct perf_event *event) { struct hw_perf_event_extra *reg; u64 br_type = event->attr.branch_sample_type; - u64 mask = 0, m; - u64 v; + u64 mask = 0, v; + int i; - for_each_branch_sample_type(m) { - if (!(br_type & m)) + for (i = 0; i < PERF_SAMPLE_BRANCH_MAX_SHIFT; i++) { + if (!(br_type & (1ULL << i))) continue; - v = x86_pmu.lbr_sel_map[m]; + v = x86_pmu.lbr_sel_map[i]; if (v == LBR_NOT_SUPP) return -EOPNOTSUPP; @@ -420,8 +557,12 @@ static int intel_pmu_setup_hw_lbr_filter(struct perf_event *event) reg = &event->hw.branch_reg; reg->idx = EXTRA_REG_LBR; - /* LBR_SELECT operates in suppress mode so invert mask */ - reg->config = ~mask & x86_pmu.lbr_sel_mask; + /* + * The first 9 bits (LBR_SEL_MASK) in LBR_SELECT operate + * in suppress mode. So LBR_SELECT should be set to + * (~mask & LBR_SEL_MASK) | (mask & ~LBR_SEL_MASK) + */ + reg->config = mask ^ x86_pmu.lbr_sel_mask; return 0; } @@ -439,7 +580,9 @@ int intel_pmu_setup_lbr_filter(struct perf_event *event) /* * setup SW LBR filter */ - intel_pmu_setup_sw_lbr_filter(event); + ret = intel_pmu_setup_sw_lbr_filter(event); + if (ret) + return ret; /* * setup HW LBR filter, if any @@ -568,6 +711,12 @@ static int branch_type(unsigned long from, unsigned long to, int abort) ret = X86_BR_INT; break; case 0xe8: /* call near rel */ + insn_get_immediate(&insn); + if (insn.immediate1.value == 0) { + /* zero length call */ + ret = X86_BR_ZERO_CALL; + break; + } case 0x9a: /* call far absolute */ ret = X86_BR_CALL; break; @@ -678,35 +827,49 @@ intel_pmu_lbr_filter(struct cpu_hw_events *cpuc) /* * Map interface branch filters onto LBR filters */ -static const int nhm_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX] = { - [PERF_SAMPLE_BRANCH_ANY] = LBR_ANY, - [PERF_SAMPLE_BRANCH_USER] = LBR_USER, - [PERF_SAMPLE_BRANCH_KERNEL] = LBR_KERNEL, - [PERF_SAMPLE_BRANCH_HV] = LBR_IGN, - [PERF_SAMPLE_BRANCH_ANY_RETURN] = LBR_RETURN | LBR_REL_JMP - | LBR_IND_JMP | LBR_FAR, +static const int nhm_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX_SHIFT] = { + [PERF_SAMPLE_BRANCH_ANY_SHIFT] = LBR_ANY, + [PERF_SAMPLE_BRANCH_USER_SHIFT] = LBR_USER, + [PERF_SAMPLE_BRANCH_KERNEL_SHIFT] = LBR_KERNEL, + [PERF_SAMPLE_BRANCH_HV_SHIFT] = LBR_IGN, + [PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT] = LBR_RETURN | LBR_REL_JMP + | LBR_IND_JMP | LBR_FAR, /* * NHM/WSM erratum: must include REL_JMP+IND_JMP to get CALL branches */ - [PERF_SAMPLE_BRANCH_ANY_CALL] = + [PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT] = LBR_REL_CALL | LBR_IND_CALL | LBR_REL_JMP | LBR_IND_JMP | LBR_FAR, /* * NHM/WSM erratum: must include IND_JMP to capture IND_CALL */ - [PERF_SAMPLE_BRANCH_IND_CALL] = LBR_IND_CALL | LBR_IND_JMP, - [PERF_SAMPLE_BRANCH_COND] = LBR_JCC, + [PERF_SAMPLE_BRANCH_IND_CALL_SHIFT] = LBR_IND_CALL | LBR_IND_JMP, + [PERF_SAMPLE_BRANCH_COND_SHIFT] = LBR_JCC, }; -static const int snb_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX] = { - [PERF_SAMPLE_BRANCH_ANY] = LBR_ANY, - [PERF_SAMPLE_BRANCH_USER] = LBR_USER, - [PERF_SAMPLE_BRANCH_KERNEL] = LBR_KERNEL, - [PERF_SAMPLE_BRANCH_HV] = LBR_IGN, - [PERF_SAMPLE_BRANCH_ANY_RETURN] = LBR_RETURN | LBR_FAR, - [PERF_SAMPLE_BRANCH_ANY_CALL] = LBR_REL_CALL | LBR_IND_CALL - | LBR_FAR, - [PERF_SAMPLE_BRANCH_IND_CALL] = LBR_IND_CALL, - [PERF_SAMPLE_BRANCH_COND] = LBR_JCC, +static const int snb_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX_SHIFT] = { + [PERF_SAMPLE_BRANCH_ANY_SHIFT] = LBR_ANY, + [PERF_SAMPLE_BRANCH_USER_SHIFT] = LBR_USER, + [PERF_SAMPLE_BRANCH_KERNEL_SHIFT] = LBR_KERNEL, + [PERF_SAMPLE_BRANCH_HV_SHIFT] = LBR_IGN, + [PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT] = LBR_RETURN | LBR_FAR, + [PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT] = LBR_REL_CALL | LBR_IND_CALL + | LBR_FAR, + [PERF_SAMPLE_BRANCH_IND_CALL_SHIFT] = LBR_IND_CALL, + [PERF_SAMPLE_BRANCH_COND_SHIFT] = LBR_JCC, +}; + +static const int hsw_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX_SHIFT] = { + [PERF_SAMPLE_BRANCH_ANY_SHIFT] = LBR_ANY, + [PERF_SAMPLE_BRANCH_USER_SHIFT] = LBR_USER, + [PERF_SAMPLE_BRANCH_KERNEL_SHIFT] = LBR_KERNEL, + [PERF_SAMPLE_BRANCH_HV_SHIFT] = LBR_IGN, + [PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT] = LBR_RETURN | LBR_FAR, + [PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT] = LBR_REL_CALL | LBR_IND_CALL + | LBR_FAR, + [PERF_SAMPLE_BRANCH_IND_CALL_SHIFT] = LBR_IND_CALL, + [PERF_SAMPLE_BRANCH_COND_SHIFT] = LBR_JCC, + [PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT] = LBR_REL_CALL | LBR_IND_CALL + | LBR_RETURN | LBR_CALL_STACK, }; /* core */ @@ -765,6 +928,20 @@ void __init intel_pmu_lbr_init_snb(void) pr_cont("16-deep LBR, "); } +/* haswell */ +void intel_pmu_lbr_init_hsw(void) +{ + x86_pmu.lbr_nr = 16; + x86_pmu.lbr_tos = MSR_LBR_TOS; + x86_pmu.lbr_from = MSR_LBR_NHM_FROM; + x86_pmu.lbr_to = MSR_LBR_NHM_TO; + + x86_pmu.lbr_sel_mask = LBR_SEL_MASK; + x86_pmu.lbr_sel_map = hsw_lbr_sel_map; + + pr_cont("16-deep LBR, "); +} + /* atom */ void __init intel_pmu_lbr_init_atom(void) { diff --git a/arch/x86/kernel/cpu/perf_event_intel_pt.c b/arch/x86/kernel/cpu/perf_event_intel_pt.c new file mode 100644 index 000000000000..f2770641c0fd --- /dev/null +++ b/arch/x86/kernel/cpu/perf_event_intel_pt.c @@ -0,0 +1,1103 @@ +/* + * Intel(R) Processor Trace PMU driver for perf + * Copyright (c) 2013-2014, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * Intel PT is specified in the Intel Architecture Instruction Set Extensions + * Programming Reference: + * http://software.intel.com/en-us/intel-isa-extensions + */ + +#undef DEBUG + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/types.h> +#include <linux/slab.h> +#include <linux/device.h> + +#include <asm/perf_event.h> +#include <asm/insn.h> +#include <asm/io.h> + +#include "perf_event.h" +#include "intel_pt.h" + +static DEFINE_PER_CPU(struct pt, pt_ctx); + +static struct pt_pmu pt_pmu; + +enum cpuid_regs { + CR_EAX = 0, + CR_ECX, + CR_EDX, + CR_EBX +}; + +/* + * Capabilities of Intel PT hardware, such as number of address bits or + * supported output schemes, are cached and exported to userspace as "caps" + * attribute group of pt pmu device + * (/sys/bus/event_source/devices/intel_pt/caps/) so that userspace can store + * relevant bits together with intel_pt traces. + * + * These are necessary for both trace decoding (payloads_lip, contains address + * width encoded in IP-related packets), and event configuration (bitmasks with + * permitted values for certain bit fields). + */ +#define PT_CAP(_n, _l, _r, _m) \ + [PT_CAP_ ## _n] = { .name = __stringify(_n), .leaf = _l, \ + .reg = _r, .mask = _m } + +static struct pt_cap_desc { + const char *name; + u32 leaf; + u8 reg; + u32 mask; +} pt_caps[] = { + PT_CAP(max_subleaf, 0, CR_EAX, 0xffffffff), + PT_CAP(cr3_filtering, 0, CR_EBX, BIT(0)), + PT_CAP(topa_output, 0, CR_ECX, BIT(0)), + PT_CAP(topa_multiple_entries, 0, CR_ECX, BIT(1)), + PT_CAP(payloads_lip, 0, CR_ECX, BIT(31)), +}; + +static u32 pt_cap_get(enum pt_capabilities cap) +{ + struct pt_cap_desc *cd = &pt_caps[cap]; + u32 c = pt_pmu.caps[cd->leaf * 4 + cd->reg]; + unsigned int shift = __ffs(cd->mask); + + return (c & cd->mask) >> shift; +} + +static ssize_t pt_cap_show(struct device *cdev, + struct device_attribute *attr, + char *buf) +{ + struct dev_ext_attribute *ea = + container_of(attr, struct dev_ext_attribute, attr); + enum pt_capabilities cap = (long)ea->var; + + return snprintf(buf, PAGE_SIZE, "%x\n", pt_cap_get(cap)); +} + +static struct attribute_group pt_cap_group = { + .name = "caps", +}; + +PMU_FORMAT_ATTR(tsc, "config:10" ); +PMU_FORMAT_ATTR(noretcomp, "config:11" ); + +static struct attribute *pt_formats_attr[] = { + &format_attr_tsc.attr, + &format_attr_noretcomp.attr, + NULL, +}; + +static struct attribute_group pt_format_group = { + .name = "format", + .attrs = pt_formats_attr, +}; + +static const struct attribute_group *pt_attr_groups[] = { + &pt_cap_group, + &pt_format_group, + NULL, +}; + +static int __init pt_pmu_hw_init(void) +{ + struct dev_ext_attribute *de_attrs; + struct attribute **attrs; + size_t size; + int ret; + long i; + + attrs = NULL; + ret = -ENODEV; + if (!test_cpu_cap(&boot_cpu_data, X86_FEATURE_INTEL_PT)) + goto fail; + + for (i = 0; i < PT_CPUID_LEAVES; i++) { + cpuid_count(20, i, + &pt_pmu.caps[CR_EAX + i*4], + &pt_pmu.caps[CR_EBX + i*4], + &pt_pmu.caps[CR_ECX + i*4], + &pt_pmu.caps[CR_EDX + i*4]); + } + + ret = -ENOMEM; + size = sizeof(struct attribute *) * (ARRAY_SIZE(pt_caps)+1); + attrs = kzalloc(size, GFP_KERNEL); + if (!attrs) + goto fail; + + size = sizeof(struct dev_ext_attribute) * (ARRAY_SIZE(pt_caps)+1); + de_attrs = kzalloc(size, GFP_KERNEL); + if (!de_attrs) + goto fail; + + for (i = 0; i < ARRAY_SIZE(pt_caps); i++) { + struct dev_ext_attribute *de_attr = de_attrs + i; + + de_attr->attr.attr.name = pt_caps[i].name; + + sysfs_attr_init(&de_attrs->attr.attr); + + de_attr->attr.attr.mode = S_IRUGO; + de_attr->attr.show = pt_cap_show; + de_attr->var = (void *)i; + + attrs[i] = &de_attr->attr.attr; + } + + pt_cap_group.attrs = attrs; + + return 0; + +fail: + kfree(attrs); + + return ret; +} + +#define PT_CONFIG_MASK (RTIT_CTL_TSC_EN | RTIT_CTL_DISRETC) + +static bool pt_event_valid(struct perf_event *event) +{ + u64 config = event->attr.config; + + if ((config & PT_CONFIG_MASK) != config) + return false; + + return true; +} + +/* + * PT configuration helpers + * These all are cpu affine and operate on a local PT + */ + +static bool pt_is_running(void) +{ + u64 ctl; + + rdmsrl(MSR_IA32_RTIT_CTL, ctl); + + return !!(ctl & RTIT_CTL_TRACEEN); +} + +static void pt_config(struct perf_event *event) +{ + u64 reg; + + reg = RTIT_CTL_TOPA | RTIT_CTL_BRANCH_EN | RTIT_CTL_TRACEEN; + + if (!event->attr.exclude_kernel) + reg |= RTIT_CTL_OS; + if (!event->attr.exclude_user) + reg |= RTIT_CTL_USR; + + reg |= (event->attr.config & PT_CONFIG_MASK); + + wrmsrl(MSR_IA32_RTIT_CTL, reg); +} + +static void pt_config_start(bool start) +{ + u64 ctl; + + rdmsrl(MSR_IA32_RTIT_CTL, ctl); + if (start) + ctl |= RTIT_CTL_TRACEEN; + else + ctl &= ~RTIT_CTL_TRACEEN; + wrmsrl(MSR_IA32_RTIT_CTL, ctl); + + /* + * A wrmsr that disables trace generation serializes other PT + * registers and causes all data packets to be written to memory, + * but a fence is required for the data to become globally visible. + * + * The below WMB, separating data store and aux_head store matches + * the consumer's RMB that separates aux_head load and data load. + */ + if (!start) + wmb(); +} + +static void pt_config_buffer(void *buf, unsigned int topa_idx, + unsigned int output_off) +{ + u64 reg; + + wrmsrl(MSR_IA32_RTIT_OUTPUT_BASE, virt_to_phys(buf)); + + reg = 0x7f | ((u64)topa_idx << 7) | ((u64)output_off << 32); + + wrmsrl(MSR_IA32_RTIT_OUTPUT_MASK, reg); +} + +/* + * Keep ToPA table-related metadata on the same page as the actual table, + * taking up a few words from the top + */ + +#define TENTS_PER_PAGE (((PAGE_SIZE - 40) / sizeof(struct topa_entry)) - 1) + +/** + * struct topa - page-sized ToPA table with metadata at the top + * @table: actual ToPA table entries, as understood by PT hardware + * @list: linkage to struct pt_buffer's list of tables + * @phys: physical address of this page + * @offset: offset of the first entry in this table in the buffer + * @size: total size of all entries in this table + * @last: index of the last initialized entry in this table + */ +struct topa { + struct topa_entry table[TENTS_PER_PAGE]; + struct list_head list; + u64 phys; + u64 offset; + size_t size; + int last; +}; + +/* make -1 stand for the last table entry */ +#define TOPA_ENTRY(t, i) ((i) == -1 ? &(t)->table[(t)->last] : &(t)->table[(i)]) + +/** + * topa_alloc() - allocate page-sized ToPA table + * @cpu: CPU on which to allocate. + * @gfp: Allocation flags. + * + * Return: On success, return the pointer to ToPA table page. + */ +static struct topa *topa_alloc(int cpu, gfp_t gfp) +{ + int node = cpu_to_node(cpu); + struct topa *topa; + struct page *p; + + p = alloc_pages_node(node, gfp | __GFP_ZERO, 0); + if (!p) + return NULL; + + topa = page_address(p); + topa->last = 0; + topa->phys = page_to_phys(p); + + /* + * In case of singe-entry ToPA, always put the self-referencing END + * link as the 2nd entry in the table + */ + if (!pt_cap_get(PT_CAP_topa_multiple_entries)) { + TOPA_ENTRY(topa, 1)->base = topa->phys >> TOPA_SHIFT; + TOPA_ENTRY(topa, 1)->end = 1; + } + + return topa; +} + +/** + * topa_free() - free a page-sized ToPA table + * @topa: Table to deallocate. + */ +static void topa_free(struct topa *topa) +{ + free_page((unsigned long)topa); +} + +/** + * topa_insert_table() - insert a ToPA table into a buffer + * @buf: PT buffer that's being extended. + * @topa: New topa table to be inserted. + * + * If it's the first table in this buffer, set up buffer's pointers + * accordingly; otherwise, add a END=1 link entry to @topa to the current + * "last" table and adjust the last table pointer to @topa. + */ +static void topa_insert_table(struct pt_buffer *buf, struct topa *topa) +{ + struct topa *last = buf->last; + + list_add_tail(&topa->list, &buf->tables); + + if (!buf->first) { + buf->first = buf->last = buf->cur = topa; + return; + } + + topa->offset = last->offset + last->size; + buf->last = topa; + + if (!pt_cap_get(PT_CAP_topa_multiple_entries)) + return; + + BUG_ON(last->last != TENTS_PER_PAGE - 1); + + TOPA_ENTRY(last, -1)->base = topa->phys >> TOPA_SHIFT; + TOPA_ENTRY(last, -1)->end = 1; +} + +/** + * topa_table_full() - check if a ToPA table is filled up + * @topa: ToPA table. + */ +static bool topa_table_full(struct topa *topa) +{ + /* single-entry ToPA is a special case */ + if (!pt_cap_get(PT_CAP_topa_multiple_entries)) + return !!topa->last; + + return topa->last == TENTS_PER_PAGE - 1; +} + +/** + * topa_insert_pages() - create a list of ToPA tables + * @buf: PT buffer being initialized. + * @gfp: Allocation flags. + * + * This initializes a list of ToPA tables with entries from + * the data_pages provided by rb_alloc_aux(). + * + * Return: 0 on success or error code. + */ +static int topa_insert_pages(struct pt_buffer *buf, gfp_t gfp) +{ + struct topa *topa = buf->last; + int order = 0; + struct page *p; + + p = virt_to_page(buf->data_pages[buf->nr_pages]); + if (PagePrivate(p)) + order = page_private(p); + + if (topa_table_full(topa)) { + topa = topa_alloc(buf->cpu, gfp); + if (!topa) + return -ENOMEM; + + topa_insert_table(buf, topa); + } + + TOPA_ENTRY(topa, -1)->base = page_to_phys(p) >> TOPA_SHIFT; + TOPA_ENTRY(topa, -1)->size = order; + if (!buf->snapshot && !pt_cap_get(PT_CAP_topa_multiple_entries)) { + TOPA_ENTRY(topa, -1)->intr = 1; + TOPA_ENTRY(topa, -1)->stop = 1; + } + + topa->last++; + topa->size += sizes(order); + + buf->nr_pages += 1ul << order; + + return 0; +} + +/** + * pt_topa_dump() - print ToPA tables and their entries + * @buf: PT buffer. + */ +static void pt_topa_dump(struct pt_buffer *buf) +{ + struct topa *topa; + + list_for_each_entry(topa, &buf->tables, list) { + int i; + + pr_debug("# table @%p (%016Lx), off %llx size %zx\n", topa->table, + topa->phys, topa->offset, topa->size); + for (i = 0; i < TENTS_PER_PAGE; i++) { + pr_debug("# entry @%p (%lx sz %u %c%c%c) raw=%16llx\n", + &topa->table[i], + (unsigned long)topa->table[i].base << TOPA_SHIFT, + sizes(topa->table[i].size), + topa->table[i].end ? 'E' : ' ', + topa->table[i].intr ? 'I' : ' ', + topa->table[i].stop ? 'S' : ' ', + *(u64 *)&topa->table[i]); + if ((pt_cap_get(PT_CAP_topa_multiple_entries) && + topa->table[i].stop) || + topa->table[i].end) + break; + } + } +} + +/** + * pt_buffer_advance() - advance to the next output region + * @buf: PT buffer. + * + * Advance the current pointers in the buffer to the next ToPA entry. + */ +static void pt_buffer_advance(struct pt_buffer *buf) +{ + buf->output_off = 0; + buf->cur_idx++; + + if (buf->cur_idx == buf->cur->last) { + if (buf->cur == buf->last) + buf->cur = buf->first; + else + buf->cur = list_entry(buf->cur->list.next, struct topa, + list); + buf->cur_idx = 0; + } +} + +/** + * pt_update_head() - calculate current offsets and sizes + * @pt: Per-cpu pt context. + * + * Update buffer's current write pointer position and data size. + */ +static void pt_update_head(struct pt *pt) +{ + struct pt_buffer *buf = perf_get_aux(&pt->handle); + u64 topa_idx, base, old; + + /* offset of the first region in this table from the beginning of buf */ + base = buf->cur->offset + buf->output_off; + + /* offset of the current output region within this table */ + for (topa_idx = 0; topa_idx < buf->cur_idx; topa_idx++) + base += sizes(buf->cur->table[topa_idx].size); + + if (buf->snapshot) { + local_set(&buf->data_size, base); + } else { + old = (local64_xchg(&buf->head, base) & + ((buf->nr_pages << PAGE_SHIFT) - 1)); + if (base < old) + base += buf->nr_pages << PAGE_SHIFT; + + local_add(base - old, &buf->data_size); + } +} + +/** + * pt_buffer_region() - obtain current output region's address + * @buf: PT buffer. + */ +static void *pt_buffer_region(struct pt_buffer *buf) +{ + return phys_to_virt(buf->cur->table[buf->cur_idx].base << TOPA_SHIFT); +} + +/** + * pt_buffer_region_size() - obtain current output region's size + * @buf: PT buffer. + */ +static size_t pt_buffer_region_size(struct pt_buffer *buf) +{ + return sizes(buf->cur->table[buf->cur_idx].size); +} + +/** + * pt_handle_status() - take care of possible status conditions + * @pt: Per-cpu pt context. + */ +static void pt_handle_status(struct pt *pt) +{ + struct pt_buffer *buf = perf_get_aux(&pt->handle); + int advance = 0; + u64 status; + + rdmsrl(MSR_IA32_RTIT_STATUS, status); + + if (status & RTIT_STATUS_ERROR) { + pr_err_ratelimited("ToPA ERROR encountered, trying to recover\n"); + pt_topa_dump(buf); + status &= ~RTIT_STATUS_ERROR; + } + + if (status & RTIT_STATUS_STOPPED) { + status &= ~RTIT_STATUS_STOPPED; + + /* + * On systems that only do single-entry ToPA, hitting STOP + * means we are already losing data; need to let the decoder + * know. + */ + if (!pt_cap_get(PT_CAP_topa_multiple_entries) || + buf->output_off == sizes(TOPA_ENTRY(buf->cur, buf->cur_idx)->size)) { + local_inc(&buf->lost); + advance++; + } + } + + /* + * Also on single-entry ToPA implementations, interrupt will come + * before the output reaches its output region's boundary. + */ + if (!pt_cap_get(PT_CAP_topa_multiple_entries) && !buf->snapshot && + pt_buffer_region_size(buf) - buf->output_off <= TOPA_PMI_MARGIN) { + void *head = pt_buffer_region(buf); + + /* everything within this margin needs to be zeroed out */ + memset(head + buf->output_off, 0, + pt_buffer_region_size(buf) - + buf->output_off); + advance++; + } + + if (advance) + pt_buffer_advance(buf); + + wrmsrl(MSR_IA32_RTIT_STATUS, status); +} + +/** + * pt_read_offset() - translate registers into buffer pointers + * @buf: PT buffer. + * + * Set buffer's output pointers from MSR values. + */ +static void pt_read_offset(struct pt_buffer *buf) +{ + u64 offset, base_topa; + + rdmsrl(MSR_IA32_RTIT_OUTPUT_BASE, base_topa); + buf->cur = phys_to_virt(base_topa); + + rdmsrl(MSR_IA32_RTIT_OUTPUT_MASK, offset); + /* offset within current output region */ + buf->output_off = offset >> 32; + /* index of current output region within this table */ + buf->cur_idx = (offset & 0xffffff80) >> 7; +} + +/** + * pt_topa_next_entry() - obtain index of the first page in the next ToPA entry + * @buf: PT buffer. + * @pg: Page offset in the buffer. + * + * When advancing to the next output region (ToPA entry), given a page offset + * into the buffer, we need to find the offset of the first page in the next + * region. + */ +static unsigned int pt_topa_next_entry(struct pt_buffer *buf, unsigned int pg) +{ + struct topa_entry *te = buf->topa_index[pg]; + + /* one region */ + if (buf->first == buf->last && buf->first->last == 1) + return pg; + + do { + pg++; + pg &= buf->nr_pages - 1; + } while (buf->topa_index[pg] == te); + + return pg; +} + +/** + * pt_buffer_reset_markers() - place interrupt and stop bits in the buffer + * @buf: PT buffer. + * @handle: Current output handle. + * + * Place INT and STOP marks to prevent overwriting old data that the consumer + * hasn't yet collected. + */ +static int pt_buffer_reset_markers(struct pt_buffer *buf, + struct perf_output_handle *handle) + +{ + unsigned long idx, npages, end; + + if (buf->snapshot) + return 0; + + /* can't stop in the middle of an output region */ + if (buf->output_off + handle->size + 1 < + sizes(TOPA_ENTRY(buf->cur, buf->cur_idx)->size)) + return -EINVAL; + + + /* single entry ToPA is handled by marking all regions STOP=1 INT=1 */ + if (!pt_cap_get(PT_CAP_topa_multiple_entries)) + return 0; + + /* clear STOP and INT from current entry */ + buf->topa_index[buf->stop_pos]->stop = 0; + buf->topa_index[buf->intr_pos]->intr = 0; + + if (pt_cap_get(PT_CAP_topa_multiple_entries)) { + npages = (handle->size + 1) >> PAGE_SHIFT; + end = (local64_read(&buf->head) >> PAGE_SHIFT) + npages; + /*if (end > handle->wakeup >> PAGE_SHIFT) + end = handle->wakeup >> PAGE_SHIFT;*/ + idx = end & (buf->nr_pages - 1); + buf->stop_pos = idx; + idx = (local64_read(&buf->head) >> PAGE_SHIFT) + npages - 1; + idx &= buf->nr_pages - 1; + buf->intr_pos = idx; + } + + buf->topa_index[buf->stop_pos]->stop = 1; + buf->topa_index[buf->intr_pos]->intr = 1; + + return 0; +} + +/** + * pt_buffer_setup_topa_index() - build topa_index[] table of regions + * @buf: PT buffer. + * + * topa_index[] references output regions indexed by offset into the + * buffer for purposes of quick reverse lookup. + */ +static void pt_buffer_setup_topa_index(struct pt_buffer *buf) +{ + struct topa *cur = buf->first, *prev = buf->last; + struct topa_entry *te_cur = TOPA_ENTRY(cur, 0), + *te_prev = TOPA_ENTRY(prev, prev->last - 1); + int pg = 0, idx = 0, ntopa = 0; + + while (pg < buf->nr_pages) { + int tidx; + + /* pages within one topa entry */ + for (tidx = 0; tidx < 1 << te_cur->size; tidx++, pg++) + buf->topa_index[pg] = te_prev; + + te_prev = te_cur; + + if (idx == cur->last - 1) { + /* advance to next topa table */ + idx = 0; + cur = list_entry(cur->list.next, struct topa, list); + ntopa++; + } else + idx++; + te_cur = TOPA_ENTRY(cur, idx); + } + +} + +/** + * pt_buffer_reset_offsets() - adjust buffer's write pointers from aux_head + * @buf: PT buffer. + * @head: Write pointer (aux_head) from AUX buffer. + * + * Find the ToPA table and entry corresponding to given @head and set buffer's + * "current" pointers accordingly. + */ +static void pt_buffer_reset_offsets(struct pt_buffer *buf, unsigned long head) +{ + int pg; + + if (buf->snapshot) + head &= (buf->nr_pages << PAGE_SHIFT) - 1; + + pg = (head >> PAGE_SHIFT) & (buf->nr_pages - 1); + pg = pt_topa_next_entry(buf, pg); + + buf->cur = (struct topa *)((unsigned long)buf->topa_index[pg] & PAGE_MASK); + buf->cur_idx = ((unsigned long)buf->topa_index[pg] - + (unsigned long)buf->cur) / sizeof(struct topa_entry); + buf->output_off = head & (sizes(buf->cur->table[buf->cur_idx].size) - 1); + + local64_set(&buf->head, head); + local_set(&buf->data_size, 0); +} + +/** + * pt_buffer_fini_topa() - deallocate ToPA structure of a buffer + * @buf: PT buffer. + */ +static void pt_buffer_fini_topa(struct pt_buffer *buf) +{ + struct topa *topa, *iter; + + list_for_each_entry_safe(topa, iter, &buf->tables, list) { + /* + * right now, this is in free_aux() path only, so + * no need to unlink this table from the list + */ + topa_free(topa); + } +} + +/** + * pt_buffer_init_topa() - initialize ToPA table for pt buffer + * @buf: PT buffer. + * @size: Total size of all regions within this ToPA. + * @gfp: Allocation flags. + */ +static int pt_buffer_init_topa(struct pt_buffer *buf, unsigned long nr_pages, + gfp_t gfp) +{ + struct topa *topa; + int err; + + topa = topa_alloc(buf->cpu, gfp); + if (!topa) + return -ENOMEM; + + topa_insert_table(buf, topa); + + while (buf->nr_pages < nr_pages) { + err = topa_insert_pages(buf, gfp); + if (err) { + pt_buffer_fini_topa(buf); + return -ENOMEM; + } + } + + pt_buffer_setup_topa_index(buf); + + /* link last table to the first one, unless we're double buffering */ + if (pt_cap_get(PT_CAP_topa_multiple_entries)) { + TOPA_ENTRY(buf->last, -1)->base = buf->first->phys >> TOPA_SHIFT; + TOPA_ENTRY(buf->last, -1)->end = 1; + } + + pt_topa_dump(buf); + return 0; +} + +/** + * pt_buffer_setup_aux() - set up topa tables for a PT buffer + * @cpu: Cpu on which to allocate, -1 means current. + * @pages: Array of pointers to buffer pages passed from perf core. + * @nr_pages: Number of pages in the buffer. + * @snapshot: If this is a snapshot/overwrite counter. + * + * This is a pmu::setup_aux callback that sets up ToPA tables and all the + * bookkeeping for an AUX buffer. + * + * Return: Our private PT buffer structure. + */ +static void * +pt_buffer_setup_aux(int cpu, void **pages, int nr_pages, bool snapshot) +{ + struct pt_buffer *buf; + int node, ret; + + if (!nr_pages) + return NULL; + + if (cpu == -1) + cpu = raw_smp_processor_id(); + node = cpu_to_node(cpu); + + buf = kzalloc_node(offsetof(struct pt_buffer, topa_index[nr_pages]), + GFP_KERNEL, node); + if (!buf) + return NULL; + + buf->cpu = cpu; + buf->snapshot = snapshot; + buf->data_pages = pages; + + INIT_LIST_HEAD(&buf->tables); + + ret = pt_buffer_init_topa(buf, nr_pages, GFP_KERNEL); + if (ret) { + kfree(buf); + return NULL; + } + + return buf; +} + +/** + * pt_buffer_free_aux() - perf AUX deallocation path callback + * @data: PT buffer. + */ +static void pt_buffer_free_aux(void *data) +{ + struct pt_buffer *buf = data; + + pt_buffer_fini_topa(buf); + kfree(buf); +} + +/** + * pt_buffer_is_full() - check if the buffer is full + * @buf: PT buffer. + * @pt: Per-cpu pt handle. + * + * If the user hasn't read data from the output region that aux_head + * points to, the buffer is considered full: the user needs to read at + * least this region and update aux_tail to point past it. + */ +static bool pt_buffer_is_full(struct pt_buffer *buf, struct pt *pt) +{ + if (buf->snapshot) + return false; + + if (local_read(&buf->data_size) >= pt->handle.size) + return true; + + return false; +} + +/** + * intel_pt_interrupt() - PT PMI handler + */ +void intel_pt_interrupt(void) +{ + struct pt *pt = this_cpu_ptr(&pt_ctx); + struct pt_buffer *buf; + struct perf_event *event = pt->handle.event; + + /* + * There may be a dangling PT bit in the interrupt status register + * after PT has been disabled by pt_event_stop(). Make sure we don't + * do anything (particularly, re-enable) for this event here. + */ + if (!ACCESS_ONCE(pt->handle_nmi)) + return; + + pt_config_start(false); + + if (!event) + return; + + buf = perf_get_aux(&pt->handle); + if (!buf) + return; + + pt_read_offset(buf); + + pt_handle_status(pt); + + pt_update_head(pt); + + perf_aux_output_end(&pt->handle, local_xchg(&buf->data_size, 0), + local_xchg(&buf->lost, 0)); + + if (!event->hw.state) { + int ret; + + buf = perf_aux_output_begin(&pt->handle, event); + if (!buf) { + event->hw.state = PERF_HES_STOPPED; + return; + } + + pt_buffer_reset_offsets(buf, pt->handle.head); + ret = pt_buffer_reset_markers(buf, &pt->handle); + if (ret) { + perf_aux_output_end(&pt->handle, 0, true); + return; + } + + pt_config_buffer(buf->cur->table, buf->cur_idx, + buf->output_off); + wrmsrl(MSR_IA32_RTIT_STATUS, 0); + pt_config(event); + } +} + +/* + * PMU callbacks + */ + +static void pt_event_start(struct perf_event *event, int mode) +{ + struct pt *pt = this_cpu_ptr(&pt_ctx); + struct pt_buffer *buf = perf_get_aux(&pt->handle); + + if (pt_is_running() || !buf || pt_buffer_is_full(buf, pt)) { + event->hw.state = PERF_HES_STOPPED; + return; + } + + ACCESS_ONCE(pt->handle_nmi) = 1; + event->hw.state = 0; + + pt_config_buffer(buf->cur->table, buf->cur_idx, + buf->output_off); + wrmsrl(MSR_IA32_RTIT_STATUS, 0); + pt_config(event); +} + +static void pt_event_stop(struct perf_event *event, int mode) +{ + struct pt *pt = this_cpu_ptr(&pt_ctx); + + /* + * Protect against the PMI racing with disabling wrmsr, + * see comment in intel_pt_interrupt(). + */ + ACCESS_ONCE(pt->handle_nmi) = 0; + pt_config_start(false); + + if (event->hw.state == PERF_HES_STOPPED) + return; + + event->hw.state = PERF_HES_STOPPED; + + if (mode & PERF_EF_UPDATE) { + struct pt *pt = this_cpu_ptr(&pt_ctx); + struct pt_buffer *buf = perf_get_aux(&pt->handle); + + if (!buf) + return; + + if (WARN_ON_ONCE(pt->handle.event != event)) + return; + + pt_read_offset(buf); + + pt_handle_status(pt); + + pt_update_head(pt); + } +} + +static void pt_event_del(struct perf_event *event, int mode) +{ + struct pt *pt = this_cpu_ptr(&pt_ctx); + struct pt_buffer *buf; + + pt_event_stop(event, PERF_EF_UPDATE); + + buf = perf_get_aux(&pt->handle); + + if (buf) { + if (buf->snapshot) + pt->handle.head = + local_xchg(&buf->data_size, + buf->nr_pages << PAGE_SHIFT); + perf_aux_output_end(&pt->handle, local_xchg(&buf->data_size, 0), + local_xchg(&buf->lost, 0)); + } +} + +static int pt_event_add(struct perf_event *event, int mode) +{ + struct pt_buffer *buf; + struct pt *pt = this_cpu_ptr(&pt_ctx); + struct hw_perf_event *hwc = &event->hw; + int ret = -EBUSY; + + if (pt->handle.event) + goto out; + + buf = perf_aux_output_begin(&pt->handle, event); + if (!buf) { + ret = -EINVAL; + goto out; + } + + pt_buffer_reset_offsets(buf, pt->handle.head); + if (!buf->snapshot) { + ret = pt_buffer_reset_markers(buf, &pt->handle); + if (ret) { + perf_aux_output_end(&pt->handle, 0, true); + goto out; + } + } + + if (mode & PERF_EF_START) { + pt_event_start(event, 0); + if (hwc->state == PERF_HES_STOPPED) { + pt_event_del(event, 0); + ret = -EBUSY; + } + } else { + hwc->state = PERF_HES_STOPPED; + } + + ret = 0; +out: + + if (ret) + hwc->state = PERF_HES_STOPPED; + + return ret; +} + +static void pt_event_read(struct perf_event *event) +{ +} + +static void pt_event_destroy(struct perf_event *event) +{ + x86_del_exclusive(x86_lbr_exclusive_pt); +} + +static int pt_event_init(struct perf_event *event) +{ + if (event->attr.type != pt_pmu.pmu.type) + return -ENOENT; + + if (!pt_event_valid(event)) + return -EINVAL; + + if (x86_add_exclusive(x86_lbr_exclusive_pt)) + return -EBUSY; + + event->destroy = pt_event_destroy; + + return 0; +} + +static __init int pt_init(void) +{ + int ret, cpu, prior_warn = 0; + + BUILD_BUG_ON(sizeof(struct topa) > PAGE_SIZE); + get_online_cpus(); + for_each_online_cpu(cpu) { + u64 ctl; + + ret = rdmsrl_safe_on_cpu(cpu, MSR_IA32_RTIT_CTL, &ctl); + if (!ret && (ctl & RTIT_CTL_TRACEEN)) + prior_warn++; + } + put_online_cpus(); + + if (prior_warn) { + x86_add_exclusive(x86_lbr_exclusive_pt); + pr_warn("PT is enabled at boot time, doing nothing\n"); + + return -EBUSY; + } + + ret = pt_pmu_hw_init(); + if (ret) + return ret; + + if (!pt_cap_get(PT_CAP_topa_output)) { + pr_warn("ToPA output is not supported on this CPU\n"); + return -ENODEV; + } + + if (!pt_cap_get(PT_CAP_topa_multiple_entries)) + pt_pmu.pmu.capabilities = + PERF_PMU_CAP_AUX_NO_SG | PERF_PMU_CAP_AUX_SW_DOUBLEBUF; + + pt_pmu.pmu.capabilities |= PERF_PMU_CAP_EXCLUSIVE | PERF_PMU_CAP_ITRACE; + pt_pmu.pmu.attr_groups = pt_attr_groups; + pt_pmu.pmu.task_ctx_nr = perf_sw_context; + pt_pmu.pmu.event_init = pt_event_init; + pt_pmu.pmu.add = pt_event_add; + pt_pmu.pmu.del = pt_event_del; + pt_pmu.pmu.start = pt_event_start; + pt_pmu.pmu.stop = pt_event_stop; + pt_pmu.pmu.read = pt_event_read; + pt_pmu.pmu.setup_aux = pt_buffer_setup_aux; + pt_pmu.pmu.free_aux = pt_buffer_free_aux; + ret = perf_pmu_register(&pt_pmu.pmu, "intel_pt", -1); + + return ret; +} + +module_init(pt_init); diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore_snbep.c b/arch/x86/kernel/cpu/perf_event_intel_uncore_snbep.c index 21af6149edf2..12d9548457e7 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_uncore_snbep.c +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore_snbep.c @@ -1132,8 +1132,7 @@ static int snbep_pci2phy_map_init(int devid) } } - if (ubox_dev) - pci_dev_put(ubox_dev); + pci_dev_put(ubox_dev); return err ? pcibios_err_to_errno(err) : 0; } diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c index 60639093d536..3d423a101fae 100644 --- a/arch/x86/kernel/cpu/scattered.c +++ b/arch/x86/kernel/cpu/scattered.c @@ -41,6 +41,7 @@ void init_scattered_cpuid_features(struct cpuinfo_x86 *c) { X86_FEATURE_HWP_ACT_WINDOW, CR_EAX, 9, 0x00000006, 0 }, { X86_FEATURE_HWP_EPP, CR_EAX,10, 0x00000006, 0 }, { X86_FEATURE_HWP_PKG_REQ, CR_EAX,11, 0x00000006, 0 }, + { X86_FEATURE_INTEL_PT, CR_EBX,25, 0x00000007, 0 }, { X86_FEATURE_APERFMPERF, CR_ECX, 0, 0x00000006, 0 }, { X86_FEATURE_EPB, CR_ECX, 3, 0x00000006, 0 }, { X86_FEATURE_HW_PSTATE, CR_EDX, 7, 0x80000007, 0 }, diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c index 24d079604fd5..1deffe6cc873 100644 --- a/arch/x86/kernel/kprobes/core.c +++ b/arch/x86/kernel/kprobes/core.c @@ -354,6 +354,7 @@ int __copy_instruction(u8 *dest, u8 *src) { struct insn insn; kprobe_opcode_t buf[MAX_INSN_SIZE]; + int length; unsigned long recovered_insn = recover_probed_instruction(buf, (unsigned long)src); @@ -361,16 +362,18 @@ int __copy_instruction(u8 *dest, u8 *src) return 0; kernel_insn_init(&insn, (void *)recovered_insn, MAX_INSN_SIZE); insn_get_length(&insn); + length = insn.length; + /* Another subsystem puts a breakpoint, failed to recover */ if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION) return 0; - memcpy(dest, insn.kaddr, insn.length); + memcpy(dest, insn.kaddr, length); #ifdef CONFIG_X86_64 if (insn_rip_relative(&insn)) { s64 newdisp; u8 *disp; - kernel_insn_init(&insn, dest, insn.length); + kernel_insn_init(&insn, dest, length); insn_get_displacement(&insn); /* * The copied instruction uses the %rip-relative addressing @@ -394,7 +397,7 @@ int __copy_instruction(u8 *dest, u8 *src) *(s32 *) disp = (s32) newdisp; } #endif - return insn.length; + return length; } static int arch_copy_kprobe(struct kprobe *p) diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 7035f6b21c3f..50e547eac8cd 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -77,9 +77,6 @@ #include <asm/realmode.h> #include <asm/misc.h> -/* State of each CPU */ -DEFINE_PER_CPU(int, cpu_state) = { 0 }; - /* Number of siblings per CPU package */ int smp_num_siblings = 1; EXPORT_SYMBOL(smp_num_siblings); @@ -257,7 +254,7 @@ static void notrace start_secondary(void *unused) lock_vector_lock(); set_cpu_online(smp_processor_id(), true); unlock_vector_lock(); - per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE; + cpu_set_state_online(smp_processor_id()); x86_platform.nmi_init(); /* enable local interrupts */ @@ -954,7 +951,10 @@ int native_cpu_up(unsigned int cpu, struct task_struct *tidle) */ mtrr_save_state(); - per_cpu(cpu_state, cpu) = CPU_UP_PREPARE; + /* x86 CPUs take themselves offline, so delayed offline is OK. */ + err = cpu_check_up_prepare(cpu); + if (err && err != -EBUSY) + return err; /* the FPU context is blank, nobody can own it */ __cpu_disable_lazy_restore(cpu); @@ -1197,7 +1197,7 @@ void __init native_smp_prepare_boot_cpu(void) switch_to_new_gdt(me); /* already set me in cpu_online_mask in boot_cpu_init() */ cpumask_set_cpu(me, cpu_callout_mask); - per_cpu(cpu_state, me) = CPU_ONLINE; + cpu_set_state_online(me); } void __init native_smp_cpus_done(unsigned int max_cpus) @@ -1324,14 +1324,10 @@ static void __ref remove_cpu_from_maps(int cpu) numa_remove_cpu(cpu); } -static DEFINE_PER_CPU(struct completion, die_complete); - void cpu_disable_common(void) { int cpu = smp_processor_id(); - init_completion(&per_cpu(die_complete, smp_processor_id())); - remove_siblinginfo(cpu); /* It's now safe to remove this processor from the online map */ @@ -1355,24 +1351,27 @@ int native_cpu_disable(void) return 0; } -void cpu_die_common(unsigned int cpu) +int common_cpu_die(unsigned int cpu) { - wait_for_completion_timeout(&per_cpu(die_complete, cpu), HZ); -} + int ret = 0; -void native_cpu_die(unsigned int cpu) -{ /* We don't do anything here: idle task is faking death itself. */ - cpu_die_common(cpu); - /* They ack this in play_dead() by setting CPU_DEAD */ - if (per_cpu(cpu_state, cpu) == CPU_DEAD) { + if (cpu_wait_death(cpu, 5)) { if (system_state == SYSTEM_RUNNING) pr_info("CPU %u is now offline\n", cpu); } else { pr_err("CPU %u didn't die...\n", cpu); + ret = -1; } + + return ret; +} + +void native_cpu_die(unsigned int cpu) +{ + common_cpu_die(cpu); } void play_dead_common(void) @@ -1381,10 +1380,8 @@ void play_dead_common(void) reset_lazy_tlbstate(); amd_e400_remove_cpu(raw_smp_processor_id()); - mb(); /* Ack it */ - __this_cpu_write(cpu_state, CPU_DEAD); - complete(&per_cpu(die_complete, smp_processor_id())); + (void)cpu_report_death(); /* * With physical CPU hotplug, we should halt the cpu diff --git a/arch/x86/kernel/test_rodata.c b/arch/x86/kernel/test_rodata.c index b79133abda48..5ecbfe5099da 100644 --- a/arch/x86/kernel/test_rodata.c +++ b/arch/x86/kernel/test_rodata.c @@ -57,7 +57,7 @@ int rodata_test(void) /* test 3: check the value hasn't changed */ /* If this test fails, we managed to overwrite the data */ if (!rodata_test_data) { - printk(KERN_ERR "rodata_test: Test 3 failes (end data)\n"); + printk(KERN_ERR "rodata_test: Test 3 fails (end data)\n"); return -ENODEV; } /* test 4: check if the rodata section is 4Kb aligned */ diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index f4fa991406cd..324ab5247687 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -123,7 +123,7 @@ enum ctx_state ist_enter(struct pt_regs *regs) * but we need to notify RCU. */ rcu_nmi_enter(); - prev_state = IN_KERNEL; /* the value is irrelevant. */ + prev_state = CONTEXT_KERNEL; /* the value is irrelevant. */ } /* diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index 7413ee3706d0..86484384492e 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c @@ -90,14 +90,10 @@ static void cpu_bringup(void) set_cpu_online(cpu, true); - this_cpu_write(cpu_state, CPU_ONLINE); - - wmb(); + cpu_set_state_online(cpu); /* Implies full memory barrier. */ /* We can take interrupts now: we're officially "up". */ local_irq_enable(); - - wmb(); /* make sure everything is out */ } /* @@ -451,7 +447,13 @@ static int xen_cpu_up(unsigned int cpu, struct task_struct *idle) xen_setup_timer(cpu); xen_init_lock_cpu(cpu); - per_cpu(cpu_state, cpu) = CPU_UP_PREPARE; + /* + * PV VCPUs are always successfully taken down (see 'while' loop + * in xen_cpu_die()), so -EBUSY is an error. + */ + rc = cpu_check_up_prepare(cpu); + if (rc) + return rc; /* make sure interrupts start blocked */ per_cpu(xen_vcpu, cpu)->evtchn_upcall_mask = 1; @@ -467,10 +469,8 @@ static int xen_cpu_up(unsigned int cpu, struct task_struct *idle) rc = HYPERVISOR_vcpu_op(VCPUOP_up, cpu, NULL); BUG_ON(rc); - while(per_cpu(cpu_state, cpu) != CPU_ONLINE) { + while (cpu_report_state(cpu) != CPU_ONLINE) HYPERVISOR_sched_op(SCHEDOP_yield, NULL); - barrier(); - } return 0; } @@ -499,11 +499,11 @@ static void xen_cpu_die(unsigned int cpu) schedule_timeout(HZ/10); } - cpu_die_common(cpu); - - xen_smp_intr_free(cpu); - xen_uninit_lock_cpu(cpu); - xen_teardown_timer(cpu); + if (common_cpu_die(cpu) == 0) { + xen_smp_intr_free(cpu); + xen_uninit_lock_cpu(cpu); + xen_teardown_timer(cpu); + } } static void xen_play_dead(void) /* used only with HOTPLUG_CPU */ @@ -735,6 +735,16 @@ static void __init xen_hvm_smp_prepare_cpus(unsigned int max_cpus) static int xen_hvm_cpu_up(unsigned int cpu, struct task_struct *tidle) { int rc; + + /* + * This can happen if CPU was offlined earlier and + * offlining timed out in common_cpu_die(). + */ + if (cpu_report_state(cpu) == CPU_DEAD_FROZEN) { + xen_smp_intr_free(cpu); + xen_uninit_lock_cpu(cpu); + } + /* * xen_smp_intr_init() needs to run before native_cpu_up() * so that IPI vectors are set up on the booting CPU before @@ -756,12 +766,6 @@ static int xen_hvm_cpu_up(unsigned int cpu, struct task_struct *tidle) return rc; } -static void xen_hvm_cpu_die(unsigned int cpu) -{ - xen_cpu_die(cpu); - native_cpu_die(cpu); -} - void __init xen_hvm_smp_init(void) { if (!xen_have_vector_callback) @@ -769,7 +773,7 @@ void __init xen_hvm_smp_init(void) smp_ops.smp_prepare_cpus = xen_hvm_smp_prepare_cpus; smp_ops.smp_send_reschedule = xen_smp_send_reschedule; smp_ops.cpu_up = xen_hvm_cpu_up; - smp_ops.cpu_die = xen_hvm_cpu_die; + smp_ops.cpu_die = xen_cpu_die; smp_ops.send_call_func_ipi = xen_smp_send_call_function_ipi; smp_ops.send_call_func_single_ipi = xen_smp_send_call_function_single_ipi; smp_ops.smp_prepare_boot_cpu = xen_smp_prepare_boot_cpu; diff --git a/arch/xtensa/kernel/signal.c b/arch/xtensa/kernel/signal.c index 3d733ba16f28..6b3790445cbe 100644 --- a/arch/xtensa/kernel/signal.c +++ b/arch/xtensa/kernel/signal.c @@ -405,11 +405,6 @@ static int setup_frame(struct ksignal *ksig, sigset_t *set, regs->areg[8] = (unsigned long) &frame->uc; regs->threadptr = tp; - /* Set access mode to USER_DS. Nomenclature is outdated, but - * functionality is used in uaccess.h - */ - set_fs(USER_DS); - #if DEBUG_SIG printk("SIG rt deliver (%s:%d): signal=%d sp=%p pc=%08x\n", current->comm, current->pid, signal, frame, regs->pc); diff --git a/block/blk-map.c b/block/blk-map.c index b8d2725324a6..da310a105429 100644 --- a/block/blk-map.c +++ b/block/blk-map.c @@ -124,10 +124,10 @@ int blk_rq_map_user(struct request_queue *q, struct request *rq, { struct iovec iov; struct iov_iter i; + int ret = import_single_range(rq_data_dir(rq), ubuf, len, &iov, &i); - iov.iov_base = ubuf; - iov.iov_len = len; - iov_iter_init(&i, rq_data_dir(rq), &iov, 1, len); + if (unlikely(ret < 0)) + return ret; return blk_rq_map_user_iov(q, rq, map_data, &i, gfp_mask); } diff --git a/block/scsi_ioctl.c b/block/scsi_ioctl.c index e1f71c396193..55b6f15dac90 100644 --- a/block/scsi_ioctl.c +++ b/block/scsi_ioctl.c @@ -335,16 +335,14 @@ static int sg_io(struct request_queue *q, struct gendisk *bd_disk, struct iov_iter i; struct iovec *iov = NULL; - ret = rw_copy_check_uvector(-1, hdr->dxferp, hdr->iovec_count, - 0, NULL, &iov); - if (ret < 0) { - kfree(iov); + ret = import_iovec(rq_data_dir(rq), + hdr->dxferp, hdr->iovec_count, + 0, &iov, &i); + if (ret < 0) goto out_free_cdb; - } /* SG_IO howto says that the shorter of the two wins */ - iov_iter_init(&i, rq_data_dir(rq), iov, hdr->iovec_count, - min_t(unsigned, ret, hdr->dxfer_len)); + iov_iter_truncate(&i, hdr->dxfer_len); ret = blk_rq_map_user_iov(q, rq, NULL, &i, GFP_KERNEL); kfree(iov); diff --git a/drivers/char/mem.c b/drivers/char/mem.c index 297110c12635..9c4fd7a8e2e5 100644 --- a/drivers/char/mem.c +++ b/drivers/char/mem.c @@ -26,7 +26,7 @@ #include <linux/pfn.h> #include <linux/export.h> #include <linux/io.h> -#include <linux/aio.h> +#include <linux/uio.h> #include <linux/uaccess.h> diff --git a/drivers/char/tile-srom.c b/drivers/char/tile-srom.c index 02e76ac6d282..69f6b4acc377 100644 --- a/drivers/char/tile-srom.c +++ b/drivers/char/tile-srom.c @@ -27,7 +27,6 @@ #include <linux/types.h> /* size_t */ #include <linux/proc_fs.h> #include <linux/fcntl.h> /* O_ACCMODE */ -#include <linux/aio.h> #include <linux/pagemap.h> #include <linux/hugetlb.h> #include <linux/uaccess.h> diff --git a/drivers/gpu/drm/drm_trace.h b/drivers/gpu/drm/drm_trace.h index 27cc95f36381..ce3c42813fbb 100644 --- a/drivers/gpu/drm/drm_trace.h +++ b/drivers/gpu/drm/drm_trace.h @@ -7,7 +7,6 @@ #undef TRACE_SYSTEM #define TRACE_SYSTEM drm -#define TRACE_SYSTEM_STRING __stringify(TRACE_SYSTEM) #define TRACE_INCLUDE_FILE drm_trace TRACE_EVENT(drm_vblank_event, diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h index 6058a01b4443..d776621c8521 100644 --- a/drivers/gpu/drm/i915/i915_trace.h +++ b/drivers/gpu/drm/i915/i915_trace.h @@ -12,7 +12,6 @@ #undef TRACE_SYSTEM #define TRACE_SYSTEM i915 -#define TRACE_SYSTEM_STRING __stringify(TRACE_SYSTEM) #define TRACE_INCLUDE_FILE i915_trace /* pipe updates */ diff --git a/drivers/gpu/drm/radeon/radeon_trace.h b/drivers/gpu/drm/radeon/radeon_trace.h index ce075cb08cb2..fdce4062901f 100644 --- a/drivers/gpu/drm/radeon/radeon_trace.h +++ b/drivers/gpu/drm/radeon/radeon_trace.h @@ -9,7 +9,6 @@ #undef TRACE_SYSTEM #define TRACE_SYSTEM radeon -#define TRACE_SYSTEM_STRING __stringify(TRACE_SYSTEM) #define TRACE_INCLUDE_FILE radeon_trace TRACE_EVENT(radeon_bo_create, diff --git a/drivers/hid/Kconfig b/drivers/hid/Kconfig index 152b006833cd..15338afdf7f9 100644 --- a/drivers/hid/Kconfig +++ b/drivers/hid/Kconfig @@ -92,7 +92,7 @@ menu "Special HID drivers" depends on HID config HID_A4TECH - tristate "A4 tech mice" if EXPERT + tristate "A4 tech mice" depends on HID default !EXPERT ---help--- @@ -113,7 +113,7 @@ config HID_ACRUX_FF game controllers. config HID_APPLE - tristate "Apple {i,Power,Mac}Books" if EXPERT + tristate "Apple {i,Power,Mac}Books" depends on HID default !EXPERT ---help--- @@ -141,7 +141,7 @@ config HID_AUREAL Support for Aureal Cy se W-01RN Remote Controller and other Aureal derived remotes. config HID_BELKIN - tristate "Belkin Flip KVM and Wireless keyboard" if EXPERT + tristate "Belkin Flip KVM and Wireless keyboard" depends on HID default !EXPERT ---help--- @@ -158,14 +158,14 @@ config HID_BETOP_FF - BETOP 2185 PC & BFM MODE config HID_CHERRY - tristate "Cherry Cymotion keyboard" if EXPERT + tristate "Cherry Cymotion keyboard" depends on HID default !EXPERT ---help--- Support for Cherry Cymotion keyboard. config HID_CHICONY - tristate "Chicony Tactical pad" if EXPERT + tristate "Chicony Tactical pad" depends on HID default !EXPERT ---help--- @@ -196,7 +196,7 @@ config HID_CP2112 customizable USB descriptor fields are exposed as sysfs attributes. config HID_CYPRESS - tristate "Cypress mouse and barcode readers" if EXPERT + tristate "Cypress mouse and barcode readers" depends on HID default !EXPERT ---help--- @@ -245,7 +245,7 @@ config HID_ELO different devices than those handled by CONFIG_TOUCHSCREEN_USB_ELO. config HID_EZKEY - tristate "Ezkey BTC 8193 keyboard" if EXPERT + tristate "Ezkey BTC 8193 keyboard" depends on HID default !EXPERT ---help--- @@ -286,12 +286,6 @@ config HID_GT683R Currently the following devices are know to be supported: - MSI GT683R -config HID_HUION - tristate "Huion tablets" - depends on USB_HID - ---help--- - Support for Huion 580 tablet. - config HID_KEYTOUCH tristate "Keytouch HID devices" depends on HID @@ -312,9 +306,9 @@ config HID_KYE config HID_UCLOGIC tristate "UC-Logic" - depends on HID + depends on USB_HID ---help--- - Support for UC-Logic tablets. + Support for UC-Logic and Huion tablets. config HID_WALTOP tristate "Waltop" @@ -344,7 +338,7 @@ config HID_TWINHAN Support for Twinhan IR remote control. config HID_KENSINGTON - tristate "Kensington Slimblade Trackball" if EXPERT + tristate "Kensington Slimblade Trackball" depends on HID default !EXPERT ---help--- @@ -372,7 +366,7 @@ config HID_LENOVO - ThinkPad Compact USB Keyboard with TrackPoint (supports Fn keys) config HID_LOGITECH - tristate "Logitech devices" if EXPERT + tristate "Logitech devices" depends on HID default !EXPERT ---help--- @@ -461,14 +455,14 @@ config HID_MAGICMOUSE Apple Wireless "Magic" Mouse and the Apple Wireless "Magic" Trackpad. config HID_MICROSOFT - tristate "Microsoft non-fully HID-compliant devices" if EXPERT + tristate "Microsoft non-fully HID-compliant devices" depends on HID default !EXPERT ---help--- Support for Microsoft devices that are not fully compliant with HID standard. config HID_MONTEREY - tristate "Monterey Genius KB29E keyboard" if EXPERT + tristate "Monterey Genius KB29E keyboard" depends on HID default !EXPERT ---help--- @@ -638,7 +632,6 @@ config HID_PICOLCD_CIR config HID_PLANTRONICS tristate "Plantronics USB HID Driver" - default !EXPERT depends on HID ---help--- Provides HID support for Plantronics telephony devices. @@ -885,6 +878,21 @@ config HID_SENSOR_HUB for events and handle data streams. Each sensor driver can format data and present to user mode using input or IIO interface. +config HID_SENSOR_CUSTOM_SENSOR + tristate "HID Sensors hub custom sensor support" + depends on HID_SENSOR_HUB + default n + ---help--- + HID Sensor hub specification allows definition of some custom and + generic sensors. Unlike other HID sensors, they can't be exported + via Linux IIO because of custom fields. This is up to the manufacturer + to decide how to interpret these special sensor ids and process in + the user space. Currently some manufacturers are using these ids for + sensor calibration and debugging other sensors. Manufacturers + should't use these special custom sensor ids to export any of the + standard sensors. + Select this config option for custom/generic sensor support. + endmenu endif # HID diff --git a/drivers/hid/Makefile b/drivers/hid/Makefile index 6f19958dfc38..e4a21dfd7ef3 100644 --- a/drivers/hid/Makefile +++ b/drivers/hid/Makefile @@ -41,7 +41,6 @@ obj-$(CONFIG_HID_GYRATION) += hid-gyration.o obj-$(CONFIG_HID_HOLTEK) += hid-holtek-kbd.o obj-$(CONFIG_HID_HOLTEK) += hid-holtek-mouse.o obj-$(CONFIG_HID_HOLTEK) += hid-holtekff.o -obj-$(CONFIG_HID_HUION) += hid-huion.o obj-$(CONFIG_HID_HYPERV_MOUSE) += hid-hyperv.o obj-$(CONFIG_HID_ICADE) += hid-icade.o obj-$(CONFIG_HID_KENSINGTON) += hid-kensington.o @@ -101,6 +100,7 @@ obj-$(CONFIG_HID_WACOM) += wacom.o obj-$(CONFIG_HID_WALTOP) += hid-waltop.o obj-$(CONFIG_HID_WIIMOTE) += hid-wiimote.o obj-$(CONFIG_HID_SENSOR_HUB) += hid-sensor-hub.o +obj-$(CONFIG_HID_SENSOR_CUSTOM_SENSOR) += hid-sensor-custom.o obj-$(CONFIG_USB_HID) += usbhid/ obj-$(CONFIG_USB_MOUSE) += usbhid/ diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c index 56ce8c2b5530..722a925795a2 100644 --- a/drivers/hid/hid-core.c +++ b/drivers/hid/hid-core.c @@ -1562,12 +1562,26 @@ read_report_descriptor(struct file *filp, struct kobject *kobj, return count; } +static ssize_t +show_country(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct hid_device *hdev = container_of(dev, struct hid_device, dev); + + return sprintf(buf, "%02x\n", hdev->country & 0xff); +} + static struct bin_attribute dev_bin_attr_report_desc = { .attr = { .name = "report_descriptor", .mode = 0444 }, .read = read_report_descriptor, .size = HID_MAX_DESCRIPTOR_SIZE, }; +static struct device_attribute dev_attr_country = { + .attr = { .name = "country", .mode = 0444 }, + .show = show_country, +}; + int hid_connect(struct hid_device *hdev, unsigned int connect_mask) { static const char *types[] = { "Device", "Pointer", "Mouse", "Device", @@ -1646,6 +1660,11 @@ int hid_connect(struct hid_device *hdev, unsigned int connect_mask) bus = "<UNKNOWN>"; } + ret = device_create_file(&hdev->dev, &dev_attr_country); + if (ret) + hid_warn(hdev, + "can't create sysfs country code attribute err: %d\n", ret); + ret = device_create_bin_file(&hdev->dev, &dev_bin_attr_report_desc); if (ret) hid_warn(hdev, @@ -1661,6 +1680,7 @@ EXPORT_SYMBOL_GPL(hid_connect); void hid_disconnect(struct hid_device *hdev) { + device_remove_file(&hdev->dev, &dev_attr_country); device_remove_bin_file(&hdev->dev, &dev_bin_attr_report_desc); if (hdev->claimed & HID_CLAIMED_INPUT) hidinput_disconnect(hdev); @@ -1824,6 +1844,7 @@ static const struct hid_device_id hid_have_special_driver[] = { { HID_USB_DEVICE(USB_VENDOR_ID_KYE, USB_DEVICE_ID_KYE_MOUSEPEN_I608X) }, { HID_USB_DEVICE(USB_VENDOR_ID_KYE, USB_DEVICE_ID_KYE_MOUSEPEN_I608X_2) }, { HID_USB_DEVICE(USB_VENDOR_ID_KYE, USB_DEVICE_ID_KYE_EASYPEN_M610X) }, + { HID_USB_DEVICE(USB_VENDOR_ID_KYE, USB_DEVICE_ID_KYE_PENSKETCH_M912) }, { HID_USB_DEVICE(USB_VENDOR_ID_LABTEC, USB_DEVICE_ID_LABTEC_WIRELESS_KEYBOARD) }, { HID_USB_DEVICE(USB_VENDOR_ID_LCPOWER, USB_DEVICE_ID_LCPOWER_LC1000 ) }, #if IS_ENABLED(CONFIG_HID_LENOVO) diff --git a/drivers/hid/hid-debug.c b/drivers/hid/hid-debug.c index 8bf61d295ffd..c785095f9f2c 100644 --- a/drivers/hid/hid-debug.c +++ b/drivers/hid/hid-debug.c @@ -165,6 +165,7 @@ static const struct hid_usage_entry hid_usage_table[] = { {0, 0x53, "DeviceIndex"}, {0, 0x54, "ContactCount"}, {0, 0x55, "ContactMaximumNumber"}, + {0, 0x59, "ButtonType"}, {0, 0x5A, "SecondaryBarrelSwitch"}, {0, 0x5B, "TransducerSerialNumber"}, { 15, 0, "PhysicalInterfaceDevice" }, @@ -1127,7 +1128,8 @@ static ssize_t hid_debug_events_read(struct file *file, char __user *buffer, if (!list->hdev || !list->hdev->debug) { ret = -EIO; - break; + set_current_state(TASK_RUNNING); + goto out; } /* allow O_NONBLOCK from other threads */ diff --git a/drivers/hid/hid-huion.c b/drivers/hid/hid-huion.c deleted file mode 100644 index 61b68ca27790..000000000000 --- a/drivers/hid/hid-huion.c +++ /dev/null @@ -1,290 +0,0 @@ -/* - * HID driver for Huion devices not fully compliant with HID standard - * - * Copyright (c) 2013 Martin Rusko - * Copyright (c) 2014 Nikolai Kondrashov - */ - -/* - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the Free - * Software Foundation; either version 2 of the License, or (at your option) - * any later version. - */ - -#include <linux/device.h> -#include <linux/hid.h> -#include <linux/module.h> -#include <linux/usb.h> -#include <asm/unaligned.h> -#include "usbhid/usbhid.h" - -#include "hid-ids.h" - -/* Report descriptor template placeholder head */ -#define HUION_PH_HEAD 0xFE, 0xED, 0x1D - -/* Report descriptor template placeholder IDs */ -enum huion_ph_id { - HUION_PH_ID_X_LM, - HUION_PH_ID_X_PM, - HUION_PH_ID_Y_LM, - HUION_PH_ID_Y_PM, - HUION_PH_ID_PRESSURE_LM, - HUION_PH_ID_NUM -}; - -/* Report descriptor template placeholder */ -#define HUION_PH(_ID) HUION_PH_HEAD, HUION_PH_ID_##_ID - -/* Fixed report descriptor template */ -static const __u8 huion_tablet_rdesc_template[] = { - 0x05, 0x0D, /* Usage Page (Digitizer), */ - 0x09, 0x02, /* Usage (Pen), */ - 0xA1, 0x01, /* Collection (Application), */ - 0x85, 0x07, /* Report ID (7), */ - 0x09, 0x20, /* Usage (Stylus), */ - 0xA0, /* Collection (Physical), */ - 0x14, /* Logical Minimum (0), */ - 0x25, 0x01, /* Logical Maximum (1), */ - 0x75, 0x01, /* Report Size (1), */ - 0x09, 0x42, /* Usage (Tip Switch), */ - 0x09, 0x44, /* Usage (Barrel Switch), */ - 0x09, 0x46, /* Usage (Tablet Pick), */ - 0x95, 0x03, /* Report Count (3), */ - 0x81, 0x02, /* Input (Variable), */ - 0x95, 0x03, /* Report Count (3), */ - 0x81, 0x03, /* Input (Constant, Variable), */ - 0x09, 0x32, /* Usage (In Range), */ - 0x95, 0x01, /* Report Count (1), */ - 0x81, 0x02, /* Input (Variable), */ - 0x95, 0x01, /* Report Count (1), */ - 0x81, 0x03, /* Input (Constant, Variable), */ - 0x75, 0x10, /* Report Size (16), */ - 0x95, 0x01, /* Report Count (1), */ - 0xA4, /* Push, */ - 0x05, 0x01, /* Usage Page (Desktop), */ - 0x65, 0x13, /* Unit (Inch), */ - 0x55, 0xFD, /* Unit Exponent (-3), */ - 0x34, /* Physical Minimum (0), */ - 0x09, 0x30, /* Usage (X), */ - 0x27, HUION_PH(X_LM), /* Logical Maximum (PLACEHOLDER), */ - 0x47, HUION_PH(X_PM), /* Physical Maximum (PLACEHOLDER), */ - 0x81, 0x02, /* Input (Variable), */ - 0x09, 0x31, /* Usage (Y), */ - 0x27, HUION_PH(Y_LM), /* Logical Maximum (PLACEHOLDER), */ - 0x47, HUION_PH(Y_PM), /* Physical Maximum (PLACEHOLDER), */ - 0x81, 0x02, /* Input (Variable), */ - 0xB4, /* Pop, */ - 0x09, 0x30, /* Usage (Tip Pressure), */ - 0x27, - HUION_PH(PRESSURE_LM), /* Logical Maximum (PLACEHOLDER), */ - 0x81, 0x02, /* Input (Variable), */ - 0xC0, /* End Collection, */ - 0xC0 /* End Collection */ -}; - -/* Parameter indices */ -enum huion_prm { - HUION_PRM_X_LM = 1, - HUION_PRM_Y_LM = 2, - HUION_PRM_PRESSURE_LM = 4, - HUION_PRM_RESOLUTION = 5, - HUION_PRM_NUM -}; - -/* Driver data */ -struct huion_drvdata { - __u8 *rdesc; - unsigned int rsize; -}; - -static __u8 *huion_report_fixup(struct hid_device *hdev, __u8 *rdesc, - unsigned int *rsize) -{ - struct huion_drvdata *drvdata = hid_get_drvdata(hdev); - switch (hdev->product) { - case USB_DEVICE_ID_HUION_TABLET: - if (drvdata->rdesc != NULL) { - rdesc = drvdata->rdesc; - *rsize = drvdata->rsize; - } - break; - } - return rdesc; -} - -/** - * Enable fully-functional tablet mode and determine device parameters. - * - * @hdev: HID device - */ -static int huion_tablet_enable(struct hid_device *hdev) -{ - int rc; - struct usb_device *usb_dev = hid_to_usb_dev(hdev); - struct huion_drvdata *drvdata = hid_get_drvdata(hdev); - __le16 *buf = NULL; - size_t len; - s32 params[HUION_PH_ID_NUM]; - s32 resolution; - __u8 *p; - s32 v; - - /* - * Read string descriptor containing tablet parameters. The specific - * string descriptor and data were discovered by sniffing the Windows - * driver traffic. - * NOTE: This enables fully-functional tablet mode. - */ - len = HUION_PRM_NUM * sizeof(*buf); - buf = kmalloc(len, GFP_KERNEL); - if (buf == NULL) { - hid_err(hdev, "failed to allocate parameter buffer\n"); - rc = -ENOMEM; - goto cleanup; - } - rc = usb_control_msg(usb_dev, usb_rcvctrlpipe(usb_dev, 0), - USB_REQ_GET_DESCRIPTOR, USB_DIR_IN, - (USB_DT_STRING << 8) + 0x64, - 0x0409, buf, len, - USB_CTRL_GET_TIMEOUT); - if (rc == -EPIPE) { - hid_err(hdev, "device parameters not found\n"); - rc = -ENODEV; - goto cleanup; - } else if (rc < 0) { - hid_err(hdev, "failed to get device parameters: %d\n", rc); - rc = -ENODEV; - goto cleanup; - } else if (rc != len) { - hid_err(hdev, "invalid device parameters\n"); - rc = -ENODEV; - goto cleanup; - } - - /* Extract device parameters */ - params[HUION_PH_ID_X_LM] = le16_to_cpu(buf[HUION_PRM_X_LM]); - params[HUION_PH_ID_Y_LM] = le16_to_cpu(buf[HUION_PRM_Y_LM]); - params[HUION_PH_ID_PRESSURE_LM] = - le16_to_cpu(buf[HUION_PRM_PRESSURE_LM]); - resolution = le16_to_cpu(buf[HUION_PRM_RESOLUTION]); - if (resolution == 0) { - params[HUION_PH_ID_X_PM] = 0; - params[HUION_PH_ID_Y_PM] = 0; - } else { - params[HUION_PH_ID_X_PM] = params[HUION_PH_ID_X_LM] * - 1000 / resolution; - params[HUION_PH_ID_Y_PM] = params[HUION_PH_ID_Y_LM] * - 1000 / resolution; - } - - /* Allocate fixed report descriptor */ - drvdata->rdesc = devm_kmalloc(&hdev->dev, - sizeof(huion_tablet_rdesc_template), - GFP_KERNEL); - if (drvdata->rdesc == NULL) { - hid_err(hdev, "failed to allocate fixed rdesc\n"); - rc = -ENOMEM; - goto cleanup; - } - drvdata->rsize = sizeof(huion_tablet_rdesc_template); - - /* Format fixed report descriptor */ - memcpy(drvdata->rdesc, huion_tablet_rdesc_template, - drvdata->rsize); - for (p = drvdata->rdesc; - p <= drvdata->rdesc + drvdata->rsize - 4;) { - if (p[0] == 0xFE && p[1] == 0xED && p[2] == 0x1D && - p[3] < sizeof(params)) { - v = params[p[3]]; - put_unaligned(cpu_to_le32(v), (s32 *)p); - p += 4; - } else { - p++; - } - } - - rc = 0; - -cleanup: - kfree(buf); - return rc; -} - -static int huion_probe(struct hid_device *hdev, const struct hid_device_id *id) -{ - int rc; - struct usb_interface *intf = to_usb_interface(hdev->dev.parent); - struct huion_drvdata *drvdata; - - /* Allocate and assign driver data */ - drvdata = devm_kzalloc(&hdev->dev, sizeof(*drvdata), GFP_KERNEL); - if (drvdata == NULL) { - hid_err(hdev, "failed to allocate driver data\n"); - return -ENOMEM; - } - hid_set_drvdata(hdev, drvdata); - - switch (id->product) { - case USB_DEVICE_ID_HUION_TABLET: - /* If this is the pen interface */ - if (intf->cur_altsetting->desc.bInterfaceNumber == 0) { - rc = huion_tablet_enable(hdev); - if (rc) { - hid_err(hdev, "tablet enabling failed\n"); - return rc; - } - } - break; - } - - rc = hid_parse(hdev); - if (rc) { - hid_err(hdev, "parse failed\n"); - return rc; - } - - rc = hid_hw_start(hdev, HID_CONNECT_DEFAULT); - if (rc) { - hid_err(hdev, "hw start failed\n"); - return rc; - } - - return 0; -} - -static int huion_raw_event(struct hid_device *hdev, struct hid_report *report, - u8 *data, int size) -{ - struct usb_interface *intf = to_usb_interface(hdev->dev.parent); - - /* If this is a pen input report */ - if (intf->cur_altsetting->desc.bInterfaceNumber == 0 && - report->type == HID_INPUT_REPORT && - report->id == 0x07 && size >= 2) - /* Invert the in-range bit */ - data[1] ^= 0x40; - - return 0; -} - -static const struct hid_device_id huion_devices[] = { - { HID_USB_DEVICE(USB_VENDOR_ID_HUION, USB_DEVICE_ID_HUION_TABLET) }, - { HID_USB_DEVICE(USB_VENDOR_ID_UCLOGIC, USB_DEVICE_ID_HUION_TABLET) }, - { } -}; -MODULE_DEVICE_TABLE(hid, huion_devices); - -static struct hid_driver huion_driver = { - .name = "huion", - .id_table = huion_devices, - .probe = huion_probe, - .report_fixup = huion_report_fixup, - .raw_event = huion_raw_event, -}; -module_hid_driver(huion_driver); - -MODULE_AUTHOR("Martin Rusko"); -MODULE_DESCRIPTION("Huion HID driver"); -MODULE_LICENSE("GPL"); diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index 9c4786759f16..41f167e4d75f 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -459,6 +459,11 @@ #define USB_DEVICE_ID_UGCI_FLYING 0x0020 #define USB_DEVICE_ID_UGCI_FIGHTING 0x0030 +#define USB_VENDOR_ID_HP 0x03f0 +#define USB_PRODUCT_ID_HP_LOGITECH_OEM_USB_OPTICAL_MOUSE_0A4A 0x0a4a +#define USB_PRODUCT_ID_HP_LOGITECH_OEM_USB_OPTICAL_MOUSE_0B4A 0x0b4a +#define USB_PRODUCT_ID_HP_PIXART_OEM_USB_OPTICAL_MOUSE 0x134a + #define USB_VENDOR_ID_HUION 0x256c #define USB_DEVICE_ID_HUION_TABLET 0x006e @@ -533,6 +538,7 @@ #define USB_DEVICE_ID_KYE_MOUSEPEN_I608X 0x5011 #define USB_DEVICE_ID_KYE_MOUSEPEN_I608X_2 0x501a #define USB_DEVICE_ID_KYE_EASYPEN_M610X 0x5013 +#define USB_DEVICE_ID_KYE_PENSKETCH_M912 0x5015 #define USB_VENDOR_ID_LABTEC 0x1020 #define USB_DEVICE_ID_LABTEC_WIRELESS_KEYBOARD 0x0006 @@ -591,6 +597,9 @@ #define USB_DEVICE_ID_LOGITECH_HARMONY_FIRST 0xc110 #define USB_DEVICE_ID_LOGITECH_HARMONY_LAST 0xc14f #define USB_DEVICE_ID_LOGITECH_HARMONY_PS3 0x0306 +#define USB_DEVICE_ID_LOGITECH_MOUSE_C01A 0xc01a +#define USB_DEVICE_ID_LOGITECH_MOUSE_C05A 0xc05a +#define USB_DEVICE_ID_LOGITECH_MOUSE_C06A 0xc06a #define USB_DEVICE_ID_LOGITECH_RUMBLEPAD_CORD 0xc20a #define USB_DEVICE_ID_LOGITECH_RUMBLEPAD 0xc211 #define USB_DEVICE_ID_LOGITECH_EXTREME_3D 0xc215 @@ -1022,6 +1031,7 @@ #define USB_DEVICE_ID_ZYTRONIC_ZXY100 0x0005 #define USB_VENDOR_ID_PRIMAX 0x0461 +#define USB_DEVICE_ID_PRIMAX_MOUSE_4D22 0x4d22 #define USB_DEVICE_ID_PRIMAX_KEYBOARD 0x4e05 diff --git a/drivers/hid/hid-input.c b/drivers/hid/hid-input.c index 32c2da49bd5b..008e89bf6f3c 100644 --- a/drivers/hid/hid-input.c +++ b/drivers/hid/hid-input.c @@ -720,6 +720,29 @@ static void hidinput_configure_usage(struct hid_input *hidinput, struct hid_fiel } break; + case HID_UP_TELEPHONY: + switch (usage->hid & HID_USAGE) { + case 0x2f: map_key_clear(KEY_MICMUTE); break; + case 0xb0: map_key_clear(KEY_NUMERIC_0); break; + case 0xb1: map_key_clear(KEY_NUMERIC_1); break; + case 0xb2: map_key_clear(KEY_NUMERIC_2); break; + case 0xb3: map_key_clear(KEY_NUMERIC_3); break; + case 0xb4: map_key_clear(KEY_NUMERIC_4); break; + case 0xb5: map_key_clear(KEY_NUMERIC_5); break; + case 0xb6: map_key_clear(KEY_NUMERIC_6); break; + case 0xb7: map_key_clear(KEY_NUMERIC_7); break; + case 0xb8: map_key_clear(KEY_NUMERIC_8); break; + case 0xb9: map_key_clear(KEY_NUMERIC_9); break; + case 0xba: map_key_clear(KEY_NUMERIC_STAR); break; + case 0xbb: map_key_clear(KEY_NUMERIC_POUND); break; + case 0xbc: map_key_clear(KEY_NUMERIC_A); break; + case 0xbd: map_key_clear(KEY_NUMERIC_B); break; + case 0xbe: map_key_clear(KEY_NUMERIC_C); break; + case 0xbf: map_key_clear(KEY_NUMERIC_D); break; + default: goto ignore; + } + break; + case HID_UP_CONSUMER: /* USB HUT v1.12, pages 75-84 */ switch (usage->hid & HID_USAGE) { case 0x000: goto ignore; diff --git a/drivers/hid/hid-kye.c b/drivers/hid/hid-kye.c index 158fcf577fae..32e6d8d9ded0 100644 --- a/drivers/hid/hid-kye.c +++ b/drivers/hid/hid-kye.c @@ -268,6 +268,137 @@ static __u8 easypen_m610x_rdesc_fixed[] = { 0xC0 /* End Collection */ }; + +/* Original PenSketch M912 report descriptor size */ +#define PENSKETCH_M912_RDESC_ORIG_SIZE 482 + +/* Fixed PenSketch M912 report descriptor */ +static __u8 pensketch_m912_rdesc_fixed[] = { + 0x05, 0x01, /* Usage Page (Desktop), */ + 0x08, /* Usage (00h), */ + 0xA1, 0x01, /* Collection (Application), */ + 0x85, 0x05, /* Report ID (5), */ + 0x06, 0x00, 0xFF, /* Usage Page (FF00h), */ + 0x09, 0x01, /* Usage (01h), */ + 0x15, 0x81, /* Logical Minimum (-127), */ + 0x25, 0x7F, /* Logical Maximum (127), */ + 0x75, 0x08, /* Report Size (8), */ + 0x95, 0x07, /* Report Count (7), */ + 0xB1, 0x02, /* Feature (Variable), */ + 0xC0, /* End Collection, */ + 0x05, 0x0D, /* Usage Page (Digitizer), */ + 0x09, 0x02, /* Usage (Pen), */ + 0xA1, 0x01, /* Collection (Application), */ + 0x85, 0x10, /* Report ID (16), */ + 0x09, 0x20, /* Usage (Stylus), */ + 0xA0, /* Collection (Physical), */ + 0x09, 0x42, /* Usage (Tip Switch), */ + 0x09, 0x44, /* Usage (Barrel Switch), */ + 0x09, 0x46, /* Usage (Tablet Pick), */ + 0x14, /* Logical Minimum (0), */ + 0x25, 0x01, /* Logical Maximum (1), */ + 0x75, 0x01, /* Report Size (1), */ + 0x95, 0x03, /* Report Count (3), */ + 0x81, 0x02, /* Input (Variable), */ + 0x95, 0x04, /* Report Count (4), */ + 0x81, 0x03, /* Input (Constant, Variable), */ + 0x09, 0x32, /* Usage (In Range), */ + 0x95, 0x01, /* Report Count (1), */ + 0x81, 0x02, /* Input (Variable), */ + 0x75, 0x10, /* Report Size (16), */ + 0x95, 0x01, /* Report Count (1), */ + 0xA4, /* Push, */ + 0x05, 0x01, /* Usage Page (Desktop), */ + 0x55, 0xFD, /* Unit Exponent (-3), */ + 0x65, 0x13, /* Unit (Inch), */ + 0x14, /* Logical Minimum (0), */ + 0x34, /* Physical Minimum (0), */ + 0x09, 0x30, /* Usage (X), */ + 0x27, 0x00, 0xF0, 0x00, 0x00, /* Logical Maximum (61440), */ + 0x46, 0xE0, 0x2E, /* Physical Maximum (12000), */ + 0x81, 0x02, /* Input (Variable), */ + 0x09, 0x31, /* Usage (Y), */ + 0x27, 0x00, 0xB4, 0x00, 0x00, /* Logical Maximum (46080), */ + 0x46, 0x28, 0x23, /* Physical Maximum (9000), */ + 0x81, 0x02, /* Input (Variable), */ + 0xB4, /* Pop, */ + 0x09, 0x30, /* Usage (Tip Pressure), */ + 0x14, /* Logical Minimum (0), */ + 0x26, 0xFF, 0x07, /* Logical Maximum (2047), */ + 0x81, 0x02, /* Input (Variable), */ + 0xC0, /* End Collection, */ + 0xC0, /* End Collection, */ + 0x05, 0x0D, /* Usage Page (Digitizer), */ + 0x09, 0x21, /* Usage (Puck), */ + 0xA1, 0x01, /* Collection (Application), */ + 0x85, 0x11, /* Report ID (17), */ + 0x09, 0x21, /* Usage (Puck), */ + 0xA0, /* Collection (Physical), */ + 0x05, 0x09, /* Usage Page (Button), */ + 0x75, 0x01, /* Report Size (1), */ + 0x19, 0x01, /* Usage Minimum (01h), */ + 0x29, 0x03, /* Usage Maximum (03h), */ + 0x14, /* Logical Minimum (0), */ + 0x25, 0x01, /* Logical Maximum (1), */ + 0x95, 0x03, /* Report Count (3), */ + 0x81, 0x02, /* Input (Variable), */ + 0x95, 0x04, /* Report Count (4), */ + 0x81, 0x01, /* Input (Constant), */ + 0x95, 0x01, /* Report Count (1), */ + 0x0B, 0x32, 0x00, 0x0D, 0x00, /* Usage (Digitizer In Range), */ + 0x14, /* Logical Minimum (0), */ + 0x25, 0x01, /* Logical Maximum (1), */ + 0x81, 0x02, /* Input (Variable), */ + 0xA4, /* Push, */ + 0x05, 0x01, /* Usage Page (Desktop), */ + 0x75, 0x10, /* Report Size (16), */ + 0x95, 0x01, /* Report Count (1), */ + 0x55, 0xFD, /* Unit Exponent (-3), */ + 0x65, 0x13, /* Unit (Inch), */ + 0x14, /* Logical Minimum (0), */ + 0x34, /* Physical Minimum (0), */ + 0x09, 0x30, /* Usage (X), */ + 0x27, 0x00, 0xF0, 0x00, 0x00, /* Logical Maximum (61440), */ + 0x46, 0xE0, 0x2E, /* Physical Maximum (12000), */ + 0x81, 0x02, /* Input (Variable), */ + 0x09, 0x31, /* Usage (Y), */ + 0x27, 0x00, 0xB4, 0x00, 0x00, /* Logical Maximum (46080), */ + 0x46, 0x28, 0x23, /* Physical Maximum (9000), */ + 0x81, 0x02, /* Input (Variable), */ + 0x09, 0x38, /* Usage (Wheel), */ + 0x75, 0x08, /* Report Size (8), */ + 0x95, 0x01, /* Report Count (1), */ + 0x15, 0xFF, /* Logical Minimum (-1), */ + 0x25, 0x01, /* Logical Maximum (1), */ + 0x34, /* Physical Minimum (0), */ + 0x44, /* Physical Maximum (0), */ + 0x81, 0x06, /* Input (Variable, Relative), */ + 0xB4, /* Pop, */ + 0xC0, /* End Collection, */ + 0xC0, /* End Collection, */ + 0x05, 0x0C, /* Usage Page (Consumer), */ + 0x09, 0x01, /* Usage (Consumer Control), */ + 0xA1, 0x01, /* Collection (Application), */ + 0x85, 0x12, /* Report ID (18), */ + 0x14, /* Logical Minimum (0), */ + 0x25, 0x01, /* Logical Maximum (1), */ + 0x75, 0x01, /* Report Size (1), */ + 0x95, 0x08, /* Report Count (8), */ + 0x05, 0x0C, /* Usage Page (Consumer), */ + 0x0A, 0x6A, 0x02, /* Usage (AC Delete), */ + 0x0A, 0x1A, 0x02, /* Usage (AC Undo), */ + 0x0A, 0x01, 0x02, /* Usage (AC New), */ + 0x0A, 0x2F, 0x02, /* Usage (AC Zoom), */ + 0x0A, 0x25, 0x02, /* Usage (AC Forward), */ + 0x0A, 0x24, 0x02, /* Usage (AC Back), */ + 0x0A, 0x2D, 0x02, /* Usage (AC Zoom In), */ + 0x0A, 0x2E, 0x02, /* Usage (AC Zoom Out), */ + 0x81, 0x02, /* Input (Variable), */ + 0x95, 0x30, /* Report Count (48), */ + 0x81, 0x03, /* Input (Constant, Variable), */ + 0xC0 /* End Collection */ +}; + static __u8 *kye_consumer_control_fixup(struct hid_device *hdev, __u8 *rdesc, unsigned int *rsize, int offset, const char *device_name) { /* @@ -335,6 +466,12 @@ static __u8 *kye_report_fixup(struct hid_device *hdev, __u8 *rdesc, *rsize = sizeof(easypen_m610x_rdesc_fixed); } break; + case USB_DEVICE_ID_KYE_PENSKETCH_M912: + if (*rsize == PENSKETCH_M912_RDESC_ORIG_SIZE) { + rdesc = pensketch_m912_rdesc_fixed; + *rsize = sizeof(pensketch_m912_rdesc_fixed); + } + break; case USB_DEVICE_ID_GENIUS_GILA_GAMING_MOUSE: rdesc = kye_consumer_control_fixup(hdev, rdesc, rsize, 104, "Genius Gila Gaming Mouse"); @@ -418,6 +555,7 @@ static int kye_probe(struct hid_device *hdev, const struct hid_device_id *id) case USB_DEVICE_ID_KYE_MOUSEPEN_I608X: case USB_DEVICE_ID_KYE_MOUSEPEN_I608X_2: case USB_DEVICE_ID_KYE_EASYPEN_M610X: + case USB_DEVICE_ID_KYE_PENSKETCH_M912: ret = kye_tablet_enable(hdev); if (ret) { hid_err(hdev, "tablet enabling failed\n"); @@ -457,6 +595,8 @@ static const struct hid_device_id kye_devices[] = { USB_DEVICE_ID_GENIUS_GX_IMPERATOR) }, { HID_USB_DEVICE(USB_VENDOR_ID_KYE, USB_DEVICE_ID_GENIUS_MANTICORE) }, + { HID_USB_DEVICE(USB_VENDOR_ID_KYE, + USB_DEVICE_ID_KYE_PENSKETCH_M912) }, { } }; MODULE_DEVICE_TABLE(hid, kye_devices); diff --git a/drivers/hid/hid-lg.c b/drivers/hid/hid-lg.c index f91ff145db9a..b86c18e651ed 100644 --- a/drivers/hid/hid-lg.c +++ b/drivers/hid/hid-lg.c @@ -27,6 +27,7 @@ #include "usbhid/usbhid.h" #include "hid-ids.h" #include "hid-lg.h" +#include "hid-lg4ff.h" #define LG_RDESC 0x001 #define LG_BAD_RELATIVE_KEYS 0x002 @@ -818,4 +819,10 @@ static struct hid_driver lg_driver = { }; module_hid_driver(lg_driver); +#ifdef CONFIG_LOGIWHEELS_FF +int lg4ff_no_autoswitch = 0; +module_param_named(lg4ff_no_autoswitch, lg4ff_no_autoswitch, int, S_IRUGO); +MODULE_PARM_DESC(lg4ff_no_autoswitch, "Do not switch multimode wheels to their native mode automatically"); +#endif + MODULE_LICENSE("GPL"); diff --git a/drivers/hid/hid-lg.h b/drivers/hid/hid-lg.h index 142ce3f5f055..10dd8f024135 100644 --- a/drivers/hid/hid-lg.h +++ b/drivers/hid/hid-lg.h @@ -24,16 +24,4 @@ int lg3ff_init(struct hid_device *hdev); static inline int lg3ff_init(struct hid_device *hdev) { return -1; } #endif -#ifdef CONFIG_LOGIWHEELS_FF -int lg4ff_adjust_input_event(struct hid_device *hid, struct hid_field *field, - struct hid_usage *usage, __s32 value, struct lg_drv_data *drv_data); -int lg4ff_init(struct hid_device *hdev); -int lg4ff_deinit(struct hid_device *hdev); -#else -static inline int lg4ff_adjust_input_event(struct hid_device *hid, struct hid_field *field, - struct hid_usage *usage, __s32 value, struct lg_drv_data *drv_data) { return 0; } -static inline int lg4ff_init(struct hid_device *hdev) { return -1; } -static inline int lg4ff_deinit(struct hid_device *hdev) { return -1; } -#endif - #endif diff --git a/drivers/hid/hid-lg4ff.c b/drivers/hid/hid-lg4ff.c index db0dd9b17e53..1232210b1cc5 100644 --- a/drivers/hid/hid-lg4ff.c +++ b/drivers/hid/hid-lg4ff.c @@ -30,23 +30,44 @@ #include "usbhid/usbhid.h" #include "hid-lg.h" +#include "hid-lg4ff.h" #include "hid-ids.h" -#define DFGT_REV_MAJ 0x13 -#define DFGT_REV_MIN 0x22 -#define DFGT2_REV_MIN 0x26 -#define DFP_REV_MAJ 0x11 -#define DFP_REV_MIN 0x06 -#define FFEX_REV_MAJ 0x21 -#define FFEX_REV_MIN 0x00 -#define G25_REV_MAJ 0x12 -#define G25_REV_MIN 0x22 -#define G27_REV_MAJ 0x12 -#define G27_REV_MIN 0x38 -#define G27_2_REV_MIN 0x39 - #define to_hid_device(pdev) container_of(pdev, struct hid_device, dev) +#define LG4FF_MMODE_IS_MULTIMODE 0 +#define LG4FF_MMODE_SWITCHED 1 +#define LG4FF_MMODE_NOT_MULTIMODE 2 + +#define LG4FF_MODE_NATIVE_IDX 0 +#define LG4FF_MODE_DFEX_IDX 1 +#define LG4FF_MODE_DFP_IDX 2 +#define LG4FF_MODE_G25_IDX 3 +#define LG4FF_MODE_DFGT_IDX 4 +#define LG4FF_MODE_G27_IDX 5 +#define LG4FF_MODE_MAX_IDX 6 + +#define LG4FF_MODE_NATIVE BIT(LG4FF_MODE_NATIVE_IDX) +#define LG4FF_MODE_DFEX BIT(LG4FF_MODE_DFEX_IDX) +#define LG4FF_MODE_DFP BIT(LG4FF_MODE_DFP_IDX) +#define LG4FF_MODE_G25 BIT(LG4FF_MODE_G25_IDX) +#define LG4FF_MODE_DFGT BIT(LG4FF_MODE_DFGT_IDX) +#define LG4FF_MODE_G27 BIT(LG4FF_MODE_G27_IDX) + +#define LG4FF_DFEX_TAG "DF-EX" +#define LG4FF_DFEX_NAME "Driving Force / Formula EX" +#define LG4FF_DFP_TAG "DFP" +#define LG4FF_DFP_NAME "Driving Force Pro" +#define LG4FF_G25_TAG "G25" +#define LG4FF_G25_NAME "G25 Racing Wheel" +#define LG4FF_G27_TAG "G27" +#define LG4FF_G27_NAME "G27 Racing Wheel" +#define LG4FF_DFGT_TAG "DFGT" +#define LG4FF_DFGT_NAME "Driving Force GT" + +#define LG4FF_FFEX_REV_MAJ 0x21 +#define LG4FF_FFEX_REV_MIN 0x00 + static void hid_lg4ff_set_range_dfp(struct hid_device *hid, u16 range); static void hid_lg4ff_set_range_g25(struct hid_device *hid, u16 range); @@ -59,6 +80,10 @@ struct lg4ff_device_entry { __u8 led_state; struct led_classdev *led[5]; #endif + u32 alternate_modes; + const char *real_tag; + const char *real_name; + u16 real_product_id; struct list_head list; void (*set_range)(struct hid_device *hid, u16 range); }; @@ -77,6 +102,35 @@ struct lg4ff_wheel { void (*set_range)(struct hid_device *hid, u16 range); }; +struct lg4ff_compat_mode_switch { + const __u8 cmd_count; /* Number of commands to send */ + const __u8 cmd[]; +}; + +struct lg4ff_wheel_ident_info { + const u16 mask; + const u16 result; + const u16 real_product_id; +}; + +struct lg4ff_wheel_ident_checklist { + const u32 count; + const struct lg4ff_wheel_ident_info *models[]; +}; + +struct lg4ff_multimode_wheel { + const u16 product_id; + const u32 alternate_modes; + const char *real_tag; + const char *real_name; +}; + +struct lg4ff_alternate_mode { + const u16 product_id; + const char *tag; + const char *name; +}; + static const struct lg4ff_wheel lg4ff_devices[] = { {USB_DEVICE_ID_LOGITECH_WHEEL, lg4ff_wheel_effects, 40, 270, NULL}, {USB_DEVICE_ID_LOGITECH_MOMO_WHEEL, lg4ff_wheel_effects, 40, 270, NULL}, @@ -88,46 +142,106 @@ static const struct lg4ff_wheel lg4ff_devices[] = { {USB_DEVICE_ID_LOGITECH_WII_WHEEL, lg4ff_wheel_effects, 40, 270, NULL} }; -struct lg4ff_native_cmd { - const __u8 cmd_num; /* Number of commands to send */ - const __u8 cmd[]; +static const struct lg4ff_multimode_wheel lg4ff_multimode_wheels[] = { + {USB_DEVICE_ID_LOGITECH_DFP_WHEEL, + LG4FF_MODE_NATIVE | LG4FF_MODE_DFP | LG4FF_MODE_DFEX, + LG4FF_DFP_TAG, LG4FF_DFP_NAME}, + {USB_DEVICE_ID_LOGITECH_G25_WHEEL, + LG4FF_MODE_NATIVE | LG4FF_MODE_G25 | LG4FF_MODE_DFP | LG4FF_MODE_DFEX, + LG4FF_G25_TAG, LG4FF_G25_NAME}, + {USB_DEVICE_ID_LOGITECH_DFGT_WHEEL, + LG4FF_MODE_NATIVE | LG4FF_MODE_DFGT | LG4FF_MODE_DFP | LG4FF_MODE_DFEX, + LG4FF_DFGT_TAG, LG4FF_DFGT_NAME}, + {USB_DEVICE_ID_LOGITECH_G27_WHEEL, + LG4FF_MODE_NATIVE | LG4FF_MODE_G27 | LG4FF_MODE_G25 | LG4FF_MODE_DFP | LG4FF_MODE_DFEX, + LG4FF_G27_TAG, LG4FF_G27_NAME}, }; -struct lg4ff_usb_revision { - const __u16 rev_maj; - const __u16 rev_min; - const struct lg4ff_native_cmd *command; +static const struct lg4ff_alternate_mode lg4ff_alternate_modes[] = { + [LG4FF_MODE_NATIVE_IDX] = {0, "native", ""}, + [LG4FF_MODE_DFEX_IDX] = {USB_DEVICE_ID_LOGITECH_WHEEL, LG4FF_DFEX_TAG, LG4FF_DFEX_NAME}, + [LG4FF_MODE_DFP_IDX] = {USB_DEVICE_ID_LOGITECH_DFP_WHEEL, LG4FF_DFP_TAG, LG4FF_DFP_NAME}, + [LG4FF_MODE_G25_IDX] = {USB_DEVICE_ID_LOGITECH_G25_WHEEL, LG4FF_G25_TAG, LG4FF_G25_NAME}, + [LG4FF_MODE_DFGT_IDX] = {USB_DEVICE_ID_LOGITECH_DFGT_WHEEL, LG4FF_DFGT_TAG, LG4FF_DFGT_NAME}, + [LG4FF_MODE_G27_IDX] = {USB_DEVICE_ID_LOGITECH_G27_WHEEL, LG4FF_G27_TAG, LG4FF_G27_NAME} }; -static const struct lg4ff_native_cmd native_dfp = { - 1, - {0xf8, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00} +/* Multimode wheel identificators */ +static const struct lg4ff_wheel_ident_info lg4ff_dfp_ident_info = { + 0xf000, + 0x1000, + USB_DEVICE_ID_LOGITECH_DFP_WHEEL +}; + +static const struct lg4ff_wheel_ident_info lg4ff_g25_ident_info = { + 0xff00, + 0x1200, + USB_DEVICE_ID_LOGITECH_G25_WHEEL +}; + +static const struct lg4ff_wheel_ident_info lg4ff_g27_ident_info = { + 0xfff0, + 0x1230, + USB_DEVICE_ID_LOGITECH_G27_WHEEL }; -static const struct lg4ff_native_cmd native_dfgt = { +static const struct lg4ff_wheel_ident_info lg4ff_dfgt_ident_info = { + 0xff00, + 0x1300, + USB_DEVICE_ID_LOGITECH_DFGT_WHEEL +}; + +/* Multimode wheel identification checklists */ +static const struct lg4ff_wheel_ident_checklist lg4ff_main_checklist = { + 4, + {&lg4ff_dfgt_ident_info, + &lg4ff_g27_ident_info, + &lg4ff_g25_ident_info, + &lg4ff_dfp_ident_info} +}; + +/* Compatibility mode switching commands */ +/* EXT_CMD9 - Understood by G27 and DFGT */ +static const struct lg4ff_compat_mode_switch lg4ff_mode_switch_ext09_dfex = { 2, - {0xf8, 0x0a, 0x00, 0x00, 0x00, 0x00, 0x00, /* 1st command */ - 0xf8, 0x09, 0x03, 0x01, 0x00, 0x00, 0x00} /* 2nd command */ + {0xf8, 0x0a, 0x00, 0x00, 0x00, 0x00, 0x00, /* Revert mode upon USB reset */ + 0xf8, 0x09, 0x00, 0x01, 0x00, 0x00, 0x00} /* Switch mode to DF-EX with detach */ }; -static const struct lg4ff_native_cmd native_g25 = { - 1, - {0xf8, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00} +static const struct lg4ff_compat_mode_switch lg4ff_mode_switch_ext09_dfp = { + 2, + {0xf8, 0x0a, 0x00, 0x00, 0x00, 0x00, 0x00, /* Revert mode upon USB reset */ + 0xf8, 0x09, 0x01, 0x01, 0x00, 0x00, 0x00} /* Switch mode to DFP with detach */ +}; + +static const struct lg4ff_compat_mode_switch lg4ff_mode_switch_ext09_g25 = { + 2, + {0xf8, 0x0a, 0x00, 0x00, 0x00, 0x00, 0x00, /* Revert mode upon USB reset */ + 0xf8, 0x09, 0x02, 0x01, 0x00, 0x00, 0x00} /* Switch mode to G25 with detach */ +}; + +static const struct lg4ff_compat_mode_switch lg4ff_mode_switch_ext09_dfgt = { + 2, + {0xf8, 0x0a, 0x00, 0x00, 0x00, 0x00, 0x00, /* Revert mode upon USB reset */ + 0xf8, 0x09, 0x03, 0x01, 0x00, 0x00, 0x00} /* Switch mode to DFGT with detach */ }; -static const struct lg4ff_native_cmd native_g27 = { +static const struct lg4ff_compat_mode_switch lg4ff_mode_switch_ext09_g27 = { 2, - {0xf8, 0x0a, 0x00, 0x00, 0x00, 0x00, 0x00, /* 1st command */ - 0xf8, 0x09, 0x04, 0x01, 0x00, 0x00, 0x00} /* 2nd command */ + {0xf8, 0x0a, 0x00, 0x00, 0x00, 0x00, 0x00, /* Revert mode upon USB reset */ + 0xf8, 0x09, 0x04, 0x01, 0x00, 0x00, 0x00} /* Switch mode to G27 with detach */ }; -static const struct lg4ff_usb_revision lg4ff_revs[] = { - {DFGT_REV_MAJ, DFGT_REV_MIN, &native_dfgt}, /* Driving Force GT */ - {DFGT_REV_MAJ, DFGT2_REV_MIN, &native_dfgt}, /* Driving Force GT v2 */ - {DFP_REV_MAJ, DFP_REV_MIN, &native_dfp}, /* Driving Force Pro */ - {G25_REV_MAJ, G25_REV_MIN, &native_g25}, /* G25 */ - {G27_REV_MAJ, G27_REV_MIN, &native_g27}, /* G27 */ - {G27_REV_MAJ, G27_2_REV_MIN, &native_g27}, /* G27 v2 */ +/* EXT_CMD1 - Understood by DFP, G25, G27 and DFGT */ +static const struct lg4ff_compat_mode_switch lg4ff_mode_switch_ext01_dfp = { + 1, + {0xf8, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00} +}; + +/* EXT_CMD16 - Understood by G25 and G27 */ +static const struct lg4ff_compat_mode_switch lg4ff_mode_switch_ext16_g25 = { + 1, + {0xf8, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00} }; /* Recalculates X axis value accordingly to currently selected range */ @@ -396,20 +510,216 @@ static void hid_lg4ff_set_range_dfp(struct hid_device *hid, __u16 range) hid_hw_request(hid, report, HID_REQ_SET_REPORT); } -static void hid_lg4ff_switch_native(struct hid_device *hid, const struct lg4ff_native_cmd *cmd) +static const struct lg4ff_compat_mode_switch *lg4ff_get_mode_switch_command(const u16 real_product_id, const u16 target_product_id) +{ + switch (real_product_id) { + case USB_DEVICE_ID_LOGITECH_DFP_WHEEL: + switch (target_product_id) { + case USB_DEVICE_ID_LOGITECH_DFP_WHEEL: + return &lg4ff_mode_switch_ext01_dfp; + /* DFP can only be switched to its native mode */ + default: + return NULL; + } + break; + case USB_DEVICE_ID_LOGITECH_G25_WHEEL: + switch (target_product_id) { + case USB_DEVICE_ID_LOGITECH_DFP_WHEEL: + return &lg4ff_mode_switch_ext01_dfp; + case USB_DEVICE_ID_LOGITECH_G25_WHEEL: + return &lg4ff_mode_switch_ext16_g25; + /* G25 can only be switched to DFP mode or its native mode */ + default: + return NULL; + } + break; + case USB_DEVICE_ID_LOGITECH_G27_WHEEL: + switch (target_product_id) { + case USB_DEVICE_ID_LOGITECH_WHEEL: + return &lg4ff_mode_switch_ext09_dfex; + case USB_DEVICE_ID_LOGITECH_DFP_WHEEL: + return &lg4ff_mode_switch_ext09_dfp; + case USB_DEVICE_ID_LOGITECH_G25_WHEEL: + return &lg4ff_mode_switch_ext09_g25; + case USB_DEVICE_ID_LOGITECH_G27_WHEEL: + return &lg4ff_mode_switch_ext09_g27; + /* G27 can only be switched to DF-EX, DFP, G25 or its native mode */ + default: + return NULL; + } + break; + case USB_DEVICE_ID_LOGITECH_DFGT_WHEEL: + switch (target_product_id) { + case USB_DEVICE_ID_LOGITECH_WHEEL: + return &lg4ff_mode_switch_ext09_dfex; + case USB_DEVICE_ID_LOGITECH_DFP_WHEEL: + return &lg4ff_mode_switch_ext09_dfp; + case USB_DEVICE_ID_LOGITECH_DFGT_WHEEL: + return &lg4ff_mode_switch_ext09_dfgt; + /* DFGT can only be switched to DF-EX, DFP or its native mode */ + default: + return NULL; + } + break; + /* No other wheels have multiple modes */ + default: + return NULL; + } +} + +static int lg4ff_switch_compatibility_mode(struct hid_device *hid, const struct lg4ff_compat_mode_switch *s) { struct list_head *report_list = &hid->report_enum[HID_OUTPUT_REPORT].report_list; struct hid_report *report = list_entry(report_list->next, struct hid_report, list); - __u8 i, j; + __s32 *value = report->field[0]->value; + u8 i; - j = 0; - while (j < 7*cmd->cmd_num) { - for (i = 0; i < 7; i++) - report->field[0]->value[i] = cmd->cmd[j++]; + for (i = 0; i < s->cmd_count; i++) { + u8 j; + + for (j = 0; j < 7; j++) + value[j] = s->cmd[j + (7*i)]; hid_hw_request(hid, report, HID_REQ_SET_REPORT); } + hid_hw_wait(hid); + return 0; +} + +static ssize_t lg4ff_alternate_modes_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct hid_device *hid = to_hid_device(dev); + struct lg4ff_device_entry *entry; + struct lg_drv_data *drv_data; + ssize_t count = 0; + int i; + + drv_data = hid_get_drvdata(hid); + if (!drv_data) { + hid_err(hid, "Private driver data not found!\n"); + return 0; + } + + entry = drv_data->device_props; + if (!entry) { + hid_err(hid, "Device properties not found!\n"); + return 0; + } + + if (!entry->real_name) { + hid_err(hid, "NULL pointer to string\n"); + return 0; + } + + for (i = 0; i < LG4FF_MODE_MAX_IDX; i++) { + if (entry->alternate_modes & BIT(i)) { + /* Print tag and full name */ + count += scnprintf(buf + count, PAGE_SIZE - count, "%s: %s", + lg4ff_alternate_modes[i].tag, + !lg4ff_alternate_modes[i].product_id ? entry->real_name : lg4ff_alternate_modes[i].name); + if (count >= PAGE_SIZE - 1) + return count; + + /* Mark the currently active mode with an asterisk */ + if (lg4ff_alternate_modes[i].product_id == entry->product_id || + (lg4ff_alternate_modes[i].product_id == 0 && entry->product_id == entry->real_product_id)) + count += scnprintf(buf + count, PAGE_SIZE - count, " *\n"); + else + count += scnprintf(buf + count, PAGE_SIZE - count, "\n"); + + if (count >= PAGE_SIZE - 1) + return count; + } + } + + return count; +} + +static ssize_t lg4ff_alternate_modes_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) +{ + struct hid_device *hid = to_hid_device(dev); + struct lg4ff_device_entry *entry; + struct lg_drv_data *drv_data; + const struct lg4ff_compat_mode_switch *s; + u16 target_product_id = 0; + int i, ret; + char *lbuf; + + drv_data = hid_get_drvdata(hid); + if (!drv_data) { + hid_err(hid, "Private driver data not found!\n"); + return -EINVAL; + } + + entry = drv_data->device_props; + if (!entry) { + hid_err(hid, "Device properties not found!\n"); + return -EINVAL; + } + + /* Allow \n at the end of the input parameter */ + lbuf = kasprintf(GFP_KERNEL, "%s", buf); + if (!lbuf) + return -ENOMEM; + + i = strlen(lbuf); + if (lbuf[i-1] == '\n') { + if (i == 1) { + kfree(lbuf); + return -EINVAL; + } + lbuf[i-1] = '\0'; + } + + for (i = 0; i < LG4FF_MODE_MAX_IDX; i++) { + const u16 mode_product_id = lg4ff_alternate_modes[i].product_id; + const char *tag = lg4ff_alternate_modes[i].tag; + + if (entry->alternate_modes & BIT(i)) { + if (!strcmp(tag, lbuf)) { + if (!mode_product_id) + target_product_id = entry->real_product_id; + else + target_product_id = mode_product_id; + break; + } + } + } + + if (i == LG4FF_MODE_MAX_IDX) { + hid_info(hid, "Requested mode \"%s\" is not supported by the device\n", lbuf); + kfree(lbuf); + return -EINVAL; + } + kfree(lbuf); /* Not needed anymore */ + + if (target_product_id == entry->product_id) /* Nothing to do */ + return count; + + /* Automatic switching has to be disabled for the switch to DF-EX mode to work correctly */ + if (target_product_id == USB_DEVICE_ID_LOGITECH_WHEEL && !lg4ff_no_autoswitch) { + hid_info(hid, "\"%s\" cannot be switched to \"DF-EX\" mode. Load the \"hid_logitech\" module with \"lg4ff_no_autoswitch=1\" parameter set and try again\n", + entry->real_name); + return -EINVAL; + } + + /* Take care of hardware limitations */ + if ((entry->real_product_id == USB_DEVICE_ID_LOGITECH_DFP_WHEEL || entry->real_product_id == USB_DEVICE_ID_LOGITECH_G25_WHEEL) && + entry->product_id > target_product_id) { + hid_info(hid, "\"%s\" cannot be switched back into \"%s\" mode\n", entry->real_name, lg4ff_alternate_modes[i].name); + return -EINVAL; + } + + s = lg4ff_get_mode_switch_command(entry->real_product_id, target_product_id); + if (!s) { + hid_err(hid, "Invalid target product ID %X\n", target_product_id); + return -EINVAL; + } + + ret = lg4ff_switch_compatibility_mode(hid, s); + return (ret == 0 ? count : ret); } +static DEVICE_ATTR(alternate_modes, S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH, lg4ff_alternate_modes_show, lg4ff_alternate_modes_store); /* Read current range and display it in terminal */ static ssize_t range_show(struct device *dev, struct device_attribute *attr, @@ -472,6 +782,41 @@ static ssize_t range_store(struct device *dev, struct device_attribute *attr, } static DEVICE_ATTR_RW(range); +static ssize_t lg4ff_real_id_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct hid_device *hid = to_hid_device(dev); + struct lg4ff_device_entry *entry; + struct lg_drv_data *drv_data; + size_t count; + + drv_data = hid_get_drvdata(hid); + if (!drv_data) { + hid_err(hid, "Private driver data not found!\n"); + return 0; + } + + entry = drv_data->device_props; + if (!entry) { + hid_err(hid, "Device properties not found!\n"); + return 0; + } + + if (!entry->real_tag || !entry->real_name) { + hid_err(hid, "NULL pointer to string\n"); + return 0; + } + + count = scnprintf(buf, PAGE_SIZE, "%s: %s\n", entry->real_tag, entry->real_name); + return count; +} + +static ssize_t lg4ff_real_id_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) +{ + /* Real ID is a read-only value */ + return -EPERM; +} +static DEVICE_ATTR(real_id, S_IRUGO, lg4ff_real_id_show, lg4ff_real_id_store); + #ifdef CONFIG_LEDS_CLASS static void lg4ff_set_leds(struct hid_device *hid, __u8 leds) { @@ -555,20 +900,119 @@ static enum led_brightness lg4ff_led_get_brightness(struct led_classdev *led_cde } #endif +static u16 lg4ff_identify_multimode_wheel(struct hid_device *hid, const u16 reported_product_id, const u16 bcdDevice) +{ + const struct lg4ff_wheel_ident_checklist *checklist; + int i, from_idx, to_idx; + + switch (reported_product_id) { + case USB_DEVICE_ID_LOGITECH_WHEEL: + case USB_DEVICE_ID_LOGITECH_DFP_WHEEL: + checklist = &lg4ff_main_checklist; + from_idx = 0; + to_idx = checklist->count - 1; + break; + case USB_DEVICE_ID_LOGITECH_G25_WHEEL: + checklist = &lg4ff_main_checklist; + from_idx = 0; + to_idx = checklist->count - 2; /* End identity check at G25 */ + break; + case USB_DEVICE_ID_LOGITECH_G27_WHEEL: + checklist = &lg4ff_main_checklist; + from_idx = 1; /* Start identity check at G27 */ + to_idx = checklist->count - 3; /* End identity check at G27 */ + break; + case USB_DEVICE_ID_LOGITECH_DFGT_WHEEL: + checklist = &lg4ff_main_checklist; + from_idx = 0; + to_idx = checklist->count - 4; /* End identity check at DFGT */ + break; + default: + return 0; + } + + for (i = from_idx; i <= to_idx; i++) { + const u16 mask = checklist->models[i]->mask; + const u16 result = checklist->models[i]->result; + const u16 real_product_id = checklist->models[i]->real_product_id; + + if ((bcdDevice & mask) == result) { + dbg_hid("Found wheel with real PID %X whose reported PID is %X\n", real_product_id, reported_product_id); + return real_product_id; + } + } + + /* No match found. This is either Driving Force or an unknown + * wheel model, do not touch it */ + dbg_hid("Wheel with bcdDevice %X was not recognized as multimode wheel, leaving in its current mode\n", bcdDevice); + return 0; +} + +static int lg4ff_handle_multimode_wheel(struct hid_device *hid, u16 *real_product_id, const u16 bcdDevice) +{ + const u16 reported_product_id = hid->product; + int ret; + + *real_product_id = lg4ff_identify_multimode_wheel(hid, reported_product_id, bcdDevice); + /* Probed wheel is not a multimode wheel */ + if (!*real_product_id) { + *real_product_id = reported_product_id; + dbg_hid("Wheel is not a multimode wheel\n"); + return LG4FF_MMODE_NOT_MULTIMODE; + } + + /* Switch from "Driving Force" mode to native mode automatically. + * Otherwise keep the wheel in its current mode */ + if (reported_product_id == USB_DEVICE_ID_LOGITECH_WHEEL && + reported_product_id != *real_product_id && + !lg4ff_no_autoswitch) { + const struct lg4ff_compat_mode_switch *s = lg4ff_get_mode_switch_command(*real_product_id, *real_product_id); + + if (!s) { + hid_err(hid, "Invalid product id %X\n", *real_product_id); + return LG4FF_MMODE_NOT_MULTIMODE; + } + + ret = lg4ff_switch_compatibility_mode(hid, s); + if (ret) { + /* Wheel could not have been switched to native mode, + * leave it in "Driving Force" mode and continue */ + hid_err(hid, "Unable to switch wheel mode, errno %d\n", ret); + return LG4FF_MMODE_IS_MULTIMODE; + } + return LG4FF_MMODE_SWITCHED; + } + + return LG4FF_MMODE_IS_MULTIMODE; +} + + int lg4ff_init(struct hid_device *hid) { struct hid_input *hidinput = list_entry(hid->inputs.next, struct hid_input, list); struct input_dev *dev = hidinput->input; + const struct usb_device_descriptor *udesc = &(hid_to_usb_dev(hid)->descriptor); + const u16 bcdDevice = le16_to_cpu(udesc->bcdDevice); struct lg4ff_device_entry *entry; struct lg_drv_data *drv_data; - struct usb_device_descriptor *udesc; int error, i, j; - __u16 bcdDevice, rev_maj, rev_min; + int mmode_ret, mmode_idx = -1; + u16 real_product_id; /* Check that the report looks ok */ if (!hid_validate_values(hid, HID_OUTPUT_REPORT, 0, 0, 7)) return -1; + /* Check if a multimode wheel has been connected and + * handle it appropriately */ + mmode_ret = lg4ff_handle_multimode_wheel(hid, &real_product_id, bcdDevice); + + /* Wheel has been told to switch to native mode. There is no point in going on + * with the initialization as the wheel will do a USB reset when it switches mode + */ + if (mmode_ret == LG4FF_MMODE_SWITCHED) + return 0; + /* Check what wheel has been connected */ for (i = 0; i < ARRAY_SIZE(lg4ff_devices); i++) { if (hid->product == lg4ff_devices[i].product_id) { @@ -583,25 +1027,15 @@ int lg4ff_init(struct hid_device *hid) return -1; } - /* Attempt to switch wheel to native mode when applicable */ - udesc = &(hid_to_usb_dev(hid)->descriptor); - if (!udesc) { - hid_err(hid, "NULL USB device descriptor\n"); - return -1; - } - bcdDevice = le16_to_cpu(udesc->bcdDevice); - rev_maj = bcdDevice >> 8; - rev_min = bcdDevice & 0xff; - - if (lg4ff_devices[i].product_id == USB_DEVICE_ID_LOGITECH_WHEEL) { - dbg_hid("Generic wheel detected, can it do native?\n"); - dbg_hid("USB revision: %2x.%02x\n", rev_maj, rev_min); + if (mmode_ret == LG4FF_MMODE_IS_MULTIMODE) { + for (mmode_idx = 0; mmode_idx < ARRAY_SIZE(lg4ff_multimode_wheels); mmode_idx++) { + if (real_product_id == lg4ff_multimode_wheels[mmode_idx].product_id) + break; + } - for (j = 0; j < ARRAY_SIZE(lg4ff_revs); j++) { - if (lg4ff_revs[j].rev_maj == rev_maj && lg4ff_revs[j].rev_min == rev_min) { - hid_lg4ff_switch_native(hid, lg4ff_revs[j].command); - hid_info(hid, "Switched to native mode\n"); - } + if (mmode_idx == ARRAY_SIZE(lg4ff_multimode_wheels)) { + hid_err(hid, "Device product ID %X is not listed as a multimode wheel", real_product_id); + return -1; } } @@ -630,14 +1064,23 @@ int lg4ff_init(struct hid_device *hid) drv_data->device_props = entry; entry->product_id = lg4ff_devices[i].product_id; + entry->real_product_id = real_product_id; entry->min_range = lg4ff_devices[i].min_range; entry->max_range = lg4ff_devices[i].max_range; entry->set_range = lg4ff_devices[i].set_range; + if (mmode_ret == LG4FF_MMODE_IS_MULTIMODE) { + BUG_ON(mmode_idx == -1); + entry->alternate_modes = lg4ff_multimode_wheels[mmode_idx].alternate_modes; + entry->real_tag = lg4ff_multimode_wheels[mmode_idx].real_tag; + entry->real_name = lg4ff_multimode_wheels[mmode_idx].real_name; + } /* Check if autocentering is available and * set the centering force to zero by default */ if (test_bit(FF_AUTOCENTER, dev->ffbit)) { - if (rev_maj == FFEX_REV_MAJ && rev_min == FFEX_REV_MIN) /* Formula Force EX expects different autocentering command */ + /* Formula Force EX expects different autocentering command */ + if ((bcdDevice >> 8) == LG4FF_FFEX_REV_MAJ && + (bcdDevice & 0xff) == LG4FF_FFEX_REV_MIN) dev->ff->set_autocenter = hid_lg4ff_set_autocenter_ffex; else dev->ff->set_autocenter = hid_lg4ff_set_autocenter_default; @@ -649,6 +1092,14 @@ int lg4ff_init(struct hid_device *hid) error = device_create_file(&hid->dev, &dev_attr_range); if (error) return error; + if (mmode_ret == LG4FF_MMODE_IS_MULTIMODE) { + error = device_create_file(&hid->dev, &dev_attr_real_id); + if (error) + return error; + error = device_create_file(&hid->dev, &dev_attr_alternate_modes); + if (error) + return error; + } dbg_hid("sysfs interface created\n"); /* Set the maximum range to start with */ @@ -711,24 +1162,26 @@ out: return 0; } - - int lg4ff_deinit(struct hid_device *hid) { struct lg4ff_device_entry *entry; struct lg_drv_data *drv_data; - device_remove_file(&hid->dev, &dev_attr_range); - drv_data = hid_get_drvdata(hid); if (!drv_data) { hid_err(hid, "Error while deinitializing device, no private driver data.\n"); return -1; } entry = drv_data->device_props; - if (!entry) { - hid_err(hid, "Error while deinitializing device, no device properties data.\n"); - return -1; + if (!entry) + goto out; /* Nothing more to do */ + + device_remove_file(&hid->dev, &dev_attr_range); + + /* Multimode devices will have at least the "MODE_NATIVE" bit set */ + if (entry->alternate_modes) { + device_remove_file(&hid->dev, &dev_attr_real_id); + device_remove_file(&hid->dev, &dev_attr_alternate_modes); } #ifdef CONFIG_LEDS_CLASS @@ -752,6 +1205,7 @@ int lg4ff_deinit(struct hid_device *hid) /* Deallocate memory */ kfree(entry); +out: dbg_hid("Device successfully unregistered\n"); return 0; } diff --git a/drivers/hid/hid-lg4ff.h b/drivers/hid/hid-lg4ff.h new file mode 100644 index 000000000000..5b6a5086c47f --- /dev/null +++ b/drivers/hid/hid-lg4ff.h @@ -0,0 +1,18 @@ +#ifndef __HID_LG4FF_H +#define __HID_LG4FF_H + +#ifdef CONFIG_LOGIWHEELS_FF +extern int lg4ff_no_autoswitch; /* From hid-lg.c */ + +int lg4ff_adjust_input_event(struct hid_device *hid, struct hid_field *field, + struct hid_usage *usage, __s32 value, struct lg_drv_data *drv_data); +int lg4ff_init(struct hid_device *hdev); +int lg4ff_deinit(struct hid_device *hdev); +#else +static inline int lg4ff_adjust_input_event(struct hid_device *hid, struct hid_field *field, + struct hid_usage *usage, __s32 value, struct lg_drv_data *drv_data) { return 0; } +static inline int lg4ff_init(struct hid_device *hdev) { return -1; } +static inline int lg4ff_deinit(struct hid_device *hdev) { return -1; } +#endif + +#endif diff --git a/drivers/hid/hid-logitech-hidpp.c b/drivers/hid/hid-logitech-hidpp.c index e77658cd037c..b3cf6fd4be96 100644 --- a/drivers/hid/hid-logitech-hidpp.c +++ b/drivers/hid/hid-logitech-hidpp.c @@ -28,6 +28,11 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Benjamin Tissoires <benjamin.tissoires@gmail.com>"); MODULE_AUTHOR("Nestor Lopez Casado <nlopezcasad@logitech.com>"); +static bool disable_raw_mode; +module_param(disable_raw_mode, bool, 0644); +MODULE_PARM_DESC(disable_raw_mode, + "Disable Raw mode reporting for touchpads and keep firmware gestures."); + #define REPORT_ID_HIDPP_SHORT 0x10 #define REPORT_ID_HIDPP_LONG 0x11 @@ -1188,6 +1193,11 @@ static int hidpp_probe(struct hid_device *hdev, const struct hid_device_id *id) hidpp->quirks = id->driver_data; + if (disable_raw_mode) { + hidpp->quirks &= ~HIDPP_QUIRK_CLASS_WTP; + hidpp->quirks &= ~HIDPP_QUIRK_DELAYED_INIT; + } + if (hidpp->quirks & HIDPP_QUIRK_CLASS_WTP) { ret = wtp_allocate(hdev, id); if (ret) @@ -1210,6 +1220,7 @@ static int hidpp_probe(struct hid_device *hdev, const struct hid_device_id *id) connected = hidpp_is_connected(hidpp); if (id->group != HID_GROUP_LOGITECH_DJ_DEVICE) { if (!connected) { + ret = -ENODEV; hid_err(hdev, "Device not connected"); hid_device_io_stop(hdev); goto hid_parse_fail; diff --git a/drivers/hid/hid-multitouch.c b/drivers/hid/hid-multitouch.c index f65e78b46999..6a9b05b328a9 100644 --- a/drivers/hid/hid-multitouch.c +++ b/drivers/hid/hid-multitouch.c @@ -42,7 +42,6 @@ #include <linux/hid.h> #include <linux/module.h> #include <linux/slab.h> -#include <linux/usb.h> #include <linux/input/mt.h> #include <linux/string.h> @@ -72,6 +71,8 @@ MODULE_LICENSE("GPL"); #define MT_INPUTMODE_TOUCHSCREEN 0x02 #define MT_INPUTMODE_TOUCHPAD 0x03 +#define MT_BUTTONTYPE_CLICKPAD 0 + struct mt_slot { __s32 x, y, cx, cy, p, w, h; __s32 contactid; /* the device ContactID assigned to this slot */ @@ -116,6 +117,8 @@ struct mt_device { __u8 touches_by_report; /* how many touches are present in one report: * 1 means we should use a serial protocol * > 1 means hybrid (multitouch) protocol */ + __u8 buttons_count; /* number of physical buttons per touchpad */ + bool is_buttonpad; /* is this device a button pad? */ bool serial_maybe; /* need to check for serial protocol */ bool curvalid; /* is the current contact valid? */ unsigned mt_flags; /* flags to pass to input-mt */ @@ -334,6 +337,16 @@ static void mt_feature_mapping(struct hid_device *hdev, td->maxcontacts = td->mtclass.maxcontacts; break; + case HID_DG_BUTTONTYPE: + if (usage->usage_index >= field->report_count) { + dev_err(&hdev->dev, "HID_DG_BUTTONTYPE out of range\n"); + break; + } + + if (field->value[usage->usage_index] == MT_BUTTONTYPE_CLICKPAD) + td->is_buttonpad = true; + + break; } } @@ -379,6 +392,10 @@ static int mt_touch_input_mapping(struct hid_device *hdev, struct hid_input *hi, td->inputmode_value = MT_INPUTMODE_TOUCHPAD; } + /* count the buttons on touchpads */ + if ((usage->hid & HID_USAGE_PAGE) == HID_UP_BUTTON) + td->buttons_count++; + if (usage->usage_index) prev_usage = &field->usage[usage->usage_index - 1]; @@ -728,6 +745,13 @@ static void mt_touch_input_configured(struct hid_device *hdev, if (cls->quirks & MT_QUIRK_NOT_SEEN_MEANS_UP) td->mt_flags |= INPUT_MT_DROP_UNUSED; + /* check for clickpads */ + if ((td->mt_flags & INPUT_MT_POINTER) && (td->buttons_count == 1)) + td->is_buttonpad = true; + + if (td->is_buttonpad) + __set_bit(INPUT_PROP_BUTTONPAD, input->propbit); + input_mt_init_slots(input, td->maxcontacts, td->mt_flags); td->mt_flags = 0; diff --git a/drivers/hid/hid-rmi.c b/drivers/hid/hid-rmi.c index 49d4fe4f5987..368ffdf2c0a3 100644 --- a/drivers/hid/hid-rmi.c +++ b/drivers/hid/hid-rmi.c @@ -104,6 +104,7 @@ struct rmi_data { unsigned long flags; + struct rmi_function f01; struct rmi_function f11; struct rmi_function f30; @@ -124,6 +125,7 @@ struct rmi_data { struct hid_device *hdev; unsigned long device_flags; + unsigned long firmware_id; }; #define RMI_PAGE(addr) (((addr) >> 8) & 0xff) @@ -272,6 +274,46 @@ static inline int rmi_read(struct hid_device *hdev, u16 addr, void *buf) return rmi_read_block(hdev, addr, buf, 1); } +static int rmi_write_block(struct hid_device *hdev, u16 addr, void *buf, + const int len) +{ + struct rmi_data *data = hid_get_drvdata(hdev); + int ret; + + mutex_lock(&data->page_mutex); + + if (RMI_PAGE(addr) != data->page) { + ret = rmi_set_page(hdev, RMI_PAGE(addr)); + if (ret < 0) + goto exit; + } + + data->writeReport[0] = RMI_WRITE_REPORT_ID; + data->writeReport[1] = len; + data->writeReport[2] = addr & 0xFF; + data->writeReport[3] = (addr >> 8) & 0xFF; + memcpy(&data->writeReport[4], buf, len); + + ret = rmi_write_report(hdev, data->writeReport, + data->output_report_size); + if (ret < 0) { + dev_err(&hdev->dev, + "failed to write request output report (%d)\n", + ret); + goto exit; + } + ret = 0; + +exit: + mutex_unlock(&data->page_mutex); + return ret; +} + +static inline int rmi_write(struct hid_device *hdev, u16 addr, void *buf) +{ + return rmi_write_block(hdev, addr, buf, 1); +} + static void rmi_f11_process_touch(struct rmi_data *hdata, int slot, u8 finger_state, u8 *touch_data) { @@ -532,6 +574,9 @@ static void rmi_register_function(struct rmi_data *data, u16 page_base = page << 8; switch (pdt_entry->function_number) { + case 0x01: + f = &data->f01; + break; case 0x11: f = &data->f11; break; @@ -604,6 +649,92 @@ error_exit: return retval; } +#define RMI_DEVICE_F01_BASIC_QUERY_LEN 11 + +static int rmi_populate_f01(struct hid_device *hdev) +{ + struct rmi_data *data = hid_get_drvdata(hdev); + u8 basic_queries[RMI_DEVICE_F01_BASIC_QUERY_LEN]; + u8 info[3]; + int ret; + bool has_query42; + bool has_lts; + bool has_sensor_id; + bool has_ds4_queries = false; + bool has_build_id_query = false; + bool has_package_id_query = false; + u16 query_offset = data->f01.query_base_addr; + u16 prod_info_addr; + u8 ds4_query_len; + + ret = rmi_read_block(hdev, query_offset, basic_queries, + RMI_DEVICE_F01_BASIC_QUERY_LEN); + if (ret) { + hid_err(hdev, "Can not read basic queries from Function 0x1.\n"); + return ret; + } + + has_lts = !!(basic_queries[0] & BIT(2)); + has_sensor_id = !!(basic_queries[1] & BIT(3)); + has_query42 = !!(basic_queries[1] & BIT(7)); + + query_offset += 11; + prod_info_addr = query_offset + 6; + query_offset += 10; + + if (has_lts) + query_offset += 20; + + if (has_sensor_id) + query_offset++; + + if (has_query42) { + ret = rmi_read(hdev, query_offset, info); + if (ret) { + hid_err(hdev, "Can not read query42.\n"); + return ret; + } + has_ds4_queries = !!(info[0] & BIT(0)); + query_offset++; + } + + if (has_ds4_queries) { + ret = rmi_read(hdev, query_offset, &ds4_query_len); + if (ret) { + hid_err(hdev, "Can not read DS4 Query length.\n"); + return ret; + } + query_offset++; + + if (ds4_query_len > 0) { + ret = rmi_read(hdev, query_offset, info); + if (ret) { + hid_err(hdev, "Can not read DS4 query.\n"); + return ret; + } + + has_package_id_query = !!(info[0] & BIT(0)); + has_build_id_query = !!(info[0] & BIT(1)); + } + } + + if (has_package_id_query) + prod_info_addr++; + + if (has_build_id_query) { + ret = rmi_read_block(hdev, prod_info_addr, info, 3); + if (ret) { + hid_err(hdev, "Can not read product info.\n"); + return ret; + } + + data->firmware_id = info[1] << 8 | info[0]; + data->firmware_id += info[2] * 65536; + } + + return 0; +} + static int rmi_populate_f11(struct hid_device *hdev) { struct rmi_data *data = hid_get_drvdata(hdev); @@ -620,6 +751,8 @@ static int rmi_populate_f11(struct hid_device *hdev) bool has_gestures; bool has_rel; bool has_data40 = false; + bool has_dribble = false; + bool has_palm_detect = false; unsigned x_size, y_size; u16 query_offset; @@ -661,6 +794,14 @@ static int rmi_populate_f11(struct hid_device *hdev) has_rel = !!(buf[0] & BIT(3)); has_gestures = !!(buf[0] & BIT(5)); + ret = rmi_read(hdev, data->f11.query_base_addr + 5, buf); + if (ret) { + hid_err(hdev, "can not get absolute data sources: %d.\n", ret); + return ret; + } + + has_dribble = !!(buf[0] & BIT(4)); + /* * At least 4 queries are guaranteed to be present in F11 * +1 for query 5 which is present since absolute events are @@ -680,6 +821,7 @@ static int rmi_populate_f11(struct hid_device *hdev) ret); return ret; } + has_palm_detect = !!(buf[0] & BIT(0)); has_query10 = !!(buf[0] & BIT(2)); query_offset += 2; /* query 7 and 8 are present */ @@ -766,17 +908,38 @@ static int rmi_populate_f11(struct hid_device *hdev) * retrieve the ctrl registers * the ctrl register has a size of 20 but a fw bug split it into 16 + 4, * and there is no way to know if the first 20 bytes are here or not. - * We use only the first 10 bytes, so get only them. + * We use only the first 12 bytes, so get only them. */ - ret = rmi_read_block(hdev, data->f11.control_base_addr, buf, 10); + ret = rmi_read_block(hdev, data->f11.control_base_addr, buf, 12); if (ret) { - hid_err(hdev, "can not read ctrl block of size 10: %d.\n", ret); + hid_err(hdev, "can not read ctrl block of size 11: %d.\n", ret); return ret; } data->max_x = buf[6] | (buf[7] << 8); data->max_y = buf[8] | (buf[9] << 8); + if (has_dribble) { + buf[0] = buf[0] & ~BIT(6); + ret = rmi_write(hdev, data->f11.control_base_addr, buf); + if (ret) { + hid_err(hdev, "can not write to control reg 0: %d.\n", + ret); + return ret; + } + } + + if (has_palm_detect) { + buf[11] = buf[11] & ~BIT(0); + ret = rmi_write(hdev, data->f11.control_base_addr + 11, + &buf[11]); + if (ret) { + hid_err(hdev, "can not write to control reg 11: %d.\n", + ret); + return ret; + } + } + return 0; } @@ -858,6 +1021,12 @@ static int rmi_populate(struct hid_device *hdev) return ret; } + ret = rmi_populate_f01(hdev); + if (ret) { + hid_err(hdev, "Error while initializing F01 (%d).\n", ret); + return ret; + } + ret = rmi_populate_f11(hdev); if (ret) { hid_err(hdev, "Error while initializing F11 (%d).\n", ret); @@ -907,6 +1076,8 @@ static void rmi_input_configured(struct hid_device *hdev, struct hid_input *hi) if (ret) goto exit; + hid_info(hdev, "firmware id: %ld\n", data->firmware_id); + __set_bit(EV_ABS, input->evbit); input_set_abs_params(input, ABS_MT_POSITION_X, 1, data->max_x, 0, 0); input_set_abs_params(input, ABS_MT_POSITION_Y, 1, data->max_y, 0, 0); diff --git a/drivers/hid/hid-sensor-custom.c b/drivers/hid/hid-sensor-custom.c new file mode 100644 index 000000000000..5614fee82347 --- /dev/null +++ b/drivers/hid/hid-sensor-custom.c @@ -0,0 +1,849 @@ +/* + * hid-sensor-custom.c + * Copyright (c) 2015, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/init.h> +#include <linux/miscdevice.h> +#include <linux/kfifo.h> +#include <linux/sched.h> +#include <linux/wait.h> +#include <linux/poll.h> +#include <linux/bsearch.h> +#include <linux/platform_device.h> +#include <linux/hid-sensor-hub.h> + +#define HID_CUSTOM_NAME_LENGTH 64 +#define HID_CUSTOM_MAX_CORE_ATTRS 10 +#define HID_CUSTOM_TOTAL_ATTRS (HID_CUSTOM_MAX_CORE_ATTRS + 1) +#define HID_CUSTOM_FIFO_SIZE 4096 +#define HID_CUSTOM_MAX_FEATURE_BYTES 64 + +struct hid_sensor_custom_field { + int report_id; + char group_name[HID_CUSTOM_NAME_LENGTH]; + struct hid_sensor_hub_attribute_info attribute; + struct device_attribute sd_attrs[HID_CUSTOM_MAX_CORE_ATTRS]; + char attr_name[HID_CUSTOM_TOTAL_ATTRS][HID_CUSTOM_NAME_LENGTH]; + struct attribute *attrs[HID_CUSTOM_TOTAL_ATTRS]; + struct attribute_group hid_custom_attribute_group; +}; + +struct hid_sensor_custom { + struct mutex mutex; + struct platform_device *pdev; + struct hid_sensor_hub_device *hsdev; + struct hid_sensor_hub_callbacks callbacks; + int sensor_field_count; + struct hid_sensor_custom_field *fields; + int input_field_count; + int input_report_size; + int input_report_recd_size; + bool input_skip_sample; + bool enable; + struct hid_sensor_custom_field *power_state; + struct hid_sensor_custom_field *report_state; + struct miscdevice custom_dev; + struct kfifo data_fifo; + unsigned long misc_opened; + wait_queue_head_t wait; +}; + +/* Header for each sample to user space via dev interface */ +struct hid_sensor_sample { + u32 usage_id; + u64 timestamp; + u32 raw_len; +} __packed; + +static struct attribute hid_custom_attrs[] = { + {.name = "name", .mode = S_IRUGO}, + {.name = "units", .mode = S_IRUGO}, + {.name = "unit-expo", .mode = S_IRUGO}, + {.name = "minimum", .mode = S_IRUGO}, + {.name = "maximum", .mode = S_IRUGO}, + {.name = "size", .mode = S_IRUGO}, + {.name = "value", .mode = S_IWUSR | S_IRUGO}, + {.name = NULL} +}; + +static const struct hid_custom_usage_desc { + int usage_id; + char *desc; +} hid_custom_usage_desc_table[] = { + {0x200201, "event-sensor-state"}, + {0x200202, "event-sensor-event"}, + {0x200301, "property-friendly-name"}, + {0x200302, "property-persistent-unique-id"}, + {0x200303, "property-sensor-status"}, + {0x200304, "property-min-report-interval"}, + {0x200305, "property-sensor-manufacturer"}, + {0x200306, "property-sensor-model"}, + {0x200307, "property-sensor-serial-number"}, + {0x200308, "property-sensor-description"}, + {0x200309, "property-sensor-connection-type"}, + {0x20030A, "property-sensor-device-path"}, + {0x20030B, "property-hardware-revision"}, + {0x20030C, "property-firmware-version"}, + {0x20030D, "property-release-date"}, + {0x20030E, "property-report-interval"}, + {0x20030F, "property-change-sensitivity-absolute"}, + {0x200310, "property-change-sensitivity-percent-range"}, + {0x200311, "property-change-sensitivity-percent-relative"}, + {0x200312, "property-accuracy"}, + {0x200313, "property-resolution"}, + {0x200314, "property-maximum"}, + {0x200315, "property-minimum"}, + {0x200316, "property-reporting-state"}, + {0x200317, "property-sampling-rate"}, + {0x200318, "property-response-curve"}, + {0x200319, "property-power-state"}, + {0x200540, "data-field-custom"}, + {0x200541, "data-field-custom-usage"}, + {0x200542, "data-field-custom-boolean-array"}, + {0x200543, "data-field-custom-value"}, + {0x200544, "data-field-custom-value_1"}, + {0x200545, "data-field-custom-value_2"}, + {0x200546, "data-field-custom-value_3"}, + {0x200547, "data-field-custom-value_4"}, + {0x200548, "data-field-custom-value_5"}, + {0x200549, "data-field-custom-value_6"}, + {0x20054A, "data-field-custom-value_7"}, + {0x20054B, "data-field-custom-value_8"}, + {0x20054C, "data-field-custom-value_9"}, + {0x20054D, "data-field-custom-value_10"}, + {0x20054E, "data-field-custom-value_11"}, + {0x20054F, "data-field-custom-value_12"}, + {0x200550, "data-field-custom-value_13"}, + {0x200551, "data-field-custom-value_14"}, + {0x200552, "data-field-custom-value_15"}, + {0x200553, "data-field-custom-value_16"}, + {0x200554, "data-field-custom-value_17"}, + {0x200555, "data-field-custom-value_18"}, + {0x200556, "data-field-custom-value_19"}, + {0x200557, "data-field-custom-value_20"}, + {0x200558, "data-field-custom-value_21"}, + {0x200559, "data-field-custom-value_22"}, + {0x20055A, "data-field-custom-value_23"}, + {0x20055B, "data-field-custom-value_24"}, + {0x20055C, "data-field-custom-value_25"}, + {0x20055D, "data-field-custom-value_26"}, + {0x20055E, "data-field-custom-value_27"}, + {0x20055F, "data-field-custom-value_28"}, +}; + +static int usage_id_cmp(const void *p1, const void *p2) +{ + if (*(int *)p1 < *(int *)p2) + return -1; + + if (*(int *)p1 > *(int *)p2) + return 1; + + return 0; +} + +static ssize_t enable_sensor_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct platform_device *pdev = to_platform_device(dev); + struct hid_sensor_custom *sensor_inst = platform_get_drvdata(pdev); + + return sprintf(buf, "%d\n", sensor_inst->enable); +} + +static int set_power_report_state(struct hid_sensor_custom *sensor_inst, + bool state) +{ + int power_val = -1; + int report_val = -1; + u32 power_state_usage_id; + u32 report_state_usage_id; + int ret; + + /* + * It is possible that the power/report state ids are not present. + * In this case this function will return success. But if the + * ids are present, then it will return error if set fails. + */ + if (state) { + power_state_usage_id = + HID_USAGE_SENSOR_PROP_POWER_STATE_D0_FULL_POWER_ENUM; + report_state_usage_id = + HID_USAGE_SENSOR_PROP_REPORTING_STATE_ALL_EVENTS_ENUM; + } else { + power_state_usage_id = + HID_USAGE_SENSOR_PROP_POWER_STATE_D4_POWER_OFF_ENUM; + report_state_usage_id = + HID_USAGE_SENSOR_PROP_REPORTING_STATE_NO_EVENTS_ENUM; + } + + if (sensor_inst->power_state) + power_val = hid_sensor_get_usage_index(sensor_inst->hsdev, + sensor_inst->power_state->attribute.report_id, + sensor_inst->power_state->attribute.index, + power_state_usage_id); + if (sensor_inst->report_state) + report_val = hid_sensor_get_usage_index(sensor_inst->hsdev, + sensor_inst->report_state->attribute.report_id, + sensor_inst->report_state->attribute.index, + report_state_usage_id); + + if (power_val >= 0) { + power_val += + sensor_inst->power_state->attribute.logical_minimum; + ret = sensor_hub_set_feature(sensor_inst->hsdev, + sensor_inst->power_state->attribute.report_id, + sensor_inst->power_state->attribute.index, + sizeof(power_val), + &power_val); + if (ret) { + hid_err(sensor_inst->hsdev->hdev, + "Set power state failed\n"); + return ret; + } + } + + if (report_val >= 0) { + report_val += + sensor_inst->report_state->attribute.logical_minimum; + ret = sensor_hub_set_feature(sensor_inst->hsdev, + sensor_inst->report_state->attribute.report_id, + sensor_inst->report_state->attribute.index, + sizeof(report_val), + &report_val); + if (ret) { + hid_err(sensor_inst->hsdev->hdev, + "Set report state failed\n"); + return ret; + } + } + + return 0; +} + +static ssize_t enable_sensor_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct platform_device *pdev = to_platform_device(dev); + struct hid_sensor_custom *sensor_inst = platform_get_drvdata(pdev); + int value; + int ret = -EINVAL; + + if (kstrtoint(buf, 0, &value) != 0) + return -EINVAL; + + mutex_lock(&sensor_inst->mutex); + if (value && !sensor_inst->enable) { + ret = sensor_hub_device_open(sensor_inst->hsdev); + if (ret) + goto unlock_state; + + ret = set_power_report_state(sensor_inst, true); + if (ret) { + sensor_hub_device_close(sensor_inst->hsdev); + goto unlock_state; + } + sensor_inst->enable = true; + } else if (!value && sensor_inst->enable) { + ret = set_power_report_state(sensor_inst, false); + sensor_hub_device_close(sensor_inst->hsdev); + sensor_inst->enable = false; + } +unlock_state: + mutex_unlock(&sensor_inst->mutex); + if (ret < 0) + return ret; + + return count; +} +static DEVICE_ATTR_RW(enable_sensor); + +static struct attribute *enable_sensor_attrs[] = { + &dev_attr_enable_sensor.attr, + NULL, +}; + +static struct attribute_group enable_sensor_attr_group = { + .attrs = enable_sensor_attrs, +}; + +static ssize_t show_value(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct platform_device *pdev = to_platform_device(dev); + struct hid_sensor_custom *sensor_inst = platform_get_drvdata(pdev); + struct hid_sensor_hub_attribute_info *attribute; + int index, usage, field_index; + char name[HID_CUSTOM_NAME_LENGTH]; + bool feature = false; + bool input = false; + int value = 0; + + if (sscanf(attr->attr.name, "feature-%d-%x-%s", &index, &usage, + name) == 3) { + feature = true; + field_index = index + sensor_inst->input_field_count; + } else if (sscanf(attr->attr.name, "input-%d-%x-%s", &index, &usage, + name) == 3) { + input = true; + field_index = index; + } else + return -EINVAL; + + if (!strncmp(name, "value", strlen("value"))) { + u32 report_id; + int ret; + + attribute = &sensor_inst->fields[field_index].attribute; + report_id = attribute->report_id; + if (feature) { + u8 values[HID_CUSTOM_MAX_FEATURE_BYTES]; + int len = 0; + u64 value = 0; + int i = 0; + + ret = sensor_hub_get_feature(sensor_inst->hsdev, + report_id, + index, + sizeof(values), values); + if (ret < 0) + return ret; + + while (i < ret) { + if (i + attribute->size > ret) { + len += snprintf(&buf[len], + PAGE_SIZE - len, + "%d ", values[i]); + break; + } + switch (attribute->size) { + case 2: + value = (u64) *(u16 *)&values[i]; + i += attribute->size; + break; + case 4: + value = (u64) *(u32 *)&values[i]; + i += attribute->size; + break; + case 8: + value = *(u64 *)&values[i]; + i += attribute->size; + break; + default: + value = (u64) values[i]; + ++i; + break; + } + len += snprintf(&buf[len], PAGE_SIZE - len, + "%lld ", value); + } + len += snprintf(&buf[len], PAGE_SIZE - len, "\n"); + + return len; + } else if (input) + value = sensor_hub_input_attr_get_raw_value( + sensor_inst->hsdev, + sensor_inst->hsdev->usage, + usage, report_id, + SENSOR_HUB_SYNC); + } else if (!strncmp(name, "units", strlen("units"))) + value = sensor_inst->fields[field_index].attribute.units; + else if (!strncmp(name, "unit-expo", strlen("unit-expo"))) + value = sensor_inst->fields[field_index].attribute.unit_expo; + else if (!strncmp(name, "size", strlen("size"))) + value = sensor_inst->fields[field_index].attribute.size; + else if (!strncmp(name, "minimum", strlen("minimum"))) + value = sensor_inst->fields[field_index].attribute. + logical_minimum; + else if (!strncmp(name, "maximum", strlen("maximum"))) + value = sensor_inst->fields[field_index].attribute. + logical_maximum; + else if (!strncmp(name, "name", strlen("name"))) { + struct hid_custom_usage_desc *usage_desc; + + usage_desc = bsearch(&usage, hid_custom_usage_desc_table, + ARRAY_SIZE(hid_custom_usage_desc_table), + sizeof(struct hid_custom_usage_desc), + usage_id_cmp); + if (usage_desc) + return snprintf(buf, PAGE_SIZE, "%s\n", + usage_desc->desc); + else + return sprintf(buf, "not-specified\n"); + } else + return -EINVAL; + + return sprintf(buf, "%d\n", value); +} + +static ssize_t store_value(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) +{ + struct platform_device *pdev = to_platform_device(dev); + struct hid_sensor_custom *sensor_inst = platform_get_drvdata(pdev); + int index, field_index, usage; + char name[HID_CUSTOM_NAME_LENGTH]; + int value; + + if (sscanf(attr->attr.name, "feature-%d-%x-%s", &index, &usage, + name) == 3) { + field_index = index + sensor_inst->input_field_count; + } else + return -EINVAL; + + if (!strncmp(name, "value", strlen("value"))) { + u32 report_id; + int ret; + + if (kstrtoint(buf, 0, &value) != 0) + return -EINVAL; + + report_id = sensor_inst->fields[field_index].attribute. + report_id; + ret = sensor_hub_set_feature(sensor_inst->hsdev, report_id, + index, sizeof(value), &value); + } else + return -EINVAL; + + return count; +} + +static int hid_sensor_capture_sample(struct hid_sensor_hub_device *hsdev, + unsigned usage_id, size_t raw_len, + char *raw_data, void *priv) +{ + struct hid_sensor_custom *sensor_inst = platform_get_drvdata(priv); + struct hid_sensor_sample header; + + /* If any error occurs in a sample, rest of the fields are ignored */ + if (sensor_inst->input_skip_sample) { + hid_err(sensor_inst->hsdev->hdev, "Skipped remaining data\n"); + return 0; + } + + hid_dbg(sensor_inst->hsdev->hdev, "%s received %d of %d\n", __func__, + (int) (sensor_inst->input_report_recd_size + raw_len), + sensor_inst->input_report_size); + + if (!test_bit(0, &sensor_inst->misc_opened)) + return 0; + + if (!sensor_inst->input_report_recd_size) { + int required_size = sizeof(struct hid_sensor_sample) + + sensor_inst->input_report_size; + header.usage_id = hsdev->usage; + header.raw_len = sensor_inst->input_report_size; + header.timestamp = ktime_get_real_ns(); + if (kfifo_avail(&sensor_inst->data_fifo) >= required_size) { + kfifo_in(&sensor_inst->data_fifo, + (unsigned char *)&header, + sizeof(header)); + } else + sensor_inst->input_skip_sample = true; + } + if (kfifo_avail(&sensor_inst->data_fifo) >= raw_len) + kfifo_in(&sensor_inst->data_fifo, (unsigned char *)raw_data, + raw_len); + + sensor_inst->input_report_recd_size += raw_len; + + return 0; +} + +static int hid_sensor_send_event(struct hid_sensor_hub_device *hsdev, + unsigned usage_id, void *priv) +{ + struct hid_sensor_custom *sensor_inst = platform_get_drvdata(priv); + + if (!test_bit(0, &sensor_inst->misc_opened)) + return 0; + + sensor_inst->input_report_recd_size = 0; + sensor_inst->input_skip_sample = false; + + wake_up(&sensor_inst->wait); + + return 0; +} + +static int hid_sensor_custom_add_field(struct hid_sensor_custom *sensor_inst, + int index, int report_type, + struct hid_report *report, + struct hid_field *field) +{ + struct hid_sensor_custom_field *sensor_field; + void *fields; + + fields = krealloc(sensor_inst->fields, + (sensor_inst->sensor_field_count + 1) * + sizeof(struct hid_sensor_custom_field), GFP_KERNEL); + if (!fields) { + kfree(sensor_inst->fields); + return -ENOMEM; + } + sensor_inst->fields = fields; + sensor_field = &sensor_inst->fields[sensor_inst->sensor_field_count]; + sensor_field->attribute.usage_id = sensor_inst->hsdev->usage; + if (field->logical) + sensor_field->attribute.attrib_id = field->logical; + else + sensor_field->attribute.attrib_id = field->usage[0].hid; + + sensor_field->attribute.index = index; + sensor_field->attribute.report_id = report->id; + sensor_field->attribute.units = field->unit; + sensor_field->attribute.unit_expo = field->unit_exponent; + sensor_field->attribute.size = (field->report_size / 8); + sensor_field->attribute.logical_minimum = field->logical_minimum; + sensor_field->attribute.logical_maximum = field->logical_maximum; + + if (report_type == HID_FEATURE_REPORT) + snprintf(sensor_field->group_name, + sizeof(sensor_field->group_name), "feature-%x-%x", + sensor_field->attribute.index, + sensor_field->attribute.attrib_id); + else if (report_type == HID_INPUT_REPORT) { + snprintf(sensor_field->group_name, + sizeof(sensor_field->group_name), + "input-%x-%x", sensor_field->attribute.index, + sensor_field->attribute.attrib_id); + sensor_inst->input_field_count++; + sensor_inst->input_report_size += (field->report_size * + field->report_count) / 8; + } + + memset(&sensor_field->hid_custom_attribute_group, 0, + sizeof(struct attribute_group)); + sensor_inst->sensor_field_count++; + + return 0; +} + +static int hid_sensor_custom_add_fields(struct hid_sensor_custom *sensor_inst, + struct hid_report_enum *report_enum, + int report_type) +{ + int i; + int ret; + struct hid_report *report; + struct hid_field *field; + struct hid_sensor_hub_device *hsdev = sensor_inst->hsdev; + + list_for_each_entry(report, &report_enum->report_list, list) { + for (i = 0; i < report->maxfield; ++i) { + field = report->field[i]; + if (field->maxusage && + ((field->usage[0].collection_index >= + hsdev->start_collection_index) && + (field->usage[0].collection_index < + hsdev->end_collection_index))) { + + ret = hid_sensor_custom_add_field(sensor_inst, + i, + report_type, + report, + field); + if (ret) + return ret; + + } + } + } + + return 0; +} + +static int hid_sensor_custom_add_attributes(struct hid_sensor_custom + *sensor_inst) +{ + struct hid_sensor_hub_device *hsdev = sensor_inst->hsdev; + struct hid_device *hdev = hsdev->hdev; + int ret = -1; + int i, j; + + for (j = 0; j < HID_REPORT_TYPES; ++j) { + if (j == HID_OUTPUT_REPORT) + continue; + + ret = hid_sensor_custom_add_fields(sensor_inst, + &hdev->report_enum[j], j); + if (ret) + return ret; + + } + + /* Create sysfs attributes */ + for (i = 0; i < sensor_inst->sensor_field_count; ++i) { + j = 0; + while (j < HID_CUSTOM_TOTAL_ATTRS && + hid_custom_attrs[j].name) { + struct device_attribute *device_attr; + + device_attr = &sensor_inst->fields[i].sd_attrs[j]; + + snprintf((char *)&sensor_inst->fields[i].attr_name[j], + HID_CUSTOM_NAME_LENGTH, "%s-%s", + sensor_inst->fields[i].group_name, + hid_custom_attrs[j].name); + sysfs_attr_init(&device_attr->attr); + device_attr->attr.name = + (char *)&sensor_inst->fields[i].attr_name[j]; + device_attr->attr.mode = hid_custom_attrs[j].mode; + device_attr->show = show_value; + if (hid_custom_attrs[j].mode & S_IWUSR) + device_attr->store = store_value; + sensor_inst->fields[i].attrs[j] = &device_attr->attr; + ++j; + } + sensor_inst->fields[i].attrs[j] = NULL; + sensor_inst->fields[i].hid_custom_attribute_group.attrs = + sensor_inst->fields[i].attrs; + sensor_inst->fields[i].hid_custom_attribute_group.name = + sensor_inst->fields[i].group_name; + ret = sysfs_create_group(&sensor_inst->pdev->dev.kobj, + &sensor_inst->fields[i]. + hid_custom_attribute_group); + if (ret) + break; + + /* For power or report field store indexes */ + if (sensor_inst->fields[i].attribute.attrib_id == + HID_USAGE_SENSOR_PROY_POWER_STATE) + sensor_inst->power_state = &sensor_inst->fields[i]; + else if (sensor_inst->fields[i].attribute.attrib_id == + HID_USAGE_SENSOR_PROP_REPORT_STATE) + sensor_inst->report_state = &sensor_inst->fields[i]; + } + + return ret; +} + +static void hid_sensor_custom_remove_attributes(struct hid_sensor_custom * + sensor_inst) +{ + int i; + + for (i = 0; i < sensor_inst->sensor_field_count; ++i) + sysfs_remove_group(&sensor_inst->pdev->dev.kobj, + &sensor_inst->fields[i]. + hid_custom_attribute_group); + + kfree(sensor_inst->fields); +} + +static ssize_t hid_sensor_custom_read(struct file *file, char __user *buf, + size_t count, loff_t *f_ps) +{ + struct hid_sensor_custom *sensor_inst; + unsigned int copied; + int ret; + + sensor_inst = container_of(file->private_data, + struct hid_sensor_custom, custom_dev); + + if (count < sizeof(struct hid_sensor_sample)) + return -EINVAL; + + do { + if (kfifo_is_empty(&sensor_inst->data_fifo)) { + if (file->f_flags & O_NONBLOCK) + return -EAGAIN; + + ret = wait_event_interruptible(sensor_inst->wait, + !kfifo_is_empty(&sensor_inst->data_fifo)); + if (ret) + return ret; + } + ret = kfifo_to_user(&sensor_inst->data_fifo, buf, count, + &copied); + if (ret) + return ret; + + } while (copied == 0); + + return copied; +} + +static int hid_sensor_custom_release(struct inode *inode, struct file *file) +{ + struct hid_sensor_custom *sensor_inst; + + sensor_inst = container_of(file->private_data, + struct hid_sensor_custom, custom_dev); + + clear_bit(0, &sensor_inst->misc_opened); + + return 0; +} + +static int hid_sensor_custom_open(struct inode *inode, struct file *file) +{ + struct hid_sensor_custom *sensor_inst; + + sensor_inst = container_of(file->private_data, + struct hid_sensor_custom, custom_dev); + /* We essentially have single reader and writer */ + if (test_and_set_bit(0, &sensor_inst->misc_opened)) + return -EBUSY; + + return nonseekable_open(inode, file); +} + +static unsigned int hid_sensor_custom_poll(struct file *file, + struct poll_table_struct *wait) +{ + struct hid_sensor_custom *sensor_inst; + unsigned int mask = 0; + + sensor_inst = container_of(file->private_data, + struct hid_sensor_custom, custom_dev); + + poll_wait(file, &sensor_inst->wait, wait); + + if (!kfifo_is_empty(&sensor_inst->data_fifo)) + mask = POLLIN | POLLRDNORM; + + return mask; +} + +static const struct file_operations hid_sensor_custom_fops = { + .open = hid_sensor_custom_open, + .read = hid_sensor_custom_read, + .release = hid_sensor_custom_release, + .poll = hid_sensor_custom_poll, + .llseek = noop_llseek, +}; + +static int hid_sensor_custom_dev_if_add(struct hid_sensor_custom *sensor_inst) +{ + int ret; + + ret = kfifo_alloc(&sensor_inst->data_fifo, HID_CUSTOM_FIFO_SIZE, + GFP_KERNEL); + if (ret) + return ret; + + init_waitqueue_head(&sensor_inst->wait); + + sensor_inst->custom_dev.minor = MISC_DYNAMIC_MINOR; + sensor_inst->custom_dev.name = dev_name(&sensor_inst->pdev->dev); + sensor_inst->custom_dev.fops = &hid_sensor_custom_fops, + ret = misc_register(&sensor_inst->custom_dev); + if (ret) { + kfifo_free(&sensor_inst->data_fifo); + return ret; + } + return 0; +} + +static void hid_sensor_custom_dev_if_remove(struct hid_sensor_custom + *sensor_inst) +{ + wake_up(&sensor_inst->wait); + misc_deregister(&sensor_inst->custom_dev); + kfifo_free(&sensor_inst->data_fifo); + +} + +static int hid_sensor_custom_probe(struct platform_device *pdev) +{ + struct hid_sensor_custom *sensor_inst; + struct hid_sensor_hub_device *hsdev = pdev->dev.platform_data; + int ret; + + sensor_inst = devm_kzalloc(&pdev->dev, sizeof(*sensor_inst), + GFP_KERNEL); + if (!sensor_inst) + return -ENOMEM; + + sensor_inst->callbacks.capture_sample = hid_sensor_capture_sample; + sensor_inst->callbacks.send_event = hid_sensor_send_event; + sensor_inst->callbacks.pdev = pdev; + sensor_inst->hsdev = hsdev; + sensor_inst->pdev = pdev; + mutex_init(&sensor_inst->mutex); + platform_set_drvdata(pdev, sensor_inst); + ret = sensor_hub_register_callback(hsdev, hsdev->usage, + &sensor_inst->callbacks); + if (ret < 0) { + dev_err(&pdev->dev, "callback reg failed\n"); + return ret; + } + + ret = sysfs_create_group(&sensor_inst->pdev->dev.kobj, + &enable_sensor_attr_group); + if (ret) + goto err_remove_callback; + + ret = hid_sensor_custom_add_attributes(sensor_inst); + if (ret) + goto err_remove_group; + + ret = hid_sensor_custom_dev_if_add(sensor_inst); + if (ret) + goto err_remove_attributes; + + return 0; + +err_remove_attributes: + hid_sensor_custom_remove_attributes(sensor_inst); +err_remove_group: + sysfs_remove_group(&sensor_inst->pdev->dev.kobj, + &enable_sensor_attr_group); +err_remove_callback: + sensor_hub_remove_callback(hsdev, hsdev->usage); + + return ret; +} + +static int hid_sensor_custom_remove(struct platform_device *pdev) +{ + struct hid_sensor_custom *sensor_inst = platform_get_drvdata(pdev); + struct hid_sensor_hub_device *hsdev = pdev->dev.platform_data; + + hid_sensor_custom_dev_if_remove(sensor_inst); + hid_sensor_custom_remove_attributes(sensor_inst); + sysfs_remove_group(&sensor_inst->pdev->dev.kobj, + &enable_sensor_attr_group); + sensor_hub_remove_callback(hsdev, hsdev->usage); + + return 0; +} + +static struct platform_device_id hid_sensor_custom_ids[] = { + { + .name = "HID-SENSOR-2000e1", + }, + { + .name = "HID-SENSOR-2000e2", + }, + { /* sentinel */ } +}; +MODULE_DEVICE_TABLE(platform, hid_sensor_custom_ids); + +static struct platform_driver hid_sensor_custom_platform_driver = { + .id_table = hid_sensor_custom_ids, + .driver = { + .name = KBUILD_MODNAME, + }, + .probe = hid_sensor_custom_probe, + .remove = hid_sensor_custom_remove, +}; +module_platform_driver(hid_sensor_custom_platform_driver); + +MODULE_DESCRIPTION("HID Sensor Custom and Generic sensor Driver"); +MODULE_AUTHOR("Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>"); +MODULE_LICENSE("GPL"); diff --git a/drivers/hid/hid-sensor-hub.c b/drivers/hid/hid-sensor-hub.c index e54ce1097e2c..c3f6f1e311ea 100644 --- a/drivers/hid/hid-sensor-hub.c +++ b/drivers/hid/hid-sensor-hub.c @@ -29,29 +29,10 @@ #define HID_SENSOR_HUB_ENUM_QUIRK 0x01 /** - * struct sensor_hub_pending - Synchronous read pending information - * @status: Pending status true/false. - * @ready: Completion synchronization data. - * @usage_id: Usage id for physical device, E.g. Gyro usage id. - * @attr_usage_id: Usage Id of a field, E.g. X-AXIS for a gyro. - * @raw_size: Response size for a read request. - * @raw_data: Place holder for received response. - */ -struct sensor_hub_pending { - bool status; - struct completion ready; - u32 usage_id; - u32 attr_usage_id; - int raw_size; - u8 *raw_data; -}; - -/** * struct sensor_hub_data - Hold a instance data for a HID hub device * @hsdev: Stored hid instance for current hub device. * @mutex: Mutex to serialize synchronous request. * @lock: Spin lock to protect pending request structure. - * @pending: Holds information of pending sync read request. * @dyn_callback_list: Holds callback function * @dyn_callback_lock: spin lock to protect callback list * @hid_sensor_hub_client_devs: Stores all MFD cells for a hub instance. @@ -61,7 +42,6 @@ struct sensor_hub_pending { struct sensor_hub_data { struct mutex mutex; spinlock_t lock; - struct sensor_hub_pending pending; struct list_head dyn_callback_list; spinlock_t dyn_callback_lock; struct mfd_cell *hid_sensor_hub_client_devs; @@ -106,7 +86,8 @@ static int sensor_hub_get_physical_device_count(struct hid_device *hdev) for (i = 0; i < hdev->maxcollection; ++i) { struct hid_collection *collection = &hdev->collection[i]; - if (collection->type == HID_COLLECTION_PHYSICAL) + if (collection->type == HID_COLLECTION_PHYSICAL || + collection->type == HID_COLLECTION_APPLICATION) ++count; } @@ -139,7 +120,8 @@ static struct hid_sensor_hub_callbacks *sensor_hub_get_callback( spin_lock_irqsave(&pdata->dyn_callback_lock, flags); list_for_each_entry(callback, &pdata->dyn_callback_list, list) - if (callback->usage_id == usage_id && + if ((callback->usage_id == usage_id || + callback->usage_id == HID_USAGE_SENSOR_COLLECTION) && (collection_index >= callback->hsdev->start_collection_index) && (collection_index < @@ -179,7 +161,18 @@ int sensor_hub_register_callback(struct hid_sensor_hub_device *hsdev, callback->usage_callback = usage_callback; callback->usage_id = usage_id; callback->priv = NULL; - list_add_tail(&callback->list, &pdata->dyn_callback_list); + /* + * If there is a handler registered for the collection type, then + * it will handle all reports for sensors in this collection. If + * there is also an individual sensor handler registration, then + * we want to make sure that the reports are directed to collection + * handler, as this may be a fusion sensor. So add collection handlers + * to the beginning of the list, so that they are matched first. + */ + if (usage_id == HID_USAGE_SENSOR_COLLECTION) + list_add(&callback->list, &pdata->dyn_callback_list); + else + list_add_tail(&callback->list, &pdata->dyn_callback_list); spin_unlock_irqrestore(&pdata->dyn_callback_lock, flags); return 0; @@ -208,10 +201,14 @@ int sensor_hub_remove_callback(struct hid_sensor_hub_device *hsdev, EXPORT_SYMBOL_GPL(sensor_hub_remove_callback); int sensor_hub_set_feature(struct hid_sensor_hub_device *hsdev, u32 report_id, - u32 field_index, s32 value) + u32 field_index, int buffer_size, void *buffer) { struct hid_report *report; struct sensor_hub_data *data = hid_get_drvdata(hsdev->hdev); + __s32 *buf32 = buffer; + int i = 0; + int remaining_bytes; + __s32 value; int ret = 0; mutex_lock(&data->mutex); @@ -220,7 +217,21 @@ int sensor_hub_set_feature(struct hid_sensor_hub_device *hsdev, u32 report_id, ret = -EINVAL; goto done_proc; } - hid_set_field(report->field[field_index], 0, value); + + remaining_bytes = do_div(buffer_size, sizeof(__s32)); + if (buffer_size) { + for (i = 0; i < buffer_size; ++i) { + hid_set_field(report->field[field_index], i, + (__force __s32)cpu_to_le32(*buf32)); + ++buf32; + } + } + if (remaining_bytes) { + value = 0; + memcpy(&value, (u8 *)buf32, remaining_bytes); + hid_set_field(report->field[field_index], i, + (__force __s32)cpu_to_le32(value)); + } hid_hw_request(hsdev->hdev, report, HID_REQ_SET_REPORT); hid_hw_wait(hsdev->hdev); @@ -232,10 +243,11 @@ done_proc: EXPORT_SYMBOL_GPL(sensor_hub_set_feature); int sensor_hub_get_feature(struct hid_sensor_hub_device *hsdev, u32 report_id, - u32 field_index, s32 *value) + u32 field_index, int buffer_size, void *buffer) { struct hid_report *report; struct sensor_hub_data *data = hid_get_drvdata(hsdev->hdev); + int report_size; int ret = 0; mutex_lock(&data->mutex); @@ -247,7 +259,17 @@ int sensor_hub_get_feature(struct hid_sensor_hub_device *hsdev, u32 report_id, } hid_hw_request(hsdev->hdev, report, HID_REQ_GET_REPORT); hid_hw_wait(hsdev->hdev); - *value = report->field[field_index]->value[0]; + + /* calculate number of bytes required to read this field */ + report_size = DIV_ROUND_UP(report->field[field_index]->report_size, + 8) * + report->field[field_index]->report_count; + if (!report_size) { + ret = -EINVAL; + goto done_proc; + } + ret = min(report_size, buffer_size); + memcpy(buffer, report->field[field_index]->value, ret); done_proc: mutex_unlock(&data->mutex); @@ -259,47 +281,54 @@ EXPORT_SYMBOL_GPL(sensor_hub_get_feature); int sensor_hub_input_attr_get_raw_value(struct hid_sensor_hub_device *hsdev, u32 usage_id, - u32 attr_usage_id, u32 report_id) + u32 attr_usage_id, u32 report_id, + enum sensor_hub_read_flags flag) { struct sensor_hub_data *data = hid_get_drvdata(hsdev->hdev); unsigned long flags; struct hid_report *report; int ret_val = 0; - mutex_lock(&data->mutex); - memset(&data->pending, 0, sizeof(data->pending)); - init_completion(&data->pending.ready); - data->pending.usage_id = usage_id; - data->pending.attr_usage_id = attr_usage_id; - data->pending.raw_size = 0; - - spin_lock_irqsave(&data->lock, flags); - data->pending.status = true; - spin_unlock_irqrestore(&data->lock, flags); - report = sensor_hub_report(report_id, hsdev->hdev, HID_INPUT_REPORT); + report = sensor_hub_report(report_id, hsdev->hdev, + HID_INPUT_REPORT); if (!report) - goto err_free; - - hid_hw_request(hsdev->hdev, report, HID_REQ_GET_REPORT); - wait_for_completion_interruptible_timeout(&data->pending.ready, HZ*5); - switch (data->pending.raw_size) { - case 1: - ret_val = *(u8 *)data->pending.raw_data; - break; - case 2: - ret_val = *(u16 *)data->pending.raw_data; - break; - case 4: - ret_val = *(u32 *)data->pending.raw_data; - break; - default: - ret_val = 0; + return -EINVAL; + + mutex_lock(&hsdev->mutex); + if (flag == SENSOR_HUB_SYNC) { + memset(&hsdev->pending, 0, sizeof(hsdev->pending)); + init_completion(&hsdev->pending.ready); + hsdev->pending.usage_id = usage_id; + hsdev->pending.attr_usage_id = attr_usage_id; + hsdev->pending.raw_size = 0; + + spin_lock_irqsave(&data->lock, flags); + hsdev->pending.status = true; + spin_unlock_irqrestore(&data->lock, flags); } - kfree(data->pending.raw_data); - -err_free: - data->pending.status = false; + mutex_lock(&data->mutex); + hid_hw_request(hsdev->hdev, report, HID_REQ_GET_REPORT); mutex_unlock(&data->mutex); + if (flag == SENSOR_HUB_SYNC) { + wait_for_completion_interruptible_timeout( + &hsdev->pending.ready, HZ*5); + switch (hsdev->pending.raw_size) { + case 1: + ret_val = *(u8 *)hsdev->pending.raw_data; + break; + case 2: + ret_val = *(u16 *)hsdev->pending.raw_data; + break; + case 4: + ret_val = *(u32 *)hsdev->pending.raw_data; + break; + default: + ret_val = 0; + } + kfree(hsdev->pending.raw_data); + hsdev->pending.status = false; + } + mutex_unlock(&hsdev->mutex); return ret_val; } @@ -455,16 +484,6 @@ static int sensor_hub_raw_event(struct hid_device *hdev, report->field[i]->report_count)/8); sz = (report->field[i]->report_size * report->field[i]->report_count)/8; - if (pdata->pending.status && pdata->pending.attr_usage_id == - report->field[i]->usage->hid) { - hid_dbg(hdev, "data was pending ...\n"); - pdata->pending.raw_data = kmemdup(ptr, sz, GFP_ATOMIC); - if (pdata->pending.raw_data) - pdata->pending.raw_size = sz; - else - pdata->pending.raw_size = 0; - complete(&pdata->pending.ready); - } collection = &hdev->collection[ report->field[i]->usage->collection_index]; hid_dbg(hdev, "collection->usage %x\n", @@ -474,8 +493,23 @@ static int sensor_hub_raw_event(struct hid_device *hdev, report->field[i]->physical, report->field[i]->usage[0].collection_index, &hsdev, &priv); - - if (callback && callback->capture_sample) { + if (!callback) { + ptr += sz; + continue; + } + if (hsdev->pending.status && (hsdev->pending.attr_usage_id == + report->field[i]->usage->hid || + hsdev->pending.attr_usage_id == + report->field[i]->logical)) { + hid_dbg(hdev, "data was pending ...\n"); + hsdev->pending.raw_data = kmemdup(ptr, sz, GFP_ATOMIC); + if (hsdev->pending.raw_data) + hsdev->pending.raw_size = sz; + else + hsdev->pending.raw_size = 0; + complete(&hsdev->pending.ready); + } + if (callback->capture_sample) { if (report->field[i]->logical) callback->capture_sample(hsdev, report->field[i]->logical, sz, ptr, @@ -572,6 +606,7 @@ static int sensor_hub_probe(struct hid_device *hdev, int dev_cnt; struct hid_sensor_hub_device *hsdev; struct hid_sensor_hub_device *last_hsdev = NULL; + struct hid_sensor_hub_device *collection_hsdev = NULL; sd = devm_kzalloc(&hdev->dev, sizeof(*sd), GFP_KERNEL); if (!sd) { @@ -618,7 +653,8 @@ static int sensor_hub_probe(struct hid_device *hdev, for (i = 0; i < hdev->maxcollection; ++i) { struct hid_collection *collection = &hdev->collection[i]; - if (collection->type == HID_COLLECTION_PHYSICAL) { + if (collection->type == HID_COLLECTION_PHYSICAL || + collection->type == HID_COLLECTION_APPLICATION) { hsdev = devm_kzalloc(&hdev->dev, sizeof(*hsdev), GFP_KERNEL); @@ -630,6 +666,8 @@ static int sensor_hub_probe(struct hid_device *hdev, hsdev->hdev = hdev; hsdev->vendor_id = hdev->vendor; hsdev->product_id = hdev->product; + hsdev->usage = collection->usage; + mutex_init(&hsdev->mutex); hsdev->start_collection_index = i; if (last_hsdev) last_hsdev->end_collection_index = i; @@ -653,10 +691,17 @@ static int sensor_hub_probe(struct hid_device *hdev, hid_dbg(hdev, "Adding %s:%d\n", name, hsdev->start_collection_index); sd->hid_sensor_client_cnt++; + if (collection_hsdev) + collection_hsdev->end_collection_index = i; + if (collection->type == HID_COLLECTION_APPLICATION && + collection->usage == HID_USAGE_SENSOR_COLLECTION) + collection_hsdev = hsdev; } } if (last_hsdev) last_hsdev->end_collection_index = i; + if (collection_hsdev) + collection_hsdev->end_collection_index = i; ret = mfd_add_hotplug_devices(&hdev->dev, sd->hid_sensor_hub_client_devs, @@ -676,13 +721,18 @@ static void sensor_hub_remove(struct hid_device *hdev) { struct sensor_hub_data *data = hid_get_drvdata(hdev); unsigned long flags; + int i; hid_dbg(hdev, " hardware removed\n"); hid_hw_close(hdev); hid_hw_stop(hdev); spin_lock_irqsave(&data->lock, flags); - if (data->pending.status) - complete(&data->pending.ready); + for (i = 0; i < data->hid_sensor_client_cnt; ++i) { + struct hid_sensor_hub_device *hsdev = + data->hid_sensor_hub_client_devs[i].platform_data; + if (hsdev->pending.status) + complete(&hsdev->pending.ready); + } spin_unlock_irqrestore(&data->lock, flags); mfd_remove_devices(&hdev->dev); hid_set_drvdata(hdev, NULL); diff --git a/drivers/hid/hid-sony.c b/drivers/hid/hid-sony.c index c906300cf667..6ca96cebb44c 100644 --- a/drivers/hid/hid-sony.c +++ b/drivers/hid/hid-sony.c @@ -802,7 +802,8 @@ union sixaxis_output_report_01 { #define DS4_REPORT_0x05_SIZE 32 #define DS4_REPORT_0x11_SIZE 78 #define DS4_REPORT_0x81_SIZE 7 -#define SIXAXIS_REPORT_0xF2_SIZE 18 +#define SIXAXIS_REPORT_0xF2_SIZE 17 +#define SIXAXIS_REPORT_0xF5_SIZE 8 static DEFINE_SPINLOCK(sony_dev_list_lock); static LIST_HEAD(sony_device_list); @@ -1131,18 +1132,38 @@ static void sony_input_configured(struct hid_device *hdev, */ static int sixaxis_set_operational_usb(struct hid_device *hdev) { + const int buf_size = + max(SIXAXIS_REPORT_0xF2_SIZE, SIXAXIS_REPORT_0xF5_SIZE); + __u8 *buf; int ret; - char *buf = kmalloc(18, GFP_KERNEL); + buf = kmalloc(buf_size, GFP_KERNEL); if (!buf) return -ENOMEM; - ret = hid_hw_raw_request(hdev, 0xf2, buf, 17, HID_FEATURE_REPORT, - HID_REQ_GET_REPORT); + ret = hid_hw_raw_request(hdev, 0xf2, buf, SIXAXIS_REPORT_0xF2_SIZE, + HID_FEATURE_REPORT, HID_REQ_GET_REPORT); + if (ret < 0) { + hid_err(hdev, "can't set operational mode: step 1\n"); + goto out; + } + + /* + * Some compatible controllers like the Speedlink Strike FX and + * Gasia need another query plus an USB interrupt to get operational. + */ + ret = hid_hw_raw_request(hdev, 0xf5, buf, SIXAXIS_REPORT_0xF5_SIZE, + HID_FEATURE_REPORT, HID_REQ_GET_REPORT); + if (ret < 0) { + hid_err(hdev, "can't set operational mode: step 2\n"); + goto out; + } + ret = hid_hw_output_report(hdev, buf, 1); if (ret < 0) - hid_err(hdev, "can't set operational mode\n"); + hid_err(hdev, "can't set operational mode: step 3\n"); +out: kfree(buf); return ret; diff --git a/drivers/hid/hid-steelseries.c b/drivers/hid/hid-steelseries.c index 29f328f411fb..3edd4ac36494 100644 --- a/drivers/hid/hid-steelseries.c +++ b/drivers/hid/hid-steelseries.c @@ -12,7 +12,6 @@ */ #include <linux/device.h> -#include <linux/usb.h> #include <linux/hid.h> #include <linux/module.h> diff --git a/drivers/hid/hid-uclogic.c b/drivers/hid/hid-uclogic.c index fb8b516ff0ed..94167310e15a 100644 --- a/drivers/hid/hid-uclogic.c +++ b/drivers/hid/hid-uclogic.c @@ -1,7 +1,8 @@ /* * HID driver for UC-Logic devices not fully compliant with HID standard * - * Copyright (c) 2010 Nikolai Kondrashov + * Copyright (c) 2010-2014 Nikolai Kondrashov + * Copyright (c) 2013 Martin Rusko */ /* @@ -15,6 +16,8 @@ #include <linux/hid.h> #include <linux/module.h> #include <linux/usb.h> +#include <asm/unaligned.h> +#include "usbhid/usbhid.h" #include "hid-ids.h" @@ -546,11 +549,93 @@ static __u8 twha60_rdesc_fixed1[] = { 0xC0 /* End Collection */ }; +/* Report descriptor template placeholder head */ +#define UCLOGIC_PH_HEAD 0xFE, 0xED, 0x1D + +/* Report descriptor template placeholder IDs */ +enum uclogic_ph_id { + UCLOGIC_PH_ID_X_LM, + UCLOGIC_PH_ID_X_PM, + UCLOGIC_PH_ID_Y_LM, + UCLOGIC_PH_ID_Y_PM, + UCLOGIC_PH_ID_PRESSURE_LM, + UCLOGIC_PH_ID_NUM +}; + +/* Report descriptor template placeholder */ +#define UCLOGIC_PH(_ID) UCLOGIC_PH_HEAD, UCLOGIC_PH_ID_##_ID +#define UCLOGIC_PEN_REPORT_ID 0x07 + +/* Fixed report descriptor template */ +static const __u8 uclogic_tablet_rdesc_template[] = { + 0x05, 0x0D, /* Usage Page (Digitizer), */ + 0x09, 0x02, /* Usage (Pen), */ + 0xA1, 0x01, /* Collection (Application), */ + 0x85, 0x07, /* Report ID (7), */ + 0x09, 0x20, /* Usage (Stylus), */ + 0xA0, /* Collection (Physical), */ + 0x14, /* Logical Minimum (0), */ + 0x25, 0x01, /* Logical Maximum (1), */ + 0x75, 0x01, /* Report Size (1), */ + 0x09, 0x42, /* Usage (Tip Switch), */ + 0x09, 0x44, /* Usage (Barrel Switch), */ + 0x09, 0x46, /* Usage (Tablet Pick), */ + 0x95, 0x03, /* Report Count (3), */ + 0x81, 0x02, /* Input (Variable), */ + 0x95, 0x03, /* Report Count (3), */ + 0x81, 0x03, /* Input (Constant, Variable), */ + 0x09, 0x32, /* Usage (In Range), */ + 0x95, 0x01, /* Report Count (1), */ + 0x81, 0x02, /* Input (Variable), */ + 0x95, 0x01, /* Report Count (1), */ + 0x81, 0x03, /* Input (Constant, Variable), */ + 0x75, 0x10, /* Report Size (16), */ + 0x95, 0x01, /* Report Count (1), */ + 0xA4, /* Push, */ + 0x05, 0x01, /* Usage Page (Desktop), */ + 0x65, 0x13, /* Unit (Inch), */ + 0x55, 0xFD, /* Unit Exponent (-3), */ + 0x34, /* Physical Minimum (0), */ + 0x09, 0x30, /* Usage (X), */ + 0x27, UCLOGIC_PH(X_LM), /* Logical Maximum (PLACEHOLDER), */ + 0x47, UCLOGIC_PH(X_PM), /* Physical Maximum (PLACEHOLDER), */ + 0x81, 0x02, /* Input (Variable), */ + 0x09, 0x31, /* Usage (Y), */ + 0x27, UCLOGIC_PH(Y_LM), /* Logical Maximum (PLACEHOLDER), */ + 0x47, UCLOGIC_PH(Y_PM), /* Physical Maximum (PLACEHOLDER), */ + 0x81, 0x02, /* Input (Variable), */ + 0xB4, /* Pop, */ + 0x09, 0x30, /* Usage (Tip Pressure), */ + 0x27, + UCLOGIC_PH(PRESSURE_LM),/* Logical Maximum (PLACEHOLDER), */ + 0x81, 0x02, /* Input (Variable), */ + 0xC0, /* End Collection, */ + 0xC0 /* End Collection */ +}; + +/* Parameter indices */ +enum uclogic_prm { + UCLOGIC_PRM_X_LM = 1, + UCLOGIC_PRM_Y_LM = 2, + UCLOGIC_PRM_PRESSURE_LM = 4, + UCLOGIC_PRM_RESOLUTION = 5, + UCLOGIC_PRM_NUM +}; + +/* Driver data */ +struct uclogic_drvdata { + __u8 *rdesc; + unsigned int rsize; + bool invert_pen_inrange; + bool ignore_pen_usage; +}; + static __u8 *uclogic_report_fixup(struct hid_device *hdev, __u8 *rdesc, unsigned int *rsize) { struct usb_interface *iface = to_usb_interface(hdev->dev.parent); __u8 iface_num = iface->cur_altsetting->desc.bInterfaceNumber; + struct uclogic_drvdata *drvdata = hid_get_drvdata(hdev); switch (hdev->product) { case USB_DEVICE_ID_UCLOGIC_TABLET_PF1209: @@ -621,11 +706,241 @@ static __u8 *uclogic_report_fixup(struct hid_device *hdev, __u8 *rdesc, break; } break; + default: + if (drvdata->rdesc != NULL) { + rdesc = drvdata->rdesc; + *rsize = drvdata->rsize; + } } return rdesc; } +static int uclogic_input_mapping(struct hid_device *hdev, struct hid_input *hi, + struct hid_field *field, struct hid_usage *usage, + unsigned long **bit, int *max) +{ + struct uclogic_drvdata *drvdata = hid_get_drvdata(hdev); + + /* discard the unused pen interface */ + if ((drvdata->ignore_pen_usage) && + (field->application == HID_DG_PEN)) + return -1; + + /* let hid-core decide what to do */ + return 0; +} + +static void uclogic_input_configured(struct hid_device *hdev, + struct hid_input *hi) +{ + char *name; + const char *suffix = NULL; + struct hid_field *field; + size_t len; + + /* no report associated (HID_QUIRK_MULTI_INPUT not set) */ + if (!hi->report) + return; + + field = hi->report->field[0]; + + switch (field->application) { + case HID_GD_KEYBOARD: + suffix = "Keyboard"; + break; + case HID_GD_MOUSE: + suffix = "Mouse"; + break; + case HID_GD_KEYPAD: + suffix = "Pad"; + break; + case HID_DG_PEN: + suffix = "Pen"; + break; + case HID_CP_CONSUMER_CONTROL: + suffix = "Consumer Control"; + break; + case HID_GD_SYSTEM_CONTROL: + suffix = "System Control"; + break; + } + + if (suffix) { + len = strlen(hdev->name) + 2 + strlen(suffix); + name = devm_kzalloc(&hi->input->dev, len, GFP_KERNEL); + if (name) { + snprintf(name, len, "%s %s", hdev->name, suffix); + hi->input->name = name; + } + } +} + +/** + * Enable fully-functional tablet mode and determine device parameters. + * + * @hdev: HID device + */ +static int uclogic_tablet_enable(struct hid_device *hdev) +{ + int rc; + struct usb_device *usb_dev = hid_to_usb_dev(hdev); + struct uclogic_drvdata *drvdata = hid_get_drvdata(hdev); + __le16 *buf = NULL; + size_t len; + s32 params[UCLOGIC_PH_ID_NUM]; + s32 resolution; + __u8 *p; + s32 v; + + /* + * Read string descriptor containing tablet parameters. The specific + * string descriptor and data were discovered by sniffing the Windows + * driver traffic. + * NOTE: This enables fully-functional tablet mode. + */ + len = UCLOGIC_PRM_NUM * sizeof(*buf); + buf = kmalloc(len, GFP_KERNEL); + if (buf == NULL) { + hid_err(hdev, "failed to allocate parameter buffer\n"); + rc = -ENOMEM; + goto cleanup; + } + rc = usb_control_msg(usb_dev, usb_rcvctrlpipe(usb_dev, 0), + USB_REQ_GET_DESCRIPTOR, USB_DIR_IN, + (USB_DT_STRING << 8) + 0x64, + 0x0409, buf, len, + USB_CTRL_GET_TIMEOUT); + if (rc == -EPIPE) { + hid_err(hdev, "device parameters not found\n"); + rc = -ENODEV; + goto cleanup; + } else if (rc < 0) { + hid_err(hdev, "failed to get device parameters: %d\n", rc); + rc = -ENODEV; + goto cleanup; + } else if (rc != len) { + hid_err(hdev, "invalid device parameters\n"); + rc = -ENODEV; + goto cleanup; + } + + /* Extract device parameters */ + params[UCLOGIC_PH_ID_X_LM] = le16_to_cpu(buf[UCLOGIC_PRM_X_LM]); + params[UCLOGIC_PH_ID_Y_LM] = le16_to_cpu(buf[UCLOGIC_PRM_Y_LM]); + params[UCLOGIC_PH_ID_PRESSURE_LM] = + le16_to_cpu(buf[UCLOGIC_PRM_PRESSURE_LM]); + resolution = le16_to_cpu(buf[UCLOGIC_PRM_RESOLUTION]); + if (resolution == 0) { + params[UCLOGIC_PH_ID_X_PM] = 0; + params[UCLOGIC_PH_ID_Y_PM] = 0; + } else { + params[UCLOGIC_PH_ID_X_PM] = params[UCLOGIC_PH_ID_X_LM] * + 1000 / resolution; + params[UCLOGIC_PH_ID_Y_PM] = params[UCLOGIC_PH_ID_Y_LM] * + 1000 / resolution; + } + + /* Allocate fixed report descriptor */ + drvdata->rdesc = devm_kzalloc(&hdev->dev, + sizeof(uclogic_tablet_rdesc_template), + GFP_KERNEL); + if (drvdata->rdesc == NULL) { + hid_err(hdev, "failed to allocate fixed rdesc\n"); + rc = -ENOMEM; + goto cleanup; + } + drvdata->rsize = sizeof(uclogic_tablet_rdesc_template); + + /* Format fixed report descriptor */ + memcpy(drvdata->rdesc, uclogic_tablet_rdesc_template, + drvdata->rsize); + for (p = drvdata->rdesc; + p <= drvdata->rdesc + drvdata->rsize - 4;) { + if (p[0] == 0xFE && p[1] == 0xED && p[2] == 0x1D && + p[3] < sizeof(params)) { + v = params[p[3]]; + put_unaligned(cpu_to_le32(v), (s32 *)p); + p += 4; + } else { + p++; + } + } + + rc = 0; + +cleanup: + kfree(buf); + return rc; +} + +static int uclogic_probe(struct hid_device *hdev, + const struct hid_device_id *id) +{ + int rc; + struct usb_interface *intf = to_usb_interface(hdev->dev.parent); + struct uclogic_drvdata *drvdata; + + /* + * libinput requires the pad interface to be on a different node + * than the pen, so use QUIRK_MULTI_INPUT for all tablets. + */ + hdev->quirks |= HID_QUIRK_MULTI_INPUT; + hdev->quirks |= HID_QUIRK_NO_EMPTY_INPUT; + + /* Allocate and assign driver data */ + drvdata = devm_kzalloc(&hdev->dev, sizeof(*drvdata), GFP_KERNEL); + if (drvdata == NULL) + return -ENOMEM; + + hid_set_drvdata(hdev, drvdata); + + switch (id->product) { + case USB_DEVICE_ID_HUION_TABLET: + /* If this is the pen interface */ + if (intf->cur_altsetting->desc.bInterfaceNumber == 0) { + rc = uclogic_tablet_enable(hdev); + if (rc) { + hid_err(hdev, "tablet enabling failed\n"); + return rc; + } + drvdata->invert_pen_inrange = true; + } else { + drvdata->ignore_pen_usage = true; + } + break; + } + + rc = hid_parse(hdev); + if (rc) { + hid_err(hdev, "parse failed\n"); + return rc; + } + + rc = hid_hw_start(hdev, HID_CONNECT_DEFAULT); + if (rc) { + hid_err(hdev, "hw start failed\n"); + return rc; + } + + return 0; +} + +static int uclogic_raw_event(struct hid_device *hdev, struct hid_report *report, + u8 *data, int size) +{ + struct uclogic_drvdata *drvdata = hid_get_drvdata(hdev); + + if ((drvdata->invert_pen_inrange) && + (report->type == HID_INPUT_REPORT) && + (report->id == UCLOGIC_PEN_REPORT_ID) && + (size >= 2)) + /* Invert the in-range bit */ + data[1] ^= 0x40; + + return 0; +} + static const struct hid_device_id uclogic_devices[] = { { HID_USB_DEVICE(USB_VENDOR_ID_UCLOGIC, USB_DEVICE_ID_UCLOGIC_TABLET_PF1209) }, @@ -641,6 +956,8 @@ static const struct hid_device_id uclogic_devices[] = { USB_DEVICE_ID_UCLOGIC_WIRELESS_TABLET_TWHL850) }, { HID_USB_DEVICE(USB_VENDOR_ID_UCLOGIC, USB_DEVICE_ID_UCLOGIC_TABLET_TWHA60) }, + { HID_USB_DEVICE(USB_VENDOR_ID_HUION, USB_DEVICE_ID_HUION_TABLET) }, + { HID_USB_DEVICE(USB_VENDOR_ID_UCLOGIC, USB_DEVICE_ID_HUION_TABLET) }, { } }; MODULE_DEVICE_TABLE(hid, uclogic_devices); @@ -648,8 +965,14 @@ MODULE_DEVICE_TABLE(hid, uclogic_devices); static struct hid_driver uclogic_driver = { .name = "uclogic", .id_table = uclogic_devices, + .probe = uclogic_probe, .report_fixup = uclogic_report_fixup, + .raw_event = uclogic_raw_event, + .input_mapping = uclogic_input_mapping, + .input_configured = uclogic_input_configured, }; module_hid_driver(uclogic_driver); +MODULE_AUTHOR("Martin Rusko"); +MODULE_AUTHOR("Nikolai Kondrashov"); MODULE_LICENSE("GPL"); diff --git a/drivers/hid/i2c-hid/i2c-hid.c b/drivers/hid/i2c-hid/i2c-hid.c index 36053f33d6d9..ab4dd952b6ba 100644 --- a/drivers/hid/i2c-hid/i2c-hid.c +++ b/drivers/hid/i2c-hid/i2c-hid.c @@ -37,6 +37,7 @@ #include <linux/mutex.h> #include <linux/acpi.h> #include <linux/of.h> +#include <linux/gpio/consumer.h> #include <linux/i2c/i2c-hid.h> @@ -144,6 +145,8 @@ struct i2c_hid { unsigned long flags; /* device flags */ wait_queue_head_t wait; /* For waiting the interrupt */ + struct gpio_desc *desc; + int irq; struct i2c_hid_platform_data pdata; }; @@ -785,16 +788,16 @@ static int i2c_hid_init_irq(struct i2c_client *client) struct i2c_hid *ihid = i2c_get_clientdata(client); int ret; - dev_dbg(&client->dev, "Requesting IRQ: %d\n", client->irq); + dev_dbg(&client->dev, "Requesting IRQ: %d\n", ihid->irq); - ret = request_threaded_irq(client->irq, NULL, i2c_hid_irq, + ret = request_threaded_irq(ihid->irq, NULL, i2c_hid_irq, IRQF_TRIGGER_LOW | IRQF_ONESHOT, client->name, ihid); if (ret < 0) { dev_warn(&client->dev, "Could not register for %s interrupt, irq = %d," " ret = %d\n", - client->name, client->irq, ret); + client->name, ihid->irq, ret); return ret; } @@ -841,6 +844,14 @@ static int i2c_hid_fetch_hid_descriptor(struct i2c_hid *ihid) } #ifdef CONFIG_ACPI + +/* Default GPIO mapping */ +static const struct acpi_gpio_params i2c_hid_irq_gpio = { 0, 0, true }; +static const struct acpi_gpio_mapping i2c_hid_acpi_gpios[] = { + { "gpios", &i2c_hid_irq_gpio, 1 }, + { }, +}; + static int i2c_hid_acpi_pdata(struct i2c_client *client, struct i2c_hid_platform_data *pdata) { @@ -866,7 +877,7 @@ static int i2c_hid_acpi_pdata(struct i2c_client *client, pdata->hid_descriptor_address = obj->integer.value; ACPI_FREE(obj); - return 0; + return acpi_dev_add_driver_gpios(adev, i2c_hid_acpi_gpios); } static const struct acpi_device_id i2c_hid_acpi_match[] = { @@ -930,12 +941,6 @@ static int i2c_hid_probe(struct i2c_client *client, dbg_hid("HID probe called for i2c 0x%02x\n", client->addr); - if (!client->irq) { - dev_err(&client->dev, - "HID over i2c has not been provided an Int IRQ\n"); - return -EINVAL; - } - ihid = kzalloc(sizeof(struct i2c_hid), GFP_KERNEL); if (!ihid) return -ENOMEM; @@ -955,6 +960,23 @@ static int i2c_hid_probe(struct i2c_client *client, ihid->pdata = *platform_data; } + if (client->irq > 0) { + ihid->irq = client->irq; + } else if (ACPI_COMPANION(&client->dev)) { + ihid->desc = gpiod_get(&client->dev, NULL, GPIOD_IN); + if (IS_ERR(ihid->desc)) { + dev_err(&client->dev, "Failed to get GPIO interrupt\n"); + return PTR_ERR(ihid->desc); + } + + ihid->irq = gpiod_to_irq(ihid->desc); + if (ihid->irq < 0) { + gpiod_put(ihid->desc); + dev_err(&client->dev, "Failed to convert GPIO to IRQ\n"); + return ihid->irq; + } + } + i2c_set_clientdata(client, ihid); ihid->client = client; @@ -1017,13 +1039,16 @@ err_mem_free: hid_destroy_device(hid); err_irq: - free_irq(client->irq, ihid); + free_irq(ihid->irq, ihid); err_pm: pm_runtime_put_noidle(&client->dev); pm_runtime_disable(&client->dev); err: + if (ihid->desc) + gpiod_put(ihid->desc); + i2c_hid_free_buffers(ihid); kfree(ihid); return ret; @@ -1042,13 +1067,18 @@ static int i2c_hid_remove(struct i2c_client *client) hid = ihid->hid; hid_destroy_device(hid); - free_irq(client->irq, ihid); + free_irq(ihid->irq, ihid); if (ihid->bufsize) i2c_hid_free_buffers(ihid); + if (ihid->desc) + gpiod_put(ihid->desc); + kfree(ihid); + acpi_dev_remove_driver_gpios(ACPI_COMPANION(&client->dev)); + return 0; } @@ -1060,9 +1090,9 @@ static int i2c_hid_suspend(struct device *dev) struct hid_device *hid = ihid->hid; int ret = 0; - disable_irq(client->irq); + disable_irq(ihid->irq); if (device_may_wakeup(&client->dev)) - enable_irq_wake(client->irq); + enable_irq_wake(ihid->irq); if (hid->driver && hid->driver->suspend) ret = hid->driver->suspend(hid, PMSG_SUSPEND); @@ -1080,13 +1110,13 @@ static int i2c_hid_resume(struct device *dev) struct i2c_hid *ihid = i2c_get_clientdata(client); struct hid_device *hid = ihid->hid; - enable_irq(client->irq); + enable_irq(ihid->irq); ret = i2c_hid_hwreset(client); if (ret) return ret; if (device_may_wakeup(&client->dev)) - disable_irq_wake(client->irq); + disable_irq_wake(ihid->irq); if (hid->driver && hid->driver->reset_resume) { ret = hid->driver->reset_resume(hid); @@ -1101,17 +1131,19 @@ static int i2c_hid_resume(struct device *dev) static int i2c_hid_runtime_suspend(struct device *dev) { struct i2c_client *client = to_i2c_client(dev); + struct i2c_hid *ihid = i2c_get_clientdata(client); i2c_hid_set_power(client, I2C_HID_PWR_SLEEP); - disable_irq(client->irq); + disable_irq(ihid->irq); return 0; } static int i2c_hid_runtime_resume(struct device *dev) { struct i2c_client *client = to_i2c_client(dev); + struct i2c_hid *ihid = i2c_get_clientdata(client); - enable_irq(client->irq); + enable_irq(ihid->irq); i2c_hid_set_power(client, I2C_HID_PWR_ON); return 0; } diff --git a/drivers/hid/usbhid/hid-pidff.c b/drivers/hid/usbhid/hid-pidff.c index 0b531c6a76a5..08174d341f4a 100644 --- a/drivers/hid/usbhid/hid-pidff.c +++ b/drivers/hid/usbhid/hid-pidff.c @@ -568,6 +568,12 @@ static int pidff_upload_effect(struct input_dev *dev, struct ff_effect *effect, int type_id; int error; + pidff->block_load[PID_EFFECT_BLOCK_INDEX].value[0] = 0; + if (old) { + pidff->block_load[PID_EFFECT_BLOCK_INDEX].value[0] = + pidff->pid_id[effect->id]; + } + switch (effect->type) { case FF_CONSTANT: if (!old) { diff --git a/drivers/hid/usbhid/hid-quirks.c b/drivers/hid/usbhid/hid-quirks.c index a82127753461..a775143e6265 100644 --- a/drivers/hid/usbhid/hid-quirks.c +++ b/drivers/hid/usbhid/hid-quirks.c @@ -78,7 +78,13 @@ static const struct hid_blacklist { { USB_VENDOR_ID_ELO, USB_DEVICE_ID_ELO_TS2700, HID_QUIRK_NOGET }, { USB_VENDOR_ID_FORMOSA, USB_DEVICE_ID_FORMOSA_IR_RECEIVER, HID_QUIRK_NO_INIT_REPORTS }, { USB_VENDOR_ID_FREESCALE, USB_DEVICE_ID_FREESCALE_MX28, HID_QUIRK_NOGET }, + { USB_VENDOR_ID_HP, USB_PRODUCT_ID_HP_LOGITECH_OEM_USB_OPTICAL_MOUSE_0A4A, HID_QUIRK_ALWAYS_POLL }, + { USB_VENDOR_ID_HP, USB_PRODUCT_ID_HP_LOGITECH_OEM_USB_OPTICAL_MOUSE_0B4A, HID_QUIRK_ALWAYS_POLL }, + { USB_VENDOR_ID_HP, USB_PRODUCT_ID_HP_PIXART_OEM_USB_OPTICAL_MOUSE, HID_QUIRK_ALWAYS_POLL }, { USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_C077, HID_QUIRK_ALWAYS_POLL }, + { USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_MOUSE_C01A, HID_QUIRK_ALWAYS_POLL }, + { USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_MOUSE_C05A, HID_QUIRK_ALWAYS_POLL }, + { USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_MOUSE_C06A, HID_QUIRK_ALWAYS_POLL }, { USB_VENDOR_ID_MGE, USB_DEVICE_ID_MGE_UPS, HID_QUIRK_NOGET }, { USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_TYPE_COVER_3, HID_QUIRK_NO_INIT_REPORTS }, { USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_TYPE_COVER_3_JP, HID_QUIRK_NO_INIT_REPORTS }, @@ -92,6 +98,7 @@ static const struct hid_blacklist { { USB_VENDOR_ID_PIXART, USB_DEVICE_ID_PIXART_OPTICAL_TOUCH_SCREEN, HID_QUIRK_NO_INIT_REPORTS }, { USB_VENDOR_ID_PIXART, USB_DEVICE_ID_PIXART_OPTICAL_TOUCH_SCREEN1, HID_QUIRK_NO_INIT_REPORTS }, { USB_VENDOR_ID_PIXART, USB_DEVICE_ID_PIXART_OPTICAL_TOUCH_SCREEN2, HID_QUIRK_NO_INIT_REPORTS }, + { USB_VENDOR_ID_PRIMAX, USB_DEVICE_ID_PRIMAX_MOUSE_4D22, HID_QUIRK_ALWAYS_POLL }, { USB_VENDOR_ID_PRODIGE, USB_DEVICE_ID_PRODIGE_CORDLESS, HID_QUIRK_NOGET }, { USB_VENDOR_ID_QUANTA, USB_DEVICE_ID_QUANTA_OPTICAL_TOUCH_3001, HID_QUIRK_NOGET }, { USB_VENDOR_ID_QUANTA, USB_DEVICE_ID_QUANTA_OPTICAL_TOUCH_3008, HID_QUIRK_NOGET }, @@ -107,12 +114,8 @@ static const struct hid_blacklist { { USB_VENDOR_ID_SYMBOL, USB_DEVICE_ID_SYMBOL_SCANNER_2, HID_QUIRK_NOGET }, { USB_VENDOR_ID_TPV, USB_DEVICE_ID_TPV_OPTICAL_TOUCHSCREEN, HID_QUIRK_NOGET }, { USB_VENDOR_ID_TURBOX, USB_DEVICE_ID_TURBOX_KEYBOARD, HID_QUIRK_NOGET }, - { USB_VENDOR_ID_UCLOGIC, USB_DEVICE_ID_UCLOGIC_TABLET_PF1209, HID_QUIRK_MULTI_INPUT }, - { USB_VENDOR_ID_UCLOGIC, USB_DEVICE_ID_UCLOGIC_TABLET_WP4030U, HID_QUIRK_MULTI_INPUT }, { USB_VENDOR_ID_UCLOGIC, USB_DEVICE_ID_UCLOGIC_TABLET_KNA5, HID_QUIRK_MULTI_INPUT }, { USB_VENDOR_ID_UCLOGIC, USB_DEVICE_ID_UCLOGIC_TABLET_TWA60, HID_QUIRK_MULTI_INPUT }, - { USB_VENDOR_ID_UCLOGIC, USB_DEVICE_ID_UCLOGIC_TABLET_WP5540U, HID_QUIRK_MULTI_INPUT }, - { USB_VENDOR_ID_UCLOGIC, USB_DEVICE_ID_UCLOGIC_TABLET_WP8060U, HID_QUIRK_MULTI_INPUT }, { USB_VENDOR_ID_WALTOP, USB_DEVICE_ID_WALTOP_MEDIA_TABLET_10_6_INCH, HID_QUIRK_MULTI_INPUT }, { USB_VENDOR_ID_WALTOP, USB_DEVICE_ID_WALTOP_MEDIA_TABLET_14_1_INCH, HID_QUIRK_MULTI_INPUT }, { USB_VENDOR_ID_WALTOP, USB_DEVICE_ID_WALTOP_SIRIUS_BATTERY_FREE_TABLET, HID_QUIRK_MULTI_INPUT }, @@ -128,6 +131,7 @@ static const struct hid_blacklist { { USB_VENDOR_ID_KYE, USB_DEVICE_ID_KYE_MOUSEPEN_I608X, HID_QUIRK_MULTI_INPUT }, { USB_VENDOR_ID_KYE, USB_DEVICE_ID_KYE_MOUSEPEN_I608X_2, HID_QUIRK_MULTI_INPUT }, { USB_VENDOR_ID_KYE, USB_DEVICE_ID_KYE_EASYPEN_M610X, HID_QUIRK_MULTI_INPUT }, + { USB_VENDOR_ID_KYE, USB_DEVICE_ID_KYE_PENSKETCH_M912, HID_QUIRK_MULTI_INPUT }, { USB_VENDOR_ID_NTRIG, USB_DEVICE_ID_NTRIG_DUOSENSE, HID_QUIRK_NO_INIT_REPORTS }, { USB_VENDOR_ID_SEMICO, USB_DEVICE_ID_SEMICO_USB_KEYKOARD, HID_QUIRK_NO_INIT_REPORTS }, { USB_VENDOR_ID_SYNAPTICS, USB_DEVICE_ID_SYNAPTICS_LTS1, HID_QUIRK_NO_INIT_REPORTS }, diff --git a/drivers/hid/wacom.h b/drivers/hid/wacom.h index 0d0d0dd89d17..024f4d89d579 100644 --- a/drivers/hid/wacom.h +++ b/drivers/hid/wacom.h @@ -131,13 +131,6 @@ static inline void wacom_schedule_work(struct wacom_wac *wacom_wac) schedule_work(&wacom->work); } -static inline void wacom_notify_battery(struct wacom_wac *wacom_wac) -{ - struct wacom *wacom = container_of(wacom_wac, struct wacom, wacom_wac); - - power_supply_changed(wacom->battery); -} - extern const struct hid_device_id wacom_ids[]; void wacom_wac_irq(struct wacom_wac *wacom_wac, size_t len); @@ -151,4 +144,5 @@ void wacom_wac_usage_mapping(struct hid_device *hdev, int wacom_wac_event(struct hid_device *hdev, struct hid_field *field, struct hid_usage *usage, __s32 value); void wacom_wac_report(struct hid_device *hdev, struct hid_report *report); +void wacom_battery_work(struct work_struct *work); #endif diff --git a/drivers/hid/wacom_sys.c b/drivers/hid/wacom_sys.c index ba9af470bea0..e8607d096138 100644 --- a/drivers/hid/wacom_sys.c +++ b/drivers/hid/wacom_sys.c @@ -406,6 +406,9 @@ static int wacom_query_tablet_data(struct hid_device *hdev, else if (features->type == WACOM_27QHDT) { return wacom_set_device_mode(hdev, 131, 3, 2); } + else if (features->type == BAMBOO_PAD) { + return wacom_set_device_mode(hdev, 2, 2, 2); + } } else if (features->device_type == BTN_TOOL_PEN) { if (features->type <= BAMBOO_PT && features->type != WIRELESS) { return wacom_set_device_mode(hdev, 2, 2, 2); @@ -524,6 +527,11 @@ static int wacom_add_shared_data(struct hid_device *hdev) wacom_wac->shared = &data->shared; + if (wacom_wac->features.device_type == BTN_TOOL_FINGER) + wacom_wac->shared->touch = hdev; + else if (wacom_wac->features.device_type == BTN_TOOL_PEN) + wacom_wac->shared->pen = hdev; + out: mutex_unlock(&wacom_udev_list_lock); return retval; @@ -541,14 +549,22 @@ static void wacom_release_shared_data(struct kref *kref) kfree(data); } -static void wacom_remove_shared_data(struct wacom_wac *wacom) +static void wacom_remove_shared_data(struct wacom *wacom) { struct wacom_hdev_data *data; + struct wacom_wac *wacom_wac = &wacom->wacom_wac; + + if (wacom_wac->shared) { + data = container_of(wacom_wac->shared, struct wacom_hdev_data, + shared); + + if (wacom_wac->shared->touch == wacom->hdev) + wacom_wac->shared->touch = NULL; + else if (wacom_wac->shared->pen == wacom->hdev) + wacom_wac->shared->pen = NULL; - if (wacom->shared) { - data = container_of(wacom->shared, struct wacom_hdev_data, shared); kref_put(&data->kref, wacom_release_shared_data); - wacom->shared = NULL; + wacom_wac->shared = NULL; } } @@ -929,6 +945,7 @@ static void wacom_destroy_leds(struct wacom *wacom) } static enum power_supply_property wacom_battery_props[] = { + POWER_SUPPLY_PROP_PRESENT, POWER_SUPPLY_PROP_STATUS, POWER_SUPPLY_PROP_SCOPE, POWER_SUPPLY_PROP_CAPACITY @@ -948,6 +965,9 @@ static int wacom_battery_get_property(struct power_supply *psy, int ret = 0; switch (psp) { + case POWER_SUPPLY_PROP_PRESENT: + val->intval = wacom->wacom_wac.bat_connected; + break; case POWER_SUPPLY_PROP_SCOPE: val->intval = POWER_SUPPLY_SCOPE_DEVICE; break; @@ -961,6 +981,8 @@ static int wacom_battery_get_property(struct power_supply *psy, else if (wacom->wacom_wac.battery_capacity == 100 && wacom->wacom_wac.ps_connected) val->intval = POWER_SUPPLY_STATUS_FULL; + else if (wacom->wacom_wac.ps_connected) + val->intval = POWER_SUPPLY_STATUS_NOT_CHARGING; else val->intval = POWER_SUPPLY_STATUS_DISCHARGING; break; @@ -1045,8 +1067,7 @@ static int wacom_initialize_battery(struct wacom *wacom) static void wacom_destroy_battery(struct wacom *wacom) { - if ((wacom->wacom_wac.features.quirks & WACOM_QUIRK_BATTERY) && - wacom->battery) { + if (wacom->battery) { power_supply_unregister(wacom->battery); wacom->battery = NULL; power_supply_unregister(wacom->ac); @@ -1317,6 +1338,20 @@ fail: return; } +void wacom_battery_work(struct work_struct *work) +{ + struct wacom *wacom = container_of(work, struct wacom, work); + + if ((wacom->wacom_wac.features.quirks & WACOM_QUIRK_BATTERY) && + !wacom->battery) { + wacom_initialize_battery(wacom); + } + else if (!(wacom->wacom_wac.features.quirks & WACOM_QUIRK_BATTERY) && + wacom->battery) { + wacom_destroy_battery(wacom); + } +} + /* * Not all devices report physical dimensions from HID. * Compute the default from hardcoded logical dimension @@ -1377,6 +1412,9 @@ static int wacom_probe(struct hid_device *hdev, hdev->quirks |= HID_QUIRK_NO_INIT_REPORTS; + /* hid-core sets this quirk for the boot interface */ + hdev->quirks &= ~HID_QUIRK_NOGET; + wacom = kzalloc(sizeof(struct wacom), GFP_KERNEL); if (!wacom) return -ENOMEM; @@ -1416,6 +1454,21 @@ static int wacom_probe(struct hid_device *hdev, goto fail_allocate_inputs; } + /* + * Bamboo Pad has a generic hid handling for the Pen, and we switch it + * into debug mode for the touch part. + * We ignore the other interfaces. + */ + if (features->type == BAMBOO_PAD) { + if (features->pktlen == WACOM_PKGLEN_PENABLED) { + features->type = HID_GENERIC; + } else if ((features->pktlen != WACOM_PKGLEN_BPAD_TOUCH) && + (features->pktlen != WACOM_PKGLEN_BPAD_TOUCH_USB)) { + error = -ENODEV; + goto fail_shared_data; + } + } + /* set the default size in case we do not get them from hid */ wacom_set_default_phy(features); @@ -1450,6 +1503,12 @@ static int wacom_probe(struct hid_device *hdev, features->y_max = 4096; } + /* + * Same thing for Bamboo PAD + */ + if (features->type == BAMBOO_PAD) + features->device_type = BTN_TOOL_FINGER; + if (hdev->bus == BUS_BLUETOOTH) features->quirks |= WACOM_QUIRK_BATTERY; @@ -1466,19 +1525,17 @@ static int wacom_probe(struct hid_device *hdev, snprintf(wacom_wac->pad_name, sizeof(wacom_wac->pad_name), "%s Pad", features->name); - if (features->quirks & WACOM_QUIRK_MULTI_INPUT) { - /* Append the device type to the name */ - if (features->device_type != BTN_TOOL_FINGER) - strlcat(wacom_wac->name, " Pen", WACOM_NAME_MAX); - else if (features->touch_max) - strlcat(wacom_wac->name, " Finger", WACOM_NAME_MAX); - else - strlcat(wacom_wac->name, " Pad", WACOM_NAME_MAX); + /* Append the device type to the name */ + if (features->device_type != BTN_TOOL_FINGER) + strlcat(wacom_wac->name, " Pen", WACOM_NAME_MAX); + else if (features->touch_max) + strlcat(wacom_wac->name, " Finger", WACOM_NAME_MAX); + else + strlcat(wacom_wac->name, " Pad", WACOM_NAME_MAX); - error = wacom_add_shared_data(hdev); - if (error) - goto fail_shared_data; - } + error = wacom_add_shared_data(hdev); + if (error) + goto fail_shared_data; if (!(features->quirks & WACOM_QUIRK_MONITOR) && (features->quirks & WACOM_QUIRK_BATTERY)) { @@ -1531,7 +1588,7 @@ fail_register_inputs: wacom_clean_inputs(wacom); wacom_destroy_battery(wacom); fail_battery: - wacom_remove_shared_data(wacom_wac); + wacom_remove_shared_data(wacom); fail_shared_data: wacom_clean_inputs(wacom); fail_allocate_inputs: @@ -1554,7 +1611,7 @@ static void wacom_remove(struct hid_device *hdev) if (hdev->bus == BUS_BLUETOOTH) device_remove_file(&hdev->dev, &dev_attr_speed); wacom_destroy_battery(wacom); - wacom_remove_shared_data(&wacom->wacom_wac); + wacom_remove_shared_data(wacom); hid_set_drvdata(hdev, NULL); kfree(wacom); diff --git a/drivers/hid/wacom_wac.c b/drivers/hid/wacom_wac.c index bbe32d66e500..fa54d3290659 100644 --- a/drivers/hid/wacom_wac.c +++ b/drivers/hid/wacom_wac.c @@ -45,6 +45,27 @@ static unsigned short batcap_gr[8] = { 1, 15, 25, 35, 50, 70, 100, 100 }; */ static unsigned short batcap_i4[8] = { 1, 15, 30, 45, 60, 70, 85, 100 }; +static void wacom_notify_battery(struct wacom_wac *wacom_wac, + int bat_capacity, bool bat_charging, bool bat_connected, + bool ps_connected) +{ + struct wacom *wacom = container_of(wacom_wac, struct wacom, wacom_wac); + bool changed = wacom_wac->battery_capacity != bat_capacity || + wacom_wac->bat_charging != bat_charging || + wacom_wac->bat_connected != bat_connected || + wacom_wac->ps_connected != ps_connected; + + if (changed) { + wacom_wac->battery_capacity = bat_capacity; + wacom_wac->bat_charging = bat_charging; + wacom_wac->bat_connected = bat_connected; + wacom_wac->ps_connected = ps_connected; + + if (wacom->battery) + power_supply_changed(wacom->battery); + } +} + static int wacom_penpartner_irq(struct wacom_wac *wacom) { unsigned char *data = wacom->data; @@ -419,17 +440,26 @@ static int wacom_graphire_irq(struct wacom_wac *wacom) rw = (data[7] >> 2 & 0x07); battery_capacity = batcap_gr[rw]; ps_connected = rw == 7; - if ((wacom->battery_capacity != battery_capacity) || - (wacom->ps_connected != ps_connected)) { - wacom->battery_capacity = battery_capacity; - wacom->ps_connected = ps_connected; - wacom_notify_battery(wacom); - } + wacom_notify_battery(wacom, battery_capacity, ps_connected, + 1, ps_connected); } exit: return retval; } +static void wacom_intuos_schedule_prox_event(struct wacom_wac *wacom_wac) +{ + struct wacom *wacom = container_of(wacom_wac, struct wacom, wacom_wac); + struct hid_report *r; + struct hid_report_enum *re; + + re = &(wacom->hdev->report_enum[HID_FEATURE_REPORT]); + r = re->report_id_hash[WACOM_REPORT_INTUOSREAD]; + if (r) { + hid_hw_request(wacom->hdev, r, HID_REQ_GET_REPORT); + } +} + static int wacom_intuos_inout(struct wacom_wac *wacom) { struct wacom_features *features = &wacom->features; @@ -551,12 +581,9 @@ static int wacom_intuos_inout(struct wacom_wac *wacom) (features->type == CINTIQ && !(data[1] & 0x40))) return 1; - if (wacom->shared) { - wacom->shared->stylus_in_proximity = true; - - if (wacom->shared->touch_down) - return 1; - } + wacom->shared->stylus_in_proximity = true; + if (wacom->shared->touch_down) + return 1; /* in Range while exiting */ if (((data[1] & 0xfe) == 0x20) && wacom->reporting_data) { @@ -568,8 +595,7 @@ static int wacom_intuos_inout(struct wacom_wac *wacom) /* Exit report */ if ((data[1] & 0xfe) == 0x80) { - if (features->quirks & WACOM_QUIRK_MULTI_INPUT) - wacom->shared->stylus_in_proximity = false; + wacom->shared->stylus_in_proximity = false; wacom->reporting_data = false; /* don't report exit if we don't know the ID */ @@ -610,8 +636,11 @@ static int wacom_intuos_inout(struct wacom_wac *wacom) } /* don't report other events if we don't know the ID */ - if (!wacom->id[idx]) + if (!wacom->id[idx]) { + /* but reschedule a read of the current tool */ + wacom_intuos_schedule_prox_event(wacom); return 1; + } return 0; } @@ -1023,15 +1052,9 @@ static int wacom_intuos_bt_irq(struct wacom_wac *wacom, size_t len) bat_charging = (power_raw & 0x08) ? 1 : 0; ps_connected = (power_raw & 0x10) ? 1 : 0; battery_capacity = batcap_i4[power_raw & 0x07]; - if ((wacom->battery_capacity != battery_capacity) || - (wacom->bat_charging != bat_charging) || - (wacom->ps_connected != ps_connected)) { - wacom->battery_capacity = battery_capacity; - wacom->bat_charging = bat_charging; - wacom->ps_connected = ps_connected; - wacom_notify_battery(wacom); - } - + wacom_notify_battery(wacom, battery_capacity, bat_charging, + battery_capacity || bat_charging, + ps_connected); break; default: dev_dbg(wacom->input->dev.parent, @@ -1042,6 +1065,28 @@ static int wacom_intuos_bt_irq(struct wacom_wac *wacom, size_t len) return 0; } +static int wacom_wac_finger_count_touches(struct wacom_wac *wacom) +{ + struct input_dev *input = wacom->input; + unsigned touch_max = wacom->features.touch_max; + int count = 0; + int i; + + /* non-HID_GENERIC single touch input doesn't call this routine */ + if ((touch_max == 1) && (wacom->features.type == HID_GENERIC)) + return wacom->hid_data.tipswitch && + !wacom->shared->stylus_in_proximity; + + for (i = 0; i < input->mt->num_slots; i++) { + struct input_mt_slot *ps = &input->mt->slots[i]; + int id = input_mt_get_value(ps, ABS_MT_TRACKING_ID); + if (id >= 0) + count++; + } + + return count; +} + static int wacom_24hdt_irq(struct wacom_wac *wacom) { struct input_dev *input = wacom->input; @@ -1052,7 +1097,6 @@ static int wacom_24hdt_irq(struct wacom_wac *wacom) int num_contacts_left = 4; /* maximum contacts per packet */ int byte_per_packet = WACOM_BYTES_PER_24HDT_PACKET; int y_offset = 2; - static int contact_with_no_pen_down_count = 0; if (wacom->features.type == WACOM_27QHDT) { current_num_contacts = data[63]; @@ -1065,10 +1109,8 @@ static int wacom_24hdt_irq(struct wacom_wac *wacom) * First packet resets the counter since only the first * packet in series will have non-zero current_num_contacts. */ - if (current_num_contacts) { + if (current_num_contacts) wacom->num_contacts_left = current_num_contacts; - contact_with_no_pen_down_count = 0; - } contacts_to_send = min(num_contacts_left, wacom->num_contacts_left); @@ -1101,15 +1143,14 @@ static int wacom_24hdt_irq(struct wacom_wac *wacom) input_report_abs(input, ABS_MT_WIDTH_MINOR, min(w, h)); input_report_abs(input, ABS_MT_ORIENTATION, w > h); } - contact_with_no_pen_down_count++; } } - input_mt_report_pointer_emulation(input, true); + input_mt_sync_frame(input); wacom->num_contacts_left -= contacts_to_send; if (wacom->num_contacts_left <= 0) { wacom->num_contacts_left = 0; - wacom->shared->touch_down = (contact_with_no_pen_down_count > 0); + wacom->shared->touch_down = wacom_wac_finger_count_touches(wacom); } return 1; } @@ -1122,7 +1163,6 @@ static int wacom_mt_touch(struct wacom_wac *wacom) int current_num_contacts = data[2]; int contacts_to_send = 0; int x_offset = 0; - static int contact_with_no_pen_down_count = 0; /* MTTPC does not support Height and Width */ if (wacom->features.type == MTTPC || wacom->features.type == MTTPC_B) @@ -1132,10 +1172,8 @@ static int wacom_mt_touch(struct wacom_wac *wacom) * First packet resets the counter since only the first * packet in series will have non-zero current_num_contacts. */ - if (current_num_contacts) { + if (current_num_contacts) wacom->num_contacts_left = current_num_contacts; - contact_with_no_pen_down_count = 0; - } /* There are at most 5 contacts per packet */ contacts_to_send = min(5, wacom->num_contacts_left); @@ -1156,15 +1194,14 @@ static int wacom_mt_touch(struct wacom_wac *wacom) int y = get_unaligned_le16(&data[offset + x_offset + 9]); input_report_abs(input, ABS_MT_POSITION_X, x); input_report_abs(input, ABS_MT_POSITION_Y, y); - contact_with_no_pen_down_count++; } } - input_mt_report_pointer_emulation(input, true); + input_mt_sync_frame(input); wacom->num_contacts_left -= contacts_to_send; if (wacom->num_contacts_left <= 0) { wacom->num_contacts_left = 0; - wacom->shared->touch_down = (contact_with_no_pen_down_count > 0); + wacom->shared->touch_down = wacom_wac_finger_count_touches(wacom); } return 1; } @@ -1173,7 +1210,6 @@ static int wacom_tpc_mt_touch(struct wacom_wac *wacom) { struct input_dev *input = wacom->input; unsigned char *data = wacom->data; - int contact_with_no_pen_down_count = 0; int i; for (i = 0; i < 2; i++) { @@ -1188,13 +1224,12 @@ static int wacom_tpc_mt_touch(struct wacom_wac *wacom) input_report_abs(input, ABS_MT_POSITION_X, x); input_report_abs(input, ABS_MT_POSITION_Y, y); - contact_with_no_pen_down_count++; } } - input_mt_report_pointer_emulation(input, true); + input_mt_sync_frame(input); /* keep touch state for pen event */ - wacom->shared->touch_down = (contact_with_no_pen_down_count > 0); + wacom->shared->touch_down = wacom_wac_finger_count_touches(wacom); return 1; } @@ -1522,29 +1557,6 @@ static int wacom_wac_finger_event(struct hid_device *hdev, return 0; } -static int wacom_wac_finger_count_touches(struct hid_device *hdev) -{ - struct wacom *wacom = hid_get_drvdata(hdev); - struct wacom_wac *wacom_wac = &wacom->wacom_wac; - struct input_dev *input = wacom_wac->input; - unsigned touch_max = wacom_wac->features.touch_max; - int count = 0; - int i; - - if (touch_max == 1) - return wacom_wac->hid_data.tipswitch && - !wacom_wac->shared->stylus_in_proximity; - - for (i = 0; i < input->mt->num_slots; i++) { - struct input_mt_slot *ps = &input->mt->slots[i]; - int id = input_mt_get_value(ps, ABS_MT_TRACKING_ID); - if (id >= 0) - count++; - } - - return count; -} - static void wacom_wac_finger_report(struct hid_device *hdev, struct hid_report *report) { @@ -1559,7 +1571,7 @@ static void wacom_wac_finger_report(struct hid_device *hdev, input_sync(input); /* keep touch state for pen event */ - wacom_wac->shared->touch_down = wacom_wac_finger_count_touches(hdev); + wacom_wac->shared->touch_down = wacom_wac_finger_count_touches(wacom_wac); } void wacom_wac_usage_mapping(struct hid_device *hdev, @@ -1619,7 +1631,6 @@ static int wacom_bpt_touch(struct wacom_wac *wacom) struct input_dev *pad_input = wacom->pad_input; unsigned char *data = wacom->data; int i; - int contact_with_no_pen_down_count = 0; if (data[0] != 0x02) return 0; @@ -1647,22 +1658,21 @@ static int wacom_bpt_touch(struct wacom_wac *wacom) } input_report_abs(input, ABS_MT_POSITION_X, x); input_report_abs(input, ABS_MT_POSITION_Y, y); - contact_with_no_pen_down_count++; } } - input_mt_report_pointer_emulation(input, true); + input_mt_sync_frame(input); input_report_key(pad_input, BTN_LEFT, (data[1] & 0x08) != 0); input_report_key(pad_input, BTN_FORWARD, (data[1] & 0x04) != 0); input_report_key(pad_input, BTN_BACK, (data[1] & 0x02) != 0); input_report_key(pad_input, BTN_RIGHT, (data[1] & 0x01) != 0); - wacom->shared->touch_down = (contact_with_no_pen_down_count > 0); + wacom->shared->touch_down = wacom_wac_finger_count_touches(wacom); return 1; } -static int wacom_bpt3_touch_msg(struct wacom_wac *wacom, unsigned char *data, int last_touch_count) +static void wacom_bpt3_touch_msg(struct wacom_wac *wacom, unsigned char *data) { struct wacom_features *features = &wacom->features; struct input_dev *input = wacom->input; @@ -1670,7 +1680,7 @@ static int wacom_bpt3_touch_msg(struct wacom_wac *wacom, unsigned char *data, in int slot = input_mt_get_slot_by_key(input, data[0]); if (slot < 0) - return 0; + return; touch = touch && !wacom->shared->stylus_in_proximity; @@ -1702,9 +1712,7 @@ static int wacom_bpt3_touch_msg(struct wacom_wac *wacom, unsigned char *data, in input_report_abs(input, ABS_MT_POSITION_Y, y); input_report_abs(input, ABS_MT_TOUCH_MAJOR, width); input_report_abs(input, ABS_MT_TOUCH_MINOR, height); - last_touch_count++; } - return last_touch_count; } static void wacom_bpt3_button_msg(struct wacom_wac *wacom, unsigned char *data) @@ -1729,7 +1737,6 @@ static int wacom_bpt3_touch(struct wacom_wac *wacom) unsigned char *data = wacom->data; int count = data[1] & 0x07; int i; - int contact_with_no_pen_down_count = 0; if (data[0] != 0x02) return 0; @@ -1740,15 +1747,13 @@ static int wacom_bpt3_touch(struct wacom_wac *wacom) int msg_id = data[offset]; if (msg_id >= 2 && msg_id <= 17) - contact_with_no_pen_down_count = - wacom_bpt3_touch_msg(wacom, data + offset, - contact_with_no_pen_down_count); + wacom_bpt3_touch_msg(wacom, data + offset); else if (msg_id == 128) wacom_bpt3_button_msg(wacom, data + offset); } - input_mt_report_pointer_emulation(input, true); - wacom->shared->touch_down = (contact_with_no_pen_down_count > 0); + input_mt_sync_frame(input); + wacom->shared->touch_down = wacom_wac_finger_count_touches(wacom); return 1; } @@ -1760,23 +1765,9 @@ static int wacom_bpt_pen(struct wacom_wac *wacom) unsigned char *data = wacom->data; int prox = 0, x = 0, y = 0, p = 0, d = 0, pen = 0, btn1 = 0, btn2 = 0; - if (data[0] != WACOM_REPORT_PENABLED && data[0] != WACOM_REPORT_USB) + if (data[0] != WACOM_REPORT_PENABLED) return 0; - if (data[0] == WACOM_REPORT_USB) { - if (features->type == INTUOSHT && - wacom->shared->touch_input && - features->touch_max) { - input_report_switch(wacom->shared->touch_input, - SW_MUTE_DEVICE, data[8] & 0x40); - input_sync(wacom->shared->touch_input); - } - return 0; - } - - if (wacom->shared->touch_down) - return 0; - prox = (data[1] & 0x20) == 0x20; /* @@ -1789,17 +1780,21 @@ static int wacom_bpt_pen(struct wacom_wac *wacom) * * Hardware does report zero in most out-of-prox cases but not all. */ - if (prox) { - if (!wacom->shared->stylus_in_proximity) { - if (data[1] & 0x08) { - wacom->tool[0] = BTN_TOOL_RUBBER; - wacom->id[0] = ERASER_DEVICE_ID; - } else { - wacom->tool[0] = BTN_TOOL_PEN; - wacom->id[0] = STYLUS_DEVICE_ID; - } - wacom->shared->stylus_in_proximity = true; + if (!wacom->shared->stylus_in_proximity) { + if (data[1] & 0x08) { + wacom->tool[0] = BTN_TOOL_RUBBER; + wacom->id[0] = ERASER_DEVICE_ID; + } else { + wacom->tool[0] = BTN_TOOL_PEN; + wacom->id[0] = STYLUS_DEVICE_ID; } + } + + wacom->shared->stylus_in_proximity = prox; + if (wacom->shared->touch_down) + return 0; + + if (prox) { x = le16_to_cpup((__le16 *)&data[2]); y = le16_to_cpup((__le16 *)&data[4]); p = le16_to_cpup((__le16 *)&data[6]); @@ -1815,6 +1810,8 @@ static int wacom_bpt_pen(struct wacom_wac *wacom) pen = data[1] & 0x01; btn1 = data[1] & 0x02; btn2 = data[1] & 0x04; + } else { + wacom->id[0] = 0; } input_report_key(input, BTN_TOUCH, pen); @@ -1826,11 +1823,6 @@ static int wacom_bpt_pen(struct wacom_wac *wacom) input_report_abs(input, ABS_PRESSURE, p); input_report_abs(input, ABS_DISTANCE, d); - if (!prox) { - wacom->id[0] = 0; - wacom->shared->stylus_in_proximity = false; - } - input_report_key(input, wacom->tool[0], prox); /* PEN or RUBBER */ input_report_abs(input, ABS_MISC, wacom->id[0]); /* TOOL ID */ @@ -1849,6 +1841,91 @@ static int wacom_bpt_irq(struct wacom_wac *wacom, size_t len) return 0; } +static void wacom_bamboo_pad_pen_event(struct wacom_wac *wacom, + unsigned char *data) +{ + unsigned char prefix; + + /* + * We need to reroute the event from the debug interface to the + * pen interface. + * We need to add the report ID to the actual pen report, so we + * temporary overwrite the first byte to prevent having to kzalloc/kfree + * and memcpy the report. + */ + prefix = data[0]; + data[0] = WACOM_REPORT_BPAD_PEN; + + /* + * actually reroute the event. + * No need to check if wacom->shared->pen is valid, hid_input_report() + * will check for us. + */ + hid_input_report(wacom->shared->pen, HID_INPUT_REPORT, data, + WACOM_PKGLEN_PENABLED, 1); + + data[0] = prefix; +} + +static int wacom_bamboo_pad_touch_event(struct wacom_wac *wacom, + unsigned char *data) +{ + struct input_dev *input = wacom->input; + unsigned char *finger_data, prefix; + unsigned id; + int x, y; + bool valid; + + prefix = data[0]; + + for (id = 0; id < wacom->features.touch_max; id++) { + valid = !!(prefix & BIT(id)) && + !wacom->shared->stylus_in_proximity; + + input_mt_slot(input, id); + input_mt_report_slot_state(input, MT_TOOL_FINGER, valid); + + if (!valid) + continue; + + finger_data = data + 1 + id * 3; + x = finger_data[0] | ((finger_data[1] & 0x0f) << 8); + y = (finger_data[2] << 4) | (finger_data[1] >> 4); + + input_report_abs(input, ABS_MT_POSITION_X, x); + input_report_abs(input, ABS_MT_POSITION_Y, y); + } + + input_mt_sync_frame(input); + + input_report_key(input, BTN_LEFT, prefix & 0x40); + input_report_key(input, BTN_RIGHT, prefix & 0x80); + + /* keep touch state for pen event */ + wacom->shared->touch_down = !!prefix && + !wacom->shared->stylus_in_proximity; + + return 1; +} + +static int wacom_bamboo_pad_irq(struct wacom_wac *wacom, size_t len) +{ + unsigned char *data = wacom->data; + + if (!((len == WACOM_PKGLEN_BPAD_TOUCH) || + (len == WACOM_PKGLEN_BPAD_TOUCH_USB)) || + (data[0] != WACOM_REPORT_BPAD_TOUCH)) + return 0; + + if (data[1] & 0x01) + wacom_bamboo_pad_pen_event(wacom, &data[1]); + + if (data[1] & 0x02) + return wacom_bamboo_pad_touch_event(wacom, &data[9]); + + return 0; +} + static int wacom_wireless_irq(struct wacom_wac *wacom, size_t len) { unsigned char *data = wacom->data; @@ -1859,7 +1936,7 @@ static int wacom_wireless_irq(struct wacom_wac *wacom, size_t len) connected = data[1] & 0x01; if (connected) { - int pid, battery, ps_connected; + int pid, battery, charging; if ((wacom->shared->type == INTUOSHT) && wacom->shared->touch_input && @@ -1871,30 +1948,63 @@ static int wacom_wireless_irq(struct wacom_wac *wacom, size_t len) pid = get_unaligned_be16(&data[6]); battery = (data[5] & 0x3f) * 100 / 31; - ps_connected = !!(data[5] & 0x80); + charging = !!(data[5] & 0x80); if (wacom->pid != pid) { wacom->pid = pid; wacom_schedule_work(wacom); } - if (wacom->shared->type && - (battery != wacom->battery_capacity || - ps_connected != wacom->ps_connected)) { - wacom->battery_capacity = battery; - wacom->ps_connected = ps_connected; - wacom->bat_charging = ps_connected && - wacom->battery_capacity < 100; - wacom_notify_battery(wacom); - } + if (wacom->shared->type) + wacom_notify_battery(wacom, battery, charging, 1, 0); + } else if (wacom->pid != 0) { /* disconnected while previously connected */ wacom->pid = 0; wacom_schedule_work(wacom); - wacom->battery_capacity = 0; - wacom->bat_charging = 0; - wacom->ps_connected = 0; + wacom_notify_battery(wacom, 0, 0, 0, 0); + } + + return 0; +} + +static int wacom_status_irq(struct wacom_wac *wacom_wac, size_t len) +{ + struct wacom *wacom = container_of(wacom_wac, struct wacom, wacom_wac); + struct wacom_features *features = &wacom_wac->features; + unsigned char *data = wacom_wac->data; + + if (data[0] != WACOM_REPORT_USB) + return 0; + + if (features->type == INTUOSHT && + wacom_wac->shared->touch_input && + features->touch_max) { + input_report_switch(wacom_wac->shared->touch_input, + SW_MUTE_DEVICE, data[8] & 0x40); + input_sync(wacom_wac->shared->touch_input); } + if (data[9] & 0x02) { /* wireless module is attached */ + int battery = (data[8] & 0x3f) * 100 / 31; + bool charging = !!(data[8] & 0x80); + + wacom_notify_battery(wacom_wac, battery, charging, + battery || charging, 1); + + if (!wacom->battery && + !(features->quirks & WACOM_QUIRK_BATTERY)) { + features->quirks |= WACOM_QUIRK_BATTERY; + INIT_WORK(&wacom->work, wacom_battery_work); + wacom_schedule_work(wacom_wac); + } + } + else if ((features->quirks & WACOM_QUIRK_BATTERY) && + wacom->battery) { + features->quirks &= ~WACOM_QUIRK_BATTERY; + INIT_WORK(&wacom->work, wacom_battery_work); + wacom_schedule_work(wacom_wac); + wacom_notify_battery(wacom_wac, 0, 0, 0, 0); + } return 0; } @@ -1967,6 +2077,8 @@ void wacom_wac_irq(struct wacom_wac *wacom_wac, size_t len) case INTUOSPL: if (len == WACOM_PKGLEN_BBTOUCH3) sync = wacom_bpt3_touch(wacom_wac); + else if (wacom_wac->data[0] == WACOM_REPORT_USB) + sync = wacom_status_irq(wacom_wac, len); else sync = wacom_intuos_irq(wacom_wac); break; @@ -1982,7 +2094,14 @@ void wacom_wac_irq(struct wacom_wac *wacom_wac, size_t len) case BAMBOO_PT: case INTUOSHT: - sync = wacom_bpt_irq(wacom_wac, len); + if (wacom_wac->data[0] == WACOM_REPORT_USB) + sync = wacom_status_irq(wacom_wac, len); + else + sync = wacom_bpt_irq(wacom_wac, len); + break; + + case BAMBOO_PAD: + sync = wacom_bamboo_pad_irq(wacom_wac, len); break; case WIRELESS: @@ -2054,12 +2173,6 @@ void wacom_setup_device_quirks(struct wacom_features *features) features->y_max = 1023; } - /* these device have multiple inputs */ - if (features->type >= WIRELESS || - (features->type >= INTUOS5S && features->type <= INTUOSHT) || - (features->oVid && features->oPid)) - features->quirks |= WACOM_QUIRK_MULTI_INPUT; - /* quirk for bamboo touch with 2 low res touches */ if (features->type == BAMBOO_PT && features->pktlen == WACOM_PKGLEN_BBTOUCH) { @@ -2323,6 +2436,13 @@ int wacom_setup_pentouch_input_capabilities(struct input_dev *input_dev, 0, 0); } break; + case BAMBOO_PAD: + __clear_bit(ABS_MISC, input_dev->absbit); + input_mt_init_slots(input_dev, features->touch_max, + INPUT_MT_POINTER); + __set_bit(BTN_LEFT, input_dev->keybit); + __set_bit(BTN_RIGHT, input_dev->keybit); + break; } return 0; } @@ -2772,6 +2892,15 @@ static const struct wacom_features wacom_features_0x304 = { "Wacom Cintiq 13HD", 59152, 33448, 1023, 63, WACOM_13HD, WACOM_INTUOS3_RES, WACOM_INTUOS3_RES, WACOM_CINTIQ_OFFSET, WACOM_CINTIQ_OFFSET }; +static const struct wacom_features wacom_features_0x333 = + { "Wacom Cintiq 13HD touch", 59152, 33448, 2047, 63, + WACOM_13HD, WACOM_INTUOS3_RES, WACOM_INTUOS3_RES, + WACOM_CINTIQ_OFFSET, WACOM_CINTIQ_OFFSET, + .oVid = USB_VENDOR_ID_WACOM, .oPid = 0x335 }; +static const struct wacom_features wacom_features_0x335 = + { "Wacom Cintiq 13HD touch", .type = WACOM_24HDT, /* Touch */ + .oVid = USB_VENDOR_ID_WACOM, .oPid = 0x333, .touch_max = 10, + .check_for_hid_type = true, .hid_type = HID_TYPE_USBNONE }; static const struct wacom_features wacom_features_0xC7 = { "Wacom DTU1931", 37832, 30305, 511, 0, PL, WACOM_INTUOS_RES, WACOM_INTUOS_RES }; @@ -2976,6 +3105,12 @@ static const struct wacom_features wacom_features_0x30C = { "Wacom ISDv5 30C", .type = WACOM_24HDT, /* Touch */ .oVid = USB_VENDOR_ID_WACOM, .oPid = 0x30A, .touch_max = 10, .check_for_hid_type = true, .hid_type = HID_TYPE_USBNONE }; +static const struct wacom_features wacom_features_0x318 = + { "Wacom USB Bamboo PAD", 4095, 4095, /* Touch */ + .type = BAMBOO_PAD, 35, 48, .touch_max = 4 }; +static const struct wacom_features wacom_features_0x319 = + { "Wacom Wireless Bamboo PAD", 4095, 4095, /* Touch */ + .type = BAMBOO_PAD, 35, 48, .touch_max = 4 }; static const struct wacom_features wacom_features_0x323 = { "Wacom Intuos P M", 21600, 13500, 1023, 31, INTUOSHT, WACOM_INTUOS_RES, WACOM_INTUOS_RES, @@ -2992,6 +3127,10 @@ static const struct wacom_features wacom_features_HID_ANY_ID = HID_DEVICE(BUS_BLUETOOTH, HID_GROUP_WACOM, USB_VENDOR_ID_WACOM, prod),\ .driver_data = (kernel_ulong_t)&wacom_features_##prod +#define I2C_DEVICE_WACOM(prod) \ + HID_DEVICE(BUS_I2C, HID_GROUP_WACOM, USB_VENDOR_ID_WACOM, prod),\ + .driver_data = (kernel_ulong_t)&wacom_features_##prod + #define USB_DEVICE_LENOVO(prod) \ HID_USB_DEVICE(USB_VENDOR_ID_LENOVO, prod), \ .driver_data = (kernel_ulong_t)&wacom_features_##prod @@ -3124,11 +3263,15 @@ const struct hid_device_id wacom_ids[] = { { USB_DEVICE_WACOM(0x314) }, { USB_DEVICE_WACOM(0x315) }, { USB_DEVICE_WACOM(0x317) }, + { USB_DEVICE_WACOM(0x318) }, + { USB_DEVICE_WACOM(0x319) }, { USB_DEVICE_WACOM(0x323) }, { USB_DEVICE_WACOM(0x32A) }, { USB_DEVICE_WACOM(0x32B) }, { USB_DEVICE_WACOM(0x32C) }, { USB_DEVICE_WACOM(0x32F) }, + { USB_DEVICE_WACOM(0x333) }, + { USB_DEVICE_WACOM(0x335) }, { USB_DEVICE_WACOM(0x4001) }, { USB_DEVICE_WACOM(0x4004) }, { USB_DEVICE_WACOM(0x5000) }, @@ -3136,6 +3279,7 @@ const struct hid_device_id wacom_ids[] = { { USB_DEVICE_LENOVO(0x6004) }, { USB_DEVICE_WACOM(HID_ANY_ID) }, + { I2C_DEVICE_WACOM(HID_ANY_ID) }, { } }; MODULE_DEVICE_TABLE(hid, wacom_ids); diff --git a/drivers/hid/wacom_wac.h b/drivers/hid/wacom_wac.h index 021ee1c1980a..4700ac994a3b 100644 --- a/drivers/hid/wacom_wac.h +++ b/drivers/hid/wacom_wac.h @@ -33,6 +33,8 @@ #define WACOM_PKGLEN_MTTPC 40 #define WACOM_PKGLEN_DTUS 68 #define WACOM_PKGLEN_PENABLED 8 +#define WACOM_PKGLEN_BPAD_TOUCH 32 +#define WACOM_PKGLEN_BPAD_TOUCH_USB 64 /* wacom data size per MT contact */ #define WACOM_BYTES_PER_MT_PACKET 11 @@ -67,13 +69,14 @@ #define WACOM_REPORT_24HDT 1 #define WACOM_REPORT_WL 128 #define WACOM_REPORT_USB 192 +#define WACOM_REPORT_BPAD_PEN 3 +#define WACOM_REPORT_BPAD_TOUCH 16 /* device quirks */ -#define WACOM_QUIRK_MULTI_INPUT 0x0001 -#define WACOM_QUIRK_BBTOUCH_LOWRES 0x0002 -#define WACOM_QUIRK_NO_INPUT 0x0004 -#define WACOM_QUIRK_MONITOR 0x0008 -#define WACOM_QUIRK_BATTERY 0x0010 +#define WACOM_QUIRK_BBTOUCH_LOWRES 0x0001 +#define WACOM_QUIRK_NO_INPUT 0x0002 +#define WACOM_QUIRK_MONITOR 0x0004 +#define WACOM_QUIRK_BATTERY 0x0008 #define WACOM_PEN_FIELD(f) (((f)->logical == HID_DG_STYLUS) || \ ((f)->physical == HID_DG_STYLUS) || \ @@ -122,6 +125,7 @@ enum { BAMBOO_PT, WACOM_24HDT, WACOM_27QHDT, + BAMBOO_PAD, TABLETPC, /* add new TPC below */ TABLETPCE, TABLETPC2FG, @@ -169,6 +173,8 @@ struct wacom_shared { unsigned touch_max; int type; struct input_dev *touch_input; + struct hid_device *pen; + struct hid_device *touch; }; struct hid_data { @@ -205,6 +211,7 @@ struct wacom_wac { int battery_capacity; int num_contacts_left; int bat_charging; + int bat_connected; int ps_connected; u8 bt_features; u8 bt_high_speed; diff --git a/drivers/iio/accel/hid-sensor-accel-3d.c b/drivers/iio/accel/hid-sensor-accel-3d.c index df6a593bd4bd..2b4fad6998c1 100644 --- a/drivers/iio/accel/hid-sensor-accel-3d.c +++ b/drivers/iio/accel/hid-sensor-accel-3d.c @@ -123,7 +123,8 @@ static int accel_3d_read_raw(struct iio_dev *indio_dev, *val = sensor_hub_input_attr_get_raw_value( accel_state->common_attributes.hsdev, HID_USAGE_SENSOR_ACCEL_3D, address, - report_id); + report_id, + SENSOR_HUB_SYNC); else { *val = 0; hid_sensor_power_state(&accel_state->common_attributes, diff --git a/drivers/iio/adc/max1027.c b/drivers/iio/adc/max1027.c index 87ee1c7d0b54..44bf815adb6c 100644 --- a/drivers/iio/adc/max1027.c +++ b/drivers/iio/adc/max1027.c @@ -436,7 +436,7 @@ static int max1027_probe(struct spi_device *spi) indio_dev->num_channels * 2, GFP_KERNEL); if (st->buffer == NULL) { - dev_err(&indio_dev->dev, "Can't allocate bufffer\n"); + dev_err(&indio_dev->dev, "Can't allocate buffer\n"); return -ENOMEM; } diff --git a/drivers/iio/common/hid-sensors/hid-sensor-attributes.c b/drivers/iio/common/hid-sensors/hid-sensor-attributes.c index 25b01e156d82..e81f434760f4 100644 --- a/drivers/iio/common/hid-sensors/hid-sensor-attributes.c +++ b/drivers/iio/common/hid-sensors/hid-sensor-attributes.c @@ -153,8 +153,8 @@ s32 hid_sensor_read_poll_value(struct hid_sensor_common *st) int ret; ret = sensor_hub_get_feature(st->hsdev, - st->poll.report_id, - st->poll.index, &value); + st->poll.report_id, + st->poll.index, sizeof(value), &value); if (ret < 0 || value < 0) { return -EINVAL; @@ -174,8 +174,8 @@ int hid_sensor_read_samp_freq_value(struct hid_sensor_common *st, int ret; ret = sensor_hub_get_feature(st->hsdev, - st->poll.report_id, - st->poll.index, &value); + st->poll.report_id, + st->poll.index, sizeof(value), &value); if (ret < 0 || value < 0) { *val1 = *val2 = 0; return -EINVAL; @@ -212,9 +212,8 @@ int hid_sensor_write_samp_freq_value(struct hid_sensor_common *st, else value = 0; } - ret = sensor_hub_set_feature(st->hsdev, - st->poll.report_id, - st->poll.index, value); + ret = sensor_hub_set_feature(st->hsdev, st->poll.report_id, + st->poll.index, sizeof(value), &value); if (ret < 0 || value < 0) ret = -EINVAL; @@ -229,8 +228,9 @@ int hid_sensor_read_raw_hyst_value(struct hid_sensor_common *st, int ret; ret = sensor_hub_get_feature(st->hsdev, - st->sensitivity.report_id, - st->sensitivity.index, &value); + st->sensitivity.report_id, + st->sensitivity.index, sizeof(value), + &value); if (ret < 0 || value < 0) { *val1 = *val2 = 0; return -EINVAL; @@ -253,9 +253,9 @@ int hid_sensor_write_raw_hyst_value(struct hid_sensor_common *st, value = convert_to_vtf_format(st->sensitivity.size, st->sensitivity.unit_expo, val1, val2); - ret = sensor_hub_set_feature(st->hsdev, - st->sensitivity.report_id, - st->sensitivity.index, value); + ret = sensor_hub_set_feature(st->hsdev, st->sensitivity.report_id, + st->sensitivity.index, sizeof(value), + &value); if (ret < 0 || value < 0) ret = -EINVAL; diff --git a/drivers/iio/common/hid-sensors/hid-sensor-trigger.c b/drivers/iio/common/hid-sensors/hid-sensor-trigger.c index 2f1d535b94c4..610fc98f88ef 100644 --- a/drivers/iio/common/hid-sensors/hid-sensor-trigger.c +++ b/drivers/iio/common/hid-sensors/hid-sensor-trigger.c @@ -68,20 +68,21 @@ static int _hid_sensor_power_state(struct hid_sensor_common *st, bool state) if (state_val >= 0) { state_val += st->power_state.logical_minimum; sensor_hub_set_feature(st->hsdev, st->power_state.report_id, - st->power_state.index, - (s32)state_val); + st->power_state.index, sizeof(state_val), + &state_val); } if (report_val >= 0) { report_val += st->report_state.logical_minimum; sensor_hub_set_feature(st->hsdev, st->report_state.report_id, - st->report_state.index, - (s32)report_val); + st->report_state.index, + sizeof(report_val), + &report_val); } sensor_hub_get_feature(st->hsdev, st->power_state.report_id, - st->power_state.index, - &state_val); + st->power_state.index, + sizeof(state_val), &state_val); if (state && poll_value) msleep_interruptible(poll_value * 2); diff --git a/drivers/iio/gyro/hid-sensor-gyro-3d.c b/drivers/iio/gyro/hid-sensor-gyro-3d.c index a3c3e19de527..b5883b6f4e50 100644 --- a/drivers/iio/gyro/hid-sensor-gyro-3d.c +++ b/drivers/iio/gyro/hid-sensor-gyro-3d.c @@ -123,7 +123,8 @@ static int gyro_3d_read_raw(struct iio_dev *indio_dev, *val = sensor_hub_input_attr_get_raw_value( gyro_state->common_attributes.hsdev, HID_USAGE_SENSOR_GYRO_3D, address, - report_id); + report_id, + SENSOR_HUB_SYNC); else { *val = 0; hid_sensor_power_state(&gyro_state->common_attributes, diff --git a/drivers/iio/light/hid-sensor-als.c b/drivers/iio/light/hid-sensor-als.c index 948acfc38b8c..1609ecdd01b0 100644 --- a/drivers/iio/light/hid-sensor-als.c +++ b/drivers/iio/light/hid-sensor-als.c @@ -101,7 +101,8 @@ static int als_read_raw(struct iio_dev *indio_dev, *val = sensor_hub_input_attr_get_raw_value( als_state->common_attributes.hsdev, HID_USAGE_SENSOR_ALS, address, - report_id); + report_id, + SENSOR_HUB_SYNC); hid_sensor_power_state(&als_state->common_attributes, false); } else { diff --git a/drivers/iio/light/hid-sensor-prox.c b/drivers/iio/light/hid-sensor-prox.c index 3ecf79ed08ac..91ecc46ffeaa 100644 --- a/drivers/iio/light/hid-sensor-prox.c +++ b/drivers/iio/light/hid-sensor-prox.c @@ -96,7 +96,8 @@ static int prox_read_raw(struct iio_dev *indio_dev, *val = sensor_hub_input_attr_get_raw_value( prox_state->common_attributes.hsdev, HID_USAGE_SENSOR_PROX, address, - report_id); + report_id, + SENSOR_HUB_SYNC); hid_sensor_power_state(&prox_state->common_attributes, false); } else { diff --git a/drivers/iio/magnetometer/hid-sensor-magn-3d.c b/drivers/iio/magnetometer/hid-sensor-magn-3d.c index d22993b4066a..4f9c0be24451 100644 --- a/drivers/iio/magnetometer/hid-sensor-magn-3d.c +++ b/drivers/iio/magnetometer/hid-sensor-magn-3d.c @@ -170,7 +170,8 @@ static int magn_3d_read_raw(struct iio_dev *indio_dev, *val = sensor_hub_input_attr_get_raw_value( magn_state->common_attributes.hsdev, HID_USAGE_SENSOR_COMPASS_3D, address, - report_id); + report_id, + SENSOR_HUB_SYNC); else { *val = 0; hid_sensor_power_state(&magn_state->common_attributes, diff --git a/drivers/iio/orientation/hid-sensor-incl-3d.c b/drivers/iio/orientation/hid-sensor-incl-3d.c index 73854460bb2c..5930fa32a2ab 100644 --- a/drivers/iio/orientation/hid-sensor-incl-3d.c +++ b/drivers/iio/orientation/hid-sensor-incl-3d.c @@ -124,7 +124,8 @@ static int incl_3d_read_raw(struct iio_dev *indio_dev, *val = sensor_hub_input_attr_get_raw_value( incl_state->common_attributes.hsdev, HID_USAGE_SENSOR_INCLINOMETER_3D, address, - report_id); + report_id, + SENSOR_HUB_SYNC); else { hid_sensor_power_state(&incl_state->common_attributes, false); diff --git a/drivers/iio/pressure/hid-sensor-press.c b/drivers/iio/pressure/hid-sensor-press.c index 1af314926ebd..7bb8d4c1f7df 100644 --- a/drivers/iio/pressure/hid-sensor-press.c +++ b/drivers/iio/pressure/hid-sensor-press.c @@ -100,7 +100,8 @@ static int press_read_raw(struct iio_dev *indio_dev, *val = sensor_hub_input_attr_get_raw_value( press_state->common_attributes.hsdev, HID_USAGE_SENSOR_PRESSURE, address, - report_id); + report_id, + SENSOR_HUB_SYNC); hid_sensor_power_state(&press_state->common_attributes, false); } else { diff --git a/drivers/infiniband/hw/ipath/ipath_file_ops.c b/drivers/infiniband/hw/ipath/ipath_file_ops.c index 6d7f453b4d05..aed8afee56da 100644 --- a/drivers/infiniband/hw/ipath/ipath_file_ops.c +++ b/drivers/infiniband/hw/ipath/ipath_file_ops.c @@ -40,7 +40,6 @@ #include <linux/slab.h> #include <linux/highmem.h> #include <linux/io.h> -#include <linux/aio.h> #include <linux/jiffies.h> #include <linux/cpu.h> #include <asm/pgtable.h> diff --git a/drivers/infiniband/hw/qib/qib_file_ops.c b/drivers/infiniband/hw/qib/qib_file_ops.c index 41937c6f888a..14046f5a37fa 100644 --- a/drivers/infiniband/hw/qib/qib_file_ops.c +++ b/drivers/infiniband/hw/qib/qib_file_ops.c @@ -39,7 +39,6 @@ #include <linux/vmalloc.h> #include <linux/highmem.h> #include <linux/io.h> -#include <linux/aio.h> #include <linux/jiffies.h> #include <asm/pgtable.h> #include <linux/delay.h> diff --git a/drivers/input/input-mt.c b/drivers/input/input-mt.c index cb150a1dbaff..34feb3e1127b 100644 --- a/drivers/input/input-mt.c +++ b/drivers/input/input-mt.c @@ -88,10 +88,13 @@ int input_mt_init_slots(struct input_dev *dev, unsigned int num_slots, goto err_mem; } - /* Mark slots as 'unused' */ + /* Mark slots as 'inactive' */ for (i = 0; i < num_slots; i++) input_mt_set_value(&mt->slots[i], ABS_MT_TRACKING_ID, -1); + /* Mark slots as 'unused' */ + mt->frame = 1; + dev->mt = mt; return 0; err_mem: @@ -439,6 +442,8 @@ EXPORT_SYMBOL(input_mt_assign_slots); * set the key on the first unused slot and return. * * If no available slot can be found, -1 is returned. + * Note that for this function to work properly, input_mt_sync_frame() has + * to be called at each frame. */ int input_mt_get_slot_by_key(struct input_dev *dev, int key) { @@ -453,7 +458,7 @@ int input_mt_get_slot_by_key(struct input_dev *dev, int key) return s - mt->slots; for (s = mt->slots; s != mt->slots + mt->num_slots; s++) - if (!input_mt_is_active(s)) { + if (!input_mt_is_active(s) && !input_mt_is_used(mt, s)) { s->key = key; return s - mt->slots; } diff --git a/drivers/isdn/hardware/mISDN/mISDNinfineon.c b/drivers/isdn/hardware/mISDN/mISDNinfineon.c index c1493f4162fb..d5bdbaf93a1a 100644 --- a/drivers/isdn/hardware/mISDN/mISDNinfineon.c +++ b/drivers/isdn/hardware/mISDN/mISDNinfineon.c @@ -1092,7 +1092,7 @@ inf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) } card->ci = get_card_info(ent->driver_data); if (!card->ci) { - pr_info("mISDN: do not have informations about adapter at %s\n", + pr_info("mISDN: do not have information about adapter at %s\n", pci_name(pdev)); kfree(card); pci_disable_device(pdev); diff --git a/drivers/isdn/mISDN/dsp_cmx.c b/drivers/isdn/mISDN/dsp_cmx.c index 87f7dff20ff6..52c43821f746 100644 --- a/drivers/isdn/mISDN/dsp_cmx.c +++ b/drivers/isdn/mISDN/dsp_cmx.c @@ -295,7 +295,7 @@ dsp_cmx_del_conf_member(struct dsp *dsp) } } printk(KERN_WARNING - "%s: dsp is not present in its own conf_meber list.\n", + "%s: dsp is not present in its own conf_member list.\n", __func__); return -EINVAL; diff --git a/drivers/isdn/mISDN/dsp_core.c b/drivers/isdn/mISDN/dsp_core.c index 77025f5cb57d..0222b1a35a2d 100644 --- a/drivers/isdn/mISDN/dsp_core.c +++ b/drivers/isdn/mISDN/dsp_core.c @@ -460,7 +460,7 @@ dsp_control_req(struct dsp *dsp, struct mISDNhead *hh, struct sk_buff *skb) } if (dsp_debug & DEBUG_DSP_CORE) printk(KERN_DEBUG "%s: enable mixing of " - "tx-data with conf mebers\n", __func__); + "tx-data with conf members\n", __func__); dsp->tx_mix = 1; dsp_cmx_hardware(dsp->conf, dsp); dsp_rx_off(dsp); @@ -474,7 +474,7 @@ dsp_control_req(struct dsp *dsp, struct mISDNhead *hh, struct sk_buff *skb) } if (dsp_debug & DEBUG_DSP_CORE) printk(KERN_DEBUG "%s: disable mixing of " - "tx-data with conf mebers\n", __func__); + "tx-data with conf members\n", __func__); dsp->tx_mix = 0; dsp_cmx_hardware(dsp->conf, dsp); dsp_rx_off(dsp); diff --git a/drivers/media/dvb-frontends/m88ds3103.c b/drivers/media/dvb-frontends/m88ds3103.c index ba4ee0b48834..d3d928e1c0ce 100644 --- a/drivers/media/dvb-frontends/m88ds3103.c +++ b/drivers/media/dvb-frontends/m88ds3103.c @@ -630,7 +630,7 @@ static int m88ds3103_init(struct dvb_frontend *fe) /* request the firmware, this will block and timeout */ ret = request_firmware(&fw, fw_file, priv->i2c->dev.parent); if (ret) { - dev_err(&priv->i2c->dev, "%s: firmare file '%s' not found\n", + dev_err(&priv->i2c->dev, "%s: firmware file '%s' not found\n", KBUILD_MODNAME, fw_file); goto err; } diff --git a/drivers/media/dvb-frontends/si2168_priv.h b/drivers/media/dvb-frontends/si2168_priv.h index aadd1367673f..d7efce8043ed 100644 --- a/drivers/media/dvb-frontends/si2168_priv.h +++ b/drivers/media/dvb-frontends/si2168_priv.h @@ -40,7 +40,7 @@ struct si2168_dev { bool ts_clock_inv; }; -/* firmare command struct */ +/* firmware command struct */ #define SI2168_ARGLEN 30 struct si2168_cmd { u8 args[SI2168_ARGLEN]; diff --git a/drivers/media/dvb-frontends/tda10071_priv.h b/drivers/media/dvb-frontends/tda10071_priv.h index 420486192736..03f839c431e9 100644 --- a/drivers/media/dvb-frontends/tda10071_priv.h +++ b/drivers/media/dvb-frontends/tda10071_priv.h @@ -99,7 +99,7 @@ struct tda10071_reg_val_mask { #define CMD_BER_CONTROL 0x3e #define CMD_BER_UPDATE_COUNTERS 0x3f -/* firmare command struct */ +/* firmware command struct */ #define TDA10071_ARGLEN 30 struct tda10071_cmd { u8 args[TDA10071_ARGLEN]; diff --git a/drivers/media/tuners/msi001.c b/drivers/media/tuners/msi001.c index 26019e731993..74cfc3c98edb 100644 --- a/drivers/media/tuners/msi001.c +++ b/drivers/media/tuners/msi001.c @@ -408,7 +408,7 @@ static int msi001_s_ctrl(struct v4l2_ctrl *ctrl) s->mixer_gain->cur.val, s->if_gain->val); break; default: - dev_dbg(&s->spi->dev, "unkown control %d\n", ctrl->id); + dev_dbg(&s->spi->dev, "unknown control %d\n", ctrl->id); ret = -EINVAL; } diff --git a/drivers/mfd/si476x-i2c.c b/drivers/mfd/si476x-i2c.c index 0e4a76daf187..7f87c62d91b3 100644 --- a/drivers/mfd/si476x-i2c.c +++ b/drivers/mfd/si476x-i2c.c @@ -766,7 +766,7 @@ static int si476x_core_probe(struct i2c_client *client, sizeof(struct v4l2_rds_data), GFP_KERNEL); if (rval) { - dev_err(&client->dev, "Could not alloate the FIFO\n"); + dev_err(&client->dev, "Could not allocate the FIFO\n"); goto free_gpio; } mutex_init(&core->rds_drainer_status_lock); diff --git a/drivers/misc/mei/amthif.c b/drivers/misc/mei/amthif.c index c4cb9a984a5f..40ea639fa413 100644 --- a/drivers/misc/mei/amthif.c +++ b/drivers/misc/mei/amthif.c @@ -19,7 +19,6 @@ #include <linux/errno.h> #include <linux/types.h> #include <linux/fcntl.h> -#include <linux/aio.h> #include <linux/ioctl.h> #include <linux/cdev.h> #include <linux/list.h> diff --git a/drivers/misc/mei/main.c b/drivers/misc/mei/main.c index 3c019c0e60eb..47680c84801c 100644 --- a/drivers/misc/mei/main.c +++ b/drivers/misc/mei/main.c @@ -22,7 +22,6 @@ #include <linux/errno.h> #include <linux/types.h> #include <linux/fcntl.h> -#include <linux/aio.h> #include <linux/poll.h> #include <linux/init.h> #include <linux/ioctl.h> diff --git a/drivers/misc/mei/pci-me.c b/drivers/misc/mei/pci-me.c index bd3039ab8f98..af44ee26075d 100644 --- a/drivers/misc/mei/pci-me.c +++ b/drivers/misc/mei/pci-me.c @@ -21,7 +21,6 @@ #include <linux/errno.h> #include <linux/types.h> #include <linux/fcntl.h> -#include <linux/aio.h> #include <linux/pci.h> #include <linux/poll.h> #include <linux/ioctl.h> diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c index c84131e28625..f36c76f8b232 100644 --- a/drivers/mmc/core/mmc.c +++ b/drivers/mmc/core/mmc.c @@ -1758,7 +1758,7 @@ static int mmc_runtime_suspend(struct mmc_host *host) err = _mmc_suspend(host, true); if (err) - pr_err("%s: error %d doing aggessive suspend\n", + pr_err("%s: error %d doing aggressive suspend\n", mmc_hostname(host), err); return err; @@ -1776,7 +1776,7 @@ static int mmc_runtime_resume(struct mmc_host *host) err = _mmc_resume(host); if (err) - pr_err("%s: error %d doing aggessive resume\n", + pr_err("%s: error %d doing aggressive resume\n", mmc_hostname(host), err); return 0; diff --git a/drivers/mmc/core/sd.c b/drivers/mmc/core/sd.c index ad4d43eae99d..31a9ef256d06 100644 --- a/drivers/mmc/core/sd.c +++ b/drivers/mmc/core/sd.c @@ -1157,7 +1157,7 @@ static int mmc_sd_runtime_suspend(struct mmc_host *host) err = _mmc_sd_suspend(host); if (err) - pr_err("%s: error %d doing aggessive suspend\n", + pr_err("%s: error %d doing aggressive suspend\n", mmc_hostname(host), err); return err; @@ -1175,7 +1175,7 @@ static int mmc_sd_runtime_resume(struct mmc_host *host) err = _mmc_sd_resume(host); if (err) - pr_err("%s: error %d doing aggessive resume\n", + pr_err("%s: error %d doing aggressive resume\n", mmc_hostname(host), err); return 0; diff --git a/drivers/mtd/nand/gpmi-nand/gpmi-lib.c b/drivers/mtd/nand/gpmi-nand/gpmi-lib.c index 27f272ed502a..43fa16b5f510 100644 --- a/drivers/mtd/nand/gpmi-nand/gpmi-lib.c +++ b/drivers/mtd/nand/gpmi-nand/gpmi-lib.c @@ -1105,7 +1105,7 @@ int gpmi_is_ready(struct gpmi_nand_data *this, unsigned chip) mask = MX28_BF_GPMI_STAT_READY_BUSY(1 << chip); reg = readl(r->gpmi_regs + HW_GPMI_STAT); } else - dev_err(this->dev, "unknow arch.\n"); + dev_err(this->dev, "unknown arch.\n"); return reg & mask; } diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c index cfe3c7695455..4c9678c8e139 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c @@ -592,7 +592,7 @@ int bnx2x_vf_mcast(struct bnx2x *bp, struct bnx2x_virtf *vf, mc = kzalloc(mc_num * sizeof(struct bnx2x_mcast_list_elem), GFP_KERNEL); if (!mc) { - BNX2X_ERR("Cannot Configure mulicasts due to lack of memory\n"); + BNX2X_ERR("Cannot Configure multicasts due to lack of memory\n"); return -ENOMEM; } } diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_hw.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_hw.c index 3e0f705a4311..75ee9e4ced51 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_hw.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_hw.c @@ -818,7 +818,7 @@ int qlcnic_82xx_config_intr_coalesce(struct qlcnic_adapter *adapter, if (rv) netdev_err(adapter->netdev, - "Failed to set Rx coalescing parametrs\n"); + "Failed to set Rx coalescing parameters\n"); return rv; } diff --git a/drivers/net/phy/dp83640.c b/drivers/net/phy/dp83640.c index e22e602beef3..4c2b5a80f17c 100644 --- a/drivers/net/phy/dp83640.c +++ b/drivers/net/phy/dp83640.c @@ -614,7 +614,7 @@ static void recalibrate(struct dp83640_clock *clock) trigger = CAL_TRIGGER; cal_gpio = 1 + ptp_find_pin(clock->ptp_clock, PTP_PF_PHYSYNC, 0); if (cal_gpio < 1) { - pr_err("PHY calibration pin not avaible - PHY is not calibrated."); + pr_err("PHY calibration pin not available - PHY is not calibrated."); return; } diff --git a/drivers/net/wireless/airo.c b/drivers/net/wireless/airo.c index e71a2ce7a448..b97367d50717 100644 --- a/drivers/net/wireless/airo.c +++ b/drivers/net/wireless/airo.c @@ -3211,7 +3211,7 @@ static void airo_print_status(const char *devname, u16 status) airo_print_dbg(devname, "link lost (TSF sync lost)"); break; default: - airo_print_dbg(devname, "unknow status %x\n", status); + airo_print_dbg(devname, "unknown status %x\n", status); break; } break; @@ -3233,7 +3233,7 @@ static void airo_print_status(const char *devname, u16 status) case STAT_REASSOC: break; default: - airo_print_dbg(devname, "unknow status %x\n", status); + airo_print_dbg(devname, "unknown status %x\n", status); break; } } diff --git a/drivers/net/wireless/ath/ath10k/mac.c b/drivers/net/wireless/ath/ath10k/mac.c index d6d2f0f00caa..4ce433f1c1fb 100644 --- a/drivers/net/wireless/ath/ath10k/mac.c +++ b/drivers/net/wireless/ath/ath10k/mac.c @@ -4857,7 +4857,7 @@ static void ath10k_sta_rc_update(struct ieee80211_hw *hw, bw = WMI_PEER_CHWIDTH_80MHZ; break; case IEEE80211_STA_RX_BW_160: - ath10k_warn(ar, "Invalid bandwith %d in rc update for %pM\n", + ath10k_warn(ar, "Invalid bandwidth %d in rc update for %pM\n", sta->bandwidth, sta->addr); bw = WMI_PEER_CHWIDTH_20MHZ; break; diff --git a/drivers/net/wireless/ath/wcn36xx/smd.c b/drivers/net/wireless/ath/wcn36xx/smd.c index 69ed39731902..dbd894428be6 100644 --- a/drivers/net/wireless/ath/wcn36xx/smd.c +++ b/drivers/net/wireless/ath/wcn36xx/smd.c @@ -1701,7 +1701,7 @@ int wcn36xx_smd_keep_alive_req(struct wcn36xx *wcn, } else if (packet_type == WCN36XX_HAL_KEEP_ALIVE_UNSOLICIT_ARP_RSP) { /* TODO: it also support ARP response type */ } else { - wcn36xx_warn("unknow keep alive packet type %d\n", packet_type); + wcn36xx_warn("unknown keep alive packet type %d\n", packet_type); ret = -EINVAL; goto out; } diff --git a/drivers/net/wireless/brcm80211/brcmsmac/brcms_trace_brcmsmac.h b/drivers/net/wireless/brcm80211/brcmsmac/brcms_trace_brcmsmac.h new file mode 100644 index 000000000000..a0da3248b942 --- /dev/null +++ b/drivers/net/wireless/brcm80211/brcmsmac/brcms_trace_brcmsmac.h @@ -0,0 +1,102 @@ +/* + * Copyright (c) 2011 Broadcom Corporation + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY + * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION + * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN + * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#if !defined(__TRACE_BRCMSMAC_H) || defined(TRACE_HEADER_MULTI_READ) +#define __TRACE_BRCMSMAC_H + +#include <linux/tracepoint.h> + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM brcmsmac + +/* + * We define a tracepoint, its arguments, its printk format and its + * 'fast binary record' layout. + */ +TRACE_EVENT(brcms_timer, + /* TPPROTO is the prototype of the function called by this tracepoint */ + TP_PROTO(struct brcms_timer *t), + /* + * TPARGS(firstarg, p) are the parameters names, same as found in the + * prototype. + */ + TP_ARGS(t), + /* + * Fast binary tracing: define the trace record via TP_STRUCT__entry(). + * You can think about it like a regular C structure local variable + * definition. + */ + TP_STRUCT__entry( + __field(uint, ms) + __field(uint, set) + __field(uint, periodic) + ), + TP_fast_assign( + __entry->ms = t->ms; + __entry->set = t->set; + __entry->periodic = t->periodic; + ), + TP_printk( + "ms=%u set=%u periodic=%u", + __entry->ms, __entry->set, __entry->periodic + ) +); + +TRACE_EVENT(brcms_dpc, + TP_PROTO(unsigned long data), + TP_ARGS(data), + TP_STRUCT__entry( + __field(unsigned long, data) + ), + TP_fast_assign( + __entry->data = data; + ), + TP_printk( + "data=%p", + (void *)__entry->data + ) +); + +TRACE_EVENT(brcms_macintstatus, + TP_PROTO(const struct device *dev, int in_isr, u32 macintstatus, + u32 mask), + TP_ARGS(dev, in_isr, macintstatus, mask), + TP_STRUCT__entry( + __string(dev, dev_name(dev)) + __field(int, in_isr) + __field(u32, macintstatus) + __field(u32, mask) + ), + TP_fast_assign( + __assign_str(dev, dev_name(dev)); + __entry->in_isr = in_isr; + __entry->macintstatus = macintstatus; + __entry->mask = mask; + ), + TP_printk("[%s] in_isr=%d macintstatus=%#x mask=%#x", __get_str(dev), + __entry->in_isr, __entry->macintstatus, __entry->mask) +); +#endif /* __TRACE_BRCMSMAC_H */ + +#ifdef CONFIG_BRCM_TRACING + +#undef TRACE_INCLUDE_PATH +#define TRACE_INCLUDE_PATH . +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_FILE brcms_trace_brcmsmac +#include <trace/define_trace.h> + +#endif /* CONFIG_BRCM_TRACING */ diff --git a/drivers/net/wireless/brcm80211/brcmsmac/brcms_trace_brcmsmac_msg.h b/drivers/net/wireless/brcm80211/brcmsmac/brcms_trace_brcmsmac_msg.h new file mode 100644 index 000000000000..0e8a69ab909f --- /dev/null +++ b/drivers/net/wireless/brcm80211/brcmsmac/brcms_trace_brcmsmac_msg.h @@ -0,0 +1,88 @@ +/* + * Copyright (c) 2011 Broadcom Corporation + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY + * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION + * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN + * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#if !defined(__TRACE_BRCMSMAC_MSG_H) || defined(TRACE_HEADER_MULTI_READ) +#define __TRACE_BRCMSMAC_MSG_H + +#include <linux/tracepoint.h> + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM brcmsmac_msg + +#define MAX_MSG_LEN 100 + +DECLARE_EVENT_CLASS(brcms_msg_event, + TP_PROTO(struct va_format *vaf), + TP_ARGS(vaf), + TP_STRUCT__entry( + __dynamic_array(char, msg, MAX_MSG_LEN) + ), + TP_fast_assign( + WARN_ON_ONCE(vsnprintf(__get_dynamic_array(msg), + MAX_MSG_LEN, vaf->fmt, + *vaf->va) >= MAX_MSG_LEN); + ), + TP_printk("%s", __get_str(msg)) +); + +DEFINE_EVENT(brcms_msg_event, brcms_info, + TP_PROTO(struct va_format *vaf), + TP_ARGS(vaf) +); + +DEFINE_EVENT(brcms_msg_event, brcms_warn, + TP_PROTO(struct va_format *vaf), + TP_ARGS(vaf) +); + +DEFINE_EVENT(brcms_msg_event, brcms_err, + TP_PROTO(struct va_format *vaf), + TP_ARGS(vaf) +); + +DEFINE_EVENT(brcms_msg_event, brcms_crit, + TP_PROTO(struct va_format *vaf), + TP_ARGS(vaf) +); + +TRACE_EVENT(brcms_dbg, + TP_PROTO(u32 level, const char *func, struct va_format *vaf), + TP_ARGS(level, func, vaf), + TP_STRUCT__entry( + __field(u32, level) + __string(func, func) + __dynamic_array(char, msg, MAX_MSG_LEN) + ), + TP_fast_assign( + __entry->level = level; + __assign_str(func, func); + WARN_ON_ONCE(vsnprintf(__get_dynamic_array(msg), + MAX_MSG_LEN, vaf->fmt, + *vaf->va) >= MAX_MSG_LEN); + ), + TP_printk("%s: %s", __get_str(func), __get_str(msg)) +); +#endif /* __TRACE_BRCMSMAC_MSG_H */ + +#ifdef CONFIG_BRCM_TRACING + +#undef TRACE_INCLUDE_PATH +#define TRACE_INCLUDE_PATH . +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_FILE brcms_trace_brcmsmac_msg +#include <trace/define_trace.h> + +#endif /* CONFIG_BRCM_TRACING */ diff --git a/drivers/net/wireless/brcm80211/brcmsmac/brcms_trace_brcmsmac_tx.h b/drivers/net/wireless/brcm80211/brcmsmac/brcms_trace_brcmsmac_tx.h new file mode 100644 index 000000000000..cf2cc070f1e5 --- /dev/null +++ b/drivers/net/wireless/brcm80211/brcmsmac/brcms_trace_brcmsmac_tx.h @@ -0,0 +1,110 @@ +/* + * Copyright (c) 2011 Broadcom Corporation + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY + * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION + * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN + * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#if !defined(__TRACE_BRCMSMAC_TX_H) || defined(TRACE_HEADER_MULTI_READ) +#define __TRACE_BRCMSMAC_TX_H + +#include <linux/tracepoint.h> + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM brcmsmac_tx + +TRACE_EVENT(brcms_txdesc, + TP_PROTO(const struct device *dev, + void *txh, size_t txh_len), + TP_ARGS(dev, txh, txh_len), + TP_STRUCT__entry( + __string(dev, dev_name(dev)) + __dynamic_array(u8, txh, txh_len) + ), + TP_fast_assign( + __assign_str(dev, dev_name(dev)); + memcpy(__get_dynamic_array(txh), txh, txh_len); + ), + TP_printk("[%s] txdesc", __get_str(dev)) +); + +TRACE_EVENT(brcms_txstatus, + TP_PROTO(const struct device *dev, u16 framelen, u16 frameid, + u16 status, u16 lasttxtime, u16 sequence, u16 phyerr, + u16 ackphyrxsh), + TP_ARGS(dev, framelen, frameid, status, lasttxtime, sequence, phyerr, + ackphyrxsh), + TP_STRUCT__entry( + __string(dev, dev_name(dev)) + __field(u16, framelen) + __field(u16, frameid) + __field(u16, status) + __field(u16, lasttxtime) + __field(u16, sequence) + __field(u16, phyerr) + __field(u16, ackphyrxsh) + ), + TP_fast_assign( + __assign_str(dev, dev_name(dev)); + __entry->framelen = framelen; + __entry->frameid = frameid; + __entry->status = status; + __entry->lasttxtime = lasttxtime; + __entry->sequence = sequence; + __entry->phyerr = phyerr; + __entry->ackphyrxsh = ackphyrxsh; + ), + TP_printk("[%s] FrameId %#04x TxStatus %#04x LastTxTime %#04x " + "Seq %#04x PHYTxStatus %#04x RxAck %#04x", + __get_str(dev), __entry->frameid, __entry->status, + __entry->lasttxtime, __entry->sequence, __entry->phyerr, + __entry->ackphyrxsh) +); + +TRACE_EVENT(brcms_ampdu_session, + TP_PROTO(const struct device *dev, unsigned max_ampdu_len, + u16 max_ampdu_frames, u16 ampdu_len, u16 ampdu_frames, + u16 dma_len), + TP_ARGS(dev, max_ampdu_len, max_ampdu_frames, ampdu_len, ampdu_frames, + dma_len), + TP_STRUCT__entry( + __string(dev, dev_name(dev)) + __field(unsigned, max_ampdu_len) + __field(u16, max_ampdu_frames) + __field(u16, ampdu_len) + __field(u16, ampdu_frames) + __field(u16, dma_len) + ), + TP_fast_assign( + __assign_str(dev, dev_name(dev)); + __entry->max_ampdu_len = max_ampdu_len; + __entry->max_ampdu_frames = max_ampdu_frames; + __entry->ampdu_len = ampdu_len; + __entry->ampdu_frames = ampdu_frames; + __entry->dma_len = dma_len; + ), + TP_printk("[%s] ampdu session max_len=%u max_frames=%u len=%u frames=%u dma_len=%u", + __get_str(dev), __entry->max_ampdu_len, + __entry->max_ampdu_frames, __entry->ampdu_len, + __entry->ampdu_frames, __entry->dma_len) +); +#endif /* __TRACE_BRCMSMAC_TX_H */ + +#ifdef CONFIG_BRCM_TRACING + +#undef TRACE_INCLUDE_PATH +#define TRACE_INCLUDE_PATH . +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_FILE brcms_trace_brcmsmac_tx +#include <trace/define_trace.h> + +#endif /* CONFIG_BRCM_TRACING */ diff --git a/drivers/net/wireless/brcm80211/brcmsmac/brcms_trace_events.h b/drivers/net/wireless/brcm80211/brcmsmac/brcms_trace_events.h index 871781e6a713..cbf2f06436fc 100644 --- a/drivers/net/wireless/brcm80211/brcmsmac/brcms_trace_events.h +++ b/drivers/net/wireless/brcm80211/brcmsmac/brcms_trace_events.h @@ -14,9 +14,8 @@ * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ -#if !defined(__TRACE_BRCMSMAC_H) || defined(TRACE_HEADER_MULTI_READ) - -#define __TRACE_BRCMSMAC_H +#ifndef __BRCMS_TRACE_EVENTS_H +#define __BRCMS_TRACE_EVENTS_H #include <linux/types.h> #include <linux/device.h> @@ -34,222 +33,8 @@ static inline void trace_ ## name(proto) {} static inline void trace_ ## name(proto) {} #endif -#undef TRACE_SYSTEM -#define TRACE_SYSTEM brcmsmac - -/* - * We define a tracepoint, its arguments, its printk format and its - * 'fast binary record' layout. - */ -TRACE_EVENT(brcms_timer, - /* TPPROTO is the prototype of the function called by this tracepoint */ - TP_PROTO(struct brcms_timer *t), - /* - * TPARGS(firstarg, p) are the parameters names, same as found in the - * prototype. - */ - TP_ARGS(t), - /* - * Fast binary tracing: define the trace record via TP_STRUCT__entry(). - * You can think about it like a regular C structure local variable - * definition. - */ - TP_STRUCT__entry( - __field(uint, ms) - __field(uint, set) - __field(uint, periodic) - ), - TP_fast_assign( - __entry->ms = t->ms; - __entry->set = t->set; - __entry->periodic = t->periodic; - ), - TP_printk( - "ms=%u set=%u periodic=%u", - __entry->ms, __entry->set, __entry->periodic - ) -); - -TRACE_EVENT(brcms_dpc, - TP_PROTO(unsigned long data), - TP_ARGS(data), - TP_STRUCT__entry( - __field(unsigned long, data) - ), - TP_fast_assign( - __entry->data = data; - ), - TP_printk( - "data=%p", - (void *)__entry->data - ) -); - -TRACE_EVENT(brcms_macintstatus, - TP_PROTO(const struct device *dev, int in_isr, u32 macintstatus, - u32 mask), - TP_ARGS(dev, in_isr, macintstatus, mask), - TP_STRUCT__entry( - __string(dev, dev_name(dev)) - __field(int, in_isr) - __field(u32, macintstatus) - __field(u32, mask) - ), - TP_fast_assign( - __assign_str(dev, dev_name(dev)); - __entry->in_isr = in_isr; - __entry->macintstatus = macintstatus; - __entry->mask = mask; - ), - TP_printk("[%s] in_isr=%d macintstatus=%#x mask=%#x", __get_str(dev), - __entry->in_isr, __entry->macintstatus, __entry->mask) -); - -#undef TRACE_SYSTEM -#define TRACE_SYSTEM brcmsmac_tx - -TRACE_EVENT(brcms_txdesc, - TP_PROTO(const struct device *dev, - void *txh, size_t txh_len), - TP_ARGS(dev, txh, txh_len), - TP_STRUCT__entry( - __string(dev, dev_name(dev)) - __dynamic_array(u8, txh, txh_len) - ), - TP_fast_assign( - __assign_str(dev, dev_name(dev)); - memcpy(__get_dynamic_array(txh), txh, txh_len); - ), - TP_printk("[%s] txdesc", __get_str(dev)) -); - -TRACE_EVENT(brcms_txstatus, - TP_PROTO(const struct device *dev, u16 framelen, u16 frameid, - u16 status, u16 lasttxtime, u16 sequence, u16 phyerr, - u16 ackphyrxsh), - TP_ARGS(dev, framelen, frameid, status, lasttxtime, sequence, phyerr, - ackphyrxsh), - TP_STRUCT__entry( - __string(dev, dev_name(dev)) - __field(u16, framelen) - __field(u16, frameid) - __field(u16, status) - __field(u16, lasttxtime) - __field(u16, sequence) - __field(u16, phyerr) - __field(u16, ackphyrxsh) - ), - TP_fast_assign( - __assign_str(dev, dev_name(dev)); - __entry->framelen = framelen; - __entry->frameid = frameid; - __entry->status = status; - __entry->lasttxtime = lasttxtime; - __entry->sequence = sequence; - __entry->phyerr = phyerr; - __entry->ackphyrxsh = ackphyrxsh; - ), - TP_printk("[%s] FrameId %#04x TxStatus %#04x LastTxTime %#04x " - "Seq %#04x PHYTxStatus %#04x RxAck %#04x", - __get_str(dev), __entry->frameid, __entry->status, - __entry->lasttxtime, __entry->sequence, __entry->phyerr, - __entry->ackphyrxsh) -); - -TRACE_EVENT(brcms_ampdu_session, - TP_PROTO(const struct device *dev, unsigned max_ampdu_len, - u16 max_ampdu_frames, u16 ampdu_len, u16 ampdu_frames, - u16 dma_len), - TP_ARGS(dev, max_ampdu_len, max_ampdu_frames, ampdu_len, ampdu_frames, - dma_len), - TP_STRUCT__entry( - __string(dev, dev_name(dev)) - __field(unsigned, max_ampdu_len) - __field(u16, max_ampdu_frames) - __field(u16, ampdu_len) - __field(u16, ampdu_frames) - __field(u16, dma_len) - ), - TP_fast_assign( - __assign_str(dev, dev_name(dev)); - __entry->max_ampdu_len = max_ampdu_len; - __entry->max_ampdu_frames = max_ampdu_frames; - __entry->ampdu_len = ampdu_len; - __entry->ampdu_frames = ampdu_frames; - __entry->dma_len = dma_len; - ), - TP_printk("[%s] ampdu session max_len=%u max_frames=%u len=%u frames=%u dma_len=%u", - __get_str(dev), __entry->max_ampdu_len, - __entry->max_ampdu_frames, __entry->ampdu_len, - __entry->ampdu_frames, __entry->dma_len) -); - -#undef TRACE_SYSTEM -#define TRACE_SYSTEM brcmsmac_msg - -#define MAX_MSG_LEN 100 - -DECLARE_EVENT_CLASS(brcms_msg_event, - TP_PROTO(struct va_format *vaf), - TP_ARGS(vaf), - TP_STRUCT__entry( - __dynamic_array(char, msg, MAX_MSG_LEN) - ), - TP_fast_assign( - WARN_ON_ONCE(vsnprintf(__get_dynamic_array(msg), - MAX_MSG_LEN, vaf->fmt, - *vaf->va) >= MAX_MSG_LEN); - ), - TP_printk("%s", __get_str(msg)) -); - -DEFINE_EVENT(brcms_msg_event, brcms_info, - TP_PROTO(struct va_format *vaf), - TP_ARGS(vaf) -); - -DEFINE_EVENT(brcms_msg_event, brcms_warn, - TP_PROTO(struct va_format *vaf), - TP_ARGS(vaf) -); - -DEFINE_EVENT(brcms_msg_event, brcms_err, - TP_PROTO(struct va_format *vaf), - TP_ARGS(vaf) -); - -DEFINE_EVENT(brcms_msg_event, brcms_crit, - TP_PROTO(struct va_format *vaf), - TP_ARGS(vaf) -); - -TRACE_EVENT(brcms_dbg, - TP_PROTO(u32 level, const char *func, struct va_format *vaf), - TP_ARGS(level, func, vaf), - TP_STRUCT__entry( - __field(u32, level) - __string(func, func) - __dynamic_array(char, msg, MAX_MSG_LEN) - ), - TP_fast_assign( - __entry->level = level; - __assign_str(func, func); - WARN_ON_ONCE(vsnprintf(__get_dynamic_array(msg), - MAX_MSG_LEN, vaf->fmt, - *vaf->va) >= MAX_MSG_LEN); - ), - TP_printk("%s: %s", __get_str(func), __get_str(msg)) -); +#include "brcms_trace_brcmsmac.h" +#include "brcms_trace_brcmsmac_tx.h" +#include "brcms_trace_brcmsmac_msg.h" #endif /* __TRACE_BRCMSMAC_H */ - -#ifdef CONFIG_BRCM_TRACING - -#undef TRACE_INCLUDE_PATH -#define TRACE_INCLUDE_PATH . -#undef TRACE_INCLUDE_FILE -#define TRACE_INCLUDE_FILE brcms_trace_events - -#include <trace/define_trace.h> - -#endif /* CONFIG_BRCM_TRACING */ diff --git a/drivers/net/wireless/iwlwifi/iwl-devtrace-data.h b/drivers/net/wireless/iwlwifi/iwl-devtrace-data.h new file mode 100644 index 000000000000..04e6649340b8 --- /dev/null +++ b/drivers/net/wireless/iwlwifi/iwl-devtrace-data.h @@ -0,0 +1,79 @@ +/****************************************************************************** + * + * Copyright(c) 2009 - 2014 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110, USA + * + * The full GNU General Public License is included in this distribution in the + * file called LICENSE. + * + * Contact Information: + * Intel Linux Wireless <ilw@linux.intel.com> + * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 + * + *****************************************************************************/ + +#if !defined(__IWLWIFI_DEVICE_TRACE_DATA) || defined(TRACE_HEADER_MULTI_READ) +#define __IWLWIFI_DEVICE_TRACE_DATA + +#include <linux/tracepoint.h> + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM iwlwifi_data + +TRACE_EVENT(iwlwifi_dev_tx_data, + TP_PROTO(const struct device *dev, + struct sk_buff *skb, + void *data, size_t data_len), + TP_ARGS(dev, skb, data, data_len), + TP_STRUCT__entry( + DEV_ENTRY + + __dynamic_array(u8, data, iwl_trace_data(skb) ? data_len : 0) + ), + TP_fast_assign( + DEV_ASSIGN; + if (iwl_trace_data(skb)) + memcpy(__get_dynamic_array(data), data, data_len); + ), + TP_printk("[%s] TX frame data", __get_str(dev)) +); + +TRACE_EVENT(iwlwifi_dev_rx_data, + TP_PROTO(const struct device *dev, + const struct iwl_trans *trans, + void *rxbuf, size_t len), + TP_ARGS(dev, trans, rxbuf, len), + TP_STRUCT__entry( + DEV_ENTRY + + __dynamic_array(u8, data, + len - iwl_rx_trace_len(trans, rxbuf, len)) + ), + TP_fast_assign( + size_t offs = iwl_rx_trace_len(trans, rxbuf, len); + DEV_ASSIGN; + if (offs < len) + memcpy(__get_dynamic_array(data), + ((u8 *)rxbuf) + offs, len - offs); + ), + TP_printk("[%s] RX frame data", __get_str(dev)) +); +#endif /* __IWLWIFI_DEVICE_TRACE_DATA */ + +#undef TRACE_INCLUDE_PATH +#define TRACE_INCLUDE_PATH . +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_FILE iwl-devtrace-data +#include <trace/define_trace.h> diff --git a/drivers/net/wireless/iwlwifi/iwl-devtrace-io.h b/drivers/net/wireless/iwlwifi/iwl-devtrace-io.h new file mode 100644 index 000000000000..f62c54485852 --- /dev/null +++ b/drivers/net/wireless/iwlwifi/iwl-devtrace-io.h @@ -0,0 +1,155 @@ +/****************************************************************************** + * + * Copyright(c) 2009 - 2014 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110, USA + * + * The full GNU General Public License is included in this distribution in the + * file called LICENSE. + * + * Contact Information: + * Intel Linux Wireless <ilw@linux.intel.com> + * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 + * + *****************************************************************************/ + +#if !defined(__IWLWIFI_DEVICE_TRACE_IO) || defined(TRACE_HEADER_MULTI_READ) +#define __IWLWIFI_DEVICE_TRACE_IO + +#include <linux/tracepoint.h> + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM iwlwifi_io + +TRACE_EVENT(iwlwifi_dev_ioread32, + TP_PROTO(const struct device *dev, u32 offs, u32 val), + TP_ARGS(dev, offs, val), + TP_STRUCT__entry( + DEV_ENTRY + __field(u32, offs) + __field(u32, val) + ), + TP_fast_assign( + DEV_ASSIGN; + __entry->offs = offs; + __entry->val = val; + ), + TP_printk("[%s] read io[%#x] = %#x", + __get_str(dev), __entry->offs, __entry->val) +); + +TRACE_EVENT(iwlwifi_dev_iowrite8, + TP_PROTO(const struct device *dev, u32 offs, u8 val), + TP_ARGS(dev, offs, val), + TP_STRUCT__entry( + DEV_ENTRY + __field(u32, offs) + __field(u8, val) + ), + TP_fast_assign( + DEV_ASSIGN; + __entry->offs = offs; + __entry->val = val; + ), + TP_printk("[%s] write io[%#x] = %#x)", + __get_str(dev), __entry->offs, __entry->val) +); + +TRACE_EVENT(iwlwifi_dev_iowrite32, + TP_PROTO(const struct device *dev, u32 offs, u32 val), + TP_ARGS(dev, offs, val), + TP_STRUCT__entry( + DEV_ENTRY + __field(u32, offs) + __field(u32, val) + ), + TP_fast_assign( + DEV_ASSIGN; + __entry->offs = offs; + __entry->val = val; + ), + TP_printk("[%s] write io[%#x] = %#x)", + __get_str(dev), __entry->offs, __entry->val) +); + +TRACE_EVENT(iwlwifi_dev_iowrite_prph32, + TP_PROTO(const struct device *dev, u32 offs, u32 val), + TP_ARGS(dev, offs, val), + TP_STRUCT__entry( + DEV_ENTRY + __field(u32, offs) + __field(u32, val) + ), + TP_fast_assign( + DEV_ASSIGN; + __entry->offs = offs; + __entry->val = val; + ), + TP_printk("[%s] write PRPH[%#x] = %#x)", + __get_str(dev), __entry->offs, __entry->val) +); + +TRACE_EVENT(iwlwifi_dev_ioread_prph32, + TP_PROTO(const struct device *dev, u32 offs, u32 val), + TP_ARGS(dev, offs, val), + TP_STRUCT__entry( + DEV_ENTRY + __field(u32, offs) + __field(u32, val) + ), + TP_fast_assign( + DEV_ASSIGN; + __entry->offs = offs; + __entry->val = val; + ), + TP_printk("[%s] read PRPH[%#x] = %#x", + __get_str(dev), __entry->offs, __entry->val) +); + +TRACE_EVENT(iwlwifi_dev_irq, + TP_PROTO(const struct device *dev), + TP_ARGS(dev), + TP_STRUCT__entry( + DEV_ENTRY + ), + TP_fast_assign( + DEV_ASSIGN; + ), + /* TP_printk("") doesn't compile */ + TP_printk("%d", 0) +); + +TRACE_EVENT(iwlwifi_dev_ict_read, + TP_PROTO(const struct device *dev, u32 index, u32 value), + TP_ARGS(dev, index, value), + TP_STRUCT__entry( + DEV_ENTRY + __field(u32, index) + __field(u32, value) + ), + TP_fast_assign( + DEV_ASSIGN; + __entry->index = index; + __entry->value = value; + ), + TP_printk("[%s] read ict[%d] = %#.8x", + __get_str(dev), __entry->index, __entry->value) +); +#endif /* __IWLWIFI_DEVICE_TRACE_IO */ + +#undef TRACE_INCLUDE_PATH +#define TRACE_INCLUDE_PATH . +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_FILE iwl-devtrace-io +#include <trace/define_trace.h> diff --git a/drivers/net/wireless/iwlwifi/iwl-devtrace-iwlwifi.h b/drivers/net/wireless/iwlwifi/iwl-devtrace-iwlwifi.h new file mode 100644 index 000000000000..6cb66a988271 --- /dev/null +++ b/drivers/net/wireless/iwlwifi/iwl-devtrace-iwlwifi.h @@ -0,0 +1,200 @@ +/****************************************************************************** + * + * Copyright(c) 2009 - 2014 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110, USA + * + * The full GNU General Public License is included in this distribution in the + * file called LICENSE. + * + * Contact Information: + * Intel Linux Wireless <ilw@linux.intel.com> + * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 + * + *****************************************************************************/ + +#if !defined(__IWLWIFI_DEVICE_TRACE_IWLWIFI) || defined(TRACE_HEADER_MULTI_READ) +#define __IWLWIFI_DEVICE_TRACE_IWLWIFI + +#include <linux/tracepoint.h> + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM iwlwifi + +TRACE_EVENT(iwlwifi_dev_hcmd, + TP_PROTO(const struct device *dev, + struct iwl_host_cmd *cmd, u16 total_size, + struct iwl_cmd_header *hdr), + TP_ARGS(dev, cmd, total_size, hdr), + TP_STRUCT__entry( + DEV_ENTRY + __dynamic_array(u8, hcmd, total_size) + __field(u32, flags) + ), + TP_fast_assign( + int i, offset = sizeof(*hdr); + + DEV_ASSIGN; + __entry->flags = cmd->flags; + memcpy(__get_dynamic_array(hcmd), hdr, sizeof(*hdr)); + + for (i = 0; i < IWL_MAX_CMD_TBS_PER_TFD; i++) { + if (!cmd->len[i]) + continue; + memcpy((u8 *)__get_dynamic_array(hcmd) + offset, + cmd->data[i], cmd->len[i]); + offset += cmd->len[i]; + } + ), + TP_printk("[%s] hcmd %#.2x (%ssync)", + __get_str(dev), ((u8 *)__get_dynamic_array(hcmd))[0], + __entry->flags & CMD_ASYNC ? "a" : "") +); + +TRACE_EVENT(iwlwifi_dev_rx, + TP_PROTO(const struct device *dev, const struct iwl_trans *trans, + void *rxbuf, size_t len), + TP_ARGS(dev, trans, rxbuf, len), + TP_STRUCT__entry( + DEV_ENTRY + __dynamic_array(u8, rxbuf, iwl_rx_trace_len(trans, rxbuf, len)) + ), + TP_fast_assign( + DEV_ASSIGN; + memcpy(__get_dynamic_array(rxbuf), rxbuf, + iwl_rx_trace_len(trans, rxbuf, len)); + ), + TP_printk("[%s] RX cmd %#.2x", + __get_str(dev), ((u8 *)__get_dynamic_array(rxbuf))[4]) +); + +TRACE_EVENT(iwlwifi_dev_tx, + TP_PROTO(const struct device *dev, struct sk_buff *skb, + void *tfd, size_t tfdlen, + void *buf0, size_t buf0_len, + void *buf1, size_t buf1_len), + TP_ARGS(dev, skb, tfd, tfdlen, buf0, buf0_len, buf1, buf1_len), + TP_STRUCT__entry( + DEV_ENTRY + + __field(size_t, framelen) + __dynamic_array(u8, tfd, tfdlen) + + /* + * Do not insert between or below these items, + * we want to keep the frame together (except + * for the possible padding). + */ + __dynamic_array(u8, buf0, buf0_len) + __dynamic_array(u8, buf1, iwl_trace_data(skb) ? 0 : buf1_len) + ), + TP_fast_assign( + DEV_ASSIGN; + __entry->framelen = buf0_len + buf1_len; + memcpy(__get_dynamic_array(tfd), tfd, tfdlen); + memcpy(__get_dynamic_array(buf0), buf0, buf0_len); + if (!iwl_trace_data(skb)) + memcpy(__get_dynamic_array(buf1), buf1, buf1_len); + ), + TP_printk("[%s] TX %.2x (%zu bytes)", + __get_str(dev), ((u8 *)__get_dynamic_array(buf0))[0], + __entry->framelen) +); + +TRACE_EVENT(iwlwifi_dev_ucode_error, + TP_PROTO(const struct device *dev, u32 desc, u32 tsf_low, + u32 data1, u32 data2, u32 line, u32 blink1, + u32 blink2, u32 ilink1, u32 ilink2, u32 bcon_time, + u32 gp1, u32 gp2, u32 gp3, u32 ucode_ver, u32 hw_ver, + u32 brd_ver), + TP_ARGS(dev, desc, tsf_low, data1, data2, line, + blink1, blink2, ilink1, ilink2, bcon_time, gp1, gp2, + gp3, ucode_ver, hw_ver, brd_ver), + TP_STRUCT__entry( + DEV_ENTRY + __field(u32, desc) + __field(u32, tsf_low) + __field(u32, data1) + __field(u32, data2) + __field(u32, line) + __field(u32, blink1) + __field(u32, blink2) + __field(u32, ilink1) + __field(u32, ilink2) + __field(u32, bcon_time) + __field(u32, gp1) + __field(u32, gp2) + __field(u32, gp3) + __field(u32, ucode_ver) + __field(u32, hw_ver) + __field(u32, brd_ver) + ), + TP_fast_assign( + DEV_ASSIGN; + __entry->desc = desc; + __entry->tsf_low = tsf_low; + __entry->data1 = data1; + __entry->data2 = data2; + __entry->line = line; + __entry->blink1 = blink1; + __entry->blink2 = blink2; + __entry->ilink1 = ilink1; + __entry->ilink2 = ilink2; + __entry->bcon_time = bcon_time; + __entry->gp1 = gp1; + __entry->gp2 = gp2; + __entry->gp3 = gp3; + __entry->ucode_ver = ucode_ver; + __entry->hw_ver = hw_ver; + __entry->brd_ver = brd_ver; + ), + TP_printk("[%s] #%02d %010u data 0x%08X 0x%08X line %u, " + "blink 0x%05X 0x%05X ilink 0x%05X 0x%05X " + "bcon_tm %010u gp 0x%08X 0x%08X 0x%08X uCode 0x%08X " + "hw 0x%08X brd 0x%08X", + __get_str(dev), __entry->desc, __entry->tsf_low, + __entry->data1, + __entry->data2, __entry->line, __entry->blink1, + __entry->blink2, __entry->ilink1, __entry->ilink2, + __entry->bcon_time, __entry->gp1, __entry->gp2, + __entry->gp3, __entry->ucode_ver, __entry->hw_ver, + __entry->brd_ver) +); + +TRACE_EVENT(iwlwifi_dev_ucode_event, + TP_PROTO(const struct device *dev, u32 time, u32 data, u32 ev), + TP_ARGS(dev, time, data, ev), + TP_STRUCT__entry( + DEV_ENTRY + + __field(u32, time) + __field(u32, data) + __field(u32, ev) + ), + TP_fast_assign( + DEV_ASSIGN; + __entry->time = time; + __entry->data = data; + __entry->ev = ev; + ), + TP_printk("[%s] EVT_LOGT:%010u:0x%08x:%04u", + __get_str(dev), __entry->time, __entry->data, __entry->ev) +); +#endif /* __IWLWIFI_DEVICE_TRACE_IWLWIFI */ + +#undef TRACE_INCLUDE_PATH +#define TRACE_INCLUDE_PATH . +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_FILE iwl-devtrace-iwlwifi +#include <trace/define_trace.h> diff --git a/drivers/net/wireless/iwlwifi/iwl-devtrace-msg.h b/drivers/net/wireless/iwlwifi/iwl-devtrace-msg.h new file mode 100644 index 000000000000..a3b3c2465f89 --- /dev/null +++ b/drivers/net/wireless/iwlwifi/iwl-devtrace-msg.h @@ -0,0 +1,97 @@ +/****************************************************************************** + * + * Copyright(c) 2009 - 2014 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110, USA + * + * The full GNU General Public License is included in this distribution in the + * file called LICENSE. + * + * Contact Information: + * Intel Linux Wireless <ilw@linux.intel.com> + * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 + * + *****************************************************************************/ + +#if !defined(__IWLWIFI_DEVICE_TRACE_MSG) || defined(TRACE_HEADER_MULTI_READ) +#define __IWLWIFI_DEVICE_TRACE_MSG + +#include <linux/tracepoint.h> + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM iwlwifi_msg + +#define MAX_MSG_LEN 110 + +DECLARE_EVENT_CLASS(iwlwifi_msg_event, + TP_PROTO(struct va_format *vaf), + TP_ARGS(vaf), + TP_STRUCT__entry( + __dynamic_array(char, msg, MAX_MSG_LEN) + ), + TP_fast_assign( + WARN_ON_ONCE(vsnprintf(__get_dynamic_array(msg), + MAX_MSG_LEN, vaf->fmt, + *vaf->va) >= MAX_MSG_LEN); + ), + TP_printk("%s", __get_str(msg)) +); + +DEFINE_EVENT(iwlwifi_msg_event, iwlwifi_err, + TP_PROTO(struct va_format *vaf), + TP_ARGS(vaf) +); + +DEFINE_EVENT(iwlwifi_msg_event, iwlwifi_warn, + TP_PROTO(struct va_format *vaf), + TP_ARGS(vaf) +); + +DEFINE_EVENT(iwlwifi_msg_event, iwlwifi_info, + TP_PROTO(struct va_format *vaf), + TP_ARGS(vaf) +); + +DEFINE_EVENT(iwlwifi_msg_event, iwlwifi_crit, + TP_PROTO(struct va_format *vaf), + TP_ARGS(vaf) +); + +TRACE_EVENT(iwlwifi_dbg, + TP_PROTO(u32 level, bool in_interrupt, const char *function, + struct va_format *vaf), + TP_ARGS(level, in_interrupt, function, vaf), + TP_STRUCT__entry( + __field(u32, level) + __field(u8, in_interrupt) + __string(function, function) + __dynamic_array(char, msg, MAX_MSG_LEN) + ), + TP_fast_assign( + __entry->level = level; + __entry->in_interrupt = in_interrupt; + __assign_str(function, function); + WARN_ON_ONCE(vsnprintf(__get_dynamic_array(msg), + MAX_MSG_LEN, vaf->fmt, + *vaf->va) >= MAX_MSG_LEN); + ), + TP_printk("%s", __get_str(msg)) +); +#endif /* __IWLWIFI_DEVICE_TRACE_MSG */ + +#undef TRACE_INCLUDE_PATH +#define TRACE_INCLUDE_PATH . +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_FILE iwl-devtrace-msg +#include <trace/define_trace.h> diff --git a/drivers/net/wireless/iwlwifi/iwl-devtrace-ucode.h b/drivers/net/wireless/iwlwifi/iwl-devtrace-ucode.h new file mode 100644 index 000000000000..10839fae9cd9 --- /dev/null +++ b/drivers/net/wireless/iwlwifi/iwl-devtrace-ucode.h @@ -0,0 +1,81 @@ +/****************************************************************************** + * + * Copyright(c) 2009 - 2014 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110, USA + * + * The full GNU General Public License is included in this distribution in the + * file called LICENSE. + * + * Contact Information: + * Intel Linux Wireless <ilw@linux.intel.com> + * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 + * + *****************************************************************************/ + +#if !defined(__IWLWIFI_DEVICE_TRACE_UCODE) || defined(TRACE_HEADER_MULTI_READ) +#define __IWLWIFI_DEVICE_TRACE_UCODE + +#include <linux/tracepoint.h> + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM iwlwifi_ucode + +TRACE_EVENT(iwlwifi_dev_ucode_cont_event, + TP_PROTO(const struct device *dev, u32 time, u32 data, u32 ev), + TP_ARGS(dev, time, data, ev), + TP_STRUCT__entry( + DEV_ENTRY + + __field(u32, time) + __field(u32, data) + __field(u32, ev) + ), + TP_fast_assign( + DEV_ASSIGN; + __entry->time = time; + __entry->data = data; + __entry->ev = ev; + ), + TP_printk("[%s] EVT_LOGT:%010u:0x%08x:%04u", + __get_str(dev), __entry->time, __entry->data, __entry->ev) +); + +TRACE_EVENT(iwlwifi_dev_ucode_wrap_event, + TP_PROTO(const struct device *dev, u32 wraps, u32 n_entry, u32 p_entry), + TP_ARGS(dev, wraps, n_entry, p_entry), + TP_STRUCT__entry( + DEV_ENTRY + + __field(u32, wraps) + __field(u32, n_entry) + __field(u32, p_entry) + ), + TP_fast_assign( + DEV_ASSIGN; + __entry->wraps = wraps; + __entry->n_entry = n_entry; + __entry->p_entry = p_entry; + ), + TP_printk("[%s] wraps=#%02d n=0x%X p=0x%X", + __get_str(dev), __entry->wraps, __entry->n_entry, + __entry->p_entry) +); +#endif /* __IWLWIFI_DEVICE_TRACE_UCODE */ + +#undef TRACE_INCLUDE_PATH +#define TRACE_INCLUDE_PATH . +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_FILE iwl-devtrace-ucode +#include <trace/define_trace.h> diff --git a/drivers/net/wireless/iwlwifi/iwl-devtrace.h b/drivers/net/wireless/iwlwifi/iwl-devtrace.h index 78bd41bf34b0..b87acd6a229b 100644 --- a/drivers/net/wireless/iwlwifi/iwl-devtrace.h +++ b/drivers/net/wireless/iwlwifi/iwl-devtrace.h @@ -24,7 +24,7 @@ * *****************************************************************************/ -#if !defined(__IWLWIFI_DEVICE_TRACE) || defined(TRACE_HEADER_MULTI_READ) +#ifndef __IWLWIFI_DEVICE_TRACE #include <linux/skbuff.h> #include <linux/ieee80211.h> #include <net/cfg80211.h> @@ -80,436 +80,10 @@ static inline void trace_ ## name(proto) {} #define DEV_ENTRY __string(dev, dev_name(dev)) #define DEV_ASSIGN __assign_str(dev, dev_name(dev)) -#undef TRACE_SYSTEM -#define TRACE_SYSTEM iwlwifi_io +#include "iwl-devtrace-io.h" +#include "iwl-devtrace-ucode.h" +#include "iwl-devtrace-msg.h" +#include "iwl-devtrace-data.h" +#include "iwl-devtrace-iwlwifi.h" -TRACE_EVENT(iwlwifi_dev_ioread32, - TP_PROTO(const struct device *dev, u32 offs, u32 val), - TP_ARGS(dev, offs, val), - TP_STRUCT__entry( - DEV_ENTRY - __field(u32, offs) - __field(u32, val) - ), - TP_fast_assign( - DEV_ASSIGN; - __entry->offs = offs; - __entry->val = val; - ), - TP_printk("[%s] read io[%#x] = %#x", - __get_str(dev), __entry->offs, __entry->val) -); - -TRACE_EVENT(iwlwifi_dev_iowrite8, - TP_PROTO(const struct device *dev, u32 offs, u8 val), - TP_ARGS(dev, offs, val), - TP_STRUCT__entry( - DEV_ENTRY - __field(u32, offs) - __field(u8, val) - ), - TP_fast_assign( - DEV_ASSIGN; - __entry->offs = offs; - __entry->val = val; - ), - TP_printk("[%s] write io[%#x] = %#x)", - __get_str(dev), __entry->offs, __entry->val) -); - -TRACE_EVENT(iwlwifi_dev_iowrite32, - TP_PROTO(const struct device *dev, u32 offs, u32 val), - TP_ARGS(dev, offs, val), - TP_STRUCT__entry( - DEV_ENTRY - __field(u32, offs) - __field(u32, val) - ), - TP_fast_assign( - DEV_ASSIGN; - __entry->offs = offs; - __entry->val = val; - ), - TP_printk("[%s] write io[%#x] = %#x)", - __get_str(dev), __entry->offs, __entry->val) -); - -TRACE_EVENT(iwlwifi_dev_iowrite_prph32, - TP_PROTO(const struct device *dev, u32 offs, u32 val), - TP_ARGS(dev, offs, val), - TP_STRUCT__entry( - DEV_ENTRY - __field(u32, offs) - __field(u32, val) - ), - TP_fast_assign( - DEV_ASSIGN; - __entry->offs = offs; - __entry->val = val; - ), - TP_printk("[%s] write PRPH[%#x] = %#x)", - __get_str(dev), __entry->offs, __entry->val) -); - -TRACE_EVENT(iwlwifi_dev_ioread_prph32, - TP_PROTO(const struct device *dev, u32 offs, u32 val), - TP_ARGS(dev, offs, val), - TP_STRUCT__entry( - DEV_ENTRY - __field(u32, offs) - __field(u32, val) - ), - TP_fast_assign( - DEV_ASSIGN; - __entry->offs = offs; - __entry->val = val; - ), - TP_printk("[%s] read PRPH[%#x] = %#x", - __get_str(dev), __entry->offs, __entry->val) -); - -TRACE_EVENT(iwlwifi_dev_irq, - TP_PROTO(const struct device *dev), - TP_ARGS(dev), - TP_STRUCT__entry( - DEV_ENTRY - ), - TP_fast_assign( - DEV_ASSIGN; - ), - /* TP_printk("") doesn't compile */ - TP_printk("%d", 0) -); - -TRACE_EVENT(iwlwifi_dev_ict_read, - TP_PROTO(const struct device *dev, u32 index, u32 value), - TP_ARGS(dev, index, value), - TP_STRUCT__entry( - DEV_ENTRY - __field(u32, index) - __field(u32, value) - ), - TP_fast_assign( - DEV_ASSIGN; - __entry->index = index; - __entry->value = value; - ), - TP_printk("[%s] read ict[%d] = %#.8x", - __get_str(dev), __entry->index, __entry->value) -); - -#undef TRACE_SYSTEM -#define TRACE_SYSTEM iwlwifi_ucode - -TRACE_EVENT(iwlwifi_dev_ucode_cont_event, - TP_PROTO(const struct device *dev, u32 time, u32 data, u32 ev), - TP_ARGS(dev, time, data, ev), - TP_STRUCT__entry( - DEV_ENTRY - - __field(u32, time) - __field(u32, data) - __field(u32, ev) - ), - TP_fast_assign( - DEV_ASSIGN; - __entry->time = time; - __entry->data = data; - __entry->ev = ev; - ), - TP_printk("[%s] EVT_LOGT:%010u:0x%08x:%04u", - __get_str(dev), __entry->time, __entry->data, __entry->ev) -); - -TRACE_EVENT(iwlwifi_dev_ucode_wrap_event, - TP_PROTO(const struct device *dev, u32 wraps, u32 n_entry, u32 p_entry), - TP_ARGS(dev, wraps, n_entry, p_entry), - TP_STRUCT__entry( - DEV_ENTRY - - __field(u32, wraps) - __field(u32, n_entry) - __field(u32, p_entry) - ), - TP_fast_assign( - DEV_ASSIGN; - __entry->wraps = wraps; - __entry->n_entry = n_entry; - __entry->p_entry = p_entry; - ), - TP_printk("[%s] wraps=#%02d n=0x%X p=0x%X", - __get_str(dev), __entry->wraps, __entry->n_entry, - __entry->p_entry) -); - -#undef TRACE_SYSTEM -#define TRACE_SYSTEM iwlwifi_msg - -#define MAX_MSG_LEN 110 - -DECLARE_EVENT_CLASS(iwlwifi_msg_event, - TP_PROTO(struct va_format *vaf), - TP_ARGS(vaf), - TP_STRUCT__entry( - __dynamic_array(char, msg, MAX_MSG_LEN) - ), - TP_fast_assign( - WARN_ON_ONCE(vsnprintf(__get_dynamic_array(msg), - MAX_MSG_LEN, vaf->fmt, - *vaf->va) >= MAX_MSG_LEN); - ), - TP_printk("%s", __get_str(msg)) -); - -DEFINE_EVENT(iwlwifi_msg_event, iwlwifi_err, - TP_PROTO(struct va_format *vaf), - TP_ARGS(vaf) -); - -DEFINE_EVENT(iwlwifi_msg_event, iwlwifi_warn, - TP_PROTO(struct va_format *vaf), - TP_ARGS(vaf) -); - -DEFINE_EVENT(iwlwifi_msg_event, iwlwifi_info, - TP_PROTO(struct va_format *vaf), - TP_ARGS(vaf) -); - -DEFINE_EVENT(iwlwifi_msg_event, iwlwifi_crit, - TP_PROTO(struct va_format *vaf), - TP_ARGS(vaf) -); - -TRACE_EVENT(iwlwifi_dbg, - TP_PROTO(u32 level, bool in_interrupt, const char *function, - struct va_format *vaf), - TP_ARGS(level, in_interrupt, function, vaf), - TP_STRUCT__entry( - __field(u32, level) - __field(u8, in_interrupt) - __string(function, function) - __dynamic_array(char, msg, MAX_MSG_LEN) - ), - TP_fast_assign( - __entry->level = level; - __entry->in_interrupt = in_interrupt; - __assign_str(function, function); - WARN_ON_ONCE(vsnprintf(__get_dynamic_array(msg), - MAX_MSG_LEN, vaf->fmt, - *vaf->va) >= MAX_MSG_LEN); - ), - TP_printk("%s", __get_str(msg)) -); - -#undef TRACE_SYSTEM -#define TRACE_SYSTEM iwlwifi_data - -TRACE_EVENT(iwlwifi_dev_tx_data, - TP_PROTO(const struct device *dev, - struct sk_buff *skb, - void *data, size_t data_len), - TP_ARGS(dev, skb, data, data_len), - TP_STRUCT__entry( - DEV_ENTRY - - __dynamic_array(u8, data, iwl_trace_data(skb) ? data_len : 0) - ), - TP_fast_assign( - DEV_ASSIGN; - if (iwl_trace_data(skb)) - memcpy(__get_dynamic_array(data), data, data_len); - ), - TP_printk("[%s] TX frame data", __get_str(dev)) -); - -TRACE_EVENT(iwlwifi_dev_rx_data, - TP_PROTO(const struct device *dev, - const struct iwl_trans *trans, - void *rxbuf, size_t len), - TP_ARGS(dev, trans, rxbuf, len), - TP_STRUCT__entry( - DEV_ENTRY - - __dynamic_array(u8, data, - len - iwl_rx_trace_len(trans, rxbuf, len)) - ), - TP_fast_assign( - size_t offs = iwl_rx_trace_len(trans, rxbuf, len); - DEV_ASSIGN; - if (offs < len) - memcpy(__get_dynamic_array(data), - ((u8 *)rxbuf) + offs, len - offs); - ), - TP_printk("[%s] RX frame data", __get_str(dev)) -); - -#undef TRACE_SYSTEM -#define TRACE_SYSTEM iwlwifi - -TRACE_EVENT(iwlwifi_dev_hcmd, - TP_PROTO(const struct device *dev, - struct iwl_host_cmd *cmd, u16 total_size, - struct iwl_cmd_header *hdr), - TP_ARGS(dev, cmd, total_size, hdr), - TP_STRUCT__entry( - DEV_ENTRY - __dynamic_array(u8, hcmd, total_size) - __field(u32, flags) - ), - TP_fast_assign( - int i, offset = sizeof(*hdr); - - DEV_ASSIGN; - __entry->flags = cmd->flags; - memcpy(__get_dynamic_array(hcmd), hdr, sizeof(*hdr)); - - for (i = 0; i < IWL_MAX_CMD_TBS_PER_TFD; i++) { - if (!cmd->len[i]) - continue; - memcpy((u8 *)__get_dynamic_array(hcmd) + offset, - cmd->data[i], cmd->len[i]); - offset += cmd->len[i]; - } - ), - TP_printk("[%s] hcmd %#.2x (%ssync)", - __get_str(dev), ((u8 *)__get_dynamic_array(hcmd))[0], - __entry->flags & CMD_ASYNC ? "a" : "") -); - -TRACE_EVENT(iwlwifi_dev_rx, - TP_PROTO(const struct device *dev, const struct iwl_trans *trans, - void *rxbuf, size_t len), - TP_ARGS(dev, trans, rxbuf, len), - TP_STRUCT__entry( - DEV_ENTRY - __dynamic_array(u8, rxbuf, iwl_rx_trace_len(trans, rxbuf, len)) - ), - TP_fast_assign( - DEV_ASSIGN; - memcpy(__get_dynamic_array(rxbuf), rxbuf, - iwl_rx_trace_len(trans, rxbuf, len)); - ), - TP_printk("[%s] RX cmd %#.2x", - __get_str(dev), ((u8 *)__get_dynamic_array(rxbuf))[4]) -); - -TRACE_EVENT(iwlwifi_dev_tx, - TP_PROTO(const struct device *dev, struct sk_buff *skb, - void *tfd, size_t tfdlen, - void *buf0, size_t buf0_len, - void *buf1, size_t buf1_len), - TP_ARGS(dev, skb, tfd, tfdlen, buf0, buf0_len, buf1, buf1_len), - TP_STRUCT__entry( - DEV_ENTRY - - __field(size_t, framelen) - __dynamic_array(u8, tfd, tfdlen) - - /* - * Do not insert between or below these items, - * we want to keep the frame together (except - * for the possible padding). - */ - __dynamic_array(u8, buf0, buf0_len) - __dynamic_array(u8, buf1, iwl_trace_data(skb) ? 0 : buf1_len) - ), - TP_fast_assign( - DEV_ASSIGN; - __entry->framelen = buf0_len + buf1_len; - memcpy(__get_dynamic_array(tfd), tfd, tfdlen); - memcpy(__get_dynamic_array(buf0), buf0, buf0_len); - if (!iwl_trace_data(skb)) - memcpy(__get_dynamic_array(buf1), buf1, buf1_len); - ), - TP_printk("[%s] TX %.2x (%zu bytes)", - __get_str(dev), ((u8 *)__get_dynamic_array(buf0))[0], - __entry->framelen) -); - -TRACE_EVENT(iwlwifi_dev_ucode_error, - TP_PROTO(const struct device *dev, u32 desc, u32 tsf_low, - u32 data1, u32 data2, u32 line, u32 blink1, - u32 blink2, u32 ilink1, u32 ilink2, u32 bcon_time, - u32 gp1, u32 gp2, u32 gp3, u32 ucode_ver, u32 hw_ver, - u32 brd_ver), - TP_ARGS(dev, desc, tsf_low, data1, data2, line, - blink1, blink2, ilink1, ilink2, bcon_time, gp1, gp2, - gp3, ucode_ver, hw_ver, brd_ver), - TP_STRUCT__entry( - DEV_ENTRY - __field(u32, desc) - __field(u32, tsf_low) - __field(u32, data1) - __field(u32, data2) - __field(u32, line) - __field(u32, blink1) - __field(u32, blink2) - __field(u32, ilink1) - __field(u32, ilink2) - __field(u32, bcon_time) - __field(u32, gp1) - __field(u32, gp2) - __field(u32, gp3) - __field(u32, ucode_ver) - __field(u32, hw_ver) - __field(u32, brd_ver) - ), - TP_fast_assign( - DEV_ASSIGN; - __entry->desc = desc; - __entry->tsf_low = tsf_low; - __entry->data1 = data1; - __entry->data2 = data2; - __entry->line = line; - __entry->blink1 = blink1; - __entry->blink2 = blink2; - __entry->ilink1 = ilink1; - __entry->ilink2 = ilink2; - __entry->bcon_time = bcon_time; - __entry->gp1 = gp1; - __entry->gp2 = gp2; - __entry->gp3 = gp3; - __entry->ucode_ver = ucode_ver; - __entry->hw_ver = hw_ver; - __entry->brd_ver = brd_ver; - ), - TP_printk("[%s] #%02d %010u data 0x%08X 0x%08X line %u, " - "blink 0x%05X 0x%05X ilink 0x%05X 0x%05X " - "bcon_tm %010u gp 0x%08X 0x%08X 0x%08X uCode 0x%08X " - "hw 0x%08X brd 0x%08X", - __get_str(dev), __entry->desc, __entry->tsf_low, - __entry->data1, - __entry->data2, __entry->line, __entry->blink1, - __entry->blink2, __entry->ilink1, __entry->ilink2, - __entry->bcon_time, __entry->gp1, __entry->gp2, - __entry->gp3, __entry->ucode_ver, __entry->hw_ver, - __entry->brd_ver) -); - -TRACE_EVENT(iwlwifi_dev_ucode_event, - TP_PROTO(const struct device *dev, u32 time, u32 data, u32 ev), - TP_ARGS(dev, time, data, ev), - TP_STRUCT__entry( - DEV_ENTRY - - __field(u32, time) - __field(u32, data) - __field(u32, ev) - ), - TP_fast_assign( - DEV_ASSIGN; - __entry->time = time; - __entry->data = data; - __entry->ev = ev; - ), - TP_printk("[%s] EVT_LOGT:%010u:0x%08x:%04u", - __get_str(dev), __entry->time, __entry->data, __entry->ev) -); #endif /* __IWLWIFI_DEVICE_TRACE */ - -#undef TRACE_INCLUDE_PATH -#define TRACE_INCLUDE_PATH . -#undef TRACE_INCLUDE_FILE -#define TRACE_INCLUDE_FILE iwl-devtrace -#include <trace/define_trace.h> diff --git a/drivers/net/wireless/rtlwifi/rtl8723be/dm.c b/drivers/net/wireless/rtlwifi/rtl8723be/dm.c index 2367e8f47a5b..e77c3a46c94a 100644 --- a/drivers/net/wireless/rtlwifi/rtl8723be/dm.c +++ b/drivers/net/wireless/rtlwifi/rtl8723be/dm.c @@ -309,7 +309,7 @@ static void rtl8723be_dm_find_minimum_rssi(struct ieee80211_hw *hw) rtl_dm_dig->min_undec_pwdb_for_dm = rtlpriv->dm.entry_min_undec_sm_pwdb; RT_TRACE(rtlpriv, COMP_BB_POWERSAVING, DBG_LOUD, - "AP Ext Port or disconnet PWDB = 0x%x\n", + "AP Ext Port or disconnect PWDB = 0x%x\n", rtl_dm_dig->min_undec_pwdb_for_dm); } RT_TRACE(rtlpriv, COMP_DIG, DBG_LOUD, "MinUndecoratedPWDBForDM =%d\n", diff --git a/drivers/net/wireless/rtlwifi/rtl8821ae/dm.c b/drivers/net/wireless/rtlwifi/rtl8821ae/dm.c index 0b2082dc48f1..342678d2ed42 100644 --- a/drivers/net/wireless/rtlwifi/rtl8821ae/dm.c +++ b/drivers/net/wireless/rtlwifi/rtl8821ae/dm.c @@ -873,7 +873,7 @@ static void rtl8821ae_dm_dig(struct ieee80211_hw *hw) if (rtlpriv->falsealm_cnt.cnt_all > 10000) { RT_TRACE(rtlpriv, COMP_DIG, DBG_LOUD, - "Abnornally false alarm case.\n"); + "Abnormally false alarm case.\n"); if (dm_digtable->large_fa_hit != 3) dm_digtable->large_fa_hit++; diff --git a/drivers/parisc/eisa_enumerator.c b/drivers/parisc/eisa_enumerator.c index caa153133754..a656d9e83343 100644 --- a/drivers/parisc/eisa_enumerator.c +++ b/drivers/parisc/eisa_enumerator.c @@ -357,7 +357,7 @@ static int parse_slot_config(int slot, } if (flags & HPEE_FUNCTION_INFO_CFG_FREE_FORM) { /* I have no idea how to handle this */ - printk("function %d have free-form confgiuration, skipping ", + printk("function %d have free-form configuration, skipping ", num_func); pos = p0 + function_len; continue; diff --git a/drivers/pinctrl/nomadik/pinctrl-abx500.c b/drivers/pinctrl/nomadik/pinctrl-abx500.c index 1806b24faa14..23db4c9ac76c 100644 --- a/drivers/pinctrl/nomadik/pinctrl-abx500.c +++ b/drivers/pinctrl/nomadik/pinctrl-abx500.c @@ -466,7 +466,7 @@ static int abx500_set_mode(struct pinctrl_dev *pctldev, struct gpio_chip *chip, break; default: - dev_dbg(pct->dev, "unknow alt_setting %d\n", alt_setting); + dev_dbg(pct->dev, "unknown alt_setting %d\n", alt_setting); return -EINVAL; } diff --git a/drivers/rtc/rtc-hid-sensor-time.c b/drivers/rtc/rtc-hid-sensor-time.c index ae7c2ba440cf..af4f85a66b39 100644 --- a/drivers/rtc/rtc-hid-sensor-time.c +++ b/drivers/rtc/rtc-hid-sensor-time.c @@ -213,7 +213,7 @@ static int hid_rtc_read_time(struct device *dev, struct rtc_time *tm) /* get a report with all values through requesting one value */ sensor_hub_input_attr_get_raw_value(time_state->common_attributes.hsdev, HID_USAGE_SENSOR_TIME, hid_time_addresses[0], - time_state->info[0].report_id); + time_state->info[0].report_id, SENSOR_HUB_SYNC); /* wait for all values (event) */ ret = wait_for_completion_killable_timeout( &time_state->comp_last_time, HZ*6); diff --git a/drivers/scsi/be2iscsi/be_cmds.c b/drivers/scsi/be2iscsi/be_cmds.c index 80d97f3d2ed9..1028760b8a22 100644 --- a/drivers/scsi/be2iscsi/be_cmds.c +++ b/drivers/scsi/be2iscsi/be_cmds.c @@ -237,7 +237,7 @@ int beiscsi_mccq_compl(struct beiscsi_hba *phba, beiscsi_log(phba, KERN_WARNING, BEISCSI_LOG_INIT | BEISCSI_LOG_EH | BEISCSI_LOG_CONFIG, - "BC_%d : Insufficent Buffer Error " + "BC_%d : Insufficient Buffer Error " "Resp_Len : %d Actual_Resp_Len : %d\n", mbx_resp_hdr->response_length, mbx_resp_hdr->actual_resp_len); diff --git a/drivers/scsi/ch.c b/drivers/scsi/ch.c index 0045742fab7d..dad959fcf6d8 100644 --- a/drivers/scsi/ch.c +++ b/drivers/scsi/ch.c @@ -339,7 +339,7 @@ ch_readconfig(scsi_changer *ch) ch->firsts[CHET_DT], ch->counts[CHET_DT]); } else { - VPRINTK(KERN_INFO, "reading element address assigment page failed!\n"); + VPRINTK(KERN_INFO, "reading element address assignment page failed!\n"); } /* vendor specific element types */ diff --git a/drivers/scsi/osd/osd_initiator.c b/drivers/scsi/osd/osd_initiator.c index 488c3929f19a..0cccd6033feb 100644 --- a/drivers/scsi/osd/osd_initiator.c +++ b/drivers/scsi/osd/osd_initiator.c @@ -186,7 +186,7 @@ static int _osd_get_print_system_info(struct osd_dev *od, if (unlikely(len > sizeof(odi->systemid))) { OSD_ERR("OSD Target error: OSD_SYSTEM_ID too long(%d). " - "device idetification might not work\n", len); + "device identification might not work\n", len); len = sizeof(odi->systemid); } odi->systemid_len = len; diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c index e59f25bff7ab..5bb57c5282c9 100644 --- a/drivers/scsi/qla2xxx/qla_init.c +++ b/drivers/scsi/qla2xxx/qla_init.c @@ -5364,7 +5364,7 @@ qla2x00_load_risc(scsi_qla_host_t *vha, uint32_t *srisc_addr) blob = qla2x00_request_firmware(vha); if (!blob) { ql_log(ql_log_info, vha, 0x0083, - "Fimware image unavailable.\n"); + "Firmware image unavailable.\n"); ql_log(ql_log_info, vha, 0x0084, "Firmware images can be retrieved from: "QLA_FW_URL ".\n"); return QLA_FUNCTION_FAILED; @@ -5467,7 +5467,7 @@ qla24xx_load_risc_blob(scsi_qla_host_t *vha, uint32_t *srisc_addr) blob = qla2x00_request_firmware(vha); if (!blob) { ql_log(ql_log_warn, vha, 0x0090, - "Fimware image unavailable.\n"); + "Firmware image unavailable.\n"); ql_log(ql_log_warn, vha, 0x0091, "Firmware images can be retrieved from: " QLA_FW_URL ".\n"); diff --git a/drivers/scsi/qla2xxx/qla_mid.c b/drivers/scsi/qla2xxx/qla_mid.c index 5c2e0317f1c0..ca3804e34833 100644 --- a/drivers/scsi/qla2xxx/qla_mid.c +++ b/drivers/scsi/qla2xxx/qla_mid.c @@ -788,7 +788,7 @@ qla25xx_create_rsp_que(struct qla_hw_data *ha, uint16_t options, rsp->msix = &ha->msix_entries[que_id + 1]; else ql_log(ql_log_warn, base_vha, 0x00e3, - "MSIX not enalbled.\n"); + "MSIX not enabled.\n"); ha->rsp_q_map[que_id] = rsp; rsp->rid = rid; diff --git a/drivers/scsi/qla2xxx/qla_nx.c b/drivers/scsi/qla2xxx/qla_nx.c index 54cb2ac9339b..7d2b18f2675c 100644 --- a/drivers/scsi/qla2xxx/qla_nx.c +++ b/drivers/scsi/qla2xxx/qla_nx.c @@ -1274,7 +1274,7 @@ qla82xx_pinit_from_rom(scsi_qla_host_t *vha) if (off == ADDR_ERROR) { ql_log(ql_log_fatal, vha, 0x0116, - "Unknow addr: 0x%08lx.\n", buf[i].addr); + "Unknown addr: 0x%08lx.\n", buf[i].addr); continue; } diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c index cce1cbc1a927..5319b3cb219e 100644 --- a/drivers/scsi/qla2xxx/qla_os.c +++ b/drivers/scsi/qla2xxx/qla_os.c @@ -446,11 +446,11 @@ static int qla25xx_setup_mode(struct scsi_qla_host *vha) } ha->flags.cpu_affinity_enabled = 1; ql_dbg(ql_dbg_multiq, vha, 0xc007, - "CPU affinity mode enalbed, " + "CPU affinity mode enabled, " "no. of response queues:%d no. of request queues:%d.\n", ha->max_rsp_queues, ha->max_req_queues); ql_dbg(ql_dbg_init, vha, 0x00e9, - "CPU affinity mode enalbed, " + "CPU affinity mode enabled, " "no. of response queues:%d no. of request queues:%d.\n", ha->max_rsp_queues, ha->max_req_queues); } @@ -4102,7 +4102,7 @@ qla83xx_schedule_work(scsi_qla_host_t *base_vha, int work_code) break; default: ql_log(ql_log_warn, base_vha, 0xb05f, - "Unknow work-code=0x%x.\n", work_code); + "Unknown work-code=0x%x.\n", work_code); } return; @@ -4702,7 +4702,7 @@ qla83xx_idc_state_handler(scsi_qla_host_t *base_vha) break; default: ql_log(ql_log_warn, base_vha, 0xb071, - "Unknow Device State: %x.\n", dev_state); + "Unknown Device State: %x.\n", dev_state); qla83xx_idc_unlock(base_vha, 0); qla8xxx_dev_failed_handler(base_vha); rval = QLA_FUNCTION_FAILED; diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c index 2270bd51f9c2..9d7b7db75e4b 100644 --- a/drivers/scsi/sg.c +++ b/drivers/scsi/sg.c @@ -33,7 +33,6 @@ static int sg_version_num = 30536; /* 2 digits for each component */ #include <linux/sched.h> #include <linux/string.h> #include <linux/mm.h> -#include <linux/aio.h> #include <linux/errno.h> #include <linux/mtio.h> #include <linux/ioctl.h> @@ -51,6 +50,7 @@ static int sg_version_num = 30536; /* 2 digits for each component */ #include <linux/mutex.h> #include <linux/atomic.h> #include <linux/ratelimit.h> +#include <linux/uio.h> #include "scsi.h" #include <scsi/scsi_dbg.h> @@ -1745,17 +1745,14 @@ sg_start_req(Sg_request *srp, unsigned char *cmd) } if (iov_count) { - int size = sizeof(struct iovec) * iov_count; - struct iovec *iov; + struct iovec *iov = NULL; struct iov_iter i; - iov = memdup_user(hp->dxferp, size); - if (IS_ERR(iov)) - return PTR_ERR(iov); + res = import_iovec(rw, hp->dxferp, iov_count, 0, &iov, &i); + if (res < 0) + return res; - iov_iter_init(&i, rw, iov, iov_count, - min_t(size_t, hp->dxfer_len, - iov_length(iov, iov_count))); + iov_iter_truncate(&i, hp->dxfer_len); res = blk_rq_map_user_iov(q, rq, md, &i, GFP_ATOMIC); kfree(iov); diff --git a/drivers/staging/unisys/include/timskmod.h b/drivers/staging/unisys/include/timskmod.h index 5a933d7bf39f..cde2494ad896 100644 --- a/drivers/staging/unisys/include/timskmod.h +++ b/drivers/staging/unisys/include/timskmod.h @@ -46,7 +46,6 @@ #include <linux/module.h> #include <linux/moduleparam.h> #include <linux/fcntl.h> -#include <linux/aio.h> #include <linux/workqueue.h> #include <linux/kthread.h> #include <linux/seq_file.h> diff --git a/drivers/tty/goldfish.c b/drivers/tty/goldfish.c index 967b2c2b7cf1..0655fecf8240 100644 --- a/drivers/tty/goldfish.c +++ b/drivers/tty/goldfish.c @@ -229,7 +229,6 @@ static int goldfish_tty_probe(struct platform_device *pdev) { struct goldfish_tty *qtty; int ret = -EINVAL; - int i; struct resource *r; struct device *ttydev; void __iomem *base; @@ -293,7 +292,6 @@ static int goldfish_tty_probe(struct platform_device *pdev) mutex_unlock(&goldfish_tty_lock); return 0; - tty_unregister_device(goldfish_tty_driver, i); err_tty_register_device_failed: free_irq(irq, pdev); err_request_irq_failed: diff --git a/drivers/tty/ipwireless/hardware.c b/drivers/tty/ipwireless/hardware.c index 5c77e1eac4ee..ad7031a4f3c4 100644 --- a/drivers/tty/ipwireless/hardware.c +++ b/drivers/tty/ipwireless/hardware.c @@ -1455,7 +1455,7 @@ static void __handle_setup_get_version_rsp(struct ipw_hardware *hw) return; } - set_RTS(hw, PRIO_SETUP, channel_idx, + ret = set_RTS(hw, PRIO_SETUP, channel_idx, (hw->control_lines [channel_idx] & IPW_CONTROL_LINE_RTS) != 0); if (ret) { diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c index 175c9956cbe3..a12315a78248 100644 --- a/drivers/usb/gadget/function/f_fs.c +++ b/drivers/usb/gadget/function/f_fs.c @@ -23,6 +23,7 @@ #include <linux/export.h> #include <linux/hid.h> #include <linux/module.h> +#include <linux/uio.h> #include <asm/unaligned.h> #include <linux/usb/composite.h> @@ -655,9 +656,10 @@ static void ffs_user_copy_worker(struct work_struct *work) unuse_mm(io_data->mm); } - aio_complete(io_data->kiocb, ret, ret); + io_data->kiocb->ki_complete(io_data->kiocb, ret, ret); - if (io_data->ffs->ffs_eventfd && !io_data->kiocb->ki_eventfd) + if (io_data->ffs->ffs_eventfd && + !(io_data->kiocb->ki_flags & IOCB_EVENTFD)) eventfd_signal(io_data->ffs->ffs_eventfd, 1); usb_ep_free_request(io_data->ep, io_data->req); diff --git a/drivers/usb/gadget/legacy/inode.c b/drivers/usb/gadget/legacy/inode.c index 200f9a584064..662ef2c1c62b 100644 --- a/drivers/usb/gadget/legacy/inode.c +++ b/drivers/usb/gadget/legacy/inode.c @@ -26,6 +26,7 @@ #include <linux/poll.h> #include <linux/mmu_context.h> #include <linux/aio.h> +#include <linux/uio.h> #include <linux/device.h> #include <linux/moduleparam.h> @@ -469,7 +470,7 @@ static void ep_user_copy_worker(struct work_struct *work) ret = -EFAULT; /* completing the iocb can drop the ctx and mm, don't touch mm after */ - aio_complete(iocb, ret, ret); + iocb->ki_complete(iocb, ret, ret); kfree(priv->buf); kfree(priv->to_free); @@ -497,7 +498,8 @@ static void ep_aio_complete(struct usb_ep *ep, struct usb_request *req) kfree(priv); iocb->private = NULL; /* aio_complete() reports bytes-transferred _and_ faults */ - aio_complete(iocb, req->actual ? req->actual : req->status, + + iocb->ki_complete(iocb, req->actual ? req->actual : req->status, req->status); } else { /* ep_copy_to_user() won't report both; we hide some faults */ diff --git a/drivers/usb/host/xhci-trace.h b/drivers/usb/host/xhci-trace.h index dde3959b7a33..59c05653b2ea 100644 --- a/drivers/usb/host/xhci-trace.h +++ b/drivers/usb/host/xhci-trace.h @@ -14,6 +14,13 @@ #undef TRACE_SYSTEM #define TRACE_SYSTEM xhci-hcd +/* + * The TRACE_SYSTEM_VAR defaults to TRACE_SYSTEM, but must be a + * legitimate C variable. It is not exported to user space. + */ +#undef TRACE_SYSTEM_VAR +#define TRACE_SYSTEM_VAR xhci_hcd + #if !defined(__XHCI_TRACE_H) || defined(TRACE_HEADER_MULTI_READ) #define __XHCI_TRACE_H diff --git a/fs/9p/vfs_addr.c b/fs/9p/vfs_addr.c index eb14e055ea83..ff1a5bac4200 100644 --- a/fs/9p/vfs_addr.c +++ b/fs/9p/vfs_addr.c @@ -33,7 +33,7 @@ #include <linux/pagemap.h> #include <linux/idr.h> #include <linux/sched.h> -#include <linux/aio.h> +#include <linux/uio.h> #include <net/9p/9p.h> #include <net/9p/client.h> diff --git a/fs/Makefile b/fs/Makefile index a88ac4838c9e..cb92fd4c3172 100644 --- a/fs/Makefile +++ b/fs/Makefile @@ -118,6 +118,7 @@ obj-$(CONFIG_HOSTFS) += hostfs/ obj-$(CONFIG_HPPFS) += hppfs/ obj-$(CONFIG_CACHEFILES) += cachefiles/ obj-$(CONFIG_DEBUG_FS) += debugfs/ +obj-$(CONFIG_TRACING) += tracefs/ obj-$(CONFIG_OCFS2_FS) += ocfs2/ obj-$(CONFIG_BTRFS_FS) += btrfs/ obj-$(CONFIG_GFS2_FS) += gfs2/ diff --git a/fs/affs/file.c b/fs/affs/file.c index a91795e01a7f..3aa7eb66547e 100644 --- a/fs/affs/file.c +++ b/fs/affs/file.c @@ -12,7 +12,7 @@ * affs regular file handling primitives */ -#include <linux/aio.h> +#include <linux/uio.h> #include "affs.h" static struct buffer_head *affs_get_extblock_slow(struct inode *inode, u32 ext); diff --git a/fs/afs/write.c b/fs/afs/write.c index c13cb08964ed..0714abcd7f32 100644 --- a/fs/afs/write.c +++ b/fs/afs/write.c @@ -14,7 +14,6 @@ #include <linux/pagemap.h> #include <linux/writeback.h> #include <linux/pagevec.h> -#include <linux/aio.h> #include "internal.h" static int afs_write_back_from_locked_page(struct afs_writeback *wb, @@ -151,6 +151,38 @@ struct kioctx { unsigned id; }; +/* + * We use ki_cancel == KIOCB_CANCELLED to indicate that a kiocb has been either + * cancelled or completed (this makes a certain amount of sense because + * successful cancellation - io_cancel() - does deliver the completion to + * userspace). + * + * And since most things don't implement kiocb cancellation and we'd really like + * kiocb completion to be lockless when possible, we use ki_cancel to + * synchronize cancellation and completion - we only set it to KIOCB_CANCELLED + * with xchg() or cmpxchg(), see batch_complete_aio() and kiocb_cancel(). + */ +#define KIOCB_CANCELLED ((void *) (~0ULL)) + +struct aio_kiocb { + struct kiocb common; + + struct kioctx *ki_ctx; + kiocb_cancel_fn *ki_cancel; + + struct iocb __user *ki_user_iocb; /* user's aiocb */ + __u64 ki_user_data; /* user's data for completion */ + + struct list_head ki_list; /* the aio core uses this + * for cancellation */ + + /* + * If the aio_resfd field of the userspace iocb is not zero, + * this is the underlying eventfd context to deliver events to. + */ + struct eventfd_ctx *ki_eventfd; +}; + /*------ sysctl variables----*/ static DEFINE_SPINLOCK(aio_nr_lock); unsigned long aio_nr; /* current system wide number of aio requests */ @@ -220,7 +252,7 @@ static int __init aio_setup(void) if (IS_ERR(aio_mnt)) panic("Failed to create aio fs mount."); - kiocb_cachep = KMEM_CACHE(kiocb, SLAB_HWCACHE_ALIGN|SLAB_PANIC); + kiocb_cachep = KMEM_CACHE(aio_kiocb, SLAB_HWCACHE_ALIGN|SLAB_PANIC); kioctx_cachep = KMEM_CACHE(kioctx,SLAB_HWCACHE_ALIGN|SLAB_PANIC); pr_debug("sizeof(struct page) = %zu\n", sizeof(struct page)); @@ -484,8 +516,9 @@ static int aio_setup_ring(struct kioctx *ctx) #define AIO_EVENTS_FIRST_PAGE ((PAGE_SIZE - sizeof(struct aio_ring)) / sizeof(struct io_event)) #define AIO_EVENTS_OFFSET (AIO_EVENTS_PER_PAGE - AIO_EVENTS_FIRST_PAGE) -void kiocb_set_cancel_fn(struct kiocb *req, kiocb_cancel_fn *cancel) +void kiocb_set_cancel_fn(struct kiocb *iocb, kiocb_cancel_fn *cancel) { + struct aio_kiocb *req = container_of(iocb, struct aio_kiocb, common); struct kioctx *ctx = req->ki_ctx; unsigned long flags; @@ -500,7 +533,7 @@ void kiocb_set_cancel_fn(struct kiocb *req, kiocb_cancel_fn *cancel) } EXPORT_SYMBOL(kiocb_set_cancel_fn); -static int kiocb_cancel(struct kiocb *kiocb) +static int kiocb_cancel(struct aio_kiocb *kiocb) { kiocb_cancel_fn *old, *cancel; @@ -518,7 +551,7 @@ static int kiocb_cancel(struct kiocb *kiocb) cancel = cmpxchg(&kiocb->ki_cancel, old, KIOCB_CANCELLED); } while (cancel != old); - return cancel(kiocb); + return cancel(&kiocb->common); } static void free_ioctx(struct work_struct *work) @@ -554,13 +587,13 @@ static void free_ioctx_reqs(struct percpu_ref *ref) static void free_ioctx_users(struct percpu_ref *ref) { struct kioctx *ctx = container_of(ref, struct kioctx, users); - struct kiocb *req; + struct aio_kiocb *req; spin_lock_irq(&ctx->ctx_lock); while (!list_empty(&ctx->active_reqs)) { req = list_first_entry(&ctx->active_reqs, - struct kiocb, ki_list); + struct aio_kiocb, ki_list); list_del_init(&req->ki_list); kiocb_cancel(req); @@ -786,22 +819,6 @@ static int kill_ioctx(struct mm_struct *mm, struct kioctx *ctx, return 0; } -/* wait_on_sync_kiocb: - * Waits on the given sync kiocb to complete. - */ -ssize_t wait_on_sync_kiocb(struct kiocb *req) -{ - while (!req->ki_ctx) { - set_current_state(TASK_UNINTERRUPTIBLE); - if (req->ki_ctx) - break; - io_schedule(); - } - __set_current_state(TASK_RUNNING); - return req->ki_user_data; -} -EXPORT_SYMBOL(wait_on_sync_kiocb); - /* * exit_aio: called when the last user of mm goes away. At this point, there is * no way for any new requests to be submited or any of the io_* syscalls to be @@ -956,9 +973,9 @@ static void user_refill_reqs_available(struct kioctx *ctx) * Allocate a slot for an aio request. * Returns NULL if no requests are free. */ -static inline struct kiocb *aio_get_req(struct kioctx *ctx) +static inline struct aio_kiocb *aio_get_req(struct kioctx *ctx) { - struct kiocb *req; + struct aio_kiocb *req; if (!get_reqs_available(ctx)) { user_refill_reqs_available(ctx); @@ -979,10 +996,10 @@ out_put: return NULL; } -static void kiocb_free(struct kiocb *req) +static void kiocb_free(struct aio_kiocb *req) { - if (req->ki_filp) - fput(req->ki_filp); + if (req->common.ki_filp) + fput(req->common.ki_filp); if (req->ki_eventfd != NULL) eventfd_ctx_put(req->ki_eventfd); kmem_cache_free(kiocb_cachep, req); @@ -1018,8 +1035,9 @@ out: /* aio_complete * Called when the io request on the given iocb is complete. */ -void aio_complete(struct kiocb *iocb, long res, long res2) +static void aio_complete(struct kiocb *kiocb, long res, long res2) { + struct aio_kiocb *iocb = container_of(kiocb, struct aio_kiocb, common); struct kioctx *ctx = iocb->ki_ctx; struct aio_ring *ring; struct io_event *ev_page, *event; @@ -1033,13 +1051,7 @@ void aio_complete(struct kiocb *iocb, long res, long res2) * ref, no other paths have a way to get another ref * - the sync task helpfully left a reference to itself in the iocb */ - if (is_sync_kiocb(iocb)) { - iocb->ki_user_data = res; - smp_wmb(); - iocb->ki_ctx = ERR_PTR(-EXDEV); - wake_up_process(iocb->ki_obj.tsk); - return; - } + BUG_ON(is_sync_kiocb(kiocb)); if (iocb->ki_list.next) { unsigned long flags; @@ -1065,7 +1077,7 @@ void aio_complete(struct kiocb *iocb, long res, long res2) ev_page = kmap_atomic(ctx->ring_pages[pos / AIO_EVENTS_PER_PAGE]); event = ev_page + pos % AIO_EVENTS_PER_PAGE; - event->obj = (u64)(unsigned long)iocb->ki_obj.user; + event->obj = (u64)(unsigned long)iocb->ki_user_iocb; event->data = iocb->ki_user_data; event->res = res; event->res2 = res2; @@ -1074,7 +1086,7 @@ void aio_complete(struct kiocb *iocb, long res, long res2) flush_dcache_page(ctx->ring_pages[pos / AIO_EVENTS_PER_PAGE]); pr_debug("%p[%u]: %p: %p %Lx %lx %lx\n", - ctx, tail, iocb, iocb->ki_obj.user, iocb->ki_user_data, + ctx, tail, iocb, iocb->ki_user_iocb, iocb->ki_user_data, res, res2); /* after flagging the request as done, we @@ -1121,7 +1133,6 @@ void aio_complete(struct kiocb *iocb, long res, long res2) percpu_ref_put(&ctx->reqs); } -EXPORT_SYMBOL(aio_complete); /* aio_read_events_ring * Pull an event off of the ioctx's event ring. Returns the number of @@ -1349,46 +1360,19 @@ typedef ssize_t (aio_rw_op)(struct kiocb *, const struct iovec *, unsigned long, loff_t); typedef ssize_t (rw_iter_op)(struct kiocb *, struct iov_iter *); -static ssize_t aio_setup_vectored_rw(struct kiocb *kiocb, - int rw, char __user *buf, - unsigned long *nr_segs, - struct iovec **iovec, - bool compat) +static int aio_setup_vectored_rw(int rw, char __user *buf, size_t len, + struct iovec **iovec, + bool compat, + struct iov_iter *iter) { - ssize_t ret; - - *nr_segs = kiocb->ki_nbytes; - #ifdef CONFIG_COMPAT if (compat) - ret = compat_rw_copy_check_uvector(rw, + return compat_import_iovec(rw, (struct compat_iovec __user *)buf, - *nr_segs, UIO_FASTIOV, *iovec, iovec); - else + len, UIO_FASTIOV, iovec, iter); #endif - ret = rw_copy_check_uvector(rw, - (struct iovec __user *)buf, - *nr_segs, UIO_FASTIOV, *iovec, iovec); - if (ret < 0) - return ret; - - /* ki_nbytes now reflect bytes instead of segs */ - kiocb->ki_nbytes = ret; - return 0; -} - -static ssize_t aio_setup_single_vector(struct kiocb *kiocb, - int rw, char __user *buf, - unsigned long *nr_segs, - struct iovec *iovec) -{ - if (unlikely(!access_ok(!rw, buf, kiocb->ki_nbytes))) - return -EFAULT; - - iovec->iov_base = buf; - iovec->iov_len = kiocb->ki_nbytes; - *nr_segs = 1; - return 0; + return import_iovec(rw, (struct iovec __user *)buf, + len, UIO_FASTIOV, iovec, iter); } /* @@ -1396,11 +1380,10 @@ static ssize_t aio_setup_single_vector(struct kiocb *kiocb, * Performs the initial checks and io submission. */ static ssize_t aio_run_iocb(struct kiocb *req, unsigned opcode, - char __user *buf, bool compat) + char __user *buf, size_t len, bool compat) { struct file *file = req->ki_filp; ssize_t ret; - unsigned long nr_segs; int rw; fmode_t mode; aio_rw_op *rw_op; @@ -1431,21 +1414,22 @@ rw_common: if (!rw_op && !iter_op) return -EINVAL; - ret = (opcode == IOCB_CMD_PREADV || - opcode == IOCB_CMD_PWRITEV) - ? aio_setup_vectored_rw(req, rw, buf, &nr_segs, - &iovec, compat) - : aio_setup_single_vector(req, rw, buf, &nr_segs, - iovec); + if (opcode == IOCB_CMD_PREADV || opcode == IOCB_CMD_PWRITEV) + ret = aio_setup_vectored_rw(rw, buf, len, + &iovec, compat, &iter); + else { + ret = import_single_range(rw, buf, len, iovec, &iter); + iovec = NULL; + } if (!ret) - ret = rw_verify_area(rw, file, &req->ki_pos, req->ki_nbytes); + ret = rw_verify_area(rw, file, &req->ki_pos, + iov_iter_count(&iter)); if (ret < 0) { - if (iovec != inline_vecs) - kfree(iovec); + kfree(iovec); return ret; } - req->ki_nbytes = ret; + len = ret; /* XXX: move/kill - rw_verify_area()? */ /* This matches the pread()/pwrite() logic */ @@ -1458,14 +1442,14 @@ rw_common: file_start_write(file); if (iter_op) { - iov_iter_init(&iter, rw, iovec, nr_segs, req->ki_nbytes); ret = iter_op(req, &iter); } else { - ret = rw_op(req, iovec, nr_segs, req->ki_pos); + ret = rw_op(req, iter.iov, iter.nr_segs, req->ki_pos); } if (rw == WRITE) file_end_write(file); + kfree(iovec); break; case IOCB_CMD_FDSYNC: @@ -1487,9 +1471,6 @@ rw_common: return -EINVAL; } - if (iovec != inline_vecs) - kfree(iovec); - if (ret != -EIOCBQUEUED) { /* * There's no easy way to restart the syscall since other AIO's @@ -1508,7 +1489,7 @@ rw_common: static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb, struct iocb *iocb, bool compat) { - struct kiocb *req; + struct aio_kiocb *req; ssize_t ret; /* enforce forwards compatibility on users */ @@ -1531,11 +1512,14 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb, if (unlikely(!req)) return -EAGAIN; - req->ki_filp = fget(iocb->aio_fildes); - if (unlikely(!req->ki_filp)) { + req->common.ki_filp = fget(iocb->aio_fildes); + if (unlikely(!req->common.ki_filp)) { ret = -EBADF; goto out_put_req; } + req->common.ki_pos = iocb->aio_offset; + req->common.ki_complete = aio_complete; + req->common.ki_flags = 0; if (iocb->aio_flags & IOCB_FLAG_RESFD) { /* @@ -1550,6 +1534,8 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb, req->ki_eventfd = NULL; goto out_put_req; } + + req->common.ki_flags |= IOCB_EVENTFD; } ret = put_user(KIOCB_KEY, &user_iocb->aio_key); @@ -1558,13 +1544,12 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb, goto out_put_req; } - req->ki_obj.user = user_iocb; + req->ki_user_iocb = user_iocb; req->ki_user_data = iocb->aio_data; - req->ki_pos = iocb->aio_offset; - req->ki_nbytes = iocb->aio_nbytes; - ret = aio_run_iocb(req, iocb->aio_lio_opcode, + ret = aio_run_iocb(&req->common, iocb->aio_lio_opcode, (char __user *)(unsigned long)iocb->aio_buf, + iocb->aio_nbytes, compat); if (ret) goto out_put_req; @@ -1651,10 +1636,10 @@ SYSCALL_DEFINE3(io_submit, aio_context_t, ctx_id, long, nr, /* lookup_kiocb * Finds a given iocb for cancellation. */ -static struct kiocb *lookup_kiocb(struct kioctx *ctx, struct iocb __user *iocb, - u32 key) +static struct aio_kiocb * +lookup_kiocb(struct kioctx *ctx, struct iocb __user *iocb, u32 key) { - struct list_head *pos; + struct aio_kiocb *kiocb; assert_spin_locked(&ctx->ctx_lock); @@ -1662,9 +1647,8 @@ static struct kiocb *lookup_kiocb(struct kioctx *ctx, struct iocb __user *iocb, return NULL; /* TODO: use a hash or array, this sucks. */ - list_for_each(pos, &ctx->active_reqs) { - struct kiocb *kiocb = list_kiocb(pos); - if (kiocb->ki_obj.user == iocb) + list_for_each_entry(kiocb, &ctx->active_reqs, ki_list) { + if (kiocb->ki_user_iocb == iocb) return kiocb; } return NULL; @@ -1684,7 +1668,7 @@ SYSCALL_DEFINE3(io_cancel, aio_context_t, ctx_id, struct iocb __user *, iocb, struct io_event __user *, result) { struct kioctx *ctx; - struct kiocb *kiocb; + struct aio_kiocb *kiocb; u32 key; int ret; diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c index 90bc079d9982..fdcb4d69f430 100644 --- a/fs/bfs/inode.c +++ b/fs/bfs/inode.c @@ -15,6 +15,7 @@ #include <linux/buffer_head.h> #include <linux/vfs.h> #include <linux/writeback.h> +#include <linux/uio.h> #include <asm/uaccess.h> #include "bfs.h" diff --git a/fs/block_dev.c b/fs/block_dev.c index 975266be67d3..2e522aed6584 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -27,7 +27,6 @@ #include <linux/namei.h> #include <linux/log2.h> #include <linux/cleancache.h> -#include <linux/aio.h> #include <asm/uaccess.h> #include "internal.h" diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 30982bbd31c3..aee18f84e315 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -24,7 +24,6 @@ #include <linux/string.h> #include <linux/backing-dev.h> #include <linux/mpage.h> -#include <linux/aio.h> #include <linux/falloc.h> #include <linux/swap.h> #include <linux/writeback.h> @@ -32,6 +31,7 @@ #include <linux/compat.h> #include <linux/slab.h> #include <linux/btrfs.h> +#include <linux/uio.h> #include "ctree.h" #include "disk-io.h" #include "transaction.h" diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index d2e732d7af52..686331f22b15 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -32,7 +32,6 @@ #include <linux/writeback.h> #include <linux/statfs.h> #include <linux/compat.h> -#include <linux/aio.h> #include <linux/bit_spinlock.h> #include <linux/xattr.h> #include <linux/posix_acl.h> @@ -43,6 +42,7 @@ #include <linux/btrfs.h> #include <linux/blkdev.h> #include <linux/posix_acl_xattr.h> +#include <linux/uio.h> #include "ctree.h" #include "disk-io.h" #include "transaction.h" diff --git a/fs/ceph/file.c b/fs/ceph/file.c index d533075a823d..139f2fea91a0 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -7,7 +7,6 @@ #include <linux/mount.h> #include <linux/namei.h> #include <linux/writeback.h> -#include <linux/aio.h> #include <linux/falloc.h> #include "super.h" @@ -808,7 +807,7 @@ static ssize_t ceph_read_iter(struct kiocb *iocb, struct iov_iter *to) { struct file *filp = iocb->ki_filp; struct ceph_file_info *fi = filp->private_data; - size_t len = iocb->ki_nbytes; + size_t len = iov_iter_count(to); struct inode *inode = file_inode(filp); struct ceph_inode_info *ci = ceph_inode(inode); struct page *pinned_page = NULL; diff --git a/fs/dcache.c b/fs/dcache.c index c71e3732e53b..d99736a63e3c 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -2690,7 +2690,7 @@ static int __d_unalias(struct inode *inode, struct dentry *dentry, struct dentry *alias) { struct mutex *m1 = NULL, *m2 = NULL; - int ret = -EBUSY; + int ret = -ESTALE; /* If alias and dentry share a parent, then no extra locks required */ if (alias->d_parent == dentry->d_parent) diff --git a/fs/direct-io.c b/fs/direct-io.c index e181b6b2e297..6fb00e3f1059 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c @@ -37,7 +37,6 @@ #include <linux/uio.h> #include <linux/atomic.h> #include <linux/prefetch.h> -#include <linux/aio.h> /* * How many user pages to map in one call to get_user_pages(). This determines @@ -265,7 +264,7 @@ static ssize_t dio_complete(struct dio *dio, loff_t offset, ssize_t ret, ret = err; } - aio_complete(dio->iocb, ret, 0); + dio->iocb->ki_complete(dio->iocb, ret, 0); } kmem_cache_free(dio_cache, dio); @@ -1056,7 +1055,7 @@ static inline int drop_refcount(struct dio *dio) * operation. AIO can if it was a broken operation described above or * in fact if all the bios race to complete before we get here. In * that case dio_complete() translates the EIOCBQUEUED into the proper - * return code that the caller will hand to aio_complete(). + * return code that the caller will hand to ->complete(). * * This is managed by the bio_lock instead of being an atomic_t so that * completion paths can drop their ref and use the remaining count to diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c index fd39bad6f1bd..79675089443d 100644 --- a/fs/ecryptfs/file.c +++ b/fs/ecryptfs/file.c @@ -31,7 +31,6 @@ #include <linux/security.h> #include <linux/compat.h> #include <linux/fs_stack.h> -#include <linux/aio.h> #include "ecryptfs_kernel.h" /** @@ -52,12 +51,6 @@ static ssize_t ecryptfs_read_update_atime(struct kiocb *iocb, struct file *file = iocb->ki_filp; rc = generic_file_read_iter(iocb, to); - /* - * Even though this is a async interface, we need to wait - * for IO to finish to update atime - */ - if (-EIOCBQUEUED == rc) - rc = wait_on_sync_kiocb(iocb); if (rc >= 0) { path = ecryptfs_dentry_to_lower_path(file->f_path.dentry); touch_atime(path); diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c index 6434bc000125..df9d6afbc5d5 100644 --- a/fs/ext2/inode.c +++ b/fs/ext2/inode.c @@ -31,7 +31,7 @@ #include <linux/mpage.h> #include <linux/fiemap.h> #include <linux/namei.h> -#include <linux/aio.h> +#include <linux/uio.h> #include "ext2.h" #include "acl.h" #include "xattr.h" diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c index 2c6ccc49ba27..db07ffbe7c85 100644 --- a/fs/ext3/inode.c +++ b/fs/ext3/inode.c @@ -27,7 +27,7 @@ #include <linux/writeback.h> #include <linux/mpage.h> #include <linux/namei.h> -#include <linux/aio.h> +#include <linux/uio.h> #include "ext3.h" #include "xattr.h" #include "acl.h" diff --git a/fs/ext4/file.c b/fs/ext4/file.c index 33a09da16c9c..598abbbe6786 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -23,9 +23,9 @@ #include <linux/jbd2.h> #include <linux/mount.h> #include <linux/path.h> -#include <linux/aio.h> #include <linux/quotaops.h> #include <linux/pagevec.h> +#include <linux/uio.h> #include "ext4.h" #include "ext4_jbd2.h" #include "xattr.h" diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c index 45fe924f82bc..740c7871c117 100644 --- a/fs/ext4/indirect.c +++ b/fs/ext4/indirect.c @@ -20,9 +20,9 @@ * (sct@redhat.com), 1993, 1998 */ -#include <linux/aio.h> #include "ext4_jbd2.h" #include "truncate.h" +#include <linux/uio.h> #include <trace/events/ext4.h> diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 5cb9a212b86f..a3f451370bef 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -37,7 +37,6 @@ #include <linux/printk.h> #include <linux/slab.h> #include <linux/ratelimit.h> -#include <linux/aio.h> #include <linux/bitops.h> #include "ext4_jbd2.h" diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c index b24a2541a9ba..464984261e69 100644 --- a/fs/ext4/page-io.c +++ b/fs/ext4/page-io.c @@ -18,7 +18,6 @@ #include <linux/pagevec.h> #include <linux/mpage.h> #include <linux/namei.h> -#include <linux/aio.h> #include <linux/uio.h> #include <linux/bio.h> #include <linux/workqueue.h> diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 985ed023a750..497f8515d205 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -12,12 +12,12 @@ #include <linux/f2fs_fs.h> #include <linux/buffer_head.h> #include <linux/mpage.h> -#include <linux/aio.h> #include <linux/writeback.h> #include <linux/backing-dev.h> #include <linux/blkdev.h> #include <linux/bio.h> #include <linux/prefetch.h> +#include <linux/uio.h> #include "f2fs.h" #include "node.h" diff --git a/fs/fat/inode.c b/fs/fat/inode.c index 497c7c5263c7..8521207de229 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c @@ -19,7 +19,6 @@ #include <linux/mpage.h> #include <linux/buffer_head.h> #include <linux/mount.h> -#include <linux/aio.h> #include <linux/vfs.h> #include <linux/parser.h> #include <linux/uio.h> diff --git a/fs/fuse/cuse.c b/fs/fuse/cuse.c index 28d0c7abba1c..b3fa05032234 100644 --- a/fs/fuse/cuse.c +++ b/fs/fuse/cuse.c @@ -38,7 +38,6 @@ #include <linux/device.h> #include <linux/file.h> #include <linux/fs.h> -#include <linux/aio.h> #include <linux/kdev_t.h> #include <linux/kthread.h> #include <linux/list.h> @@ -48,6 +47,7 @@ #include <linux/slab.h> #include <linux/stat.h> #include <linux/module.h> +#include <linux/uio.h> #include "fuse_i.h" diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index 39706c57ad3c..95a2797eef66 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -19,7 +19,6 @@ #include <linux/pipe_fs_i.h> #include <linux/swap.h> #include <linux/splice.h> -#include <linux/aio.h> MODULE_ALIAS_MISCDEV(FUSE_MINOR); MODULE_ALIAS("devname:fuse"); diff --git a/fs/fuse/file.c b/fs/fuse/file.c index c01ec3bdcfd8..ff102cbf16ea 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -15,8 +15,8 @@ #include <linux/module.h> #include <linux/compat.h> #include <linux/swap.h> -#include <linux/aio.h> #include <linux/falloc.h> +#include <linux/uio.h> static const struct file_operations fuse_direct_io_file_operations; @@ -528,6 +528,17 @@ static void fuse_release_user_pages(struct fuse_req *req, int write) } } +static ssize_t fuse_get_res_by_io(struct fuse_io_priv *io) +{ + if (io->err) + return io->err; + + if (io->bytes >= 0 && io->write) + return -EIO; + + return io->bytes < 0 ? io->size : io->bytes; +} + /** * In case of short read, the caller sets 'pos' to the position of * actual end of fuse request in IO request. Otherwise, if bytes_requested @@ -546,6 +557,7 @@ static void fuse_release_user_pages(struct fuse_req *req, int write) */ static void fuse_aio_complete(struct fuse_io_priv *io, int err, ssize_t pos) { + bool is_sync = is_sync_kiocb(io->iocb); int left; spin_lock(&io->lock); @@ -555,30 +567,24 @@ static void fuse_aio_complete(struct fuse_io_priv *io, int err, ssize_t pos) io->bytes = pos; left = --io->reqs; + if (!left && is_sync) + complete(io->done); spin_unlock(&io->lock); - if (!left) { - long res; + if (!left && !is_sync) { + ssize_t res = fuse_get_res_by_io(io); - if (io->err) - res = io->err; - else if (io->bytes >= 0 && io->write) - res = -EIO; - else { - res = io->bytes < 0 ? io->size : io->bytes; + if (res >= 0) { + struct inode *inode = file_inode(io->iocb->ki_filp); + struct fuse_conn *fc = get_fuse_conn(inode); + struct fuse_inode *fi = get_fuse_inode(inode); - if (!is_sync_kiocb(io->iocb)) { - struct inode *inode = file_inode(io->iocb->ki_filp); - struct fuse_conn *fc = get_fuse_conn(inode); - struct fuse_inode *fi = get_fuse_inode(inode); - - spin_lock(&fc->lock); - fi->attr_version = ++fc->attr_version; - spin_unlock(&fc->lock); - } + spin_lock(&fc->lock); + fi->attr_version = ++fc->attr_version; + spin_unlock(&fc->lock); } - aio_complete(io->iocb, res, 0); + io->iocb->ki_complete(io->iocb, res, 0); kfree(io); } } @@ -2801,6 +2807,7 @@ static ssize_t fuse_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter, loff_t offset) { + DECLARE_COMPLETION_ONSTACK(wait); ssize_t ret = 0; struct file *file = iocb->ki_filp; struct fuse_file *ff = file->private_data; @@ -2852,6 +2859,9 @@ fuse_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter, if (!is_sync_kiocb(iocb) && (offset + count > i_size) && rw == WRITE) io->async = false; + if (io->async && is_sync_kiocb(iocb)) + io->done = &wait; + if (rw == WRITE) ret = __fuse_direct_write(io, iter, &pos); else @@ -2864,11 +2874,12 @@ fuse_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter, if (!is_sync_kiocb(iocb)) return -EIOCBQUEUED; - ret = wait_on_sync_kiocb(iocb); - } else { - kfree(io); + wait_for_completion(&wait); + ret = fuse_get_res_by_io(io); } + kfree(io); + if (rw == WRITE) { if (ret > 0) fuse_write_update_size(inode, pos); diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index 1cdfb07c1376..7354dc142a50 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -263,6 +263,7 @@ struct fuse_io_priv { int err; struct kiocb *iocb; struct file *file; + struct completion *done; }; /** diff --git a/fs/gfs2/acl.c b/fs/gfs2/acl.c index 7b3143064af1..1be3b061c05c 100644 --- a/fs/gfs2/acl.c +++ b/fs/gfs2/acl.c @@ -110,11 +110,7 @@ int gfs2_set_acl(struct inode *inode, struct posix_acl *acl, int type) error = __gfs2_xattr_set(inode, name, data, len, 0, GFS2_EATYPE_SYS); if (error) goto out; - - if (acl) - set_cached_acl(inode, type, acl); - else - forget_cached_acl(inode, type); + set_cached_acl(inode, type, acl); out: kfree(data); return error; diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c index 4ad4f94edebe..a6e6990aea39 100644 --- a/fs/gfs2/aops.c +++ b/fs/gfs2/aops.c @@ -20,7 +20,7 @@ #include <linux/swap.h> #include <linux/gfs2_ondisk.h> #include <linux/backing-dev.h> -#include <linux/aio.h> +#include <linux/uio.h> #include <trace/events/writeback.h> #include "gfs2.h" @@ -671,12 +671,12 @@ static int gfs2_write_begin(struct file *file, struct address_space *mapping, if (alloc_required) { struct gfs2_alloc_parms ap = { .aflags = 0, }; - error = gfs2_quota_lock_check(ip); + requested = data_blocks + ind_blocks; + ap.target = requested; + error = gfs2_quota_lock_check(ip, &ap); if (error) goto out_unlock; - requested = data_blocks + ind_blocks; - ap.target = requested; error = gfs2_inplace_reserve(ip, &ap); if (error) goto out_qunlock; diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c index f0b945ab853e..61296ecbd0e2 100644 --- a/fs/gfs2/bmap.c +++ b/fs/gfs2/bmap.c @@ -1224,7 +1224,7 @@ static int do_grow(struct inode *inode, u64 size) if (gfs2_is_stuffed(ip) && (size > (sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode)))) { - error = gfs2_quota_lock_check(ip); + error = gfs2_quota_lock_check(ip, &ap); if (error) return error; diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index 3e32bb8e2d7e..8ec43ab5babf 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c @@ -25,7 +25,6 @@ #include <asm/uaccess.h> #include <linux/dlm.h> #include <linux/dlm_plock.h> -#include <linux/aio.h> #include <linux/delay.h> #include "gfs2.h" @@ -429,11 +428,11 @@ static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) if (ret) goto out_unlock; - ret = gfs2_quota_lock_check(ip); - if (ret) - goto out_unlock; gfs2_write_calc_reserv(ip, PAGE_CACHE_SIZE, &data_blocks, &ind_blocks); ap.target = data_blocks + ind_blocks; + ret = gfs2_quota_lock_check(ip, &ap); + if (ret) + goto out_unlock; ret = gfs2_inplace_reserve(ip, &ap); if (ret) goto out_quota_unlock; @@ -765,22 +764,30 @@ out: brelse(dibh); return error; } - -static void calc_max_reserv(struct gfs2_inode *ip, loff_t max, loff_t *len, - unsigned int *data_blocks, unsigned int *ind_blocks) +/** + * calc_max_reserv() - Reverse of write_calc_reserv. Given a number of + * blocks, determine how many bytes can be written. + * @ip: The inode in question. + * @len: Max cap of bytes. What we return in *len must be <= this. + * @data_blocks: Compute and return the number of data blocks needed + * @ind_blocks: Compute and return the number of indirect blocks needed + * @max_blocks: The total blocks available to work with. + * + * Returns: void, but @len, @data_blocks and @ind_blocks are filled in. + */ +static void calc_max_reserv(struct gfs2_inode *ip, loff_t *len, + unsigned int *data_blocks, unsigned int *ind_blocks, + unsigned int max_blocks) { + loff_t max = *len; const struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); - unsigned int max_blocks = ip->i_rgd->rd_free_clone; unsigned int tmp, max_data = max_blocks - 3 * (sdp->sd_max_height - 1); for (tmp = max_data; tmp > sdp->sd_diptrs;) { tmp = DIV_ROUND_UP(tmp, sdp->sd_inptrs); max_data -= tmp; } - /* This calculation isn't the exact reverse of gfs2_write_calc_reserve, - so it might end up with fewer data blocks */ - if (max_data <= *data_blocks) - return; + *data_blocks = max_data; *ind_blocks = max_blocks - max_data; *len = ((loff_t)max_data - 3) << sdp->sd_sb.sb_bsize_shift; @@ -797,7 +804,7 @@ static long __gfs2_fallocate(struct file *file, int mode, loff_t offset, loff_t struct gfs2_inode *ip = GFS2_I(inode); struct gfs2_alloc_parms ap = { .aflags = 0, }; unsigned int data_blocks = 0, ind_blocks = 0, rblocks; - loff_t bytes, max_bytes; + loff_t bytes, max_bytes, max_blks = UINT_MAX; int error; const loff_t pos = offset; const loff_t count = len; @@ -819,6 +826,9 @@ static long __gfs2_fallocate(struct file *file, int mode, loff_t offset, loff_t gfs2_size_hint(file, offset, len); + gfs2_write_calc_reserv(ip, PAGE_SIZE, &data_blocks, &ind_blocks); + ap.min_target = data_blocks + ind_blocks; + while (len > 0) { if (len < bytes) bytes = len; @@ -827,27 +837,41 @@ static long __gfs2_fallocate(struct file *file, int mode, loff_t offset, loff_t offset += bytes; continue; } - error = gfs2_quota_lock_check(ip); + + /* We need to determine how many bytes we can actually + * fallocate without exceeding quota or going over the + * end of the fs. We start off optimistically by assuming + * we can write max_bytes */ + max_bytes = (len > max_chunk_size) ? max_chunk_size : len; + + /* Since max_bytes is most likely a theoretical max, we + * calculate a more realistic 'bytes' to serve as a good + * starting point for the number of bytes we may be able + * to write */ + gfs2_write_calc_reserv(ip, bytes, &data_blocks, &ind_blocks); + ap.target = data_blocks + ind_blocks; + + error = gfs2_quota_lock_check(ip, &ap); if (error) return error; -retry: - gfs2_write_calc_reserv(ip, bytes, &data_blocks, &ind_blocks); + /* ap.allowed tells us how many blocks quota will allow + * us to write. Check if this reduces max_blks */ + if (ap.allowed && ap.allowed < max_blks) + max_blks = ap.allowed; - ap.target = data_blocks + ind_blocks; error = gfs2_inplace_reserve(ip, &ap); - if (error) { - if (error == -ENOSPC && bytes > sdp->sd_sb.sb_bsize) { - bytes >>= 1; - bytes &= bsize_mask; - if (bytes == 0) - bytes = sdp->sd_sb.sb_bsize; - goto retry; - } + if (error) goto out_qunlock; - } - max_bytes = bytes; - calc_max_reserv(ip, (len > max_chunk_size)? max_chunk_size: len, - &max_bytes, &data_blocks, &ind_blocks); + + /* check if the selected rgrp limits our max_blks further */ + if (ap.allowed && ap.allowed < max_blks) + max_blks = ap.allowed; + + /* Almost done. Calculate bytes that can be written using + * max_blks. We also recompute max_bytes, data_blocks and + * ind_blocks */ + calc_max_reserv(ip, &max_bytes, &data_blocks, + &ind_blocks, max_blks); rblocks = RES_DINODE + ind_blocks + RES_STATFS + RES_QUOTA + RES_RG_HDR + gfs2_rg_blocks(ip, data_blocks + ind_blocks); @@ -931,6 +955,22 @@ out_uninit: return ret; } +static ssize_t gfs2_file_splice_write(struct pipe_inode_info *pipe, + struct file *out, loff_t *ppos, + size_t len, unsigned int flags) +{ + int error; + struct gfs2_inode *ip = GFS2_I(out->f_mapping->host); + + error = gfs2_rs_alloc(ip); + if (error) + return (ssize_t)error; + + gfs2_size_hint(out, *ppos, len); + + return iter_file_splice_write(pipe, out, ppos, len, flags); +} + #ifdef CONFIG_GFS2_FS_LOCKING_DLM /** @@ -1077,7 +1117,7 @@ const struct file_operations gfs2_file_fops = { .lock = gfs2_lock, .flock = gfs2_flock, .splice_read = generic_file_splice_read, - .splice_write = iter_file_splice_write, + .splice_write = gfs2_file_splice_write, .setlease = simple_nosetlease, .fallocate = gfs2_fallocate, }; @@ -1107,7 +1147,7 @@ const struct file_operations gfs2_file_fops_nolock = { .release = gfs2_release, .fsync = gfs2_fsync, .splice_read = generic_file_splice_read, - .splice_write = iter_file_splice_write, + .splice_write = gfs2_file_splice_write, .setlease = generic_setlease, .fallocate = gfs2_fallocate, }; diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index f42dffba056a..0fa8062f85a7 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -2047,34 +2047,41 @@ static const struct file_operations gfs2_sbstats_fops = { int gfs2_create_debugfs_file(struct gfs2_sbd *sdp) { - sdp->debugfs_dir = debugfs_create_dir(sdp->sd_table_name, gfs2_root); - if (!sdp->debugfs_dir) - return -ENOMEM; - sdp->debugfs_dentry_glocks = debugfs_create_file("glocks", - S_IFREG | S_IRUGO, - sdp->debugfs_dir, sdp, - &gfs2_glocks_fops); - if (!sdp->debugfs_dentry_glocks) + struct dentry *dent; + + dent = debugfs_create_dir(sdp->sd_table_name, gfs2_root); + if (IS_ERR_OR_NULL(dent)) + goto fail; + sdp->debugfs_dir = dent; + + dent = debugfs_create_file("glocks", + S_IFREG | S_IRUGO, + sdp->debugfs_dir, sdp, + &gfs2_glocks_fops); + if (IS_ERR_OR_NULL(dent)) goto fail; + sdp->debugfs_dentry_glocks = dent; - sdp->debugfs_dentry_glstats = debugfs_create_file("glstats", - S_IFREG | S_IRUGO, - sdp->debugfs_dir, sdp, - &gfs2_glstats_fops); - if (!sdp->debugfs_dentry_glstats) + dent = debugfs_create_file("glstats", + S_IFREG | S_IRUGO, + sdp->debugfs_dir, sdp, + &gfs2_glstats_fops); + if (IS_ERR_OR_NULL(dent)) goto fail; + sdp->debugfs_dentry_glstats = dent; - sdp->debugfs_dentry_sbstats = debugfs_create_file("sbstats", - S_IFREG | S_IRUGO, - sdp->debugfs_dir, sdp, - &gfs2_sbstats_fops); - if (!sdp->debugfs_dentry_sbstats) + dent = debugfs_create_file("sbstats", + S_IFREG | S_IRUGO, + sdp->debugfs_dir, sdp, + &gfs2_sbstats_fops); + if (IS_ERR_OR_NULL(dent)) goto fail; + sdp->debugfs_dentry_sbstats = dent; return 0; fail: gfs2_delete_debugfs_file(sdp); - return -ENOMEM; + return dent ? PTR_ERR(dent) : -ENOMEM; } void gfs2_delete_debugfs_file(struct gfs2_sbd *sdp) @@ -2100,6 +2107,8 @@ void gfs2_delete_debugfs_file(struct gfs2_sbd *sdp) int gfs2_register_debugfs(void) { gfs2_root = debugfs_create_dir("gfs2", NULL); + if (IS_ERR(gfs2_root)) + return PTR_ERR(gfs2_root); return gfs2_root ? 0 : -ENOMEM; } diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index 7a2dbbc0d634..58b75abf6ab2 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -301,8 +301,10 @@ struct gfs2_blkreserv { * to the allocation code. */ struct gfs2_alloc_parms { - u32 target; + u64 target; + u32 min_target; u32 aflags; + u64 allowed; }; enum { diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index 73c72253faac..08bc84d7e768 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -382,7 +382,7 @@ static int alloc_dinode(struct gfs2_inode *ip, u32 flags, unsigned *dblocks) struct gfs2_alloc_parms ap = { .target = *dblocks, .aflags = flags, }; int error; - error = gfs2_quota_lock_check(ip); + error = gfs2_quota_lock_check(ip, &ap); if (error) goto out; @@ -525,7 +525,7 @@ static int link_dinode(struct gfs2_inode *dip, const struct qstr *name, int error; if (da->nr_blocks) { - error = gfs2_quota_lock_check(dip); + error = gfs2_quota_lock_check(dip, &ap); if (error) goto fail_quota_locks; @@ -953,7 +953,7 @@ static int gfs2_link(struct dentry *old_dentry, struct inode *dir, if (da.nr_blocks) { struct gfs2_alloc_parms ap = { .target = da.nr_blocks, }; - error = gfs2_quota_lock_check(dip); + error = gfs2_quota_lock_check(dip, &ap); if (error) goto out_gunlock; @@ -1470,7 +1470,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry, if (da.nr_blocks) { struct gfs2_alloc_parms ap = { .target = da.nr_blocks, }; - error = gfs2_quota_lock_check(ndip); + error = gfs2_quota_lock_check(ndip, &ap); if (error) goto out_gunlock; @@ -1669,6 +1669,7 @@ static int setattr_chown(struct inode *inode, struct iattr *attr) kuid_t ouid, nuid; kgid_t ogid, ngid; int error; + struct gfs2_alloc_parms ap; ouid = inode->i_uid; ogid = inode->i_gid; @@ -1696,9 +1697,11 @@ static int setattr_chown(struct inode *inode, struct iattr *attr) if (error) goto out; + ap.target = gfs2_get_inode_blocks(&ip->i_inode); + if (!uid_eq(ouid, NO_UID_QUOTA_CHANGE) || !gid_eq(ogid, NO_GID_QUOTA_CHANGE)) { - error = gfs2_quota_check(ip, nuid, ngid); + error = gfs2_quota_check(ip, nuid, ngid, &ap); if (error) goto out_gunlock_q; } @@ -1713,9 +1716,8 @@ static int setattr_chown(struct inode *inode, struct iattr *attr) if (!uid_eq(ouid, NO_UID_QUOTA_CHANGE) || !gid_eq(ogid, NO_GID_QUOTA_CHANGE)) { - u64 blocks = gfs2_get_inode_blocks(&ip->i_inode); - gfs2_quota_change(ip, -blocks, ouid, ogid); - gfs2_quota_change(ip, blocks, nuid, ngid); + gfs2_quota_change(ip, -ap.target, ouid, ogid); + gfs2_quota_change(ip, ap.target, nuid, ngid); } out_end_trans: diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c index 3aa17d4d1cfc..5c27e48aa76f 100644 --- a/fs/gfs2/quota.c +++ b/fs/gfs2/quota.c @@ -923,6 +923,9 @@ restart: if (error) return error; + if (test_and_clear_bit(QDF_REFRESH, &qd->qd_flags)) + force_refresh = FORCE; + qd->qd_qb = *(struct gfs2_quota_lvb *)qd->qd_gl->gl_lksb.sb_lvbptr; if (force_refresh || qd->qd_qb.qb_magic != cpu_to_be32(GFS2_MAGIC)) { @@ -974,11 +977,8 @@ int gfs2_quota_lock(struct gfs2_inode *ip, kuid_t uid, kgid_t gid) sizeof(struct gfs2_quota_data *), sort_qd, NULL); for (x = 0; x < ip->i_res->rs_qa_qd_num; x++) { - int force = NO_FORCE; qd = ip->i_res->rs_qa_qd[x]; - if (test_and_clear_bit(QDF_REFRESH, &qd->qd_flags)) - force = FORCE; - error = do_glock(qd, force, &ip->i_res->rs_qa_qd_ghs[x]); + error = do_glock(qd, NO_FORCE, &ip->i_res->rs_qa_qd_ghs[x]); if (error) break; } @@ -1094,14 +1094,33 @@ static int print_message(struct gfs2_quota_data *qd, char *type) return 0; } -int gfs2_quota_check(struct gfs2_inode *ip, kuid_t uid, kgid_t gid) +/** + * gfs2_quota_check - check if allocating new blocks will exceed quota + * @ip: The inode for which this check is being performed + * @uid: The uid to check against + * @gid: The gid to check against + * @ap: The allocation parameters. ap->target contains the requested + * blocks. ap->min_target, if set, contains the minimum blks + * requested. + * + * Returns: 0 on success. + * min_req = ap->min_target ? ap->min_target : ap->target; + * quota must allow atleast min_req blks for success and + * ap->allowed is set to the number of blocks allowed + * + * -EDQUOT otherwise, quota violation. ap->allowed is set to number + * of blocks available. + */ +int gfs2_quota_check(struct gfs2_inode *ip, kuid_t uid, kgid_t gid, + struct gfs2_alloc_parms *ap) { struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); struct gfs2_quota_data *qd; - s64 value; + s64 value, warn, limit; unsigned int x; int error = 0; + ap->allowed = UINT_MAX; /* Assume we are permitted a whole lot */ if (!test_bit(GIF_QD_LOCKED, &ip->i_flags)) return 0; @@ -1115,30 +1134,37 @@ int gfs2_quota_check(struct gfs2_inode *ip, kuid_t uid, kgid_t gid) qid_eq(qd->qd_id, make_kqid_gid(gid)))) continue; + warn = (s64)be64_to_cpu(qd->qd_qb.qb_warn); + limit = (s64)be64_to_cpu(qd->qd_qb.qb_limit); value = (s64)be64_to_cpu(qd->qd_qb.qb_value); spin_lock(&qd_lock); value += qd->qd_change; spin_unlock(&qd_lock); - if (be64_to_cpu(qd->qd_qb.qb_limit) && (s64)be64_to_cpu(qd->qd_qb.qb_limit) < value) { - print_message(qd, "exceeded"); - quota_send_warning(qd->qd_id, - sdp->sd_vfs->s_dev, QUOTA_NL_BHARDWARN); - - error = -EDQUOT; - break; - } else if (be64_to_cpu(qd->qd_qb.qb_warn) && - (s64)be64_to_cpu(qd->qd_qb.qb_warn) < value && + if (limit > 0 && (limit - value) < ap->allowed) + ap->allowed = limit - value; + /* If we can't meet the target */ + if (limit && limit < (value + (s64)ap->target)) { + /* If no min_target specified or we don't meet + * min_target, return -EDQUOT */ + if (!ap->min_target || ap->min_target > ap->allowed) { + print_message(qd, "exceeded"); + quota_send_warning(qd->qd_id, + sdp->sd_vfs->s_dev, + QUOTA_NL_BHARDWARN); + error = -EDQUOT; + break; + } + } else if (warn && warn < value && time_after_eq(jiffies, qd->qd_last_warn + - gfs2_tune_get(sdp, - gt_quota_warn_period) * HZ)) { + gfs2_tune_get(sdp, gt_quota_warn_period) + * HZ)) { quota_send_warning(qd->qd_id, sdp->sd_vfs->s_dev, QUOTA_NL_BSOFTWARN); error = print_message(qd, "warning"); qd->qd_last_warn = jiffies; } } - return error; } diff --git a/fs/gfs2/quota.h b/fs/gfs2/quota.h index 55d506eb3c4a..ad04b3acae2b 100644 --- a/fs/gfs2/quota.h +++ b/fs/gfs2/quota.h @@ -24,7 +24,8 @@ extern void gfs2_quota_unhold(struct gfs2_inode *ip); extern int gfs2_quota_lock(struct gfs2_inode *ip, kuid_t uid, kgid_t gid); extern void gfs2_quota_unlock(struct gfs2_inode *ip); -extern int gfs2_quota_check(struct gfs2_inode *ip, kuid_t uid, kgid_t gid); +extern int gfs2_quota_check(struct gfs2_inode *ip, kuid_t uid, kgid_t gid, + struct gfs2_alloc_parms *ap); extern void gfs2_quota_change(struct gfs2_inode *ip, s64 change, kuid_t uid, kgid_t gid); @@ -37,7 +38,8 @@ extern int gfs2_quotad(void *data); extern void gfs2_wake_up_statfs(struct gfs2_sbd *sdp); -static inline int gfs2_quota_lock_check(struct gfs2_inode *ip) +static inline int gfs2_quota_lock_check(struct gfs2_inode *ip, + struct gfs2_alloc_parms *ap) { struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); int ret; @@ -48,7 +50,7 @@ static inline int gfs2_quota_lock_check(struct gfs2_inode *ip) return ret; if (sdp->sd_args.ar_quota != GFS2_QUOTA_ON) return 0; - ret = gfs2_quota_check(ip, ip->i_inode.i_uid, ip->i_inode.i_gid); + ret = gfs2_quota_check(ip, ip->i_inode.i_uid, ip->i_inode.i_gid, ap); if (ret) gfs2_quota_unlock(ip); return ret; diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index 9150207f365c..6af2396a317c 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -1946,10 +1946,18 @@ static inline int fast_to_acquire(struct gfs2_rgrpd *rgd) * @ip: the inode to reserve space for * @ap: the allocation parameters * - * Returns: errno + * We try our best to find an rgrp that has at least ap->target blocks + * available. After a couple of passes (loops == 2), the prospects of finding + * such an rgrp diminish. At this stage, we return the first rgrp that has + * atleast ap->min_target blocks available. Either way, we set ap->allowed to + * the number of blocks available in the chosen rgrp. + * + * Returns: 0 on success, + * -ENOMEM if a suitable rgrp can't be found + * errno otherwise */ -int gfs2_inplace_reserve(struct gfs2_inode *ip, const struct gfs2_alloc_parms *ap) +int gfs2_inplace_reserve(struct gfs2_inode *ip, struct gfs2_alloc_parms *ap) { struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); struct gfs2_rgrpd *begin = NULL; @@ -2012,7 +2020,7 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, const struct gfs2_alloc_parms *a /* Skip unuseable resource groups */ if ((rs->rs_rbm.rgd->rd_flags & (GFS2_RGF_NOALLOC | GFS2_RDF_ERROR)) || - (ap->target > rs->rs_rbm.rgd->rd_extfail_pt)) + (loops == 0 && ap->target > rs->rs_rbm.rgd->rd_extfail_pt)) goto skip_rgrp; if (sdp->sd_args.ar_rgrplvb) @@ -2027,11 +2035,13 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, const struct gfs2_alloc_parms *a goto check_rgrp; /* If rgrp has enough free space, use it */ - if (rs->rs_rbm.rgd->rd_free_clone >= ap->target) { + if (rs->rs_rbm.rgd->rd_free_clone >= ap->target || + (loops == 2 && ap->min_target && + rs->rs_rbm.rgd->rd_free_clone >= ap->min_target)) { ip->i_rgd = rs->rs_rbm.rgd; + ap->allowed = ip->i_rgd->rd_free_clone; return 0; } - check_rgrp: /* Check for unlinked inodes which can be reclaimed */ if (rs->rs_rbm.rgd->rd_flags & GFS2_RDF_CHECK) diff --git a/fs/gfs2/rgrp.h b/fs/gfs2/rgrp.h index b104f4af3afd..68972ecfbb01 100644 --- a/fs/gfs2/rgrp.h +++ b/fs/gfs2/rgrp.h @@ -41,7 +41,8 @@ extern void gfs2_rgrp_go_unlock(struct gfs2_holder *gh); extern struct gfs2_alloc *gfs2_alloc_get(struct gfs2_inode *ip); #define GFS2_AF_ORLOV 1 -extern int gfs2_inplace_reserve(struct gfs2_inode *ip, const struct gfs2_alloc_parms *ap); +extern int gfs2_inplace_reserve(struct gfs2_inode *ip, + struct gfs2_alloc_parms *ap); extern void gfs2_inplace_release(struct gfs2_inode *ip); extern int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *n, diff --git a/fs/gfs2/xattr.c b/fs/gfs2/xattr.c index 0b81f783f787..fd260ce8869a 100644 --- a/fs/gfs2/xattr.c +++ b/fs/gfs2/xattr.c @@ -732,7 +732,7 @@ static int ea_alloc_skeleton(struct gfs2_inode *ip, struct gfs2_ea_request *er, if (error) return error; - error = gfs2_quota_lock_check(ip); + error = gfs2_quota_lock_check(ip, &ap); if (error) return error; diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c index d0929bc81782..98d4ea45bb70 100644 --- a/fs/hfs/inode.c +++ b/fs/hfs/inode.c @@ -14,7 +14,7 @@ #include <linux/pagemap.h> #include <linux/mpage.h> #include <linux/sched.h> -#include <linux/aio.h> +#include <linux/uio.h> #include "hfs_fs.h" #include "btree.h" diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c index 0cf786f2d046..f541196d4ee9 100644 --- a/fs/hfsplus/inode.c +++ b/fs/hfsplus/inode.c @@ -14,7 +14,7 @@ #include <linux/pagemap.h> #include <linux/mpage.h> #include <linux/sched.h> -#include <linux/aio.h> +#include <linux/uio.h> #include "hfsplus_fs.h" #include "hfsplus_raw.h" diff --git a/fs/jffs2/xattr.c b/fs/jffs2/xattr.c index d72817ac51f6..762c7a3cf43d 100644 --- a/fs/jffs2/xattr.c +++ b/fs/jffs2/xattr.c @@ -195,7 +195,7 @@ static int do_verify_xattr_datum(struct jffs2_sb_info *c, struct jffs2_xattr_dat /* unchecked xdatum is chained with c->xattr_unchecked */ list_del_init(&xd->xindex); - dbg_xattr("success on verfying xdatum (xid=%u, version=%u)\n", + dbg_xattr("success on verifying xdatum (xid=%u, version=%u)\n", xd->xid, xd->version); return 0; diff --git a/fs/jfs/inode.c b/fs/jfs/inode.c index bd3df1ca3c9b..3197aed10614 100644 --- a/fs/jfs/inode.c +++ b/fs/jfs/inode.c @@ -22,8 +22,8 @@ #include <linux/buffer_head.h> #include <linux/pagemap.h> #include <linux/quotaops.h> +#include <linux/uio.h> #include <linux/writeback.h> -#include <linux/aio.h> #include "jfs_incore.h" #include "jfs_inode.h" #include "jfs_filsys.h" diff --git a/fs/jfs/super.c b/fs/jfs/super.c index 5d30c56ae075..4cd9798f4948 100644 --- a/fs/jfs/super.c +++ b/fs/jfs/super.c @@ -102,7 +102,7 @@ void jfs_error(struct super_block *sb, const char *fmt, ...) vaf.fmt = fmt; vaf.va = &args; - pr_err("ERROR: (device %s): %pf: %pV\n", + pr_err("ERROR: (device %s): %ps: %pV\n", sb->s_id, __builtin_return_address(0), &vaf); va_end(args); diff --git a/fs/namei.c b/fs/namei.c index c83145af4bfc..76fb76a0818b 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -119,15 +119,14 @@ * PATH_MAX includes the nul terminator --RR. */ -#define EMBEDDED_NAME_MAX (PATH_MAX - sizeof(struct filename)) +#define EMBEDDED_NAME_MAX (PATH_MAX - offsetof(struct filename, iname)) struct filename * getname_flags(const char __user *filename, int flags, int *empty) { - struct filename *result, *err; - int len; - long max; + struct filename *result; char *kname; + int len; result = audit_reusename(filename); if (result) @@ -136,22 +135,18 @@ getname_flags(const char __user *filename, int flags, int *empty) result = __getname(); if (unlikely(!result)) return ERR_PTR(-ENOMEM); - result->refcnt = 1; /* * First, try to embed the struct filename inside the names_cache * allocation */ - kname = (char *)result + sizeof(*result); + kname = (char *)result->iname; result->name = kname; - result->separate = false; - max = EMBEDDED_NAME_MAX; -recopy: - len = strncpy_from_user(kname, filename, max); + len = strncpy_from_user(kname, filename, EMBEDDED_NAME_MAX); if (unlikely(len < 0)) { - err = ERR_PTR(len); - goto error; + __putname(result); + return ERR_PTR(len); } /* @@ -160,43 +155,49 @@ recopy: * names_cache allocation for the pathname, and re-do the copy from * userland. */ - if (len == EMBEDDED_NAME_MAX && max == EMBEDDED_NAME_MAX) { + if (unlikely(len == EMBEDDED_NAME_MAX)) { + const size_t size = offsetof(struct filename, iname[1]); kname = (char *)result; - result = kzalloc(sizeof(*result), GFP_KERNEL); - if (!result) { - err = ERR_PTR(-ENOMEM); - result = (struct filename *)kname; - goto error; + /* + * size is chosen that way we to guarantee that + * result->iname[0] is within the same object and that + * kname can't be equal to result->iname, no matter what. + */ + result = kzalloc(size, GFP_KERNEL); + if (unlikely(!result)) { + __putname(kname); + return ERR_PTR(-ENOMEM); } result->name = kname; - result->separate = true; - result->refcnt = 1; - max = PATH_MAX; - goto recopy; + len = strncpy_from_user(kname, filename, PATH_MAX); + if (unlikely(len < 0)) { + __putname(kname); + kfree(result); + return ERR_PTR(len); + } + if (unlikely(len == PATH_MAX)) { + __putname(kname); + kfree(result); + return ERR_PTR(-ENAMETOOLONG); + } } + result->refcnt = 1; /* The empty path is special. */ if (unlikely(!len)) { if (empty) *empty = 1; - err = ERR_PTR(-ENOENT); - if (!(flags & LOOKUP_EMPTY)) - goto error; + if (!(flags & LOOKUP_EMPTY)) { + putname(result); + return ERR_PTR(-ENOENT); + } } - err = ERR_PTR(-ENAMETOOLONG); - if (unlikely(len >= PATH_MAX)) - goto error; - result->uptr = filename; result->aname = NULL; audit_getname(result); return result; - -error: - putname(result); - return err; } struct filename * @@ -216,8 +217,7 @@ getname_kernel(const char * filename) return ERR_PTR(-ENOMEM); if (len <= EMBEDDED_NAME_MAX) { - result->name = (char *)(result) + sizeof(*result); - result->separate = false; + result->name = (char *)result->iname; } else if (len <= PATH_MAX) { struct filename *tmp; @@ -227,7 +227,6 @@ getname_kernel(const char * filename) return ERR_PTR(-ENOMEM); } tmp->name = (char *)result; - tmp->separate = true; result = tmp; } else { __putname(result); @@ -249,7 +248,7 @@ void putname(struct filename *name) if (--name->refcnt > 0) return; - if (name->separate) { + if (name->name != name->iname) { __putname(name->name); kfree(name); } else @@ -1851,10 +1850,11 @@ static int link_path_walk(const char *name, struct nameidata *nd) return err; } -static int path_init(int dfd, const char *name, unsigned int flags, +static int path_init(int dfd, const struct filename *name, unsigned int flags, struct nameidata *nd) { int retval = 0; + const char *s = name->name; nd->last_type = LAST_ROOT; /* if there are only slashes... */ nd->flags = flags | LOOKUP_JUMPED | LOOKUP_PARENT; @@ -1863,7 +1863,7 @@ static int path_init(int dfd, const char *name, unsigned int flags, if (flags & LOOKUP_ROOT) { struct dentry *root = nd->root.dentry; struct inode *inode = root->d_inode; - if (*name) { + if (*s) { if (!d_can_lookup(root)) return -ENOTDIR; retval = inode_permission(inode, MAY_EXEC); @@ -1885,7 +1885,7 @@ static int path_init(int dfd, const char *name, unsigned int flags, nd->root.mnt = NULL; nd->m_seq = read_seqbegin(&mount_lock); - if (*name=='/') { + if (*s == '/') { if (flags & LOOKUP_RCU) { rcu_read_lock(); nd->seq = set_root_rcu(nd); @@ -1919,7 +1919,7 @@ static int path_init(int dfd, const char *name, unsigned int flags, dentry = f.file->f_path.dentry; - if (*name) { + if (*s) { if (!d_can_lookup(dentry)) { fdput(f); return -ENOTDIR; @@ -1949,7 +1949,7 @@ static int path_init(int dfd, const char *name, unsigned int flags, return -ECHILD; done: current->total_link_count = 0; - return link_path_walk(name, nd); + return link_path_walk(s, nd); } static void path_cleanup(struct nameidata *nd) @@ -1972,7 +1972,7 @@ static inline int lookup_last(struct nameidata *nd, struct path *path) } /* Returns 0 and nd will be valid on success; Retuns error, otherwise. */ -static int path_lookupat(int dfd, const char *name, +static int path_lookupat(int dfd, const struct filename *name, unsigned int flags, struct nameidata *nd) { struct path path; @@ -2027,31 +2027,17 @@ static int path_lookupat(int dfd, const char *name, static int filename_lookup(int dfd, struct filename *name, unsigned int flags, struct nameidata *nd) { - int retval = path_lookupat(dfd, name->name, flags | LOOKUP_RCU, nd); + int retval = path_lookupat(dfd, name, flags | LOOKUP_RCU, nd); if (unlikely(retval == -ECHILD)) - retval = path_lookupat(dfd, name->name, flags, nd); + retval = path_lookupat(dfd, name, flags, nd); if (unlikely(retval == -ESTALE)) - retval = path_lookupat(dfd, name->name, - flags | LOOKUP_REVAL, nd); + retval = path_lookupat(dfd, name, flags | LOOKUP_REVAL, nd); if (likely(!retval)) audit_inode(name, nd->path.dentry, flags & LOOKUP_PARENT); return retval; } -static int do_path_lookup(int dfd, const char *name, - unsigned int flags, struct nameidata *nd) -{ - struct filename *filename = getname_kernel(name); - int retval = PTR_ERR(filename); - - if (!IS_ERR(filename)) { - retval = filename_lookup(dfd, filename, flags, nd); - putname(filename); - } - return retval; -} - /* does lookup, returns the object with parent locked */ struct dentry *kern_path_locked(const char *name, struct path *path) { @@ -2089,9 +2075,15 @@ out: int kern_path(const char *name, unsigned int flags, struct path *path) { struct nameidata nd; - int res = do_path_lookup(AT_FDCWD, name, flags, &nd); - if (!res) - *path = nd.path; + struct filename *filename = getname_kernel(name); + int res = PTR_ERR(filename); + + if (!IS_ERR(filename)) { + res = filename_lookup(AT_FDCWD, filename, flags, &nd); + putname(filename); + if (!res) + *path = nd.path; + } return res; } EXPORT_SYMBOL(kern_path); @@ -2108,15 +2100,22 @@ int vfs_path_lookup(struct dentry *dentry, struct vfsmount *mnt, const char *name, unsigned int flags, struct path *path) { - struct nameidata nd; - int err; - nd.root.dentry = dentry; - nd.root.mnt = mnt; + struct filename *filename = getname_kernel(name); + int err = PTR_ERR(filename); + BUG_ON(flags & LOOKUP_PARENT); - /* the first argument of do_path_lookup() is ignored with LOOKUP_ROOT */ - err = do_path_lookup(AT_FDCWD, name, flags | LOOKUP_ROOT, &nd); - if (!err) - *path = nd.path; + + /* the first argument of filename_lookup() is ignored with LOOKUP_ROOT */ + if (!IS_ERR(filename)) { + struct nameidata nd; + nd.root.dentry = dentry; + nd.root.mnt = mnt; + err = filename_lookup(AT_FDCWD, filename, + flags | LOOKUP_ROOT, &nd); + if (!err) + *path = nd.path; + putname(filename); + } return err; } EXPORT_SYMBOL(vfs_path_lookup); @@ -2138,9 +2137,7 @@ static struct dentry *lookup_hash(struct nameidata *nd) * @len: maximum length @len should be interpreted to * * Note that this routine is purely a helper for filesystem usage and should - * not be called by generic code. Also note that by using this function the - * nameidata argument is passed to the filesystem methods and a filesystem - * using this helper needs to be prepared for that. + * not be called by generic code. */ struct dentry *lookup_one_len(const char *name, struct dentry *base, int len) { @@ -2341,7 +2338,8 @@ out: * Returns 0 and "path" will be valid on success; Returns error otherwise. */ static int -path_mountpoint(int dfd, const char *name, struct path *path, unsigned int flags) +path_mountpoint(int dfd, const struct filename *name, struct path *path, + unsigned int flags) { struct nameidata nd; int err; @@ -2370,20 +2368,20 @@ out: } static int -filename_mountpoint(int dfd, struct filename *s, struct path *path, +filename_mountpoint(int dfd, struct filename *name, struct path *path, unsigned int flags) { int error; - if (IS_ERR(s)) - return PTR_ERR(s); - error = path_mountpoint(dfd, s->name, path, flags | LOOKUP_RCU); + if (IS_ERR(name)) + return PTR_ERR(name); + error = path_mountpoint(dfd, name, path, flags | LOOKUP_RCU); if (unlikely(error == -ECHILD)) - error = path_mountpoint(dfd, s->name, path, flags); + error = path_mountpoint(dfd, name, path, flags); if (unlikely(error == -ESTALE)) - error = path_mountpoint(dfd, s->name, path, flags | LOOKUP_REVAL); + error = path_mountpoint(dfd, name, path, flags | LOOKUP_REVAL); if (likely(!error)) - audit_inode(s, path->dentry, 0); - putname(s); + audit_inode(name, path->dentry, 0); + putname(name); return error; } @@ -3156,7 +3154,7 @@ static int do_tmpfile(int dfd, struct filename *pathname, static const struct qstr name = QSTR_INIT("/", 1); struct dentry *dentry, *child; struct inode *dir; - int error = path_lookupat(dfd, pathname->name, + int error = path_lookupat(dfd, pathname, flags | LOOKUP_DIRECTORY, nd); if (unlikely(error)) return error; @@ -3229,7 +3227,7 @@ static struct file *path_openat(int dfd, struct filename *pathname, goto out; } - error = path_init(dfd, pathname->name, flags, nd); + error = path_init(dfd, pathname, flags, nd); if (unlikely(error)) goto out; diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index e907c8cf732e..c3929fb2ab26 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -265,7 +265,7 @@ ssize_t nfs_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter, loff_t return -EINVAL; #else - VM_BUG_ON(iocb->ki_nbytes != PAGE_SIZE); + VM_BUG_ON(iov_iter_count(iter) != PAGE_SIZE); if (rw == READ) return nfs_file_direct_read(iocb, iter, pos); @@ -393,7 +393,7 @@ static void nfs_direct_complete(struct nfs_direct_req *dreq, bool write) long res = (long) dreq->error; if (!res) res = (long) dreq->count; - aio_complete(dreq->iocb, res, 0); + dreq->iocb->ki_complete(dreq->iocb, res, 0); } complete_all(&dreq->completion); diff --git a/fs/nfs/file.c b/fs/nfs/file.c index e679d24c39d3..37b15582e0de 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -26,7 +26,6 @@ #include <linux/nfs_mount.h> #include <linux/mm.h> #include <linux/pagemap.h> -#include <linux/aio.h> #include <linux/gfp.h> #include <linux/swap.h> diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c index 8b5969538f39..ab4987bc637f 100644 --- a/fs/nilfs2/inode.c +++ b/fs/nilfs2/inode.c @@ -26,7 +26,7 @@ #include <linux/mpage.h> #include <linux/pagemap.h> #include <linux/writeback.h> -#include <linux/aio.h> +#include <linux/uio.h> #include "nilfs.h" #include "btnode.h" #include "segment.h" diff --git a/fs/ntfs/Makefile b/fs/ntfs/Makefile index 36ae529511c4..2ff263e6d363 100644 --- a/fs/ntfs/Makefile +++ b/fs/ntfs/Makefile @@ -8,7 +8,7 @@ ntfs-y := aops.o attrib.o collate.o compress.o debug.o dir.o file.o \ ntfs-$(CONFIG_NTFS_RW) += bitmap.o lcnalloc.o logfile.o quota.o usnjrnl.o -ccflags-y := -DNTFS_VERSION=\"2.1.31\" +ccflags-y := -DNTFS_VERSION=\"2.1.32\" ccflags-$(CONFIG_NTFS_DEBUG) += -DDEBUG ccflags-$(CONFIG_NTFS_RW) += -DNTFS_RW diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c index 1da9b2d184dc..c1da78dad1af 100644 --- a/fs/ntfs/file.c +++ b/fs/ntfs/file.c @@ -1,7 +1,7 @@ /* * file.c - NTFS kernel file operations. Part of the Linux-NTFS project. * - * Copyright (c) 2001-2014 Anton Altaparmakov and Tuxera Inc. + * Copyright (c) 2001-2015 Anton Altaparmakov and Tuxera Inc. * * This program/include file is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License as published @@ -28,7 +28,6 @@ #include <linux/swap.h> #include <linux/uio.h> #include <linux/writeback.h> -#include <linux/aio.h> #include <asm/page.h> #include <asm/uaccess.h> @@ -329,62 +328,168 @@ err_out: return err; } -/** - * ntfs_fault_in_pages_readable - - * - * Fault a number of userspace pages into pagetables. - * - * Unlike include/linux/pagemap.h::fault_in_pages_readable(), this one copes - * with more than two userspace pages as well as handling the single page case - * elegantly. - * - * If you find this difficult to understand, then think of the while loop being - * the following code, except that we do without the integer variable ret: - * - * do { - * ret = __get_user(c, uaddr); - * uaddr += PAGE_SIZE; - * } while (!ret && uaddr < end); - * - * Note, the final __get_user() may well run out-of-bounds of the user buffer, - * but _not_ out-of-bounds of the page the user buffer belongs to, and since - * this is only a read and not a write, and since it is still in the same page, - * it should not matter and this makes the code much simpler. - */ -static inline void ntfs_fault_in_pages_readable(const char __user *uaddr, - int bytes) +static ssize_t ntfs_prepare_file_for_write(struct file *file, loff_t *ppos, + size_t *count) { - const char __user *end; - volatile char c; - - /* Set @end to the first byte outside the last page we care about. */ - end = (const char __user*)PAGE_ALIGN((unsigned long)uaddr + bytes); - - while (!__get_user(c, uaddr) && (uaddr += PAGE_SIZE, uaddr < end)) - ; -} - -/** - * ntfs_fault_in_pages_readable_iovec - - * - * Same as ntfs_fault_in_pages_readable() but operates on an array of iovecs. - */ -static inline void ntfs_fault_in_pages_readable_iovec(const struct iovec *iov, - size_t iov_ofs, int bytes) -{ - do { - const char __user *buf; - unsigned len; + loff_t pos; + s64 end, ll; + ssize_t err; + unsigned long flags; + struct inode *vi = file_inode(file); + ntfs_inode *base_ni, *ni = NTFS_I(vi); + ntfs_volume *vol = ni->vol; - buf = iov->iov_base + iov_ofs; - len = iov->iov_len - iov_ofs; - if (len > bytes) - len = bytes; - ntfs_fault_in_pages_readable(buf, len); - bytes -= len; - iov++; - iov_ofs = 0; - } while (bytes); + ntfs_debug("Entering for i_ino 0x%lx, attribute type 0x%x, pos " + "0x%llx, count 0x%lx.", vi->i_ino, + (unsigned)le32_to_cpu(ni->type), + (unsigned long long)*ppos, (unsigned long)*count); + /* We can write back this queue in page reclaim. */ + current->backing_dev_info = inode_to_bdi(vi); + err = generic_write_checks(file, ppos, count, S_ISBLK(vi->i_mode)); + if (unlikely(err)) + goto out; + /* + * All checks have passed. Before we start doing any writing we want + * to abort any totally illegal writes. + */ + BUG_ON(NInoMstProtected(ni)); + BUG_ON(ni->type != AT_DATA); + /* If file is encrypted, deny access, just like NT4. */ + if (NInoEncrypted(ni)) { + /* Only $DATA attributes can be encrypted. */ + /* + * Reminder for later: Encrypted files are _always_ + * non-resident so that the content can always be encrypted. + */ + ntfs_debug("Denying write access to encrypted file."); + err = -EACCES; + goto out; + } + if (NInoCompressed(ni)) { + /* Only unnamed $DATA attribute can be compressed. */ + BUG_ON(ni->name_len); + /* + * Reminder for later: If resident, the data is not actually + * compressed. Only on the switch to non-resident does + * compression kick in. This is in contrast to encrypted files + * (see above). + */ + ntfs_error(vi->i_sb, "Writing to compressed files is not " + "implemented yet. Sorry."); + err = -EOPNOTSUPP; + goto out; + } + if (*count == 0) + goto out; + base_ni = ni; + if (NInoAttr(ni)) + base_ni = ni->ext.base_ntfs_ino; + err = file_remove_suid(file); + if (unlikely(err)) + goto out; + /* + * Our ->update_time method always succeeds thus file_update_time() + * cannot fail either so there is no need to check the return code. + */ + file_update_time(file); + pos = *ppos; + /* The first byte after the last cluster being written to. */ + end = (pos + *count + vol->cluster_size_mask) & + ~(u64)vol->cluster_size_mask; + /* + * If the write goes beyond the allocated size, extend the allocation + * to cover the whole of the write, rounded up to the nearest cluster. + */ + read_lock_irqsave(&ni->size_lock, flags); + ll = ni->allocated_size; + read_unlock_irqrestore(&ni->size_lock, flags); + if (end > ll) { + /* + * Extend the allocation without changing the data size. + * + * Note we ensure the allocation is big enough to at least + * write some data but we do not require the allocation to be + * complete, i.e. it may be partial. + */ + ll = ntfs_attr_extend_allocation(ni, end, -1, pos); + if (likely(ll >= 0)) { + BUG_ON(pos >= ll); + /* If the extension was partial truncate the write. */ + if (end > ll) { + ntfs_debug("Truncating write to inode 0x%lx, " + "attribute type 0x%x, because " + "the allocation was only " + "partially extended.", + vi->i_ino, (unsigned) + le32_to_cpu(ni->type)); + *count = ll - pos; + } + } else { + err = ll; + read_lock_irqsave(&ni->size_lock, flags); + ll = ni->allocated_size; + read_unlock_irqrestore(&ni->size_lock, flags); + /* Perform a partial write if possible or fail. */ + if (pos < ll) { + ntfs_debug("Truncating write to inode 0x%lx " + "attribute type 0x%x, because " + "extending the allocation " + "failed (error %d).", + vi->i_ino, (unsigned) + le32_to_cpu(ni->type), + (int)-err); + *count = ll - pos; + } else { + if (err != -ENOSPC) + ntfs_error(vi->i_sb, "Cannot perform " + "write to inode " + "0x%lx, attribute " + "type 0x%x, because " + "extending the " + "allocation failed " + "(error %ld).", + vi->i_ino, (unsigned) + le32_to_cpu(ni->type), + (long)-err); + else + ntfs_debug("Cannot perform write to " + "inode 0x%lx, " + "attribute type 0x%x, " + "because there is not " + "space left.", + vi->i_ino, (unsigned) + le32_to_cpu(ni->type)); + goto out; + } + } + } + /* + * If the write starts beyond the initialized size, extend it up to the + * beginning of the write and initialize all non-sparse space between + * the old initialized size and the new one. This automatically also + * increments the vfs inode->i_size to keep it above or equal to the + * initialized_size. + */ + read_lock_irqsave(&ni->size_lock, flags); + ll = ni->initialized_size; + read_unlock_irqrestore(&ni->size_lock, flags); + if (pos > ll) { + /* + * Wait for ongoing direct i/o to complete before proceeding. + * New direct i/o cannot start as we hold i_mutex. + */ + inode_dio_wait(vi); + err = ntfs_attr_extend_initialized(ni, pos); + if (unlikely(err < 0)) + ntfs_error(vi->i_sb, "Cannot perform write to inode " + "0x%lx, attribute type 0x%x, because " + "extending the initialized size " + "failed (error %d).", vi->i_ino, + (unsigned)le32_to_cpu(ni->type), + (int)-err); + } +out: + return err; } /** @@ -421,8 +526,8 @@ static inline int __ntfs_grab_cache_pages(struct address_space *mapping, goto err_out; } } - err = add_to_page_cache_lru(*cached_page, mapping, index, - GFP_KERNEL); + err = add_to_page_cache_lru(*cached_page, mapping, + index, GFP_KERNEL); if (unlikely(err)) { if (err == -EEXIST) continue; @@ -1268,180 +1373,6 @@ rl_not_mapped_enoent: return err; } -/* - * Copy as much as we can into the pages and return the number of bytes which - * were successfully copied. If a fault is encountered then clear the pages - * out to (ofs + bytes) and return the number of bytes which were copied. - */ -static inline size_t ntfs_copy_from_user(struct page **pages, - unsigned nr_pages, unsigned ofs, const char __user *buf, - size_t bytes) -{ - struct page **last_page = pages + nr_pages; - char *addr; - size_t total = 0; - unsigned len; - int left; - - do { - len = PAGE_CACHE_SIZE - ofs; - if (len > bytes) - len = bytes; - addr = kmap_atomic(*pages); - left = __copy_from_user_inatomic(addr + ofs, buf, len); - kunmap_atomic(addr); - if (unlikely(left)) { - /* Do it the slow way. */ - addr = kmap(*pages); - left = __copy_from_user(addr + ofs, buf, len); - kunmap(*pages); - if (unlikely(left)) - goto err_out; - } - total += len; - bytes -= len; - if (!bytes) - break; - buf += len; - ofs = 0; - } while (++pages < last_page); -out: - return total; -err_out: - total += len - left; - /* Zero the rest of the target like __copy_from_user(). */ - while (++pages < last_page) { - bytes -= len; - if (!bytes) - break; - len = PAGE_CACHE_SIZE; - if (len > bytes) - len = bytes; - zero_user(*pages, 0, len); - } - goto out; -} - -static size_t __ntfs_copy_from_user_iovec_inatomic(char *vaddr, - const struct iovec *iov, size_t iov_ofs, size_t bytes) -{ - size_t total = 0; - - while (1) { - const char __user *buf = iov->iov_base + iov_ofs; - unsigned len; - size_t left; - - len = iov->iov_len - iov_ofs; - if (len > bytes) - len = bytes; - left = __copy_from_user_inatomic(vaddr, buf, len); - total += len; - bytes -= len; - vaddr += len; - if (unlikely(left)) { - total -= left; - break; - } - if (!bytes) - break; - iov++; - iov_ofs = 0; - } - return total; -} - -static inline void ntfs_set_next_iovec(const struct iovec **iovp, - size_t *iov_ofsp, size_t bytes) -{ - const struct iovec *iov = *iovp; - size_t iov_ofs = *iov_ofsp; - - while (bytes) { - unsigned len; - - len = iov->iov_len - iov_ofs; - if (len > bytes) - len = bytes; - bytes -= len; - iov_ofs += len; - if (iov->iov_len == iov_ofs) { - iov++; - iov_ofs = 0; - } - } - *iovp = iov; - *iov_ofsp = iov_ofs; -} - -/* - * This has the same side-effects and return value as ntfs_copy_from_user(). - * The difference is that on a fault we need to memset the remainder of the - * pages (out to offset + bytes), to emulate ntfs_copy_from_user()'s - * single-segment behaviour. - * - * We call the same helper (__ntfs_copy_from_user_iovec_inatomic()) both when - * atomic and when not atomic. This is ok because it calls - * __copy_from_user_inatomic() and it is ok to call this when non-atomic. In - * fact, the only difference between __copy_from_user_inatomic() and - * __copy_from_user() is that the latter calls might_sleep() and the former - * should not zero the tail of the buffer on error. And on many architectures - * __copy_from_user_inatomic() is just defined to __copy_from_user() so it - * makes no difference at all on those architectures. - */ -static inline size_t ntfs_copy_from_user_iovec(struct page **pages, - unsigned nr_pages, unsigned ofs, const struct iovec **iov, - size_t *iov_ofs, size_t bytes) -{ - struct page **last_page = pages + nr_pages; - char *addr; - size_t copied, len, total = 0; - - do { - len = PAGE_CACHE_SIZE - ofs; - if (len > bytes) - len = bytes; - addr = kmap_atomic(*pages); - copied = __ntfs_copy_from_user_iovec_inatomic(addr + ofs, - *iov, *iov_ofs, len); - kunmap_atomic(addr); - if (unlikely(copied != len)) { - /* Do it the slow way. */ - addr = kmap(*pages); - copied = __ntfs_copy_from_user_iovec_inatomic(addr + - ofs, *iov, *iov_ofs, len); - if (unlikely(copied != len)) - goto err_out; - kunmap(*pages); - } - total += len; - ntfs_set_next_iovec(iov, iov_ofs, len); - bytes -= len; - if (!bytes) - break; - ofs = 0; - } while (++pages < last_page); -out: - return total; -err_out: - BUG_ON(copied > len); - /* Zero the rest of the target like __copy_from_user(). */ - memset(addr + ofs + copied, 0, len - copied); - kunmap(*pages); - total += copied; - ntfs_set_next_iovec(iov, iov_ofs, copied); - while (++pages < last_page) { - bytes -= len; - if (!bytes) - break; - len = PAGE_CACHE_SIZE; - if (len > bytes) - len = bytes; - zero_user(*pages, 0, len); - } - goto out; -} - static inline void ntfs_flush_dcache_pages(struct page **pages, unsigned nr_pages) { @@ -1762,86 +1693,83 @@ err_out: return err; } -static void ntfs_write_failed(struct address_space *mapping, loff_t to) +/* + * Copy as much as we can into the pages and return the number of bytes which + * were successfully copied. If a fault is encountered then clear the pages + * out to (ofs + bytes) and return the number of bytes which were copied. + */ +static size_t ntfs_copy_from_user_iter(struct page **pages, unsigned nr_pages, + unsigned ofs, struct iov_iter *i, size_t bytes) { - struct inode *inode = mapping->host; + struct page **last_page = pages + nr_pages; + size_t total = 0; + struct iov_iter data = *i; + unsigned len, copied; - if (to > inode->i_size) { - truncate_pagecache(inode, inode->i_size); - ntfs_truncate_vfs(inode); - } + do { + len = PAGE_CACHE_SIZE - ofs; + if (len > bytes) + len = bytes; + copied = iov_iter_copy_from_user_atomic(*pages, &data, ofs, + len); + total += copied; + bytes -= copied; + if (!bytes) + break; + iov_iter_advance(&data, copied); + if (copied < len) + goto err; + ofs = 0; + } while (++pages < last_page); +out: + return total; +err: + /* Zero the rest of the target like __copy_from_user(). */ + len = PAGE_CACHE_SIZE - copied; + do { + if (len > bytes) + len = bytes; + zero_user(*pages, copied, len); + bytes -= len; + copied = 0; + len = PAGE_CACHE_SIZE; + } while (++pages < last_page); + goto out; } /** - * ntfs_file_buffered_write - - * - * Locking: The vfs is holding ->i_mutex on the inode. + * ntfs_perform_write - perform buffered write to a file + * @file: file to write to + * @i: iov_iter with data to write + * @pos: byte offset in file at which to begin writing to */ -static ssize_t ntfs_file_buffered_write(struct kiocb *iocb, - const struct iovec *iov, unsigned long nr_segs, - loff_t pos, loff_t *ppos, size_t count) +static ssize_t ntfs_perform_write(struct file *file, struct iov_iter *i, + loff_t pos) { - struct file *file = iocb->ki_filp; struct address_space *mapping = file->f_mapping; struct inode *vi = mapping->host; ntfs_inode *ni = NTFS_I(vi); ntfs_volume *vol = ni->vol; struct page *pages[NTFS_MAX_PAGES_PER_CLUSTER]; struct page *cached_page = NULL; - char __user *buf = NULL; - s64 end, ll; VCN last_vcn; LCN lcn; - unsigned long flags; - size_t bytes, iov_ofs = 0; /* Offset in the current iovec. */ - ssize_t status, written; + size_t bytes; + ssize_t status, written = 0; unsigned nr_pages; - int err; - ntfs_debug("Entering for i_ino 0x%lx, attribute type 0x%x, " - "pos 0x%llx, count 0x%lx.", - vi->i_ino, (unsigned)le32_to_cpu(ni->type), - (unsigned long long)pos, (unsigned long)count); - if (unlikely(!count)) - return 0; - BUG_ON(NInoMstProtected(ni)); - /* - * If the attribute is not an index root and it is encrypted or - * compressed, we cannot write to it yet. Note we need to check for - * AT_INDEX_ALLOCATION since this is the type of both directory and - * index inodes. - */ - if (ni->type != AT_INDEX_ALLOCATION) { - /* If file is encrypted, deny access, just like NT4. */ - if (NInoEncrypted(ni)) { - /* - * Reminder for later: Encrypted files are _always_ - * non-resident so that the content can always be - * encrypted. - */ - ntfs_debug("Denying write access to encrypted file."); - return -EACCES; - } - if (NInoCompressed(ni)) { - /* Only unnamed $DATA attribute can be compressed. */ - BUG_ON(ni->type != AT_DATA); - BUG_ON(ni->name_len); - /* - * Reminder for later: If resident, the data is not - * actually compressed. Only on the switch to non- - * resident does compression kick in. This is in - * contrast to encrypted files (see above). - */ - ntfs_error(vi->i_sb, "Writing to compressed files is " - "not implemented yet. Sorry."); - return -EOPNOTSUPP; - } - } + ntfs_debug("Entering for i_ino 0x%lx, attribute type 0x%x, pos " + "0x%llx, count 0x%lx.", vi->i_ino, + (unsigned)le32_to_cpu(ni->type), + (unsigned long long)pos, + (unsigned long)iov_iter_count(i)); /* * If a previous ntfs_truncate() failed, repeat it and abort if it * fails again. */ if (unlikely(NInoTruncateFailed(ni))) { + int err; + inode_dio_wait(vi); err = ntfs_truncate(vi); if (err || NInoTruncateFailed(ni)) { @@ -1855,81 +1783,6 @@ static ssize_t ntfs_file_buffered_write(struct kiocb *iocb, return err; } } - /* The first byte after the write. */ - end = pos + count; - /* - * If the write goes beyond the allocated size, extend the allocation - * to cover the whole of the write, rounded up to the nearest cluster. - */ - read_lock_irqsave(&ni->size_lock, flags); - ll = ni->allocated_size; - read_unlock_irqrestore(&ni->size_lock, flags); - if (end > ll) { - /* Extend the allocation without changing the data size. */ - ll = ntfs_attr_extend_allocation(ni, end, -1, pos); - if (likely(ll >= 0)) { - BUG_ON(pos >= ll); - /* If the extension was partial truncate the write. */ - if (end > ll) { - ntfs_debug("Truncating write to inode 0x%lx, " - "attribute type 0x%x, because " - "the allocation was only " - "partially extended.", - vi->i_ino, (unsigned) - le32_to_cpu(ni->type)); - end = ll; - count = ll - pos; - } - } else { - err = ll; - read_lock_irqsave(&ni->size_lock, flags); - ll = ni->allocated_size; - read_unlock_irqrestore(&ni->size_lock, flags); - /* Perform a partial write if possible or fail. */ - if (pos < ll) { - ntfs_debug("Truncating write to inode 0x%lx, " - "attribute type 0x%x, because " - "extending the allocation " - "failed (error code %i).", - vi->i_ino, (unsigned) - le32_to_cpu(ni->type), err); - end = ll; - count = ll - pos; - } else { - ntfs_error(vol->sb, "Cannot perform write to " - "inode 0x%lx, attribute type " - "0x%x, because extending the " - "allocation failed (error " - "code %i).", vi->i_ino, - (unsigned) - le32_to_cpu(ni->type), err); - return err; - } - } - } - written = 0; - /* - * If the write starts beyond the initialized size, extend it up to the - * beginning of the write and initialize all non-sparse space between - * the old initialized size and the new one. This automatically also - * increments the vfs inode->i_size to keep it above or equal to the - * initialized_size. - */ - read_lock_irqsave(&ni->size_lock, flags); - ll = ni->initialized_size; - read_unlock_irqrestore(&ni->size_lock, flags); - if (pos > ll) { - err = ntfs_attr_extend_initialized(ni, pos); - if (err < 0) { - ntfs_error(vol->sb, "Cannot perform write to inode " - "0x%lx, attribute type 0x%x, because " - "extending the initialized size " - "failed (error code %i).", vi->i_ino, - (unsigned)le32_to_cpu(ni->type), err); - status = err; - goto err_out; - } - } /* * Determine the number of pages per cluster for non-resident * attributes. @@ -1937,10 +1790,7 @@ static ssize_t ntfs_file_buffered_write(struct kiocb *iocb, nr_pages = 1; if (vol->cluster_size > PAGE_CACHE_SIZE && NInoNonResident(ni)) nr_pages = vol->cluster_size >> PAGE_CACHE_SHIFT; - /* Finally, perform the actual write. */ last_vcn = -1; - if (likely(nr_segs == 1)) - buf = iov->iov_base; do { VCN vcn; pgoff_t idx, start_idx; @@ -1965,10 +1815,10 @@ static ssize_t ntfs_file_buffered_write(struct kiocb *iocb, vol->cluster_size_bits, false); up_read(&ni->runlist.lock); if (unlikely(lcn < LCN_HOLE)) { - status = -EIO; if (lcn == LCN_ENOMEM) status = -ENOMEM; - else + else { + status = -EIO; ntfs_error(vol->sb, "Cannot " "perform write to " "inode 0x%lx, " @@ -1977,6 +1827,7 @@ static ssize_t ntfs_file_buffered_write(struct kiocb *iocb, "is corrupt.", vi->i_ino, (unsigned) le32_to_cpu(ni->type)); + } break; } if (lcn == LCN_HOLE) { @@ -1989,8 +1840,9 @@ static ssize_t ntfs_file_buffered_write(struct kiocb *iocb, } } } - if (bytes > count) - bytes = count; + if (bytes > iov_iter_count(i)) + bytes = iov_iter_count(i); +again: /* * Bring in the user page(s) that we will copy from _first_. * Otherwise there is a nasty deadlock on copying from the same @@ -1999,10 +1851,10 @@ static ssize_t ntfs_file_buffered_write(struct kiocb *iocb, * pages being swapped out between us bringing them into memory * and doing the actual copying. */ - if (likely(nr_segs == 1)) - ntfs_fault_in_pages_readable(buf, bytes); - else - ntfs_fault_in_pages_readable_iovec(iov, iov_ofs, bytes); + if (unlikely(iov_iter_fault_in_multipages_readable(i, bytes))) { + status = -EFAULT; + break; + } /* Get and lock @do_pages starting at index @start_idx. */ status = __ntfs_grab_cache_pages(mapping, start_idx, do_pages, pages, &cached_page); @@ -2018,56 +1870,57 @@ static ssize_t ntfs_file_buffered_write(struct kiocb *iocb, status = ntfs_prepare_pages_for_non_resident_write( pages, do_pages, pos, bytes); if (unlikely(status)) { - loff_t i_size; - do { unlock_page(pages[--do_pages]); page_cache_release(pages[do_pages]); } while (do_pages); - /* - * The write preparation may have instantiated - * allocated space outside i_size. Trim this - * off again. We can ignore any errors in this - * case as we will just be waisting a bit of - * allocated space, which is not a disaster. - */ - i_size = i_size_read(vi); - if (pos + bytes > i_size) { - ntfs_write_failed(mapping, pos + bytes); - } break; } } u = (pos >> PAGE_CACHE_SHIFT) - pages[0]->index; - if (likely(nr_segs == 1)) { - copied = ntfs_copy_from_user(pages + u, do_pages - u, - ofs, buf, bytes); - buf += copied; - } else - copied = ntfs_copy_from_user_iovec(pages + u, - do_pages - u, ofs, &iov, &iov_ofs, - bytes); + copied = ntfs_copy_from_user_iter(pages + u, do_pages - u, ofs, + i, bytes); ntfs_flush_dcache_pages(pages + u, do_pages - u); - status = ntfs_commit_pages_after_write(pages, do_pages, pos, - bytes); - if (likely(!status)) { - written += copied; - count -= copied; - pos += copied; - if (unlikely(copied != bytes)) - status = -EFAULT; + status = 0; + if (likely(copied == bytes)) { + status = ntfs_commit_pages_after_write(pages, do_pages, + pos, bytes); + if (!status) + status = bytes; } do { unlock_page(pages[--do_pages]); page_cache_release(pages[do_pages]); } while (do_pages); - if (unlikely(status)) + if (unlikely(status < 0)) break; - balance_dirty_pages_ratelimited(mapping); + copied = status; cond_resched(); - } while (count); -err_out: - *ppos = pos; + if (unlikely(!copied)) { + size_t sc; + + /* + * We failed to copy anything. Fall back to single + * segment length write. + * + * This is needed to avoid possible livelock in the + * case that all segments in the iov cannot be copied + * at once without a pagefault. + */ + sc = iov_iter_single_seg_count(i); + if (bytes > sc) + bytes = sc; + goto again; + } + iov_iter_advance(i, copied); + pos += copied; + written += copied; + balance_dirty_pages_ratelimited(mapping); + if (fatal_signal_pending(current)) { + status = -EINTR; + break; + } + } while (iov_iter_count(i)); if (cached_page) page_cache_release(cached_page); ntfs_debug("Done. Returning %s (written 0x%lx, status %li).", @@ -2077,59 +1930,56 @@ err_out: } /** - * ntfs_file_aio_write_nolock - + * ntfs_file_write_iter_nolock - write data to a file + * @iocb: IO state structure (file, offset, etc.) + * @from: iov_iter with data to write + * + * Basically the same as __generic_file_write_iter() except that it ends + * up calling ntfs_perform_write() instead of generic_perform_write() and that + * O_DIRECT is not implemented. */ -static ssize_t ntfs_file_aio_write_nolock(struct kiocb *iocb, - const struct iovec *iov, unsigned long nr_segs, loff_t *ppos) +static ssize_t ntfs_file_write_iter_nolock(struct kiocb *iocb, + struct iov_iter *from) { struct file *file = iocb->ki_filp; - struct address_space *mapping = file->f_mapping; - struct inode *inode = mapping->host; - loff_t pos; - size_t count; /* after file limit checks */ - ssize_t written, err; + loff_t pos = iocb->ki_pos; + ssize_t written = 0; + ssize_t err; + size_t count = iov_iter_count(from); - count = iov_length(iov, nr_segs); - pos = *ppos; - /* We can write back this queue in page reclaim. */ - current->backing_dev_info = inode_to_bdi(inode); - written = 0; - err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode)); - if (err) - goto out; - if (!count) - goto out; - err = file_remove_suid(file); - if (err) - goto out; - err = file_update_time(file); - if (err) - goto out; - written = ntfs_file_buffered_write(iocb, iov, nr_segs, pos, ppos, - count); -out: + err = ntfs_prepare_file_for_write(file, &pos, &count); + if (count && !err) { + iov_iter_truncate(from, count); + written = ntfs_perform_write(file, from, pos); + if (likely(written >= 0)) + iocb->ki_pos = pos + written; + } current->backing_dev_info = NULL; return written ? written : err; } /** - * ntfs_file_aio_write - + * ntfs_file_write_iter - simple wrapper for ntfs_file_write_iter_nolock() + * @iocb: IO state structure + * @from: iov_iter with data to write + * + * Basically the same as generic_file_write_iter() except that it ends up + * calling ntfs_file_write_iter_nolock() instead of + * __generic_file_write_iter(). */ -static ssize_t ntfs_file_aio_write(struct kiocb *iocb, const struct iovec *iov, - unsigned long nr_segs, loff_t pos) +static ssize_t ntfs_file_write_iter(struct kiocb *iocb, struct iov_iter *from) { struct file *file = iocb->ki_filp; - struct address_space *mapping = file->f_mapping; - struct inode *inode = mapping->host; + struct inode *vi = file_inode(file); ssize_t ret; - BUG_ON(iocb->ki_pos != pos); - - mutex_lock(&inode->i_mutex); - ret = ntfs_file_aio_write_nolock(iocb, iov, nr_segs, &iocb->ki_pos); - mutex_unlock(&inode->i_mutex); + mutex_lock(&vi->i_mutex); + ret = ntfs_file_write_iter_nolock(iocb, from); + mutex_unlock(&vi->i_mutex); if (ret > 0) { - int err = generic_write_sync(file, iocb->ki_pos - ret, ret); + ssize_t err; + + err = generic_write_sync(file, iocb->ki_pos - ret, ret); if (err < 0) ret = err; } @@ -2197,37 +2047,17 @@ static int ntfs_file_fsync(struct file *filp, loff_t start, loff_t end, #endif /* NTFS_RW */ const struct file_operations ntfs_file_ops = { - .llseek = generic_file_llseek, /* Seek inside file. */ - .read = new_sync_read, /* Read from file. */ - .read_iter = generic_file_read_iter, /* Async read from file. */ + .llseek = generic_file_llseek, + .read = new_sync_read, + .read_iter = generic_file_read_iter, #ifdef NTFS_RW - .write = do_sync_write, /* Write to file. */ - .aio_write = ntfs_file_aio_write, /* Async write to file. */ - /*.release = ,*/ /* Last file is closed. See - fs/ext2/file.c:: - ext2_release_file() for - how to use this to discard - preallocated space for - write opened files. */ - .fsync = ntfs_file_fsync, /* Sync a file to disk. */ - /*.aio_fsync = ,*/ /* Sync all outstanding async - i/o operations on a - kiocb. */ + .write = new_sync_write, + .write_iter = ntfs_file_write_iter, + .fsync = ntfs_file_fsync, #endif /* NTFS_RW */ - /*.ioctl = ,*/ /* Perform function on the - mounted filesystem. */ - .mmap = generic_file_mmap, /* Mmap file. */ - .open = ntfs_file_open, /* Open file. */ - .splice_read = generic_file_splice_read /* Zero-copy data send with - the data source being on - the ntfs partition. We do - not need to care about the - data destination. */ - /*.sendpage = ,*/ /* Zero-copy data send with - the data destination being - on the ntfs partition. We - do not need to care about - the data source. */ + .mmap = generic_file_mmap, + .open = ntfs_file_open, + .splice_read = generic_file_splice_read, }; const struct inode_operations ntfs_file_inode_ops = { diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c index 898b9949d363..1d0c21df0d80 100644 --- a/fs/ntfs/inode.c +++ b/fs/ntfs/inode.c @@ -28,7 +28,6 @@ #include <linux/quotaops.h> #include <linux/slab.h> #include <linux/log2.h> -#include <linux/aio.h> #include "aops.h" #include "attrib.h" diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index 1b0463a92b17..8d2bc840c288 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c @@ -29,6 +29,7 @@ #include <linux/mpage.h> #include <linux/quotaops.h> #include <linux/blkdev.h> +#include <linux/uio.h> #include <cluster/masklog.h> diff --git a/fs/ocfs2/aops.h b/fs/ocfs2/aops.h index 6cae155d54df..dd59599b022d 100644 --- a/fs/ocfs2/aops.h +++ b/fs/ocfs2/aops.h @@ -22,7 +22,7 @@ #ifndef OCFS2_AOPS_H #define OCFS2_AOPS_H -#include <linux/aio.h> +#include <linux/fs.h> handle_t *ocfs2_start_walk_page_trans(struct inode *inode, struct page *page, diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index ba1790e52ff2..91f03ce98108 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -2280,7 +2280,7 @@ static ssize_t ocfs2_file_write_iter(struct kiocb *iocb, file->f_path.dentry->d_name.name, (unsigned int)from->nr_segs); /* GRRRRR */ - if (iocb->ki_nbytes == 0) + if (count == 0) return 0; appending = file->f_flags & O_APPEND ? 1 : 0; @@ -2330,8 +2330,7 @@ relock: } can_do_direct = direct_io; - ret = ocfs2_prepare_inode_for_write(file, ppos, - iocb->ki_nbytes, appending, + ret = ocfs2_prepare_inode_for_write(file, ppos, count, appending, &can_do_direct, &has_refcount); if (ret < 0) { mlog_errno(ret); @@ -2339,8 +2338,7 @@ relock: } if (direct_io && !is_sync_kiocb(iocb)) - unaligned_dio = ocfs2_is_io_unaligned(inode, iocb->ki_nbytes, - *ppos); + unaligned_dio = ocfs2_is_io_unaligned(inode, count, *ppos); /* * We can't complete the direct I/O as requested, fall back to diff --git a/fs/open.c b/fs/open.c index 33f9cbf2610b..6a83c47d5904 100644 --- a/fs/open.c +++ b/fs/open.c @@ -570,6 +570,7 @@ static int chown_common(struct path *path, uid_t user, gid_t group) uid = make_kuid(current_user_ns(), user); gid = make_kgid(current_user_ns(), group); +retry_deleg: newattrs.ia_valid = ATTR_CTIME; if (user != (uid_t) -1) { if (!uid_valid(uid)) @@ -586,7 +587,6 @@ static int chown_common(struct path *path, uid_t user, gid_t group) if (!S_ISDIR(inode->i_mode)) newattrs.ia_valid |= ATTR_KILL_SUID | ATTR_KILL_SGID | ATTR_KILL_PRIV; -retry_deleg: mutex_lock(&inode->i_mutex); error = security_path_chown(path, uid, gid); if (!error) @@ -988,9 +988,6 @@ struct file *file_open_root(struct dentry *dentry, struct vfsmount *mnt, return ERR_PTR(err); if (flags & O_CREAT) return ERR_PTR(-EINVAL); - if (!filename && (flags & O_DIRECTORY)) - if (!dentry->d_inode->i_op->lookup) - return ERR_PTR(-ENOTDIR); return do_file_open_root(dentry, mnt, filename, &op); } EXPORT_SYMBOL(file_open_root); diff --git a/fs/pipe.c b/fs/pipe.c index 21981e58e2a6..2d084f2d0b83 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -21,7 +21,6 @@ #include <linux/audit.h> #include <linux/syscalls.h> #include <linux/fcntl.h> -#include <linux/aio.h> #include <asm/uaccess.h> #include <asm/ioctls.h> diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c index 39d1373128e9..44a549beeafa 100644 --- a/fs/pstore/ram.c +++ b/fs/pstore/ram.c @@ -539,6 +539,9 @@ static int ramoops_probe(struct platform_device *pdev) mem_address = pdata->mem_address; record_size = pdata->record_size; dump_oops = pdata->dump_oops; + ramoops_console_size = pdata->console_size; + ramoops_pmsg_size = pdata->pmsg_size; + ramoops_ftrace_size = pdata->ftrace_size; pr_info("attached 0x%lx@0x%llx, ecc: %d/%d\n", cxt->size, (unsigned long long)cxt->phys_addr, diff --git a/fs/read_write.c b/fs/read_write.c index 8e1b68786d66..69128b378646 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -9,7 +9,6 @@ #include <linux/fcntl.h> #include <linux/file.h> #include <linux/uio.h> -#include <linux/aio.h> #include <linux/fsnotify.h> #include <linux/security.h> #include <linux/export.h> @@ -343,13 +342,10 @@ ssize_t vfs_iter_read(struct file *file, struct iov_iter *iter, loff_t *ppos) init_sync_kiocb(&kiocb, file); kiocb.ki_pos = *ppos; - kiocb.ki_nbytes = iov_iter_count(iter); iter->type |= READ; ret = file->f_op->read_iter(&kiocb, iter); - if (ret == -EIOCBQUEUED) - ret = wait_on_sync_kiocb(&kiocb); - + BUG_ON(ret == -EIOCBQUEUED); if (ret > 0) *ppos = kiocb.ki_pos; return ret; @@ -366,13 +362,10 @@ ssize_t vfs_iter_write(struct file *file, struct iov_iter *iter, loff_t *ppos) init_sync_kiocb(&kiocb, file); kiocb.ki_pos = *ppos; - kiocb.ki_nbytes = iov_iter_count(iter); iter->type |= WRITE; ret = file->f_op->write_iter(&kiocb, iter); - if (ret == -EIOCBQUEUED) - ret = wait_on_sync_kiocb(&kiocb); - + BUG_ON(ret == -EIOCBQUEUED); if (ret > 0) *ppos = kiocb.ki_pos; return ret; @@ -426,11 +419,9 @@ ssize_t do_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *pp init_sync_kiocb(&kiocb, filp); kiocb.ki_pos = *ppos; - kiocb.ki_nbytes = len; ret = filp->f_op->aio_read(&kiocb, &iov, 1, kiocb.ki_pos); - if (-EIOCBQUEUED == ret) - ret = wait_on_sync_kiocb(&kiocb); + BUG_ON(ret == -EIOCBQUEUED); *ppos = kiocb.ki_pos; return ret; } @@ -446,12 +437,10 @@ ssize_t new_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *p init_sync_kiocb(&kiocb, filp); kiocb.ki_pos = *ppos; - kiocb.ki_nbytes = len; iov_iter_init(&iter, READ, &iov, 1, len); ret = filp->f_op->read_iter(&kiocb, &iter); - if (-EIOCBQUEUED == ret) - ret = wait_on_sync_kiocb(&kiocb); + BUG_ON(ret == -EIOCBQUEUED); *ppos = kiocb.ki_pos; return ret; } @@ -510,11 +499,9 @@ ssize_t do_sync_write(struct file *filp, const char __user *buf, size_t len, lof init_sync_kiocb(&kiocb, filp); kiocb.ki_pos = *ppos; - kiocb.ki_nbytes = len; ret = filp->f_op->aio_write(&kiocb, &iov, 1, kiocb.ki_pos); - if (-EIOCBQUEUED == ret) - ret = wait_on_sync_kiocb(&kiocb); + BUG_ON(ret == -EIOCBQUEUED); *ppos = kiocb.ki_pos; return ret; } @@ -530,12 +517,10 @@ ssize_t new_sync_write(struct file *filp, const char __user *buf, size_t len, lo init_sync_kiocb(&kiocb, filp); kiocb.ki_pos = *ppos; - kiocb.ki_nbytes = len; iov_iter_init(&iter, WRITE, &iov, 1, len); ret = filp->f_op->write_iter(&kiocb, &iter); - if (-EIOCBQUEUED == ret) - ret = wait_on_sync_kiocb(&kiocb); + BUG_ON(ret == -EIOCBQUEUED); *ppos = kiocb.ki_pos; return ret; } @@ -710,60 +695,47 @@ unsigned long iov_shorten(struct iovec *iov, unsigned long nr_segs, size_t to) } EXPORT_SYMBOL(iov_shorten); -static ssize_t do_iter_readv_writev(struct file *filp, int rw, const struct iovec *iov, - unsigned long nr_segs, size_t len, loff_t *ppos, iter_fn_t fn) +static ssize_t do_iter_readv_writev(struct file *filp, struct iov_iter *iter, + loff_t *ppos, iter_fn_t fn) { struct kiocb kiocb; - struct iov_iter iter; ssize_t ret; init_sync_kiocb(&kiocb, filp); kiocb.ki_pos = *ppos; - kiocb.ki_nbytes = len; - iov_iter_init(&iter, rw, iov, nr_segs, len); - ret = fn(&kiocb, &iter); - if (ret == -EIOCBQUEUED) - ret = wait_on_sync_kiocb(&kiocb); + ret = fn(&kiocb, iter); + BUG_ON(ret == -EIOCBQUEUED); *ppos = kiocb.ki_pos; return ret; } -static ssize_t do_sync_readv_writev(struct file *filp, const struct iovec *iov, - unsigned long nr_segs, size_t len, loff_t *ppos, iov_fn_t fn) +static ssize_t do_sync_readv_writev(struct file *filp, struct iov_iter *iter, + loff_t *ppos, iov_fn_t fn) { struct kiocb kiocb; ssize_t ret; init_sync_kiocb(&kiocb, filp); kiocb.ki_pos = *ppos; - kiocb.ki_nbytes = len; - ret = fn(&kiocb, iov, nr_segs, kiocb.ki_pos); - if (ret == -EIOCBQUEUED) - ret = wait_on_sync_kiocb(&kiocb); + ret = fn(&kiocb, iter->iov, iter->nr_segs, kiocb.ki_pos); + BUG_ON(ret == -EIOCBQUEUED); *ppos = kiocb.ki_pos; return ret; } /* Do it by hand, with file-ops */ -static ssize_t do_loop_readv_writev(struct file *filp, struct iovec *iov, - unsigned long nr_segs, loff_t *ppos, io_fn_t fn) +static ssize_t do_loop_readv_writev(struct file *filp, struct iov_iter *iter, + loff_t *ppos, io_fn_t fn) { - struct iovec *vector = iov; ssize_t ret = 0; - while (nr_segs > 0) { - void __user *base; - size_t len; + while (iov_iter_count(iter)) { + struct iovec iovec = iov_iter_iovec(iter); ssize_t nr; - base = vector->iov_base; - len = vector->iov_len; - vector++; - nr_segs--; - - nr = fn(filp, base, len, ppos); + nr = fn(filp, iovec.iov_base, iovec.iov_len, ppos); if (nr < 0) { if (!ret) @@ -771,8 +743,9 @@ static ssize_t do_loop_readv_writev(struct file *filp, struct iovec *iov, break; } ret += nr; - if (nr != len) + if (nr != iovec.iov_len) break; + iov_iter_advance(iter, nr); } return ret; @@ -863,17 +836,20 @@ static ssize_t do_readv_writev(int type, struct file *file, size_t tot_len; struct iovec iovstack[UIO_FASTIOV]; struct iovec *iov = iovstack; + struct iov_iter iter; ssize_t ret; io_fn_t fn; iov_fn_t fnv; iter_fn_t iter_fn; - ret = rw_copy_check_uvector(type, uvector, nr_segs, - ARRAY_SIZE(iovstack), iovstack, &iov); - if (ret <= 0) - goto out; + ret = import_iovec(type, uvector, nr_segs, + ARRAY_SIZE(iovstack), &iov, &iter); + if (ret < 0) + return ret; - tot_len = ret; + tot_len = iov_iter_count(&iter); + if (!tot_len) + goto out; ret = rw_verify_area(type, file, pos, tot_len); if (ret < 0) goto out; @@ -891,20 +867,17 @@ static ssize_t do_readv_writev(int type, struct file *file, } if (iter_fn) - ret = do_iter_readv_writev(file, type, iov, nr_segs, tot_len, - pos, iter_fn); + ret = do_iter_readv_writev(file, &iter, pos, iter_fn); else if (fnv) - ret = do_sync_readv_writev(file, iov, nr_segs, tot_len, - pos, fnv); + ret = do_sync_readv_writev(file, &iter, pos, fnv); else - ret = do_loop_readv_writev(file, iov, nr_segs, pos, fn); + ret = do_loop_readv_writev(file, &iter, pos, fn); if (type != READ) file_end_write(file); out: - if (iov != iovstack) - kfree(iov); + kfree(iov); if ((ret + (type == READ)) > 0) { if (type == READ) fsnotify_access(file); @@ -1043,17 +1016,20 @@ static ssize_t compat_do_readv_writev(int type, struct file *file, compat_ssize_t tot_len; struct iovec iovstack[UIO_FASTIOV]; struct iovec *iov = iovstack; + struct iov_iter iter; ssize_t ret; io_fn_t fn; iov_fn_t fnv; iter_fn_t iter_fn; - ret = compat_rw_copy_check_uvector(type, uvector, nr_segs, - UIO_FASTIOV, iovstack, &iov); - if (ret <= 0) - goto out; + ret = compat_import_iovec(type, uvector, nr_segs, + UIO_FASTIOV, &iov, &iter); + if (ret < 0) + return ret; - tot_len = ret; + tot_len = iov_iter_count(&iter); + if (!tot_len) + goto out; ret = rw_verify_area(type, file, pos, tot_len); if (ret < 0) goto out; @@ -1071,20 +1047,17 @@ static ssize_t compat_do_readv_writev(int type, struct file *file, } if (iter_fn) - ret = do_iter_readv_writev(file, type, iov, nr_segs, tot_len, - pos, iter_fn); + ret = do_iter_readv_writev(file, &iter, pos, iter_fn); else if (fnv) - ret = do_sync_readv_writev(file, iov, nr_segs, tot_len, - pos, fnv); + ret = do_sync_readv_writev(file, &iter, pos, fnv); else - ret = do_loop_readv_writev(file, iov, nr_segs, pos, fn); + ret = do_loop_readv_writev(file, &iter, pos, fn); if (type != READ) file_end_write(file); out: - if (iov != iovstack) - kfree(iov); + kfree(iov); if ((ret + (type == READ)) > 0) { if (type == READ) fsnotify_access(file); diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c index e72401e1f995..9312b7842e03 100644 --- a/fs/reiserfs/inode.c +++ b/fs/reiserfs/inode.c @@ -18,7 +18,7 @@ #include <linux/writeback.h> #include <linux/quotaops.h> #include <linux/swap.h> -#include <linux/aio.h> +#include <linux/uio.h> int reiserfs_commit_write(struct file *f, struct page *page, unsigned from, unsigned to); diff --git a/fs/splice.c b/fs/splice.c index 7968da96bebb..41cbb16299e0 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -32,7 +32,6 @@ #include <linux/gfp.h> #include <linux/socket.h> #include <linux/compat.h> -#include <linux/aio.h> #include "internal.h" /* @@ -1534,34 +1533,29 @@ static long vmsplice_to_user(struct file *file, const struct iovec __user *uiov, struct iovec iovstack[UIO_FASTIOV]; struct iovec *iov = iovstack; struct iov_iter iter; - ssize_t count; pipe = get_pipe_info(file); if (!pipe) return -EBADF; - ret = rw_copy_check_uvector(READ, uiov, nr_segs, - ARRAY_SIZE(iovstack), iovstack, &iov); - if (ret <= 0) - goto out; - - count = ret; - iov_iter_init(&iter, READ, iov, nr_segs, count); + ret = import_iovec(READ, uiov, nr_segs, + ARRAY_SIZE(iovstack), &iov, &iter); + if (ret < 0) + return ret; + sd.total_len = iov_iter_count(&iter); sd.len = 0; - sd.total_len = count; sd.flags = flags; sd.u.data = &iter; sd.pos = 0; - pipe_lock(pipe); - ret = __splice_from_pipe(pipe, &sd, pipe_to_user); - pipe_unlock(pipe); - -out: - if (iov != iovstack) - kfree(iov); + if (sd.total_len) { + pipe_lock(pipe); + ret = __splice_from_pipe(pipe, &sd, pipe_to_user); + pipe_unlock(pipe); + } + kfree(iov); return ret; } diff --git a/fs/stat.c b/fs/stat.c index ae0c3cef9927..19636af5e75c 100644 --- a/fs/stat.c +++ b/fs/stat.c @@ -66,7 +66,7 @@ int vfs_getattr(struct path *path, struct kstat *stat) { int retval; - retval = security_inode_getattr(path->mnt, path->dentry); + retval = security_inode_getattr(path); if (retval) return retval; return vfs_getattr_nosec(path, stat); diff --git a/fs/tracefs/Makefile b/fs/tracefs/Makefile new file mode 100644 index 000000000000..82fa35b656c4 --- /dev/null +++ b/fs/tracefs/Makefile @@ -0,0 +1,4 @@ +tracefs-objs := inode.o + +obj-$(CONFIG_TRACING) += tracefs.o + diff --git a/fs/tracefs/inode.c b/fs/tracefs/inode.c new file mode 100644 index 000000000000..d92bdf3b079a --- /dev/null +++ b/fs/tracefs/inode.c @@ -0,0 +1,650 @@ +/* + * inode.c - part of tracefs, a pseudo file system for activating tracing + * + * Based on debugfs by: Greg Kroah-Hartman <greg@kroah.com> + * + * Copyright (C) 2014 Red Hat Inc, author: Steven Rostedt <srostedt@redhat.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version + * 2 as published by the Free Software Foundation. + * + * tracefs is the file system that is used by the tracing infrastructure. + * + */ + +#include <linux/module.h> +#include <linux/fs.h> +#include <linux/mount.h> +#include <linux/kobject.h> +#include <linux/namei.h> +#include <linux/tracefs.h> +#include <linux/fsnotify.h> +#include <linux/seq_file.h> +#include <linux/parser.h> +#include <linux/magic.h> +#include <linux/slab.h> + +#define TRACEFS_DEFAULT_MODE 0700 + +static struct vfsmount *tracefs_mount; +static int tracefs_mount_count; +static bool tracefs_registered; + +static ssize_t default_read_file(struct file *file, char __user *buf, + size_t count, loff_t *ppos) +{ + return 0; +} + +static ssize_t default_write_file(struct file *file, const char __user *buf, + size_t count, loff_t *ppos) +{ + return count; +} + +static const struct file_operations tracefs_file_operations = { + .read = default_read_file, + .write = default_write_file, + .open = simple_open, + .llseek = noop_llseek, +}; + +static struct tracefs_dir_ops { + int (*mkdir)(const char *name); + int (*rmdir)(const char *name); +} tracefs_ops; + +static char *get_dname(struct dentry *dentry) +{ + const char *dname; + char *name; + int len = dentry->d_name.len; + + dname = dentry->d_name.name; + name = kmalloc(len + 1, GFP_KERNEL); + if (!name) + return NULL; + memcpy(name, dname, len); + name[len] = 0; + return name; +} + +static int tracefs_syscall_mkdir(struct inode *inode, struct dentry *dentry, umode_t mode) +{ + char *name; + int ret; + + name = get_dname(dentry); + if (!name) + return -ENOMEM; + + /* + * The mkdir call can call the generic functions that create + * the files within the tracefs system. It is up to the individual + * mkdir routine to handle races. + */ + mutex_unlock(&inode->i_mutex); + ret = tracefs_ops.mkdir(name); + mutex_lock(&inode->i_mutex); + + kfree(name); + + return ret; +} + +static int tracefs_syscall_rmdir(struct inode *inode, struct dentry *dentry) +{ + char *name; + int ret; + + name = get_dname(dentry); + if (!name) + return -ENOMEM; + + /* + * The rmdir call can call the generic functions that create + * the files within the tracefs system. It is up to the individual + * rmdir routine to handle races. + * This time we need to unlock not only the parent (inode) but + * also the directory that is being deleted. + */ + mutex_unlock(&inode->i_mutex); + mutex_unlock(&dentry->d_inode->i_mutex); + + ret = tracefs_ops.rmdir(name); + + mutex_lock_nested(&inode->i_mutex, I_MUTEX_PARENT); + mutex_lock(&dentry->d_inode->i_mutex); + + kfree(name); + + return ret; +} + +static const struct inode_operations tracefs_dir_inode_operations = { + .lookup = simple_lookup, + .mkdir = tracefs_syscall_mkdir, + .rmdir = tracefs_syscall_rmdir, +}; + +static struct inode *tracefs_get_inode(struct super_block *sb) +{ + struct inode *inode = new_inode(sb); + if (inode) { + inode->i_ino = get_next_ino(); + inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; + } + return inode; +} + +struct tracefs_mount_opts { + kuid_t uid; + kgid_t gid; + umode_t mode; +}; + +enum { + Opt_uid, + Opt_gid, + Opt_mode, + Opt_err +}; + +static const match_table_t tokens = { + {Opt_uid, "uid=%u"}, + {Opt_gid, "gid=%u"}, + {Opt_mode, "mode=%o"}, + {Opt_err, NULL} +}; + +struct tracefs_fs_info { + struct tracefs_mount_opts mount_opts; +}; + +static int tracefs_parse_options(char *data, struct tracefs_mount_opts *opts) +{ + substring_t args[MAX_OPT_ARGS]; + int option; + int token; + kuid_t uid; + kgid_t gid; + char *p; + + opts->mode = TRACEFS_DEFAULT_MODE; + + while ((p = strsep(&data, ",")) != NULL) { + if (!*p) + continue; + + token = match_token(p, tokens, args); + switch (token) { + case Opt_uid: + if (match_int(&args[0], &option)) + return -EINVAL; + uid = make_kuid(current_user_ns(), option); + if (!uid_valid(uid)) + return -EINVAL; + opts->uid = uid; + break; + case Opt_gid: + if (match_int(&args[0], &option)) + return -EINVAL; + gid = make_kgid(current_user_ns(), option); + if (!gid_valid(gid)) + return -EINVAL; + opts->gid = gid; + break; + case Opt_mode: + if (match_octal(&args[0], &option)) + return -EINVAL; + opts->mode = option & S_IALLUGO; + break; + /* + * We might like to report bad mount options here; + * but traditionally tracefs has ignored all mount options + */ + } + } + + return 0; +} + +static int tracefs_apply_options(struct super_block *sb) +{ + struct tracefs_fs_info *fsi = sb->s_fs_info; + struct inode *inode = sb->s_root->d_inode; + struct tracefs_mount_opts *opts = &fsi->mount_opts; + + inode->i_mode &= ~S_IALLUGO; + inode->i_mode |= opts->mode; + + inode->i_uid = opts->uid; + inode->i_gid = opts->gid; + + return 0; +} + +static int tracefs_remount(struct super_block *sb, int *flags, char *data) +{ + int err; + struct tracefs_fs_info *fsi = sb->s_fs_info; + + sync_filesystem(sb); + err = tracefs_parse_options(data, &fsi->mount_opts); + if (err) + goto fail; + + tracefs_apply_options(sb); + +fail: + return err; +} + +static int tracefs_show_options(struct seq_file *m, struct dentry *root) +{ + struct tracefs_fs_info *fsi = root->d_sb->s_fs_info; + struct tracefs_mount_opts *opts = &fsi->mount_opts; + + if (!uid_eq(opts->uid, GLOBAL_ROOT_UID)) + seq_printf(m, ",uid=%u", + from_kuid_munged(&init_user_ns, opts->uid)); + if (!gid_eq(opts->gid, GLOBAL_ROOT_GID)) + seq_printf(m, ",gid=%u", + from_kgid_munged(&init_user_ns, opts->gid)); + if (opts->mode != TRACEFS_DEFAULT_MODE) + seq_printf(m, ",mode=%o", opts->mode); + + return 0; +} + +static const struct super_operations tracefs_super_operations = { + .statfs = simple_statfs, + .remount_fs = tracefs_remount, + .show_options = tracefs_show_options, +}; + +static int trace_fill_super(struct super_block *sb, void *data, int silent) +{ + static struct tree_descr trace_files[] = {{""}}; + struct tracefs_fs_info *fsi; + int err; + + save_mount_options(sb, data); + + fsi = kzalloc(sizeof(struct tracefs_fs_info), GFP_KERNEL); + sb->s_fs_info = fsi; + if (!fsi) { + err = -ENOMEM; + goto fail; + } + + err = tracefs_parse_options(data, &fsi->mount_opts); + if (err) + goto fail; + + err = simple_fill_super(sb, TRACEFS_MAGIC, trace_files); + if (err) + goto fail; + + sb->s_op = &tracefs_super_operations; + + tracefs_apply_options(sb); + + return 0; + +fail: + kfree(fsi); + sb->s_fs_info = NULL; + return err; +} + +static struct dentry *trace_mount(struct file_system_type *fs_type, + int flags, const char *dev_name, + void *data) +{ + return mount_single(fs_type, flags, data, trace_fill_super); +} + +static struct file_system_type trace_fs_type = { + .owner = THIS_MODULE, + .name = "tracefs", + .mount = trace_mount, + .kill_sb = kill_litter_super, +}; +MODULE_ALIAS_FS("tracefs"); + +static struct dentry *start_creating(const char *name, struct dentry *parent) +{ + struct dentry *dentry; + int error; + + pr_debug("tracefs: creating file '%s'\n",name); + + error = simple_pin_fs(&trace_fs_type, &tracefs_mount, + &tracefs_mount_count); + if (error) + return ERR_PTR(error); + + /* If the parent is not specified, we create it in the root. + * We need the root dentry to do this, which is in the super + * block. A pointer to that is in the struct vfsmount that we + * have around. + */ + if (!parent) + parent = tracefs_mount->mnt_root; + + mutex_lock(&parent->d_inode->i_mutex); + dentry = lookup_one_len(name, parent, strlen(name)); + if (!IS_ERR(dentry) && dentry->d_inode) { + dput(dentry); + dentry = ERR_PTR(-EEXIST); + } + if (IS_ERR(dentry)) + mutex_unlock(&parent->d_inode->i_mutex); + return dentry; +} + +static struct dentry *failed_creating(struct dentry *dentry) +{ + mutex_unlock(&dentry->d_parent->d_inode->i_mutex); + dput(dentry); + simple_release_fs(&tracefs_mount, &tracefs_mount_count); + return NULL; +} + +static struct dentry *end_creating(struct dentry *dentry) +{ + mutex_unlock(&dentry->d_parent->d_inode->i_mutex); + return dentry; +} + +/** + * tracefs_create_file - create a file in the tracefs filesystem + * @name: a pointer to a string containing the name of the file to create. + * @mode: the permission that the file should have. + * @parent: a pointer to the parent dentry for this file. This should be a + * directory dentry if set. If this parameter is NULL, then the + * file will be created in the root of the tracefs filesystem. + * @data: a pointer to something that the caller will want to get to later + * on. The inode.i_private pointer will point to this value on + * the open() call. + * @fops: a pointer to a struct file_operations that should be used for + * this file. + * + * This is the basic "create a file" function for tracefs. It allows for a + * wide range of flexibility in creating a file, or a directory (if you want + * to create a directory, the tracefs_create_dir() function is + * recommended to be used instead.) + * + * This function will return a pointer to a dentry if it succeeds. This + * pointer must be passed to the tracefs_remove() function when the file is + * to be removed (no automatic cleanup happens if your module is unloaded, + * you are responsible here.) If an error occurs, %NULL will be returned. + * + * If tracefs is not enabled in the kernel, the value -%ENODEV will be + * returned. + */ +struct dentry *tracefs_create_file(const char *name, umode_t mode, + struct dentry *parent, void *data, + const struct file_operations *fops) +{ + struct dentry *dentry; + struct inode *inode; + + if (!(mode & S_IFMT)) + mode |= S_IFREG; + BUG_ON(!S_ISREG(mode)); + dentry = start_creating(name, parent); + + if (IS_ERR(dentry)) + return NULL; + + inode = tracefs_get_inode(dentry->d_sb); + if (unlikely(!inode)) + return failed_creating(dentry); + + inode->i_mode = mode; + inode->i_fop = fops ? fops : &tracefs_file_operations; + inode->i_private = data; + d_instantiate(dentry, inode); + fsnotify_create(dentry->d_parent->d_inode, dentry); + return end_creating(dentry); +} + +static struct dentry *__create_dir(const char *name, struct dentry *parent, + const struct inode_operations *ops) +{ + struct dentry *dentry = start_creating(name, parent); + struct inode *inode; + + if (IS_ERR(dentry)) + return NULL; + + inode = tracefs_get_inode(dentry->d_sb); + if (unlikely(!inode)) + return failed_creating(dentry); + + inode->i_mode = S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO; + inode->i_op = ops; + inode->i_fop = &simple_dir_operations; + + /* directory inodes start off with i_nlink == 2 (for "." entry) */ + inc_nlink(inode); + d_instantiate(dentry, inode); + inc_nlink(dentry->d_parent->d_inode); + fsnotify_mkdir(dentry->d_parent->d_inode, dentry); + return end_creating(dentry); +} + +/** + * tracefs_create_dir - create a directory in the tracefs filesystem + * @name: a pointer to a string containing the name of the directory to + * create. + * @parent: a pointer to the parent dentry for this file. This should be a + * directory dentry if set. If this parameter is NULL, then the + * directory will be created in the root of the tracefs filesystem. + * + * This function creates a directory in tracefs with the given name. + * + * This function will return a pointer to a dentry if it succeeds. This + * pointer must be passed to the tracefs_remove() function when the file is + * to be removed. If an error occurs, %NULL will be returned. + * + * If tracing is not enabled in the kernel, the value -%ENODEV will be + * returned. + */ +struct dentry *tracefs_create_dir(const char *name, struct dentry *parent) +{ + return __create_dir(name, parent, &simple_dir_inode_operations); +} + +/** + * tracefs_create_instance_dir - create the tracing instances directory + * @name: The name of the instances directory to create + * @parent: The parent directory that the instances directory will exist + * @mkdir: The function to call when a mkdir is performed. + * @rmdir: The function to call when a rmdir is performed. + * + * Only one instances directory is allowed. + * + * The instances directory is special as it allows for mkdir and rmdir to + * to be done by userspace. When a mkdir or rmdir is performed, the inode + * locks are released and the methhods passed in (@mkdir and @rmdir) are + * called without locks and with the name of the directory being created + * within the instances directory. + * + * Returns the dentry of the instances directory. + */ +struct dentry *tracefs_create_instance_dir(const char *name, struct dentry *parent, + int (*mkdir)(const char *name), + int (*rmdir)(const char *name)) +{ + struct dentry *dentry; + + /* Only allow one instance of the instances directory. */ + if (WARN_ON(tracefs_ops.mkdir || tracefs_ops.rmdir)) + return NULL; + + dentry = __create_dir(name, parent, &tracefs_dir_inode_operations); + if (!dentry) + return NULL; + + tracefs_ops.mkdir = mkdir; + tracefs_ops.rmdir = rmdir; + + return dentry; +} + +static inline int tracefs_positive(struct dentry *dentry) +{ + return dentry->d_inode && !d_unhashed(dentry); +} + +static int __tracefs_remove(struct dentry *dentry, struct dentry *parent) +{ + int ret = 0; + + if (tracefs_positive(dentry)) { + if (dentry->d_inode) { + dget(dentry); + switch (dentry->d_inode->i_mode & S_IFMT) { + case S_IFDIR: + ret = simple_rmdir(parent->d_inode, dentry); + break; + default: + simple_unlink(parent->d_inode, dentry); + break; + } + if (!ret) + d_delete(dentry); + dput(dentry); + } + } + return ret; +} + +/** + * tracefs_remove - removes a file or directory from the tracefs filesystem + * @dentry: a pointer to a the dentry of the file or directory to be + * removed. + * + * This function removes a file or directory in tracefs that was previously + * created with a call to another tracefs function (like + * tracefs_create_file() or variants thereof.) + */ +void tracefs_remove(struct dentry *dentry) +{ + struct dentry *parent; + int ret; + + if (IS_ERR_OR_NULL(dentry)) + return; + + parent = dentry->d_parent; + if (!parent || !parent->d_inode) + return; + + mutex_lock(&parent->d_inode->i_mutex); + ret = __tracefs_remove(dentry, parent); + mutex_unlock(&parent->d_inode->i_mutex); + if (!ret) + simple_release_fs(&tracefs_mount, &tracefs_mount_count); +} + +/** + * tracefs_remove_recursive - recursively removes a directory + * @dentry: a pointer to a the dentry of the directory to be removed. + * + * This function recursively removes a directory tree in tracefs that + * was previously created with a call to another tracefs function + * (like tracefs_create_file() or variants thereof.) + */ +void tracefs_remove_recursive(struct dentry *dentry) +{ + struct dentry *child, *parent; + + if (IS_ERR_OR_NULL(dentry)) + return; + + parent = dentry->d_parent; + if (!parent || !parent->d_inode) + return; + + parent = dentry; + down: + mutex_lock(&parent->d_inode->i_mutex); + loop: + /* + * The parent->d_subdirs is protected by the d_lock. Outside that + * lock, the child can be unlinked and set to be freed which can + * use the d_u.d_child as the rcu head and corrupt this list. + */ + spin_lock(&parent->d_lock); + list_for_each_entry(child, &parent->d_subdirs, d_child) { + if (!tracefs_positive(child)) + continue; + + /* perhaps simple_empty(child) makes more sense */ + if (!list_empty(&child->d_subdirs)) { + spin_unlock(&parent->d_lock); + mutex_unlock(&parent->d_inode->i_mutex); + parent = child; + goto down; + } + + spin_unlock(&parent->d_lock); + + if (!__tracefs_remove(child, parent)) + simple_release_fs(&tracefs_mount, &tracefs_mount_count); + + /* + * The parent->d_lock protects agaist child from unlinking + * from d_subdirs. When releasing the parent->d_lock we can + * no longer trust that the next pointer is valid. + * Restart the loop. We'll skip this one with the + * tracefs_positive() check. + */ + goto loop; + } + spin_unlock(&parent->d_lock); + + mutex_unlock(&parent->d_inode->i_mutex); + child = parent; + parent = parent->d_parent; + mutex_lock(&parent->d_inode->i_mutex); + + if (child != dentry) + /* go up */ + goto loop; + + if (!__tracefs_remove(child, parent)) + simple_release_fs(&tracefs_mount, &tracefs_mount_count); + mutex_unlock(&parent->d_inode->i_mutex); +} + +/** + * tracefs_initialized - Tells whether tracefs has been registered + */ +bool tracefs_initialized(void) +{ + return tracefs_registered; +} + +static struct kobject *trace_kobj; + +static int __init tracefs_init(void) +{ + int retval; + + trace_kobj = kobject_create_and_add("tracing", kernel_kobj); + if (!trace_kobj) + return -EINVAL; + + retval = register_filesystem(&trace_fs_type); + if (!retval) + tracefs_registered = true; + + return retval; +} +core_initcall(tracefs_init); diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c index e627c0acf626..c3d15fe83403 100644 --- a/fs/ubifs/file.c +++ b/fs/ubifs/file.c @@ -50,7 +50,6 @@ */ #include "ubifs.h" -#include <linux/aio.h> #include <linux/mount.h> #include <linux/namei.h> #include <linux/slab.h> diff --git a/fs/udf/file.c b/fs/udf/file.c index 08f3555fbeac..7f885cc8b0b7 100644 --- a/fs/udf/file.c +++ b/fs/udf/file.c @@ -34,7 +34,7 @@ #include <linux/errno.h> #include <linux/pagemap.h> #include <linux/buffer_head.h> -#include <linux/aio.h> +#include <linux/uio.h> #include "udf_i.h" #include "udf_sb.h" @@ -122,7 +122,7 @@ static ssize_t udf_file_write_iter(struct kiocb *iocb, struct iov_iter *from) struct file *file = iocb->ki_filp; struct inode *inode = file_inode(file); int err, pos; - size_t count = iocb->ki_nbytes; + size_t count = iov_iter_count(from); struct udf_inode_info *iinfo = UDF_I(inode); mutex_lock(&inode->i_mutex); diff --git a/fs/udf/inode.c b/fs/udf/inode.c index a445d599098d..9c1fbd23913d 100644 --- a/fs/udf/inode.c +++ b/fs/udf/inode.c @@ -38,7 +38,7 @@ #include <linux/slab.h> #include <linux/crc-itu-t.h> #include <linux/mpage.h> -#include <linux/aio.h> +#include <linux/uio.h> #include "udf_i.h" #include "udf_sb.h" diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index 3a9b7a1b8704..4f8cdc59bc38 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -31,7 +31,6 @@ #include "xfs_bmap.h" #include "xfs_bmap_util.h" #include "xfs_bmap_btree.h" -#include <linux/aio.h> #include <linux/gfp.h> #include <linux/mpage.h> #include <linux/pagevec.h> diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index a2e1cb8a568b..f44212fae653 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -38,7 +38,6 @@ #include "xfs_icache.h" #include "xfs_pnfs.h" -#include <linux/aio.h> #include <linux/dcache.h> #include <linux/falloc.h> #include <linux/pagevec.h> diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index ac78910d7416..f8e8b34dc427 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -124,7 +124,10 @@ #define FTRACE_EVENTS() . = ALIGN(8); \ VMLINUX_SYMBOL(__start_ftrace_events) = .; \ *(_ftrace_events) \ - VMLINUX_SYMBOL(__stop_ftrace_events) = .; + VMLINUX_SYMBOL(__stop_ftrace_events) = .; \ + VMLINUX_SYMBOL(__start_ftrace_enum_maps) = .; \ + *(_ftrace_enum_map) \ + VMLINUX_SYMBOL(__stop_ftrace_enum_maps) = .; #else #define FTRACE_EVENTS() #endif diff --git a/include/linux/aio.h b/include/linux/aio.h index d9c92daa3944..9eb42dbc5582 100644 --- a/include/linux/aio.h +++ b/include/linux/aio.h @@ -1,86 +1,23 @@ #ifndef __LINUX__AIO_H #define __LINUX__AIO_H -#include <linux/list.h> -#include <linux/workqueue.h> #include <linux/aio_abi.h> -#include <linux/uio.h> -#include <linux/rcupdate.h> - -#include <linux/atomic.h> struct kioctx; struct kiocb; +struct mm_struct; #define KIOCB_KEY 0 -/* - * We use ki_cancel == KIOCB_CANCELLED to indicate that a kiocb has been either - * cancelled or completed (this makes a certain amount of sense because - * successful cancellation - io_cancel() - does deliver the completion to - * userspace). - * - * And since most things don't implement kiocb cancellation and we'd really like - * kiocb completion to be lockless when possible, we use ki_cancel to - * synchronize cancellation and completion - we only set it to KIOCB_CANCELLED - * with xchg() or cmpxchg(), see batch_complete_aio() and kiocb_cancel(). - */ -#define KIOCB_CANCELLED ((void *) (~0ULL)) - typedef int (kiocb_cancel_fn)(struct kiocb *); -struct kiocb { - struct file *ki_filp; - struct kioctx *ki_ctx; /* NULL for sync ops */ - kiocb_cancel_fn *ki_cancel; - void *private; - - union { - void __user *user; - struct task_struct *tsk; - } ki_obj; - - __u64 ki_user_data; /* user's data for completion */ - loff_t ki_pos; - size_t ki_nbytes; /* copy of iocb->aio_nbytes */ - - struct list_head ki_list; /* the aio core uses this - * for cancellation */ - - /* - * If the aio_resfd field of the userspace iocb is not zero, - * this is the underlying eventfd context to deliver events to. - */ - struct eventfd_ctx *ki_eventfd; -}; - -static inline bool is_sync_kiocb(struct kiocb *kiocb) -{ - return kiocb->ki_ctx == NULL; -} - -static inline void init_sync_kiocb(struct kiocb *kiocb, struct file *filp) -{ - *kiocb = (struct kiocb) { - .ki_ctx = NULL, - .ki_filp = filp, - .ki_obj.tsk = current, - }; -} - /* prototypes */ #ifdef CONFIG_AIO -extern ssize_t wait_on_sync_kiocb(struct kiocb *iocb); -extern void aio_complete(struct kiocb *iocb, long res, long res2); -struct mm_struct; extern void exit_aio(struct mm_struct *mm); extern long do_io_submit(aio_context_t ctx_id, long nr, struct iocb __user *__user *iocbpp, bool compat); void kiocb_set_cancel_fn(struct kiocb *req, kiocb_cancel_fn *cancel); #else -static inline ssize_t wait_on_sync_kiocb(struct kiocb *iocb) { return 0; } -static inline void aio_complete(struct kiocb *iocb, long res, long res2) { } -struct mm_struct; static inline void exit_aio(struct mm_struct *mm) { } static inline long do_io_submit(aio_context_t ctx_id, long nr, struct iocb __user * __user *iocbpp, @@ -89,11 +26,6 @@ static inline void kiocb_set_cancel_fn(struct kiocb *req, kiocb_cancel_fn *cancel) { } #endif /* CONFIG_AIO */ -static inline struct kiocb *list_kiocb(struct list_head *h) -{ - return list_entry(h, struct kiocb, ki_list); -} - /* for sysctl: */ extern unsigned long aio_nr; extern unsigned long aio_max_nr; diff --git a/include/linux/bpf.h b/include/linux/bpf.h index bbfceb756452..c2e21113ecc0 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -113,8 +113,6 @@ struct bpf_prog_type_list { enum bpf_prog_type type; }; -void bpf_register_prog_type(struct bpf_prog_type_list *tl); - struct bpf_prog; struct bpf_prog_aux { @@ -129,11 +127,25 @@ struct bpf_prog_aux { }; #ifdef CONFIG_BPF_SYSCALL +void bpf_register_prog_type(struct bpf_prog_type_list *tl); + void bpf_prog_put(struct bpf_prog *prog); +struct bpf_prog *bpf_prog_get(u32 ufd); #else -static inline void bpf_prog_put(struct bpf_prog *prog) {} +static inline void bpf_register_prog_type(struct bpf_prog_type_list *tl) +{ +} + +static inline struct bpf_prog *bpf_prog_get(u32 ufd) +{ + return ERR_PTR(-EOPNOTSUPP); +} + +static inline void bpf_prog_put(struct bpf_prog *prog) +{ +} #endif -struct bpf_prog *bpf_prog_get(u32 ufd); + /* verify correctness of eBPF program */ int bpf_check(struct bpf_prog *fp, union bpf_attr *attr); diff --git a/include/linux/context_tracking.h b/include/linux/context_tracking.h index 37b81bd51ec0..2821838256b4 100644 --- a/include/linux/context_tracking.h +++ b/include/linux/context_tracking.h @@ -10,6 +10,8 @@ #ifdef CONFIG_CONTEXT_TRACKING extern void context_tracking_cpu_set(int cpu); +extern void context_tracking_enter(enum ctx_state state); +extern void context_tracking_exit(enum ctx_state state); extern void context_tracking_user_enter(void); extern void context_tracking_user_exit(void); extern void __context_tracking_task_switch(struct task_struct *prev, @@ -35,7 +37,8 @@ static inline enum ctx_state exception_enter(void) return 0; prev_ctx = this_cpu_read(context_tracking.state); - context_tracking_user_exit(); + if (prev_ctx != CONTEXT_KERNEL) + context_tracking_exit(prev_ctx); return prev_ctx; } @@ -43,8 +46,8 @@ static inline enum ctx_state exception_enter(void) static inline void exception_exit(enum ctx_state prev_ctx) { if (context_tracking_is_enabled()) { - if (prev_ctx == IN_USER) - context_tracking_user_enter(); + if (prev_ctx != CONTEXT_KERNEL) + context_tracking_enter(prev_ctx); } } @@ -78,10 +81,16 @@ static inline void guest_enter(void) vtime_guest_enter(current); else current->flags |= PF_VCPU; + + if (context_tracking_is_enabled()) + context_tracking_enter(CONTEXT_GUEST); } static inline void guest_exit(void) { + if (context_tracking_is_enabled()) + context_tracking_exit(CONTEXT_GUEST); + if (vtime_accounting_enabled()) vtime_guest_exit(current); else diff --git a/include/linux/context_tracking_state.h b/include/linux/context_tracking_state.h index 97a81225d037..6b7b96a32b75 100644 --- a/include/linux/context_tracking_state.h +++ b/include/linux/context_tracking_state.h @@ -13,8 +13,9 @@ struct context_tracking { */ bool active; enum ctx_state { - IN_KERNEL = 0, - IN_USER, + CONTEXT_KERNEL = 0, + CONTEXT_USER, + CONTEXT_GUEST, } state; }; @@ -34,11 +35,13 @@ static inline bool context_tracking_cpu_is_enabled(void) static inline bool context_tracking_in_user(void) { - return __this_cpu_read(context_tracking.state) == IN_USER; + return __this_cpu_read(context_tracking.state) == CONTEXT_USER; } #else static inline bool context_tracking_in_user(void) { return false; } static inline bool context_tracking_active(void) { return false; } +static inline bool context_tracking_is_enabled(void) { return false; } +static inline bool context_tracking_cpu_is_enabled(void) { return false; } #endif /* CONFIG_CONTEXT_TRACKING */ #endif diff --git a/include/linux/cpu.h b/include/linux/cpu.h index 4260e8594bd7..c0fb6b1b4712 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -73,6 +73,7 @@ enum { /* migration should happen before other stuff but after perf */ CPU_PRI_PERF = 20, CPU_PRI_MIGRATION = 10, + CPU_PRI_SMPBOOT = 9, /* bring up workqueues before normal notifiers and down after */ CPU_PRI_WORKQUEUE_UP = 5, CPU_PRI_WORKQUEUE_DOWN = -5, @@ -95,6 +96,10 @@ enum { * Called on the new cpu, just before * enabling interrupts. Must not sleep, * must not fail */ +#define CPU_DYING_IDLE 0x000B /* CPU (unsigned)v dying, reached + * idle loop. */ +#define CPU_BROKEN 0x000C /* CPU (unsigned)v did not die properly, + * perhaps due to preemption. */ /* Used for CPU hotplug events occurring while tasks are frozen due to a suspend * operation in progress @@ -161,6 +166,7 @@ static inline void __unregister_cpu_notifier(struct notifier_block *nb) } #endif +void smpboot_thread_init(void); int cpu_up(unsigned int cpu); void notify_cpu_starting(unsigned int cpu); extern void cpu_maps_update_begin(void); @@ -208,6 +214,10 @@ static inline void cpu_notifier_register_done(void) { } +static inline void smpboot_thread_init(void) +{ +} + #endif /* CONFIG_SMP */ extern struct bus_type cpu_subsys; @@ -271,4 +281,14 @@ void arch_cpu_idle_enter(void); void arch_cpu_idle_exit(void); void arch_cpu_idle_dead(void); +DECLARE_PER_CPU(bool, cpu_dead_idle); + +int cpu_report_state(int cpu); +int cpu_check_up_prepare(int cpu); +void cpu_set_state_online(int cpu); +#ifdef CONFIG_HOTPLUG_CPU +bool cpu_wait_death(unsigned int cpu, int seconds); +bool cpu_report_death(void); +#endif /* #ifdef CONFIG_HOTPLUG_CPU */ + #endif /* _LINUX_CPU_H_ */ diff --git a/include/linux/fs.h b/include/linux/fs.h index 52cc4492cb3a..d502e5436c84 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -314,6 +314,28 @@ struct page; struct address_space; struct writeback_control; +#define IOCB_EVENTFD (1 << 0) + +struct kiocb { + struct file *ki_filp; + loff_t ki_pos; + void (*ki_complete)(struct kiocb *iocb, long ret, long ret2); + void *private; + int ki_flags; +}; + +static inline bool is_sync_kiocb(struct kiocb *kiocb) +{ + return kiocb->ki_complete == NULL; +} + +static inline void init_sync_kiocb(struct kiocb *kiocb, struct file *filp) +{ + *kiocb = (struct kiocb) { + .ki_filp = filp, + }; +} + /* * "descriptor" for what we're up to with a read. * This allows us to use the same read code yet @@ -2145,7 +2167,7 @@ struct filename { const __user char *uptr; /* original userland pointer */ struct audit_names *aname; int refcnt; - bool separate; /* should "name" be freed? */ + const char iname[]; }; extern long vfs_truncate(struct path *, loff_t); diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index c674ee8f7fca..46e83c2156c6 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -13,6 +13,7 @@ struct trace_array; struct trace_buffer; struct tracer; struct dentry; +struct bpf_prog; struct trace_print_flags { unsigned long mask; @@ -202,7 +203,7 @@ enum trace_reg { struct ftrace_event_call; struct ftrace_event_class { - char *system; + const char *system; void *probe; #ifdef CONFIG_PERF_EVENTS void *perf_probe; @@ -252,6 +253,7 @@ enum { TRACE_EVENT_FL_WAS_ENABLED_BIT, TRACE_EVENT_FL_USE_CALL_FILTER_BIT, TRACE_EVENT_FL_TRACEPOINT_BIT, + TRACE_EVENT_FL_KPROBE_BIT, }; /* @@ -265,6 +267,7 @@ enum { * it is best to clear the buffers that used it). * USE_CALL_FILTER - For ftrace internal events, don't use file filter * TRACEPOINT - Event is a tracepoint + * KPROBE - Event is a kprobe */ enum { TRACE_EVENT_FL_FILTERED = (1 << TRACE_EVENT_FL_FILTERED_BIT), @@ -274,6 +277,7 @@ enum { TRACE_EVENT_FL_WAS_ENABLED = (1 << TRACE_EVENT_FL_WAS_ENABLED_BIT), TRACE_EVENT_FL_USE_CALL_FILTER = (1 << TRACE_EVENT_FL_USE_CALL_FILTER_BIT), TRACE_EVENT_FL_TRACEPOINT = (1 << TRACE_EVENT_FL_TRACEPOINT_BIT), + TRACE_EVENT_FL_KPROBE = (1 << TRACE_EVENT_FL_KPROBE_BIT), }; struct ftrace_event_call { @@ -285,7 +289,7 @@ struct ftrace_event_call { struct tracepoint *tp; }; struct trace_event event; - const char *print_fmt; + char *print_fmt; struct event_filter *filter; void *mod; void *data; @@ -303,6 +307,7 @@ struct ftrace_event_call { #ifdef CONFIG_PERF_EVENTS int perf_refcount; struct hlist_head __percpu *perf_events; + struct bpf_prog *prog; int (*perf_perm)(struct ftrace_event_call *, struct perf_event *); @@ -548,6 +553,15 @@ event_trigger_unlock_commit_regs(struct ftrace_event_file *file, event_triggers_post_call(file, tt); } +#ifdef CONFIG_BPF_SYSCALL +unsigned int trace_call_bpf(struct bpf_prog *prog, void *ctx); +#else +static inline unsigned int trace_call_bpf(struct bpf_prog *prog, void *ctx) +{ + return 1; +} +#endif + enum { FILTER_OTHER = 0, FILTER_STATIC_STRING, diff --git a/include/linux/hid-sensor-hub.h b/include/linux/hid-sensor-hub.h index 4173a8fdad9e..0408421d885f 100644 --- a/include/linux/hid-sensor-hub.h +++ b/include/linux/hid-sensor-hub.h @@ -49,19 +49,43 @@ struct hid_sensor_hub_attribute_info { }; /** + * struct sensor_hub_pending - Synchronous read pending information + * @status: Pending status true/false. + * @ready: Completion synchronization data. + * @usage_id: Usage id for physical device, E.g. Gyro usage id. + * @attr_usage_id: Usage Id of a field, E.g. X-AXIS for a gyro. + * @raw_size: Response size for a read request. + * @raw_data: Place holder for received response. + */ +struct sensor_hub_pending { + bool status; + struct completion ready; + u32 usage_id; + u32 attr_usage_id; + int raw_size; + u8 *raw_data; +}; + +/** * struct hid_sensor_hub_device - Stores the hub instance data * @hdev: Stores the hid instance. * @vendor_id: Vendor id of hub device. * @product_id: Product id of hub device. + * @usage: Usage id for this hub device instance. * @start_collection_index: Starting index for a phy type collection * @end_collection_index: Last index for a phy type collection + * @mutex: synchronizing mutex. + * @pending: Holds information of pending sync read request. */ struct hid_sensor_hub_device { struct hid_device *hdev; u32 vendor_id; u32 product_id; + u32 usage; int start_collection_index; int end_collection_index; + struct mutex mutex; + struct sensor_hub_pending pending; }; /** @@ -152,40 +176,51 @@ int sensor_hub_input_get_attribute_info(struct hid_sensor_hub_device *hsdev, * @usage_id: Attribute usage id of parent physical device as per spec * @attr_usage_id: Attribute usage id as per spec * @report_id: Report id to look for +* @flag: Synchronous or asynchronous read * -* Issues a synchronous read request for an input attribute. Returns -* data upto 32 bits. Since client can get events, so this call should -* not be used for data paths, this will impact performance. +* Issues a synchronous or asynchronous read request for an input attribute. +* Returns data upto 32 bits. */ +enum sensor_hub_read_flags { + SENSOR_HUB_SYNC, + SENSOR_HUB_ASYNC, +}; + int sensor_hub_input_attr_get_raw_value(struct hid_sensor_hub_device *hsdev, - u32 usage_id, - u32 attr_usage_id, u32 report_id); + u32 usage_id, + u32 attr_usage_id, u32 report_id, + enum sensor_hub_read_flags flag +); + /** * sensor_hub_set_feature() - Feature set request * @hsdev: Hub device instance. * @report_id: Report id to look for * @field_index: Field index inside a report -* @value: Value to set +* @buffer_size: size of the buffer +* @buffer: buffer to use in the feature set * * Used to set a field in feature report. For example this can set polling * interval, sensitivity, activate/deactivate state. */ int sensor_hub_set_feature(struct hid_sensor_hub_device *hsdev, u32 report_id, - u32 field_index, s32 value); + u32 field_index, int buffer_size, void *buffer); /** * sensor_hub_get_feature() - Feature get request * @hsdev: Hub device instance. * @report_id: Report id to look for * @field_index: Field index inside a report -* @value: Place holder for return value +* @buffer_size: size of the buffer +* @buffer: buffer to copy output * * Used to get a field in feature report. For example this can get polling -* interval, sensitivity, activate/deactivate state. +* interval, sensitivity, activate/deactivate state. On success it returns +* number of bytes copied to buffer. On failure, it returns value < 0. */ int sensor_hub_get_feature(struct hid_sensor_hub_device *hsdev, u32 report_id, - u32 field_index, s32 *value); + u32 field_index, int buffer_size, void *buffer); /* hid-sensor-attributes */ diff --git a/include/linux/hid-sensor-ids.h b/include/linux/hid-sensor-ids.h index 109f0e633e01..f2ee90aed0c2 100644 --- a/include/linux/hid-sensor-ids.h +++ b/include/linux/hid-sensor-ids.h @@ -21,6 +21,8 @@ #define HID_MAX_PHY_DEVICES 0xFF +#define HID_USAGE_SENSOR_COLLECTION 0x200001 + /* Accel 3D (200073) */ #define HID_USAGE_SENSOR_ACCEL_3D 0x200073 #define HID_USAGE_SENSOR_DATA_ACCELERATION 0x200452 diff --git a/include/linux/hid.h b/include/linux/hid.h index f94cf28e4b7c..176b43670e5d 100644 --- a/include/linux/hid.h +++ b/include/linux/hid.h @@ -159,6 +159,7 @@ struct hid_item { #define HID_UP_LED 0x00080000 #define HID_UP_BUTTON 0x00090000 #define HID_UP_ORDINAL 0x000a0000 +#define HID_UP_TELEPHONY 0x000b0000 #define HID_UP_CONSUMER 0x000c0000 #define HID_UP_DIGITIZER 0x000d0000 #define HID_UP_PID 0x000f0000 @@ -269,6 +270,7 @@ struct hid_item { #define HID_DG_DEVICEINDEX 0x000d0053 #define HID_DG_CONTACTCOUNT 0x000d0054 #define HID_DG_CONTACTMAX 0x000d0055 +#define HID_DG_BUTTONTYPE 0x000d0059 #define HID_DG_BARRELSWITCH2 0x000d005a #define HID_DG_TOOLSERIALNUMBER 0x000d005b diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 82af5d0b996e..ad45054309a0 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -779,7 +779,8 @@ static inline void kvm_guest_enter(void) * one time slice). Lets treat guest mode as quiescent state, just like * we do with user-mode execution. */ - rcu_virt_note_context_switch(smp_processor_id()); + if (!context_tracking_cpu_is_enabled()) + rcu_virt_note_context_switch(smp_processor_id()); } static inline void kvm_guest_exit(void) diff --git a/include/linux/livepatch.h b/include/linux/livepatch.h index 95023fd8b00d..ee6dbb39a809 100644 --- a/include/linux/livepatch.h +++ b/include/linux/livepatch.h @@ -123,10 +123,10 @@ struct klp_patch { enum klp_state state; }; -extern int klp_register_patch(struct klp_patch *); -extern int klp_unregister_patch(struct klp_patch *); -extern int klp_enable_patch(struct klp_patch *); -extern int klp_disable_patch(struct klp_patch *); +int klp_register_patch(struct klp_patch *); +int klp_unregister_patch(struct klp_patch *); +int klp_enable_patch(struct klp_patch *); +int klp_disable_patch(struct klp_patch *); #endif /* CONFIG_LIVEPATCH */ diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index 74ab23176e9b..066ba4157541 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -531,8 +531,13 @@ do { \ # define might_lock_read(lock) do { } while (0) #endif -#ifdef CONFIG_PROVE_RCU +#ifdef CONFIG_LOCKDEP void lockdep_rcu_suspicious(const char *file, const int line, const char *s); +#else +static inline void +lockdep_rcu_suspicious(const char *file, const int line, const char *s) +{ +} #endif #endif /* __LINUX_LOCKDEP_H */ diff --git a/include/linux/module.h b/include/linux/module.h index b03485bcb82a..c883b86ea964 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -338,6 +338,8 @@ struct module { #ifdef CONFIG_EVENT_TRACING struct ftrace_event_call **trace_events; unsigned int num_trace_events; + struct trace_enum_map **trace_enums; + unsigned int num_trace_enums; #endif #ifdef CONFIG_FTRACE_MCOUNT_RECORD unsigned int num_ftrace_callsites; diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 2b621982938d..61992cf2e977 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -53,6 +53,7 @@ struct perf_guest_info_callbacks { #include <linux/sysfs.h> #include <linux/perf_regs.h> #include <linux/workqueue.h> +#include <linux/cgroup.h> #include <asm/local.h> struct perf_callchain_entry { @@ -118,10 +119,19 @@ struct hw_perf_event { struct hrtimer hrtimer; }; struct { /* tracepoint */ - struct task_struct *tp_target; /* for tp_event->class */ struct list_head tp_list; }; + struct { /* intel_cqm */ + int cqm_state; + int cqm_rmid; + struct list_head cqm_events_entry; + struct list_head cqm_groups_entry; + struct list_head cqm_group_entry; + }; + struct { /* itrace */ + int itrace_started; + }; #ifdef CONFIG_HAVE_HW_BREAKPOINT struct { /* breakpoint */ /* @@ -129,12 +139,12 @@ struct hw_perf_event { * problem hw_breakpoint has with context * creation and event initalization. */ - struct task_struct *bp_target; struct arch_hw_breakpoint info; struct list_head bp_list; }; #endif }; + struct task_struct *target; int state; local64_t prev_count; u64 sample_period; @@ -166,6 +176,11 @@ struct perf_event; * pmu::capabilities flags */ #define PERF_PMU_CAP_NO_INTERRUPT 0x01 +#define PERF_PMU_CAP_NO_NMI 0x02 +#define PERF_PMU_CAP_AUX_NO_SG 0x04 +#define PERF_PMU_CAP_AUX_SW_DOUBLEBUF 0x08 +#define PERF_PMU_CAP_EXCLUSIVE 0x10 +#define PERF_PMU_CAP_ITRACE 0x20 /** * struct pmu - generic performance monitoring unit @@ -186,6 +201,7 @@ struct pmu { int * __percpu pmu_disable_count; struct perf_cpu_context * __percpu pmu_cpu_context; + atomic_t exclusive_cnt; /* < 0: cpu; > 0: tsk */ int task_ctx_nr; int hrtimer_interval_ms; @@ -262,9 +278,32 @@ struct pmu { int (*event_idx) (struct perf_event *event); /*optional */ /* - * flush branch stack on context-switches (needed in cpu-wide mode) + * context-switches callback + */ + void (*sched_task) (struct perf_event_context *ctx, + bool sched_in); + /* + * PMU specific data size + */ + size_t task_ctx_size; + + + /* + * Return the count value for a counter. + */ + u64 (*count) (struct perf_event *event); /*optional*/ + + /* + * Set up pmu-private data structures for an AUX area */ - void (*flush_branch_stack) (void); + void *(*setup_aux) (int cpu, void **pages, + int nr_pages, bool overwrite); + /* optional */ + + /* + * Free pmu-private AUX data structures + */ + void (*free_aux) (void *aux); /* optional */ }; /** @@ -300,6 +339,7 @@ struct swevent_hlist { #define PERF_ATTACH_CONTEXT 0x01 #define PERF_ATTACH_GROUP 0x02 #define PERF_ATTACH_TASK 0x04 +#define PERF_ATTACH_TASK_DATA 0x08 struct perf_cgroup; struct ring_buffer; @@ -438,6 +478,7 @@ struct perf_event { struct pid_namespace *ns; u64 id; + u64 (*clock)(void); perf_overflow_handler_t overflow_handler; void *overflow_handler_context; @@ -504,7 +545,7 @@ struct perf_event_context { u64 generation; int pin_count; int nr_cgroups; /* cgroup evts */ - int nr_branch_stack; /* branch_stack evt */ + void *task_ctx_data; /* pmu specific data */ struct rcu_head rcu_head; struct delayed_work orphans_remove; @@ -536,12 +577,52 @@ struct perf_output_handle { struct ring_buffer *rb; unsigned long wakeup; unsigned long size; - void *addr; + union { + void *addr; + unsigned long head; + }; int page; }; +#ifdef CONFIG_CGROUP_PERF + +/* + * perf_cgroup_info keeps track of time_enabled for a cgroup. + * This is a per-cpu dynamically allocated data structure. + */ +struct perf_cgroup_info { + u64 time; + u64 timestamp; +}; + +struct perf_cgroup { + struct cgroup_subsys_state css; + struct perf_cgroup_info __percpu *info; +}; + +/* + * Must ensure cgroup is pinned (css_get) before calling + * this function. In other words, we cannot call this function + * if there is no cgroup event for the current CPU context. + */ +static inline struct perf_cgroup * +perf_cgroup_from_task(struct task_struct *task) +{ + return container_of(task_css(task, perf_event_cgrp_id), + struct perf_cgroup, css); +} +#endif /* CONFIG_CGROUP_PERF */ + #ifdef CONFIG_PERF_EVENTS +extern void *perf_aux_output_begin(struct perf_output_handle *handle, + struct perf_event *event); +extern void perf_aux_output_end(struct perf_output_handle *handle, + unsigned long size, bool truncated); +extern int perf_aux_output_skip(struct perf_output_handle *handle, + unsigned long size); +extern void *perf_get_aux(struct perf_output_handle *handle); + extern int perf_pmu_register(struct pmu *pmu, const char *name, int type); extern void perf_pmu_unregister(struct pmu *pmu); @@ -558,6 +639,8 @@ extern void perf_event_delayed_put(struct task_struct *task); extern void perf_event_print_debug(void); extern void perf_pmu_disable(struct pmu *pmu); extern void perf_pmu_enable(struct pmu *pmu); +extern void perf_sched_cb_dec(struct pmu *pmu); +extern void perf_sched_cb_inc(struct pmu *pmu); extern int perf_event_task_disable(void); extern int perf_event_task_enable(void); extern int perf_event_refresh(struct perf_event *event, int refresh); @@ -731,6 +814,11 @@ static inline void perf_event_task_sched_out(struct task_struct *prev, __perf_event_task_sched_out(prev, next); } +static inline u64 __perf_event_count(struct perf_event *event) +{ + return local64_read(&event->count) + atomic64_read(&event->child_count); +} + extern void perf_event_mmap(struct vm_area_struct *vma); extern struct perf_guest_info_callbacks *perf_guest_cbs; extern int perf_register_guest_info_callbacks(struct perf_guest_info_callbacks *callbacks); @@ -800,6 +888,16 @@ static inline bool has_branch_stack(struct perf_event *event) return event->attr.sample_type & PERF_SAMPLE_BRANCH_STACK; } +static inline bool needs_branch_stack(struct perf_event *event) +{ + return event->attr.branch_sample_type != 0; +} + +static inline bool has_aux(struct perf_event *event) +{ + return event->pmu->setup_aux; +} + extern int perf_output_begin(struct perf_output_handle *handle, struct perf_event *event, unsigned int size); extern void perf_output_end(struct perf_output_handle *handle); @@ -815,6 +913,17 @@ extern void perf_event_disable(struct perf_event *event); extern int __perf_event_disable(void *info); extern void perf_event_task_tick(void); #else /* !CONFIG_PERF_EVENTS: */ +static inline void * +perf_aux_output_begin(struct perf_output_handle *handle, + struct perf_event *event) { return NULL; } +static inline void +perf_aux_output_end(struct perf_output_handle *handle, unsigned long size, + bool truncated) { } +static inline int +perf_aux_output_skip(struct perf_output_handle *handle, + unsigned long size) { return -EINVAL; } +static inline void * +perf_get_aux(struct perf_output_handle *handle) { return NULL; } static inline void perf_event_task_sched_in(struct task_struct *prev, struct task_struct *task) { } diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 78097491cd99..573a5afd5ed8 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -48,6 +48,26 @@ extern int rcu_expedited; /* for sysctl */ +#ifdef CONFIG_TINY_RCU +/* Tiny RCU doesn't expedite, as its purpose in life is instead to be tiny. */ +static inline bool rcu_gp_is_expedited(void) /* Internal RCU use. */ +{ + return false; +} + +static inline void rcu_expedite_gp(void) +{ +} + +static inline void rcu_unexpedite_gp(void) +{ +} +#else /* #ifdef CONFIG_TINY_RCU */ +bool rcu_gp_is_expedited(void); /* Internal RCU use. */ +void rcu_expedite_gp(void); +void rcu_unexpedite_gp(void); +#endif /* #else #ifdef CONFIG_TINY_RCU */ + enum rcutorture_type { RCU_FLAVOR, RCU_BH_FLAVOR, @@ -195,6 +215,15 @@ void call_rcu_sched(struct rcu_head *head, void synchronize_sched(void); +/* + * Structure allowing asynchronous waiting on RCU. + */ +struct rcu_synchronize { + struct rcu_head head; + struct completion completion; +}; +void wakeme_after_rcu(struct rcu_head *head); + /** * call_rcu_tasks() - Queue an RCU for invocation task-based grace period * @head: structure to be used for queueing the RCU updates. @@ -258,6 +287,7 @@ static inline int rcu_preempt_depth(void) /* Internal to kernel */ void rcu_init(void); +void rcu_end_inkernel_boot(void); void rcu_sched_qs(void); void rcu_bh_qs(void); void rcu_check_callbacks(int user); @@ -266,6 +296,8 @@ void rcu_idle_enter(void); void rcu_idle_exit(void); void rcu_irq_enter(void); void rcu_irq_exit(void); +int rcu_cpu_notify(struct notifier_block *self, + unsigned long action, void *hcpu); #ifdef CONFIG_RCU_STALL_COMMON void rcu_sysrq_start(void); @@ -720,7 +752,7 @@ static inline void rcu_preempt_sleep_check(void) * annotated as __rcu. */ #define rcu_dereference_check(p, c) \ - __rcu_dereference_check((p), rcu_read_lock_held() || (c), __rcu) + __rcu_dereference_check((p), (c) || rcu_read_lock_held(), __rcu) /** * rcu_dereference_bh_check() - rcu_dereference_bh with debug checking @@ -730,7 +762,7 @@ static inline void rcu_preempt_sleep_check(void) * This is the RCU-bh counterpart to rcu_dereference_check(). */ #define rcu_dereference_bh_check(p, c) \ - __rcu_dereference_check((p), rcu_read_lock_bh_held() || (c), __rcu) + __rcu_dereference_check((p), (c) || rcu_read_lock_bh_held(), __rcu) /** * rcu_dereference_sched_check() - rcu_dereference_sched with debug checking @@ -740,7 +772,7 @@ static inline void rcu_preempt_sleep_check(void) * This is the RCU-sched counterpart to rcu_dereference_check(). */ #define rcu_dereference_sched_check(p, c) \ - __rcu_dereference_check((p), rcu_read_lock_sched_held() || (c), \ + __rcu_dereference_check((p), (c) || rcu_read_lock_sched_held(), \ __rcu) #define rcu_dereference_raw(p) rcu_dereference_check(p, 1) /*@@@ needed? @@@*/ @@ -933,9 +965,9 @@ static inline void rcu_read_unlock(void) { rcu_lockdep_assert(rcu_is_watching(), "rcu_read_unlock() used illegally while idle"); - rcu_lock_release(&rcu_lock_map); __release(RCU); __rcu_read_unlock(); + rcu_lock_release(&rcu_lock_map); /* Keep acq info for rls diags. */ } /** diff --git a/include/linux/security.h b/include/linux/security.h index a1b7dbd127ff..4e14e3d6309f 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -1556,7 +1556,7 @@ struct security_operations { int (*inode_follow_link) (struct dentry *dentry, struct nameidata *nd); int (*inode_permission) (struct inode *inode, int mask); int (*inode_setattr) (struct dentry *dentry, struct iattr *attr); - int (*inode_getattr) (struct vfsmount *mnt, struct dentry *dentry); + int (*inode_getattr) (const struct path *path); int (*inode_setxattr) (struct dentry *dentry, const char *name, const void *value, size_t size, int flags); void (*inode_post_setxattr) (struct dentry *dentry, const char *name, @@ -1843,7 +1843,7 @@ int security_inode_readlink(struct dentry *dentry); int security_inode_follow_link(struct dentry *dentry, struct nameidata *nd); int security_inode_permission(struct inode *inode, int mask); int security_inode_setattr(struct dentry *dentry, struct iattr *attr); -int security_inode_getattr(struct vfsmount *mnt, struct dentry *dentry); +int security_inode_getattr(const struct path *path); int security_inode_setxattr(struct dentry *dentry, const char *name, const void *value, size_t size, int flags); void security_inode_post_setxattr(struct dentry *dentry, const char *name, @@ -2259,8 +2259,7 @@ static inline int security_inode_setattr(struct dentry *dentry, return 0; } -static inline int security_inode_getattr(struct vfsmount *mnt, - struct dentry *dentry) +static inline int security_inode_getattr(const struct path *path) { return 0; } diff --git a/include/linux/smpboot.h b/include/linux/smpboot.h index 13e929679550..d600afb21926 100644 --- a/include/linux/smpboot.h +++ b/include/linux/smpboot.h @@ -47,6 +47,5 @@ struct smp_hotplug_thread { int smpboot_register_percpu_thread(struct smp_hotplug_thread *plug_thread); void smpboot_unregister_percpu_thread(struct smp_hotplug_thread *plug_thread); -int smpboot_thread_schedule(void); #endif diff --git a/include/linux/srcu.h b/include/linux/srcu.h index 9cfd9623fb03..bdeb4567b71e 100644 --- a/include/linux/srcu.h +++ b/include/linux/srcu.h @@ -182,7 +182,7 @@ static inline int srcu_read_lock_held(struct srcu_struct *sp) * lockdep_is_held() calls. */ #define srcu_dereference_check(p, sp, c) \ - __rcu_dereference_check((p), srcu_read_lock_held(sp) || (c), __rcu) + __rcu_dereference_check((p), (c) || srcu_read_lock_held(sp), __rcu) /** * srcu_dereference - fetch SRCU-protected pointer for later dereferencing diff --git a/include/linux/stacktrace.h b/include/linux/stacktrace.h index 669045ab73f3..0a34489a46b6 100644 --- a/include/linux/stacktrace.h +++ b/include/linux/stacktrace.h @@ -7,8 +7,6 @@ struct task_struct; struct pt_regs; #ifdef CONFIG_STACKTRACE -struct task_struct; - struct stack_trace { unsigned int nr_entries, max_entries; unsigned long *entries; diff --git a/include/linux/tracefs.h b/include/linux/tracefs.h new file mode 100644 index 000000000000..5b727a17beee --- /dev/null +++ b/include/linux/tracefs.h @@ -0,0 +1,45 @@ +/* + * tracefs.h - a pseudo file system for activating tracing + * + * Based on debugfs by: 2004 Greg Kroah-Hartman <greg@kroah.com> + * + * Copyright (C) 2014 Red Hat Inc, author: Steven Rostedt <srostedt@redhat.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version + * 2 as published by the Free Software Foundation. + * + * tracefs is the file system that is used by the tracing infrastructure. + * + */ + +#ifndef _TRACEFS_H_ +#define _TRACEFS_H_ + +#include <linux/fs.h> +#include <linux/seq_file.h> + +#include <linux/types.h> + +struct file_operations; + +#ifdef CONFIG_TRACING + +struct dentry *tracefs_create_file(const char *name, umode_t mode, + struct dentry *parent, void *data, + const struct file_operations *fops); + +struct dentry *tracefs_create_dir(const char *name, struct dentry *parent); + +void tracefs_remove(struct dentry *dentry); +void tracefs_remove_recursive(struct dentry *dentry); + +struct dentry *tracefs_create_instance_dir(const char *name, struct dentry *parent, + int (*mkdir)(const char *name), + int (*rmdir)(const char *name)); + +bool tracefs_initialized(void); + +#endif /* CONFIG_TRACING */ + +#endif diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index c72851328ca9..a5f7f3ecafa3 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -36,6 +36,12 @@ struct tracepoint { struct tracepoint_func __rcu *funcs; }; +struct trace_enum_map { + const char *system; + const char *enum_string; + unsigned long enum_value; +}; + extern int tracepoint_probe_register(struct tracepoint *tp, void *probe, void *data); extern int @@ -87,6 +93,8 @@ extern void syscall_unregfunc(void); #define PARAMS(args...) args +#define TRACE_DEFINE_ENUM(x) + #endif /* _LINUX_TRACEPOINT_H */ /* diff --git a/include/linux/uio.h b/include/linux/uio.h index 71880299ed48..15f11fb9fff6 100644 --- a/include/linux/uio.h +++ b/include/linux/uio.h @@ -76,6 +76,7 @@ size_t iov_iter_copy_from_user_atomic(struct page *page, struct iov_iter *i, unsigned long offset, size_t bytes); void iov_iter_advance(struct iov_iter *i, size_t bytes); int iov_iter_fault_in_readable(struct iov_iter *i, size_t bytes); +int iov_iter_fault_in_multipages_readable(struct iov_iter *i, size_t bytes); size_t iov_iter_single_seg_count(const struct iov_iter *i); size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes, struct iov_iter *i); @@ -139,4 +140,18 @@ static inline void iov_iter_reexpand(struct iov_iter *i, size_t count) size_t csum_and_copy_to_iter(void *addr, size_t bytes, __wsum *csum, struct iov_iter *i); size_t csum_and_copy_from_iter(void *addr, size_t bytes, __wsum *csum, struct iov_iter *i); +int import_iovec(int type, const struct iovec __user * uvector, + unsigned nr_segs, unsigned fast_segs, + struct iovec **iov, struct iov_iter *i); + +#ifdef CONFIG_COMPAT +struct compat_iovec; +int compat_import_iovec(int type, const struct compat_iovec __user * uvector, + unsigned nr_segs, unsigned fast_segs, + struct iovec **iov, struct iov_iter *i); +#endif + +int import_single_range(int type, void __user *buf, size_t len, + struct iovec *iov, struct iov_iter *i); + #endif diff --git a/include/linux/watchdog.h b/include/linux/watchdog.h index 395b70e0eccf..a746bf5216f8 100644 --- a/include/linux/watchdog.h +++ b/include/linux/watchdog.h @@ -137,4 +137,12 @@ extern int watchdog_init_timeout(struct watchdog_device *wdd, extern int watchdog_register_device(struct watchdog_device *); extern void watchdog_unregister_device(struct watchdog_device *); +#ifdef CONFIG_HARDLOCKUP_DETECTOR +void watchdog_nmi_disable_all(void); +void watchdog_nmi_enable_all(void); +#else +static inline void watchdog_nmi_disable_all(void) {} +static inline void watchdog_nmi_enable_all(void) {} +#endif + #endif /* ifndef _LINUX_WATCHDOG_H */ diff --git a/include/net/sock.h b/include/net/sock.h index e4079c28e6b8..81c81ead9a35 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -57,7 +57,6 @@ #include <linux/page_counter.h> #include <linux/memcontrol.h> #include <linux/static_key.h> -#include <linux/aio.h> #include <linux/sched.h> #include <linux/filter.h> diff --git a/include/trace/events/9p.h b/include/trace/events/9p.h index a0666362c111..633ee9ee9778 100644 --- a/include/trace/events/9p.h +++ b/include/trace/events/9p.h @@ -6,76 +6,95 @@ #include <linux/tracepoint.h> +#define P9_MSG_T \ + EM( P9_TLERROR, "P9_TLERROR" ) \ + EM( P9_RLERROR, "P9_RLERROR" ) \ + EM( P9_TSTATFS, "P9_TSTATFS" ) \ + EM( P9_RSTATFS, "P9_RSTATFS" ) \ + EM( P9_TLOPEN, "P9_TLOPEN" ) \ + EM( P9_RLOPEN, "P9_RLOPEN" ) \ + EM( P9_TLCREATE, "P9_TLCREATE" ) \ + EM( P9_RLCREATE, "P9_RLCREATE" ) \ + EM( P9_TSYMLINK, "P9_TSYMLINK" ) \ + EM( P9_RSYMLINK, "P9_RSYMLINK" ) \ + EM( P9_TMKNOD, "P9_TMKNOD" ) \ + EM( P9_RMKNOD, "P9_RMKNOD" ) \ + EM( P9_TRENAME, "P9_TRENAME" ) \ + EM( P9_RRENAME, "P9_RRENAME" ) \ + EM( P9_TREADLINK, "P9_TREADLINK" ) \ + EM( P9_RREADLINK, "P9_RREADLINK" ) \ + EM( P9_TGETATTR, "P9_TGETATTR" ) \ + EM( P9_RGETATTR, "P9_RGETATTR" ) \ + EM( P9_TSETATTR, "P9_TSETATTR" ) \ + EM( P9_RSETATTR, "P9_RSETATTR" ) \ + EM( P9_TXATTRWALK, "P9_TXATTRWALK" ) \ + EM( P9_RXATTRWALK, "P9_RXATTRWALK" ) \ + EM( P9_TXATTRCREATE, "P9_TXATTRCREATE" ) \ + EM( P9_RXATTRCREATE, "P9_RXATTRCREATE" ) \ + EM( P9_TREADDIR, "P9_TREADDIR" ) \ + EM( P9_RREADDIR, "P9_RREADDIR" ) \ + EM( P9_TFSYNC, "P9_TFSYNC" ) \ + EM( P9_RFSYNC, "P9_RFSYNC" ) \ + EM( P9_TLOCK, "P9_TLOCK" ) \ + EM( P9_RLOCK, "P9_RLOCK" ) \ + EM( P9_TGETLOCK, "P9_TGETLOCK" ) \ + EM( P9_RGETLOCK, "P9_RGETLOCK" ) \ + EM( P9_TLINK, "P9_TLINK" ) \ + EM( P9_RLINK, "P9_RLINK" ) \ + EM( P9_TMKDIR, "P9_TMKDIR" ) \ + EM( P9_RMKDIR, "P9_RMKDIR" ) \ + EM( P9_TRENAMEAT, "P9_TRENAMEAT" ) \ + EM( P9_RRENAMEAT, "P9_RRENAMEAT" ) \ + EM( P9_TUNLINKAT, "P9_TUNLINKAT" ) \ + EM( P9_RUNLINKAT, "P9_RUNLINKAT" ) \ + EM( P9_TVERSION, "P9_TVERSION" ) \ + EM( P9_RVERSION, "P9_RVERSION" ) \ + EM( P9_TAUTH, "P9_TAUTH" ) \ + EM( P9_RAUTH, "P9_RAUTH" ) \ + EM( P9_TATTACH, "P9_TATTACH" ) \ + EM( P9_RATTACH, "P9_RATTACH" ) \ + EM( P9_TERROR, "P9_TERROR" ) \ + EM( P9_RERROR, "P9_RERROR" ) \ + EM( P9_TFLUSH, "P9_TFLUSH" ) \ + EM( P9_RFLUSH, "P9_RFLUSH" ) \ + EM( P9_TWALK, "P9_TWALK" ) \ + EM( P9_RWALK, "P9_RWALK" ) \ + EM( P9_TOPEN, "P9_TOPEN" ) \ + EM( P9_ROPEN, "P9_ROPEN" ) \ + EM( P9_TCREATE, "P9_TCREATE" ) \ + EM( P9_RCREATE, "P9_RCREATE" ) \ + EM( P9_TREAD, "P9_TREAD" ) \ + EM( P9_RREAD, "P9_RREAD" ) \ + EM( P9_TWRITE, "P9_TWRITE" ) \ + EM( P9_RWRITE, "P9_RWRITE" ) \ + EM( P9_TCLUNK, "P9_TCLUNK" ) \ + EM( P9_RCLUNK, "P9_RCLUNK" ) \ + EM( P9_TREMOVE, "P9_TREMOVE" ) \ + EM( P9_RREMOVE, "P9_RREMOVE" ) \ + EM( P9_TSTAT, "P9_TSTAT" ) \ + EM( P9_RSTAT, "P9_RSTAT" ) \ + EM( P9_TWSTAT, "P9_TWSTAT" ) \ + EMe(P9_RWSTAT, "P9_RWSTAT" ) + +/* Define EM() to export the enums to userspace via TRACE_DEFINE_ENUM() */ +#undef EM +#undef EMe +#define EM(a, b) TRACE_DEFINE_ENUM(a); +#define EMe(a, b) TRACE_DEFINE_ENUM(a); + +P9_MSG_T + +/* + * Now redefine the EM() and EMe() macros to map the enums to the strings + * that will be printed in the output. + */ +#undef EM +#undef EMe +#define EM(a, b) { a, b }, +#define EMe(a, b) { a, b } + #define show_9p_op(type) \ - __print_symbolic(type, \ - { P9_TLERROR, "P9_TLERROR" }, \ - { P9_RLERROR, "P9_RLERROR" }, \ - { P9_TSTATFS, "P9_TSTATFS" }, \ - { P9_RSTATFS, "P9_RSTATFS" }, \ - { P9_TLOPEN, "P9_TLOPEN" }, \ - { P9_RLOPEN, "P9_RLOPEN" }, \ - { P9_TLCREATE, "P9_TLCREATE" }, \ - { P9_RLCREATE, "P9_RLCREATE" }, \ - { P9_TSYMLINK, "P9_TSYMLINK" }, \ - { P9_RSYMLINK, "P9_RSYMLINK" }, \ - { P9_TMKNOD, "P9_TMKNOD" }, \ - { P9_RMKNOD, "P9_RMKNOD" }, \ - { P9_TRENAME, "P9_TRENAME" }, \ - { P9_RRENAME, "P9_RRENAME" }, \ - { P9_TREADLINK, "P9_TREADLINK" }, \ - { P9_RREADLINK, "P9_RREADLINK" }, \ - { P9_TGETATTR, "P9_TGETATTR" }, \ - { P9_RGETATTR, "P9_RGETATTR" }, \ - { P9_TSETATTR, "P9_TSETATTR" }, \ - { P9_RSETATTR, "P9_RSETATTR" }, \ - { P9_TXATTRWALK, "P9_TXATTRWALK" }, \ - { P9_RXATTRWALK, "P9_RXATTRWALK" }, \ - { P9_TXATTRCREATE, "P9_TXATTRCREATE" }, \ - { P9_RXATTRCREATE, "P9_RXATTRCREATE" }, \ - { P9_TREADDIR, "P9_TREADDIR" }, \ - { P9_RREADDIR, "P9_RREADDIR" }, \ - { P9_TFSYNC, "P9_TFSYNC" }, \ - { P9_RFSYNC, "P9_RFSYNC" }, \ - { P9_TLOCK, "P9_TLOCK" }, \ - { P9_RLOCK, "P9_RLOCK" }, \ - { P9_TGETLOCK, "P9_TGETLOCK" }, \ - { P9_RGETLOCK, "P9_RGETLOCK" }, \ - { P9_TLINK, "P9_TLINK" }, \ - { P9_RLINK, "P9_RLINK" }, \ - { P9_TMKDIR, "P9_TMKDIR" }, \ - { P9_RMKDIR, "P9_RMKDIR" }, \ - { P9_TRENAMEAT, "P9_TRENAMEAT" }, \ - { P9_RRENAMEAT, "P9_RRENAMEAT" }, \ - { P9_TUNLINKAT, "P9_TUNLINKAT" }, \ - { P9_RUNLINKAT, "P9_RUNLINKAT" }, \ - { P9_TVERSION, "P9_TVERSION" }, \ - { P9_RVERSION, "P9_RVERSION" }, \ - { P9_TAUTH, "P9_TAUTH" }, \ - { P9_RAUTH, "P9_RAUTH" }, \ - { P9_TATTACH, "P9_TATTACH" }, \ - { P9_RATTACH, "P9_RATTACH" }, \ - { P9_TERROR, "P9_TERROR" }, \ - { P9_RERROR, "P9_RERROR" }, \ - { P9_TFLUSH, "P9_TFLUSH" }, \ - { P9_RFLUSH, "P9_RFLUSH" }, \ - { P9_TWALK, "P9_TWALK" }, \ - { P9_RWALK, "P9_RWALK" }, \ - { P9_TOPEN, "P9_TOPEN" }, \ - { P9_ROPEN, "P9_ROPEN" }, \ - { P9_TCREATE, "P9_TCREATE" }, \ - { P9_RCREATE, "P9_RCREATE" }, \ - { P9_TREAD, "P9_TREAD" }, \ - { P9_RREAD, "P9_RREAD" }, \ - { P9_TWRITE, "P9_TWRITE" }, \ - { P9_RWRITE, "P9_RWRITE" }, \ - { P9_TCLUNK, "P9_TCLUNK" }, \ - { P9_RCLUNK, "P9_RCLUNK" }, \ - { P9_TREMOVE, "P9_TREMOVE" }, \ - { P9_RREMOVE, "P9_RREMOVE" }, \ - { P9_TSTAT, "P9_TSTAT" }, \ - { P9_RSTAT, "P9_RSTAT" }, \ - { P9_TWSTAT, "P9_TWSTAT" }, \ - { P9_RWSTAT, "P9_RWSTAT" }) + __print_symbolic(type, P9_MSG_T) TRACE_EVENT(9p_client_req, TP_PROTO(struct p9_client *clnt, int8_t type, int tag), diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h index 1faecea101f3..572e6503394a 100644 --- a/include/trace/events/btrfs.h +++ b/include/trace/events/btrfs.h @@ -962,7 +962,7 @@ TRACE_EVENT(alloc_extent_state, __entry->ip = IP ), - TP_printk("state=%p; mask = %s; caller = %pF", __entry->state, + TP_printk("state=%p; mask = %s; caller = %pS", __entry->state, show_gfp_flags(__entry->mask), (void *)__entry->ip) ); @@ -982,7 +982,7 @@ TRACE_EVENT(free_extent_state, __entry->ip = IP ), - TP_printk(" state=%p; caller = %pF", __entry->state, + TP_printk(" state=%p; caller = %pS", __entry->state, (void *)__entry->ip) ); diff --git a/include/trace/events/ext3.h b/include/trace/events/ext3.h index 6797b9de90ed..7f20707849bb 100644 --- a/include/trace/events/ext3.h +++ b/include/trace/events/ext3.h @@ -144,7 +144,7 @@ TRACE_EVENT(ext3_mark_inode_dirty, __entry->ip = IP; ), - TP_printk("dev %d,%d ino %lu caller %pF", + TP_printk("dev %d,%d ino %lu caller %pS", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, (void *)__entry->ip) ); diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h index 6e5abd6d38a2..47fca36ee426 100644 --- a/include/trace/events/ext4.h +++ b/include/trace/events/ext4.h @@ -240,7 +240,7 @@ TRACE_EVENT(ext4_mark_inode_dirty, __entry->ip = IP; ), - TP_printk("dev %d,%d ino %lu caller %pF", + TP_printk("dev %d,%d ino %lu caller %pS", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, (void *)__entry->ip) ); @@ -1762,7 +1762,7 @@ TRACE_EVENT(ext4_journal_start, __entry->rsv_blocks = rsv_blocks; ), - TP_printk("dev %d,%d blocks, %d rsv_blocks, %d caller %pF", + TP_printk("dev %d,%d blocks, %d rsv_blocks, %d caller %pS", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->blocks, __entry->rsv_blocks, (void *)__entry->ip) ); @@ -1784,7 +1784,7 @@ TRACE_EVENT(ext4_journal_start_reserved, __entry->blocks = blocks; ), - TP_printk("dev %d,%d blocks, %d caller %pF", + TP_printk("dev %d,%d blocks, %d caller %pS", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->blocks, (void *)__entry->ip) ); diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h index 5422dbfaf97d..36f4536b6149 100644 --- a/include/trace/events/f2fs.h +++ b/include/trace/events/f2fs.h @@ -9,6 +9,36 @@ #define show_dev(entry) MAJOR(entry->dev), MINOR(entry->dev) #define show_dev_ino(entry) show_dev(entry), (unsigned long)entry->ino +TRACE_DEFINE_ENUM(NODE); +TRACE_DEFINE_ENUM(DATA); +TRACE_DEFINE_ENUM(META); +TRACE_DEFINE_ENUM(META_FLUSH); +TRACE_DEFINE_ENUM(CURSEG_HOT_DATA); +TRACE_DEFINE_ENUM(CURSEG_WARM_DATA); +TRACE_DEFINE_ENUM(CURSEG_COLD_DATA); +TRACE_DEFINE_ENUM(CURSEG_HOT_NODE); +TRACE_DEFINE_ENUM(CURSEG_WARM_NODE); +TRACE_DEFINE_ENUM(CURSEG_COLD_NODE); +TRACE_DEFINE_ENUM(NO_CHECK_TYPE); +TRACE_DEFINE_ENUM(GC_GREEDY); +TRACE_DEFINE_ENUM(GC_CB); +TRACE_DEFINE_ENUM(FG_GC); +TRACE_DEFINE_ENUM(BG_GC); +TRACE_DEFINE_ENUM(LFS); +TRACE_DEFINE_ENUM(SSR); +TRACE_DEFINE_ENUM(__REQ_RAHEAD); +TRACE_DEFINE_ENUM(__REQ_WRITE); +TRACE_DEFINE_ENUM(__REQ_SYNC); +TRACE_DEFINE_ENUM(__REQ_NOIDLE); +TRACE_DEFINE_ENUM(__REQ_FLUSH); +TRACE_DEFINE_ENUM(__REQ_FUA); +TRACE_DEFINE_ENUM(__REQ_PRIO); +TRACE_DEFINE_ENUM(__REQ_META); +TRACE_DEFINE_ENUM(CP_UMOUNT); +TRACE_DEFINE_ENUM(CP_FASTBOOT); +TRACE_DEFINE_ENUM(CP_SYNC); +TRACE_DEFINE_ENUM(CP_DISCARD); + #define show_block_type(type) \ __print_symbolic(type, \ { NODE, "NODE" }, \ diff --git a/include/trace/events/intel-sst.h b/include/trace/events/intel-sst.h index 76c72d3f1902..edc24e6dea1b 100644 --- a/include/trace/events/intel-sst.h +++ b/include/trace/events/intel-sst.h @@ -1,6 +1,13 @@ #undef TRACE_SYSTEM #define TRACE_SYSTEM intel-sst +/* + * The TRACE_SYSTEM_VAR defaults to TRACE_SYSTEM, but must be a + * legitimate C variable. It is not exported to user space. + */ +#undef TRACE_SYSTEM_VAR +#define TRACE_SYSTEM_VAR intel_sst + #if !defined(_TRACE_INTEL_SST_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_INTEL_SST_H diff --git a/include/trace/events/irq.h b/include/trace/events/irq.h index 3608bebd3d9c..ff8f6c091a15 100644 --- a/include/trace/events/irq.h +++ b/include/trace/events/irq.h @@ -9,19 +9,34 @@ struct irqaction; struct softirq_action; -#define softirq_name(sirq) { sirq##_SOFTIRQ, #sirq } +#define SOFTIRQ_NAME_LIST \ + softirq_name(HI) \ + softirq_name(TIMER) \ + softirq_name(NET_TX) \ + softirq_name(NET_RX) \ + softirq_name(BLOCK) \ + softirq_name(BLOCK_IOPOLL) \ + softirq_name(TASKLET) \ + softirq_name(SCHED) \ + softirq_name(HRTIMER) \ + softirq_name_end(RCU) + +#undef softirq_name +#undef softirq_name_end + +#define softirq_name(sirq) TRACE_DEFINE_ENUM(sirq##_SOFTIRQ); +#define softirq_name_end(sirq) TRACE_DEFINE_ENUM(sirq##_SOFTIRQ); + +SOFTIRQ_NAME_LIST + +#undef softirq_name +#undef softirq_name_end + +#define softirq_name(sirq) { sirq##_SOFTIRQ, #sirq }, +#define softirq_name_end(sirq) { sirq##_SOFTIRQ, #sirq } + #define show_softirq_name(val) \ - __print_symbolic(val, \ - softirq_name(HI), \ - softirq_name(TIMER), \ - softirq_name(NET_TX), \ - softirq_name(NET_RX), \ - softirq_name(BLOCK), \ - softirq_name(BLOCK_IOPOLL), \ - softirq_name(TASKLET), \ - softirq_name(SCHED), \ - softirq_name(HRTIMER), \ - softirq_name(RCU)) + __print_symbolic(val, SOFTIRQ_NAME_LIST) /** * irq_handler_entry - called immediately before the irq action handler diff --git a/include/trace/events/migrate.h b/include/trace/events/migrate.h index dd2b5467d905..539b25a76111 100644 --- a/include/trace/events/migrate.h +++ b/include/trace/events/migrate.h @@ -7,18 +7,40 @@ #include <linux/tracepoint.h> #define MIGRATE_MODE \ - {MIGRATE_ASYNC, "MIGRATE_ASYNC"}, \ - {MIGRATE_SYNC_LIGHT, "MIGRATE_SYNC_LIGHT"}, \ - {MIGRATE_SYNC, "MIGRATE_SYNC"} + EM( MIGRATE_ASYNC, "MIGRATE_ASYNC") \ + EM( MIGRATE_SYNC_LIGHT, "MIGRATE_SYNC_LIGHT") \ + EMe(MIGRATE_SYNC, "MIGRATE_SYNC") + #define MIGRATE_REASON \ - {MR_COMPACTION, "compaction"}, \ - {MR_MEMORY_FAILURE, "memory_failure"}, \ - {MR_MEMORY_HOTPLUG, "memory_hotplug"}, \ - {MR_SYSCALL, "syscall_or_cpuset"}, \ - {MR_MEMPOLICY_MBIND, "mempolicy_mbind"}, \ - {MR_NUMA_MISPLACED, "numa_misplaced"}, \ - {MR_CMA, "cma"} + EM( MR_COMPACTION, "compaction") \ + EM( MR_MEMORY_FAILURE, "memory_failure") \ + EM( MR_MEMORY_HOTPLUG, "memory_hotplug") \ + EM( MR_SYSCALL, "syscall_or_cpuset") \ + EM( MR_MEMPOLICY_MBIND, "mempolicy_mbind") \ + EM( MR_NUMA_MISPLACED, "numa_misplaced") \ + EMe(MR_CMA, "cma") + +/* + * First define the enums in the above macros to be exported to userspace + * via TRACE_DEFINE_ENUM(). + */ +#undef EM +#undef EMe +#define EM(a, b) TRACE_DEFINE_ENUM(a); +#define EMe(a, b) TRACE_DEFINE_ENUM(a); + +MIGRATE_MODE +MIGRATE_REASON + +/* + * Now redefine the EM() and EMe() macros to map the enums to the strings + * that will be printed in the output. + */ +#undef EM +#undef EMe +#define EM(a, b) {a, b}, +#define EMe(a, b) {a, b} TRACE_EVENT(mm_migrate_pages, diff --git a/include/trace/events/module.h b/include/trace/events/module.h index 81c4c183d348..28c45997e451 100644 --- a/include/trace/events/module.h +++ b/include/trace/events/module.h @@ -84,7 +84,7 @@ DECLARE_EVENT_CLASS(module_refcnt, __assign_str(name, mod->name); ), - TP_printk("%s call_site=%pf refcnt=%d", + TP_printk("%s call_site=%ps refcnt=%d", __get_str(name), (void *)__entry->ip, __entry->refcnt) ); @@ -121,7 +121,7 @@ TRACE_EVENT(module_request, __assign_str(name, name); ), - TP_printk("%s wait=%d call_site=%pf", + TP_printk("%s wait=%d call_site=%ps", __get_str(name), (int)__entry->wait, (void *)__entry->ip) ); diff --git a/include/trace/events/random.h b/include/trace/events/random.h index 805af6db41cc..4684de344c5d 100644 --- a/include/trace/events/random.h +++ b/include/trace/events/random.h @@ -22,7 +22,7 @@ TRACE_EVENT(add_device_randomness, __entry->IP = IP; ), - TP_printk("bytes %d caller %pF", + TP_printk("bytes %d caller %pS", __entry->bytes, (void *)__entry->IP) ); @@ -43,7 +43,7 @@ DECLARE_EVENT_CLASS(random__mix_pool_bytes, __entry->IP = IP; ), - TP_printk("%s pool: bytes %d caller %pF", + TP_printk("%s pool: bytes %d caller %pS", __entry->pool_name, __entry->bytes, (void *)__entry->IP) ); @@ -82,7 +82,7 @@ TRACE_EVENT(credit_entropy_bits, ), TP_printk("%s pool: bits %d entropy_count %d entropy_total %d " - "caller %pF", __entry->pool_name, __entry->bits, + "caller %pS", __entry->pool_name, __entry->bits, __entry->entropy_count, __entry->entropy_total, (void *)__entry->IP) ); @@ -207,7 +207,7 @@ DECLARE_EVENT_CLASS(random__get_random_bytes, __entry->IP = IP; ), - TP_printk("nbytes %d caller %pF", __entry->nbytes, (void *)__entry->IP) + TP_printk("nbytes %d caller %pS", __entry->nbytes, (void *)__entry->IP) ); DEFINE_EVENT(random__get_random_bytes, get_random_bytes, @@ -242,7 +242,7 @@ DECLARE_EVENT_CLASS(random__extract_entropy, __entry->IP = IP; ), - TP_printk("%s pool: nbytes %d entropy_count %d caller %pF", + TP_printk("%s pool: nbytes %d entropy_count %d caller %pS", __entry->pool_name, __entry->nbytes, __entry->entropy_count, (void *)__entry->IP) ); diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h index b9c1dc6c825a..fd1a02cb3c82 100644 --- a/include/trace/events/sunrpc.h +++ b/include/trace/events/sunrpc.h @@ -179,27 +179,53 @@ DEFINE_EVENT(rpc_task_queued, rpc_task_wakeup, ); +/* + * First define the enums in the below macros to be exported to userspace + * via TRACE_DEFINE_ENUM(). + */ +#undef EM +#undef EMe +#define EM(a, b) TRACE_DEFINE_ENUM(a); +#define EMe(a, b) TRACE_DEFINE_ENUM(a); + +#define RPC_SHOW_SOCKET \ + EM( SS_FREE, "FREE" ) \ + EM( SS_UNCONNECTED, "UNCONNECTED" ) \ + EM( SS_CONNECTING, "CONNECTING," ) \ + EM( SS_CONNECTED, "CONNECTED," ) \ + EMe(SS_DISCONNECTING, "DISCONNECTING" ) + #define rpc_show_socket_state(state) \ - __print_symbolic(state, \ - { SS_FREE, "FREE" }, \ - { SS_UNCONNECTED, "UNCONNECTED" }, \ - { SS_CONNECTING, "CONNECTING," }, \ - { SS_CONNECTED, "CONNECTED," }, \ - { SS_DISCONNECTING, "DISCONNECTING" }) + __print_symbolic(state, RPC_SHOW_SOCKET) + +RPC_SHOW_SOCKET + +#define RPC_SHOW_SOCK \ + EM( TCP_ESTABLISHED, "ESTABLISHED" ) \ + EM( TCP_SYN_SENT, "SYN_SENT" ) \ + EM( TCP_SYN_RECV, "SYN_RECV" ) \ + EM( TCP_FIN_WAIT1, "FIN_WAIT1" ) \ + EM( TCP_FIN_WAIT2, "FIN_WAIT2" ) \ + EM( TCP_TIME_WAIT, "TIME_WAIT" ) \ + EM( TCP_CLOSE, "CLOSE" ) \ + EM( TCP_CLOSE_WAIT, "CLOSE_WAIT" ) \ + EM( TCP_LAST_ACK, "LAST_ACK" ) \ + EM( TCP_LISTEN, "LISTEN" ) \ + EMe( TCP_CLOSING, "CLOSING" ) #define rpc_show_sock_state(state) \ - __print_symbolic(state, \ - { TCP_ESTABLISHED, "ESTABLISHED" }, \ - { TCP_SYN_SENT, "SYN_SENT" }, \ - { TCP_SYN_RECV, "SYN_RECV" }, \ - { TCP_FIN_WAIT1, "FIN_WAIT1" }, \ - { TCP_FIN_WAIT2, "FIN_WAIT2" }, \ - { TCP_TIME_WAIT, "TIME_WAIT" }, \ - { TCP_CLOSE, "CLOSE" }, \ - { TCP_CLOSE_WAIT, "CLOSE_WAIT" }, \ - { TCP_LAST_ACK, "LAST_ACK" }, \ - { TCP_LISTEN, "LISTEN" }, \ - { TCP_CLOSING, "CLOSING" }) + __print_symbolic(state, RPC_SHOW_SOCK) + +RPC_SHOW_SOCK + +/* + * Now redefine the EM() and EMe() macros to map the enums to the strings + * that will be printed in the output. + */ +#undef EM +#undef EMe +#define EM(a, b) {a, b}, +#define EMe(a, b) {a, b} DECLARE_EVENT_CLASS(xs_socket_event, diff --git a/include/trace/events/tlb.h b/include/trace/events/tlb.h index 0e7635765153..4250f364a6ca 100644 --- a/include/trace/events/tlb.h +++ b/include/trace/events/tlb.h @@ -7,11 +7,31 @@ #include <linux/mm_types.h> #include <linux/tracepoint.h> -#define TLB_FLUSH_REASON \ - { TLB_FLUSH_ON_TASK_SWITCH, "flush on task switch" }, \ - { TLB_REMOTE_SHOOTDOWN, "remote shootdown" }, \ - { TLB_LOCAL_SHOOTDOWN, "local shootdown" }, \ - { TLB_LOCAL_MM_SHOOTDOWN, "local mm shootdown" } +#define TLB_FLUSH_REASON \ + EM( TLB_FLUSH_ON_TASK_SWITCH, "flush on task switch" ) \ + EM( TLB_REMOTE_SHOOTDOWN, "remote shootdown" ) \ + EM( TLB_LOCAL_SHOOTDOWN, "local shootdown" ) \ + EMe( TLB_LOCAL_MM_SHOOTDOWN, "local mm shootdown" ) + +/* + * First define the enums in TLB_FLUSH_REASON to be exported to userspace + * via TRACE_DEFINE_ENUM(). + */ +#undef EM +#undef EMe +#define EM(a,b) TRACE_DEFINE_ENUM(a); +#define EMe(a,b) TRACE_DEFINE_ENUM(a); + +TLB_FLUSH_REASON + +/* + * Now redefine the EM() and EMe() macros to map the enums to the strings + * that will be printed in the output. + */ +#undef EM +#undef EMe +#define EM(a,b) { a, b }, +#define EMe(a,b) { a, b } TRACE_EVENT_CONDITION(tlb_flush, diff --git a/include/trace/events/v4l2.h b/include/trace/events/v4l2.h index b9bb1f204693..20112170ff11 100644 --- a/include/trace/events/v4l2.h +++ b/include/trace/events/v4l2.h @@ -6,33 +6,58 @@ #include <linux/tracepoint.h> -#define show_type(type) \ - __print_symbolic(type, \ - { V4L2_BUF_TYPE_VIDEO_CAPTURE, "VIDEO_CAPTURE" }, \ - { V4L2_BUF_TYPE_VIDEO_OUTPUT, "VIDEO_OUTPUT" }, \ - { V4L2_BUF_TYPE_VIDEO_OVERLAY, "VIDEO_OVERLAY" }, \ - { V4L2_BUF_TYPE_VBI_CAPTURE, "VBI_CAPTURE" }, \ - { V4L2_BUF_TYPE_VBI_OUTPUT, "VBI_OUTPUT" }, \ - { V4L2_BUF_TYPE_SLICED_VBI_CAPTURE, "SLICED_VBI_CAPTURE" }, \ - { V4L2_BUF_TYPE_SLICED_VBI_OUTPUT, "SLICED_VBI_OUTPUT" }, \ - { V4L2_BUF_TYPE_VIDEO_OUTPUT_OVERLAY, "VIDEO_OUTPUT_OVERLAY" },\ - { V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE, "VIDEO_CAPTURE_MPLANE" },\ - { V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE, "VIDEO_OUTPUT_MPLANE" }, \ - { V4L2_BUF_TYPE_SDR_CAPTURE, "SDR_CAPTURE" }, \ - { V4L2_BUF_TYPE_PRIVATE, "PRIVATE" }) +/* Enums require being exported to userspace, for user tool parsing */ +#undef EM +#undef EMe +#define EM(a, b) TRACE_DEFINE_ENUM(a); +#define EMe(a, b) TRACE_DEFINE_ENUM(a); + +#define show_type(type) \ + __print_symbolic(type, SHOW_TYPE) + +#define SHOW_TYPE \ + EM( V4L2_BUF_TYPE_VIDEO_CAPTURE, "VIDEO_CAPTURE" ) \ + EM( V4L2_BUF_TYPE_VIDEO_OUTPUT, "VIDEO_OUTPUT" ) \ + EM( V4L2_BUF_TYPE_VIDEO_OVERLAY, "VIDEO_OVERLAY" ) \ + EM( V4L2_BUF_TYPE_VBI_CAPTURE, "VBI_CAPTURE" ) \ + EM( V4L2_BUF_TYPE_VBI_OUTPUT, "VBI_OUTPUT" ) \ + EM( V4L2_BUF_TYPE_SLICED_VBI_CAPTURE, "SLICED_VBI_CAPTURE" ) \ + EM( V4L2_BUF_TYPE_SLICED_VBI_OUTPUT, "SLICED_VBI_OUTPUT" ) \ + EM( V4L2_BUF_TYPE_VIDEO_OUTPUT_OVERLAY, "VIDEO_OUTPUT_OVERLAY" ) \ + EM( V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE, "VIDEO_CAPTURE_MPLANE" ) \ + EM( V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE, "VIDEO_OUTPUT_MPLANE" ) \ + EM( V4L2_BUF_TYPE_SDR_CAPTURE, "SDR_CAPTURE" ) \ + EMe(V4L2_BUF_TYPE_PRIVATE, "PRIVATE" ) + +SHOW_TYPE #define show_field(field) \ - __print_symbolic(field, \ - { V4L2_FIELD_ANY, "ANY" }, \ - { V4L2_FIELD_NONE, "NONE" }, \ - { V4L2_FIELD_TOP, "TOP" }, \ - { V4L2_FIELD_BOTTOM, "BOTTOM" }, \ - { V4L2_FIELD_INTERLACED, "INTERLACED" }, \ - { V4L2_FIELD_SEQ_TB, "SEQ_TB" }, \ - { V4L2_FIELD_SEQ_BT, "SEQ_BT" }, \ - { V4L2_FIELD_ALTERNATE, "ALTERNATE" }, \ - { V4L2_FIELD_INTERLACED_TB, "INTERLACED_TB" }, \ - { V4L2_FIELD_INTERLACED_BT, "INTERLACED_BT" }) + __print_symbolic(field, SHOW_FIELD) + +#define SHOW_FIELD \ + EM( V4L2_FIELD_ANY, "ANY" ) \ + EM( V4L2_FIELD_NONE, "NONE" ) \ + EM( V4L2_FIELD_TOP, "TOP" ) \ + EM( V4L2_FIELD_BOTTOM, "BOTTOM" ) \ + EM( V4L2_FIELD_INTERLACED, "INTERLACED" ) \ + EM( V4L2_FIELD_SEQ_TB, "SEQ_TB" ) \ + EM( V4L2_FIELD_SEQ_BT, "SEQ_BT" ) \ + EM( V4L2_FIELD_ALTERNATE, "ALTERNATE" ) \ + EM( V4L2_FIELD_INTERLACED_TB, "INTERLACED_TB" ) \ + EMe( V4L2_FIELD_INTERLACED_BT, "INTERLACED_BT" ) + +SHOW_FIELD + +/* + * Now redefine the EM() and EMe() macros to map the enums to the strings + * that will be printed in the output. + */ +#undef EM +#undef EMe +#define EM(a, b) {a, b}, +#define EMe(a, b) {a, b} + +/* V4L2_TC_TYPE_* are macros, not defines, they do not need processing */ #define show_timecode_type(type) \ __print_symbolic(type, \ diff --git a/include/trace/events/writeback.h b/include/trace/events/writeback.h index 5a14ead59696..880dd7437172 100644 --- a/include/trace/events/writeback.h +++ b/include/trace/events/writeback.h @@ -23,15 +23,32 @@ {I_REFERENCED, "I_REFERENCED"} \ ) +/* enums need to be exported to user space */ +#undef EM +#undef EMe +#define EM(a,b) TRACE_DEFINE_ENUM(a); +#define EMe(a,b) TRACE_DEFINE_ENUM(a); + #define WB_WORK_REASON \ - {WB_REASON_BACKGROUND, "background"}, \ - {WB_REASON_TRY_TO_FREE_PAGES, "try_to_free_pages"}, \ - {WB_REASON_SYNC, "sync"}, \ - {WB_REASON_PERIODIC, "periodic"}, \ - {WB_REASON_LAPTOP_TIMER, "laptop_timer"}, \ - {WB_REASON_FREE_MORE_MEM, "free_more_memory"}, \ - {WB_REASON_FS_FREE_SPACE, "fs_free_space"}, \ - {WB_REASON_FORKER_THREAD, "forker_thread"} + EM( WB_REASON_BACKGROUND, "background") \ + EM( WB_REASON_TRY_TO_FREE_PAGES, "try_to_free_pages") \ + EM( WB_REASON_SYNC, "sync") \ + EM( WB_REASON_PERIODIC, "periodic") \ + EM( WB_REASON_LAPTOP_TIMER, "laptop_timer") \ + EM( WB_REASON_FREE_MORE_MEM, "free_more_memory") \ + EM( WB_REASON_FS_FREE_SPACE, "fs_free_space") \ + EMe(WB_REASON_FORKER_THREAD, "forker_thread") + +WB_WORK_REASON + +/* + * Now redefine the EM() and EMe() macros to map the enums to the strings + * that will be printed in the output. + */ +#undef EM +#undef EMe +#define EM(a,b) { a, b }, +#define EMe(a,b) { a, b } struct wb_writeback_work; diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index 41bf65f04dd9..37d4b10b111d 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h @@ -18,6 +18,34 @@ #include <linux/ftrace_event.h> +#ifndef TRACE_SYSTEM_VAR +#define TRACE_SYSTEM_VAR TRACE_SYSTEM +#endif + +#define __app__(x, y) str__##x##y +#define __app(x, y) __app__(x, y) + +#define TRACE_SYSTEM_STRING __app(TRACE_SYSTEM_VAR,__trace_system_name) + +#define TRACE_MAKE_SYSTEM_STR() \ + static const char TRACE_SYSTEM_STRING[] = \ + __stringify(TRACE_SYSTEM) + +TRACE_MAKE_SYSTEM_STR(); + +#undef TRACE_DEFINE_ENUM +#define TRACE_DEFINE_ENUM(a) \ + static struct trace_enum_map __used __initdata \ + __##TRACE_SYSTEM##_##a = \ + { \ + .system = TRACE_SYSTEM_STRING, \ + .enum_string = #a, \ + .enum_value = a \ + }; \ + static struct trace_enum_map __used \ + __attribute__((section("_ftrace_enum_map"))) \ + *TRACE_SYSTEM##_##a = &__##TRACE_SYSTEM##_##a + /* * DECLARE_EVENT_CLASS can be used to add a generic function * handlers for events. That is, if all events have the same @@ -105,7 +133,6 @@ #include TRACE_INCLUDE(TRACE_INCLUDE_FILE) - /* * Stage 2 of the trace events. * @@ -122,6 +149,9 @@ * The size of an array is also encoded, in the higher 16 bits of <item>. */ +#undef TRACE_DEFINE_ENUM +#define TRACE_DEFINE_ENUM(a) + #undef __field #define __field(type, item) @@ -539,7 +569,7 @@ static inline notrace int ftrace_get_offsets_##call( \ * .trace = ftrace_raw_output_<call>, <-- stage 2 * }; * - * static const char print_fmt_<call>[] = <TP_printk>; + * static char print_fmt_<call>[] = <TP_printk>; * * static struct ftrace_event_class __used event_class_<template> = { * .system = "<system>", @@ -690,9 +720,9 @@ static inline void ftrace_test_probe_##call(void) \ #undef DECLARE_EVENT_CLASS #define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print) \ _TRACE_PERF_PROTO(call, PARAMS(proto)); \ -static const char print_fmt_##call[] = print; \ +static char print_fmt_##call[] = print; \ static struct ftrace_event_class __used __refdata event_class_##call = { \ - .system = __stringify(TRACE_SYSTEM), \ + .system = TRACE_SYSTEM_STRING, \ .define_fields = ftrace_define_fields_##call, \ .fields = LIST_HEAD_INIT(event_class_##call.fields),\ .raw_init = trace_event_raw_init, \ @@ -719,7 +749,7 @@ __attribute__((section("_ftrace_events"))) *__event_##call = &event_##call #undef DEFINE_EVENT_PRINT #define DEFINE_EVENT_PRINT(template, call, proto, args, print) \ \ -static const char print_fmt_##call[] = print; \ +static char print_fmt_##call[] = print; \ \ static struct ftrace_event_call __used event_##call = { \ .class = &event_class_##template, \ @@ -735,6 +765,7 @@ __attribute__((section("_ftrace_events"))) *__event_##call = &event_##call #include TRACE_INCLUDE(TRACE_INCLUDE_FILE) +#undef TRACE_SYSTEM_VAR #ifdef CONFIG_PERF_EVENTS diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 45da7ec7d274..cc47ef41076a 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -118,6 +118,7 @@ enum bpf_map_type { enum bpf_prog_type { BPF_PROG_TYPE_UNSPEC, BPF_PROG_TYPE_SOCKET_FILTER, + BPF_PROG_TYPE_KPROBE, }; /* flags for BPF_MAP_UPDATE_ELEM command */ @@ -151,6 +152,7 @@ union bpf_attr { __u32 log_level; /* verbosity level of verifier */ __u32 log_size; /* size of user buffer */ __aligned_u64 log_buf; /* user supplied buffer */ + __u32 kern_version; /* checked when prog_type=kprobe */ }; } __attribute__((aligned(8))); @@ -162,6 +164,9 @@ enum bpf_func_id { BPF_FUNC_map_lookup_elem, /* void *map_lookup_elem(&map, &key) */ BPF_FUNC_map_update_elem, /* int map_update_elem(&map, &key, &value, flags) */ BPF_FUNC_map_delete_elem, /* int map_delete_elem(&map, &key) */ + BPF_FUNC_probe_read, /* int bpf_probe_read(void *dst, int size, void *src) */ + BPF_FUNC_ktime_get_ns, /* u64 bpf_ktime_get_ns(void) */ + BPF_FUNC_trace_printk, /* int bpf_trace_printk(const char *fmt, int fmt_size, ...) */ __BPF_FUNC_MAX_ID, }; diff --git a/include/uapi/linux/input.h b/include/uapi/linux/input.h index 2f62ab2d7bf9..8038e9aa3b95 100644 --- a/include/uapi/linux/input.h +++ b/include/uapi/linux/input.h @@ -702,6 +702,10 @@ struct input_keymap_entry { #define KEY_NUMERIC_9 0x209 #define KEY_NUMERIC_STAR 0x20a #define KEY_NUMERIC_POUND 0x20b +#define KEY_NUMERIC_A 0x20c /* Phone key A - HUT Telephony 0xb9 */ +#define KEY_NUMERIC_B 0x20d +#define KEY_NUMERIC_C 0x20e +#define KEY_NUMERIC_D 0x20f #define KEY_CAMERA_FOCUS 0x210 #define KEY_WPS_BUTTON 0x211 /* WiFi Protected Setup key */ diff --git a/include/uapi/linux/magic.h b/include/uapi/linux/magic.h index 7d664ea85ebd..7b1425a6b370 100644 --- a/include/uapi/linux/magic.h +++ b/include/uapi/linux/magic.h @@ -58,6 +58,8 @@ #define STACK_END_MAGIC 0x57AC6E9D +#define TRACEFS_MAGIC 0x74726163 + #define V9FS_MAGIC 0x01021997 #define BDEVFS_MAGIC 0x62646576 diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index 9b79abbd1ab8..309211b3eb67 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -152,21 +152,42 @@ enum perf_event_sample_format { * The branch types can be combined, however BRANCH_ANY covers all types * of branches and therefore it supersedes all the other types. */ +enum perf_branch_sample_type_shift { + PERF_SAMPLE_BRANCH_USER_SHIFT = 0, /* user branches */ + PERF_SAMPLE_BRANCH_KERNEL_SHIFT = 1, /* kernel branches */ + PERF_SAMPLE_BRANCH_HV_SHIFT = 2, /* hypervisor branches */ + + PERF_SAMPLE_BRANCH_ANY_SHIFT = 3, /* any branch types */ + PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT = 4, /* any call branch */ + PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT = 5, /* any return branch */ + PERF_SAMPLE_BRANCH_IND_CALL_SHIFT = 6, /* indirect calls */ + PERF_SAMPLE_BRANCH_ABORT_TX_SHIFT = 7, /* transaction aborts */ + PERF_SAMPLE_BRANCH_IN_TX_SHIFT = 8, /* in transaction */ + PERF_SAMPLE_BRANCH_NO_TX_SHIFT = 9, /* not in transaction */ + PERF_SAMPLE_BRANCH_COND_SHIFT = 10, /* conditional branches */ + + PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT = 11, /* call/ret stack */ + + PERF_SAMPLE_BRANCH_MAX_SHIFT /* non-ABI */ +}; + enum perf_branch_sample_type { - PERF_SAMPLE_BRANCH_USER = 1U << 0, /* user branches */ - PERF_SAMPLE_BRANCH_KERNEL = 1U << 1, /* kernel branches */ - PERF_SAMPLE_BRANCH_HV = 1U << 2, /* hypervisor branches */ - - PERF_SAMPLE_BRANCH_ANY = 1U << 3, /* any branch types */ - PERF_SAMPLE_BRANCH_ANY_CALL = 1U << 4, /* any call branch */ - PERF_SAMPLE_BRANCH_ANY_RETURN = 1U << 5, /* any return branch */ - PERF_SAMPLE_BRANCH_IND_CALL = 1U << 6, /* indirect calls */ - PERF_SAMPLE_BRANCH_ABORT_TX = 1U << 7, /* transaction aborts */ - PERF_SAMPLE_BRANCH_IN_TX = 1U << 8, /* in transaction */ - PERF_SAMPLE_BRANCH_NO_TX = 1U << 9, /* not in transaction */ - PERF_SAMPLE_BRANCH_COND = 1U << 10, /* conditional branches */ - - PERF_SAMPLE_BRANCH_MAX = 1U << 11, /* non-ABI */ + PERF_SAMPLE_BRANCH_USER = 1U << PERF_SAMPLE_BRANCH_USER_SHIFT, + PERF_SAMPLE_BRANCH_KERNEL = 1U << PERF_SAMPLE_BRANCH_KERNEL_SHIFT, + PERF_SAMPLE_BRANCH_HV = 1U << PERF_SAMPLE_BRANCH_HV_SHIFT, + + PERF_SAMPLE_BRANCH_ANY = 1U << PERF_SAMPLE_BRANCH_ANY_SHIFT, + PERF_SAMPLE_BRANCH_ANY_CALL = 1U << PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT, + PERF_SAMPLE_BRANCH_ANY_RETURN = 1U << PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT, + PERF_SAMPLE_BRANCH_IND_CALL = 1U << PERF_SAMPLE_BRANCH_IND_CALL_SHIFT, + PERF_SAMPLE_BRANCH_ABORT_TX = 1U << PERF_SAMPLE_BRANCH_ABORT_TX_SHIFT, + PERF_SAMPLE_BRANCH_IN_TX = 1U << PERF_SAMPLE_BRANCH_IN_TX_SHIFT, + PERF_SAMPLE_BRANCH_NO_TX = 1U << PERF_SAMPLE_BRANCH_NO_TX_SHIFT, + PERF_SAMPLE_BRANCH_COND = 1U << PERF_SAMPLE_BRANCH_COND_SHIFT, + + PERF_SAMPLE_BRANCH_CALL_STACK = 1U << PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT, + + PERF_SAMPLE_BRANCH_MAX = 1U << PERF_SAMPLE_BRANCH_MAX_SHIFT, }; #define PERF_SAMPLE_BRANCH_PLM_ALL \ @@ -240,6 +261,7 @@ enum perf_event_read_format { #define PERF_ATTR_SIZE_VER3 96 /* add: sample_regs_user */ /* add: sample_stack_user */ #define PERF_ATTR_SIZE_VER4 104 /* add: sample_regs_intr */ +#define PERF_ATTR_SIZE_VER5 112 /* add: aux_watermark */ /* * Hardware event_id to monitor via a performance monitoring event: @@ -305,7 +327,8 @@ struct perf_event_attr { exclude_callchain_user : 1, /* exclude user callchains */ mmap2 : 1, /* include mmap with inode data */ comm_exec : 1, /* flag comm events that are due to an exec */ - __reserved_1 : 39; + use_clockid : 1, /* use @clockid for time fields */ + __reserved_1 : 38; union { __u32 wakeup_events; /* wakeup every n events */ @@ -334,8 +357,7 @@ struct perf_event_attr { */ __u32 sample_stack_user; - /* Align to u64. */ - __u32 __reserved_2; + __s32 clockid; /* * Defines set of regs to dump for each sample * state captured on: @@ -345,6 +367,12 @@ struct perf_event_attr { * See asm/perf_regs.h for details. */ __u64 sample_regs_intr; + + /* + * Wakeup watermark for AUX area + */ + __u32 aux_watermark; + __u32 __reserved_2; /* align to __u64 */ }; #define perf_flags(attr) (*(&(attr)->read_format + 1)) @@ -360,6 +388,7 @@ struct perf_event_attr { #define PERF_EVENT_IOC_SET_OUTPUT _IO ('$', 5) #define PERF_EVENT_IOC_SET_FILTER _IOW('$', 6, char *) #define PERF_EVENT_IOC_ID _IOR('$', 7, __u64 *) +#define PERF_EVENT_IOC_SET_BPF _IOW('$', 8, __u32) enum perf_event_ioc_flags { PERF_IOC_FLAG_GROUP = 1U << 0, @@ -500,9 +529,30 @@ struct perf_event_mmap_page { * In this case the kernel will not over-write unread data. * * See perf_output_put_handle() for the data ordering. + * + * data_{offset,size} indicate the location and size of the perf record + * buffer within the mmapped area. */ __u64 data_head; /* head in the data section */ __u64 data_tail; /* user-space written tail */ + __u64 data_offset; /* where the buffer starts */ + __u64 data_size; /* data buffer size */ + + /* + * AUX area is defined by aux_{offset,size} fields that should be set + * by the userspace, so that + * + * aux_offset >= data_offset + data_size + * + * prior to mmap()ing it. Size of the mmap()ed area should be aux_size. + * + * Ring buffer pointers aux_{head,tail} have the same semantics as + * data_{head,tail} and same ordering rules apply. + */ + __u64 aux_head; + __u64 aux_tail; + __u64 aux_offset; + __u64 aux_size; }; #define PERF_RECORD_MISC_CPUMODE_MASK (7 << 0) @@ -725,6 +775,31 @@ enum perf_event_type { */ PERF_RECORD_MMAP2 = 10, + /* + * Records that new data landed in the AUX buffer part. + * + * struct { + * struct perf_event_header header; + * + * u64 aux_offset; + * u64 aux_size; + * u64 flags; + * struct sample_id sample_id; + * }; + */ + PERF_RECORD_AUX = 11, + + /* + * Indicates that instruction trace has started + * + * struct { + * struct perf_event_header header; + * u32 pid; + * u32 tid; + * }; + */ + PERF_RECORD_ITRACE_START = 12, + PERF_RECORD_MAX, /* non-ABI */ }; @@ -742,6 +817,12 @@ enum perf_callchain_context { PERF_CONTEXT_MAX = (__u64)-4095, }; +/** + * PERF_RECORD_AUX::flags bits + */ +#define PERF_AUX_FLAG_TRUNCATED 0x01 /* record was truncated to fit */ +#define PERF_AUX_FLAG_OVERWRITE 0x02 /* snapshot from overwrite mode */ + #define PERF_FLAG_FD_NO_GROUP (1UL << 0) #define PERF_FLAG_FD_OUTPUT (1UL << 1) #define PERF_FLAG_PID_CGROUP (1UL << 2) /* pid=cgroup id, per-cpu mode only */ diff --git a/init/Kconfig b/init/Kconfig index f5dbc6d4261b..a905b7301e10 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -791,6 +791,19 @@ config RCU_NOCB_CPU_ALL endchoice +config RCU_EXPEDITE_BOOT + bool + default n + help + This option enables expedited grace periods at boot time, + as if rcu_expedite_gp() had been invoked early in boot. + The corresponding rcu_unexpedite_gp() is invoked from + rcu_end_inkernel_boot(), which is intended to be invoked + at the end of the kernel-only boot sequence, just before + init is exec'ed. + + Accept the default if unsure. + endmenu # "RCU Subsystem" config BUILD_BIN2C @@ -1513,7 +1526,7 @@ config EVENTFD # syscall, maps, verifier config BPF_SYSCALL - bool "Enable bpf() system call" if EXPERT + bool "Enable bpf() system call" select ANON_INODES select BPF default n diff --git a/init/main.c b/init/main.c index f6dd8fe1f22c..a7e969d12f51 100644 --- a/init/main.c +++ b/init/main.c @@ -144,7 +144,7 @@ EXPORT_SYMBOL_GPL(static_key_initialized); * rely on the BIOS and skip the reset operation. * * This is useful if kernel is booting in an unreliable environment. - * For ex. kdump situaiton where previous kernel has crashed, BIOS has been + * For ex. kdump situation where previous kernel has crashed, BIOS has been * skipped and devices will be in unknown state. */ unsigned int reset_devices; @@ -385,6 +385,7 @@ static noinline void __init_refok rest_init(void) int pid; rcu_scheduler_starting(); + smpboot_thread_init(); /* * We need to spawn init first so that it obtains pid 1, however * the init task will end up wanting to create kthreads, which, if diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 536edc2be307..504c10b990ef 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -16,6 +16,7 @@ #include <linux/file.h> #include <linux/license.h> #include <linux/filter.h> +#include <linux/version.h> static LIST_HEAD(bpf_map_types); @@ -467,7 +468,7 @@ struct bpf_prog *bpf_prog_get(u32 ufd) } /* last field in 'union bpf_attr' used by this command */ -#define BPF_PROG_LOAD_LAST_FIELD log_buf +#define BPF_PROG_LOAD_LAST_FIELD kern_version static int bpf_prog_load(union bpf_attr *attr) { @@ -492,6 +493,10 @@ static int bpf_prog_load(union bpf_attr *attr) if (attr->insn_cnt >= BPF_MAXINSNS) return -EINVAL; + if (type == BPF_PROG_TYPE_KPROBE && + attr->kern_version != LINUX_VERSION_CODE) + return -EINVAL; + /* plain bpf_prog allocation */ prog = bpf_prog_alloc(bpf_prog_size(attr->insn_cnt), GFP_USER); if (!prog) diff --git a/kernel/context_tracking.c b/kernel/context_tracking.c index 937ecdfdf258..72d59a1a6eb6 100644 --- a/kernel/context_tracking.c +++ b/kernel/context_tracking.c @@ -39,15 +39,15 @@ void context_tracking_cpu_set(int cpu) } /** - * context_tracking_user_enter - Inform the context tracking that the CPU is going to - * enter userspace mode. + * context_tracking_enter - Inform the context tracking that the CPU is going + * enter user or guest space mode. * * This function must be called right before we switch from the kernel - * to userspace, when it's guaranteed the remaining kernel instructions - * to execute won't use any RCU read side critical section because this - * function sets RCU in extended quiescent state. + * to user or guest space, when it's guaranteed the remaining kernel + * instructions to execute won't use any RCU read side critical section + * because this function sets RCU in extended quiescent state. */ -void context_tracking_user_enter(void) +void context_tracking_enter(enum ctx_state state) { unsigned long flags; @@ -75,9 +75,8 @@ void context_tracking_user_enter(void) WARN_ON_ONCE(!current->mm); local_irq_save(flags); - if ( __this_cpu_read(context_tracking.state) != IN_USER) { + if ( __this_cpu_read(context_tracking.state) != state) { if (__this_cpu_read(context_tracking.active)) { - trace_user_enter(0); /* * At this stage, only low level arch entry code remains and * then we'll run in userspace. We can assume there won't be @@ -85,7 +84,10 @@ void context_tracking_user_enter(void) * user_exit() or rcu_irq_enter(). Let's remove RCU's dependency * on the tick. */ - vtime_user_enter(current); + if (state == CONTEXT_USER) { + trace_user_enter(0); + vtime_user_enter(current); + } rcu_user_enter(); } /* @@ -101,24 +103,32 @@ void context_tracking_user_enter(void) * OTOH we can spare the calls to vtime and RCU when context_tracking.active * is false because we know that CPU is not tickless. */ - __this_cpu_write(context_tracking.state, IN_USER); + __this_cpu_write(context_tracking.state, state); } local_irq_restore(flags); } +NOKPROBE_SYMBOL(context_tracking_enter); +EXPORT_SYMBOL_GPL(context_tracking_enter); + +void context_tracking_user_enter(void) +{ + context_tracking_enter(CONTEXT_USER); +} NOKPROBE_SYMBOL(context_tracking_user_enter); /** - * context_tracking_user_exit - Inform the context tracking that the CPU is - * exiting userspace mode and entering the kernel. + * context_tracking_exit - Inform the context tracking that the CPU is + * exiting user or guest mode and entering the kernel. * - * This function must be called after we entered the kernel from userspace - * before any use of RCU read side critical section. This potentially include - * any high level kernel code like syscalls, exceptions, signal handling, etc... + * This function must be called after we entered the kernel from user or + * guest space before any use of RCU read side critical section. This + * potentially include any high level kernel code like syscalls, exceptions, + * signal handling, etc... * * This call supports re-entrancy. This way it can be called from any exception * handler without needing to know if we came from userspace or not. */ -void context_tracking_user_exit(void) +void context_tracking_exit(enum ctx_state state) { unsigned long flags; @@ -129,20 +139,29 @@ void context_tracking_user_exit(void) return; local_irq_save(flags); - if (__this_cpu_read(context_tracking.state) == IN_USER) { + if (__this_cpu_read(context_tracking.state) == state) { if (__this_cpu_read(context_tracking.active)) { /* * We are going to run code that may use RCU. Inform * RCU core about that (ie: we may need the tick again). */ rcu_user_exit(); - vtime_user_exit(current); - trace_user_exit(0); + if (state == CONTEXT_USER) { + vtime_user_exit(current); + trace_user_exit(0); + } } - __this_cpu_write(context_tracking.state, IN_KERNEL); + __this_cpu_write(context_tracking.state, CONTEXT_KERNEL); } local_irq_restore(flags); } +NOKPROBE_SYMBOL(context_tracking_exit); +EXPORT_SYMBOL_GPL(context_tracking_exit); + +void context_tracking_user_exit(void) +{ + context_tracking_exit(CONTEXT_USER); +} NOKPROBE_SYMBOL(context_tracking_user_exit); /** diff --git a/kernel/cpu.c b/kernel/cpu.c index 82eea9c5af61..94bbe4695232 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -411,8 +411,10 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen) * * Wait for the stop thread to go away. */ - while (!idle_cpu(cpu)) + while (!per_cpu(cpu_dead_idle, cpu)) cpu_relax(); + smp_mb(); /* Read from cpu_dead_idle before __cpu_die(). */ + per_cpu(cpu_dead_idle, cpu) = false; hotplug_cpu__broadcast_tick_pull(cpu); /* This actually kills the CPU. */ @@ -451,6 +453,37 @@ out: EXPORT_SYMBOL(cpu_down); #endif /*CONFIG_HOTPLUG_CPU*/ +/* + * Unpark per-CPU smpboot kthreads at CPU-online time. + */ +static int smpboot_thread_call(struct notifier_block *nfb, + unsigned long action, void *hcpu) +{ + int cpu = (long)hcpu; + + switch (action & ~CPU_TASKS_FROZEN) { + + case CPU_ONLINE: + smpboot_unpark_threads(cpu); + break; + + default: + break; + } + + return NOTIFY_OK; +} + +static struct notifier_block smpboot_thread_notifier = { + .notifier_call = smpboot_thread_call, + .priority = CPU_PRI_SMPBOOT, +}; + +void __cpuinit smpboot_thread_init(void) +{ + register_cpu_notifier(&smpboot_thread_notifier); +} + /* Requires cpu_add_remove_lock to be held */ static int _cpu_up(unsigned int cpu, int tasks_frozen) { @@ -490,9 +523,6 @@ static int _cpu_up(unsigned int cpu, int tasks_frozen) goto out_notify; BUG_ON(!cpu_online(cpu)); - /* Wake the per cpu threads */ - smpboot_unpark_threads(cpu); - /* Now call notifier in preparation. */ cpu_notify(CPU_ONLINE | mod, hcpu); diff --git a/kernel/events/core.c b/kernel/events/core.c index 2fabc0627165..06917d537302 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -34,14 +34,16 @@ #include <linux/syscalls.h> #include <linux/anon_inodes.h> #include <linux/kernel_stat.h> +#include <linux/cgroup.h> #include <linux/perf_event.h> #include <linux/ftrace_event.h> #include <linux/hw_breakpoint.h> #include <linux/mm_types.h> -#include <linux/cgroup.h> #include <linux/module.h> #include <linux/mman.h> #include <linux/compat.h> +#include <linux/bpf.h> +#include <linux/filter.h> #include "internal.h" @@ -153,7 +155,7 @@ enum event_type_t { */ struct static_key_deferred perf_sched_events __read_mostly; static DEFINE_PER_CPU(atomic_t, perf_cgroup_events); -static DEFINE_PER_CPU(atomic_t, perf_branch_stack_events); +static DEFINE_PER_CPU(int, perf_sched_cb_usages); static atomic_t nr_mmap_events __read_mostly; static atomic_t nr_comm_events __read_mostly; @@ -327,6 +329,11 @@ static inline u64 perf_clock(void) return local_clock(); } +static inline u64 perf_event_clock(struct perf_event *event) +{ + return event->clock(); +} + static inline struct perf_cpu_context * __get_cpu_context(struct perf_event_context *ctx) { @@ -351,32 +358,6 @@ static void perf_ctx_unlock(struct perf_cpu_context *cpuctx, #ifdef CONFIG_CGROUP_PERF -/* - * perf_cgroup_info keeps track of time_enabled for a cgroup. - * This is a per-cpu dynamically allocated data structure. - */ -struct perf_cgroup_info { - u64 time; - u64 timestamp; -}; - -struct perf_cgroup { - struct cgroup_subsys_state css; - struct perf_cgroup_info __percpu *info; -}; - -/* - * Must ensure cgroup is pinned (css_get) before calling - * this function. In other words, we cannot call this function - * if there is no cgroup event for the current CPU context. - */ -static inline struct perf_cgroup * -perf_cgroup_from_task(struct task_struct *task) -{ - return container_of(task_css(task, perf_event_cgrp_id), - struct perf_cgroup, css); -} - static inline bool perf_cgroup_match(struct perf_event *event) { @@ -905,6 +886,15 @@ static void get_ctx(struct perf_event_context *ctx) WARN_ON(!atomic_inc_not_zero(&ctx->refcount)); } +static void free_ctx(struct rcu_head *head) +{ + struct perf_event_context *ctx; + + ctx = container_of(head, struct perf_event_context, rcu_head); + kfree(ctx->task_ctx_data); + kfree(ctx); +} + static void put_ctx(struct perf_event_context *ctx) { if (atomic_dec_and_test(&ctx->refcount)) { @@ -912,7 +902,7 @@ static void put_ctx(struct perf_event_context *ctx) put_ctx(ctx->parent_ctx); if (ctx->task) put_task_struct(ctx->task); - kfree_rcu(ctx, rcu_head); + call_rcu(&ctx->rcu_head, free_ctx); } } @@ -1239,9 +1229,6 @@ list_add_event(struct perf_event *event, struct perf_event_context *ctx) if (is_cgroup_event(event)) ctx->nr_cgroups++; - if (has_branch_stack(event)) - ctx->nr_branch_stack++; - list_add_rcu(&event->event_entry, &ctx->event_list); ctx->nr_events++; if (event->attr.inherit_stat) @@ -1408,9 +1395,6 @@ list_del_event(struct perf_event *event, struct perf_event_context *ctx) cpuctx->cgrp = NULL; } - if (has_branch_stack(event)) - ctx->nr_branch_stack--; - ctx->nr_events--; if (event->attr.inherit_stat) ctx->nr_stat--; @@ -1847,6 +1831,7 @@ static void perf_set_shadow_time(struct perf_event *event, #define MAX_INTERRUPTS (~0ULL) static void perf_log_throttle(struct perf_event *event, int enable); +static void perf_log_itrace_start(struct perf_event *event); static int event_sched_in(struct perf_event *event, @@ -1881,6 +1866,12 @@ event_sched_in(struct perf_event *event, perf_pmu_disable(event->pmu); + event->tstamp_running += tstamp - event->tstamp_stopped; + + perf_set_shadow_time(event, ctx, tstamp); + + perf_log_itrace_start(event); + if (event->pmu->add(event, PERF_EF_START)) { event->state = PERF_EVENT_STATE_INACTIVE; event->oncpu = -1; @@ -1888,10 +1879,6 @@ event_sched_in(struct perf_event *event, goto out; } - event->tstamp_running += tstamp - event->tstamp_stopped; - - perf_set_shadow_time(event, ctx, tstamp); - if (!is_software_event(event)) cpuctx->active_oncpu++; if (!ctx->nr_active++) @@ -2559,6 +2546,9 @@ static void perf_event_context_sched_out(struct task_struct *task, int ctxn, next->perf_event_ctxp[ctxn] = ctx; ctx->task = next; next_ctx->task = task; + + swap(ctx->task_ctx_data, next_ctx->task_ctx_data); + do_switch = 0; perf_event_sync_stat(ctx, next_ctx); @@ -2577,6 +2567,56 @@ unlock: } } +void perf_sched_cb_dec(struct pmu *pmu) +{ + this_cpu_dec(perf_sched_cb_usages); +} + +void perf_sched_cb_inc(struct pmu *pmu) +{ + this_cpu_inc(perf_sched_cb_usages); +} + +/* + * This function provides the context switch callback to the lower code + * layer. It is invoked ONLY when the context switch callback is enabled. + */ +static void perf_pmu_sched_task(struct task_struct *prev, + struct task_struct *next, + bool sched_in) +{ + struct perf_cpu_context *cpuctx; + struct pmu *pmu; + unsigned long flags; + + if (prev == next) + return; + + local_irq_save(flags); + + rcu_read_lock(); + + list_for_each_entry_rcu(pmu, &pmus, entry) { + if (pmu->sched_task) { + cpuctx = this_cpu_ptr(pmu->pmu_cpu_context); + + perf_ctx_lock(cpuctx, cpuctx->task_ctx); + + perf_pmu_disable(pmu); + + pmu->sched_task(cpuctx->task_ctx, sched_in); + + perf_pmu_enable(pmu); + + perf_ctx_unlock(cpuctx, cpuctx->task_ctx); + } + } + + rcu_read_unlock(); + + local_irq_restore(flags); +} + #define for_each_task_context_nr(ctxn) \ for ((ctxn) = 0; (ctxn) < perf_nr_task_contexts; (ctxn)++) @@ -2596,6 +2636,9 @@ void __perf_event_task_sched_out(struct task_struct *task, { int ctxn; + if (__this_cpu_read(perf_sched_cb_usages)) + perf_pmu_sched_task(task, next, false); + for_each_task_context_nr(ctxn) perf_event_context_sched_out(task, ctxn, next); @@ -2755,64 +2798,6 @@ static void perf_event_context_sched_in(struct perf_event_context *ctx, } /* - * When sampling the branck stack in system-wide, it may be necessary - * to flush the stack on context switch. This happens when the branch - * stack does not tag its entries with the pid of the current task. - * Otherwise it becomes impossible to associate a branch entry with a - * task. This ambiguity is more likely to appear when the branch stack - * supports priv level filtering and the user sets it to monitor only - * at the user level (which could be a useful measurement in system-wide - * mode). In that case, the risk is high of having a branch stack with - * branch from multiple tasks. Flushing may mean dropping the existing - * entries or stashing them somewhere in the PMU specific code layer. - * - * This function provides the context switch callback to the lower code - * layer. It is invoked ONLY when there is at least one system-wide context - * with at least one active event using taken branch sampling. - */ -static void perf_branch_stack_sched_in(struct task_struct *prev, - struct task_struct *task) -{ - struct perf_cpu_context *cpuctx; - struct pmu *pmu; - unsigned long flags; - - /* no need to flush branch stack if not changing task */ - if (prev == task) - return; - - local_irq_save(flags); - - rcu_read_lock(); - - list_for_each_entry_rcu(pmu, &pmus, entry) { - cpuctx = this_cpu_ptr(pmu->pmu_cpu_context); - - /* - * check if the context has at least one - * event using PERF_SAMPLE_BRANCH_STACK - */ - if (cpuctx->ctx.nr_branch_stack > 0 - && pmu->flush_branch_stack) { - - perf_ctx_lock(cpuctx, cpuctx->task_ctx); - - perf_pmu_disable(pmu); - - pmu->flush_branch_stack(); - - perf_pmu_enable(pmu); - - perf_ctx_unlock(cpuctx, cpuctx->task_ctx); - } - } - - rcu_read_unlock(); - - local_irq_restore(flags); -} - -/* * Called from scheduler to add the events of the current task * with interrupts disabled. * @@ -2844,9 +2829,8 @@ void __perf_event_task_sched_in(struct task_struct *prev, if (atomic_read(this_cpu_ptr(&perf_cgroup_events))) perf_cgroup_sched_in(prev, task); - /* check for system-wide branch_stack events */ - if (atomic_read(this_cpu_ptr(&perf_branch_stack_events))) - perf_branch_stack_sched_in(prev, task); + if (__this_cpu_read(perf_sched_cb_usages)) + perf_pmu_sched_task(prev, task, true); } static u64 perf_calculate_period(struct perf_event *event, u64 nsec, u64 count) @@ -3220,7 +3204,10 @@ static void __perf_event_read(void *info) static inline u64 perf_event_count(struct perf_event *event) { - return local64_read(&event->count) + atomic64_read(&event->child_count); + if (event->pmu->count) + return event->pmu->count(event); + + return __perf_event_count(event); } static u64 perf_event_read(struct perf_event *event) @@ -3321,12 +3308,15 @@ errout: * Returns a matching context with refcount and pincount. */ static struct perf_event_context * -find_get_context(struct pmu *pmu, struct task_struct *task, int cpu) +find_get_context(struct pmu *pmu, struct task_struct *task, + struct perf_event *event) { struct perf_event_context *ctx, *clone_ctx = NULL; struct perf_cpu_context *cpuctx; + void *task_ctx_data = NULL; unsigned long flags; int ctxn, err; + int cpu = event->cpu; if (!task) { /* Must be root to operate on a CPU event: */ @@ -3354,11 +3344,24 @@ find_get_context(struct pmu *pmu, struct task_struct *task, int cpu) if (ctxn < 0) goto errout; + if (event->attach_state & PERF_ATTACH_TASK_DATA) { + task_ctx_data = kzalloc(pmu->task_ctx_size, GFP_KERNEL); + if (!task_ctx_data) { + err = -ENOMEM; + goto errout; + } + } + retry: ctx = perf_lock_task_context(task, ctxn, &flags); if (ctx) { clone_ctx = unclone_ctx(ctx); ++ctx->pin_count; + + if (task_ctx_data && !ctx->task_ctx_data) { + ctx->task_ctx_data = task_ctx_data; + task_ctx_data = NULL; + } raw_spin_unlock_irqrestore(&ctx->lock, flags); if (clone_ctx) @@ -3369,6 +3372,11 @@ retry: if (!ctx) goto errout; + if (task_ctx_data) { + ctx->task_ctx_data = task_ctx_data; + task_ctx_data = NULL; + } + err = 0; mutex_lock(&task->perf_event_mutex); /* @@ -3395,13 +3403,16 @@ retry: } } + kfree(task_ctx_data); return ctx; errout: + kfree(task_ctx_data); return ERR_PTR(err); } static void perf_event_free_filter(struct perf_event *event); +static void perf_event_free_bpf_prog(struct perf_event *event); static void free_event_rcu(struct rcu_head *head) { @@ -3411,10 +3422,10 @@ static void free_event_rcu(struct rcu_head *head) if (event->ns) put_pid_ns(event->ns); perf_event_free_filter(event); + perf_event_free_bpf_prog(event); kfree(event); } -static void ring_buffer_put(struct ring_buffer *rb); static void ring_buffer_attach(struct perf_event *event, struct ring_buffer *rb); @@ -3423,10 +3434,6 @@ static void unaccount_event_cpu(struct perf_event *event, int cpu) if (event->parent) return; - if (has_branch_stack(event)) { - if (!(event->attach_state & PERF_ATTACH_TASK)) - atomic_dec(&per_cpu(perf_branch_stack_events, cpu)); - } if (is_cgroup_event(event)) atomic_dec(&per_cpu(perf_cgroup_events, cpu)); } @@ -3454,6 +3461,91 @@ static void unaccount_event(struct perf_event *event) unaccount_event_cpu(event, event->cpu); } +/* + * The following implement mutual exclusion of events on "exclusive" pmus + * (PERF_PMU_CAP_EXCLUSIVE). Such pmus can only have one event scheduled + * at a time, so we disallow creating events that might conflict, namely: + * + * 1) cpu-wide events in the presence of per-task events, + * 2) per-task events in the presence of cpu-wide events, + * 3) two matching events on the same context. + * + * The former two cases are handled in the allocation path (perf_event_alloc(), + * __free_event()), the latter -- before the first perf_install_in_context(). + */ +static int exclusive_event_init(struct perf_event *event) +{ + struct pmu *pmu = event->pmu; + + if (!(pmu->capabilities & PERF_PMU_CAP_EXCLUSIVE)) + return 0; + + /* + * Prevent co-existence of per-task and cpu-wide events on the + * same exclusive pmu. + * + * Negative pmu::exclusive_cnt means there are cpu-wide + * events on this "exclusive" pmu, positive means there are + * per-task events. + * + * Since this is called in perf_event_alloc() path, event::ctx + * doesn't exist yet; it is, however, safe to use PERF_ATTACH_TASK + * to mean "per-task event", because unlike other attach states it + * never gets cleared. + */ + if (event->attach_state & PERF_ATTACH_TASK) { + if (!atomic_inc_unless_negative(&pmu->exclusive_cnt)) + return -EBUSY; + } else { + if (!atomic_dec_unless_positive(&pmu->exclusive_cnt)) + return -EBUSY; + } + + return 0; +} + +static void exclusive_event_destroy(struct perf_event *event) +{ + struct pmu *pmu = event->pmu; + + if (!(pmu->capabilities & PERF_PMU_CAP_EXCLUSIVE)) + return; + + /* see comment in exclusive_event_init() */ + if (event->attach_state & PERF_ATTACH_TASK) + atomic_dec(&pmu->exclusive_cnt); + else + atomic_inc(&pmu->exclusive_cnt); +} + +static bool exclusive_event_match(struct perf_event *e1, struct perf_event *e2) +{ + if ((e1->pmu->capabilities & PERF_PMU_CAP_EXCLUSIVE) && + (e1->cpu == e2->cpu || + e1->cpu == -1 || + e2->cpu == -1)) + return true; + return false; +} + +/* Called under the same ctx::mutex as perf_install_in_context() */ +static bool exclusive_event_installable(struct perf_event *event, + struct perf_event_context *ctx) +{ + struct perf_event *iter_event; + struct pmu *pmu = event->pmu; + + if (!(pmu->capabilities & PERF_PMU_CAP_EXCLUSIVE)) + return true; + + list_for_each_entry(iter_event, &ctx->event_list, event_entry) { + if (exclusive_event_match(iter_event, event)) + return false; + } + + return true; +} + static void __free_event(struct perf_event *event) { if (!event->parent) { @@ -3467,8 +3559,10 @@ static void __free_event(struct perf_event *event) if (event->ctx) put_ctx(event->ctx); - if (event->pmu) + if (event->pmu) { + exclusive_event_destroy(event); module_put(event->pmu->module); + } call_rcu(&event->rcu_head, free_event_rcu); } @@ -3927,6 +4021,7 @@ static inline int perf_fget_light(int fd, struct fd *p) static int perf_event_set_output(struct perf_event *event, struct perf_event *output_event); static int perf_event_set_filter(struct perf_event *event, void __user *arg); +static int perf_event_set_bpf_prog(struct perf_event *event, u32 prog_fd); static long _perf_ioctl(struct perf_event *event, unsigned int cmd, unsigned long arg) { @@ -3980,6 +4075,9 @@ static long _perf_ioctl(struct perf_event *event, unsigned int cmd, unsigned lon case PERF_EVENT_IOC_SET_FILTER: return perf_event_set_filter(event, (void __user *)arg); + case PERF_EVENT_IOC_SET_BPF: + return perf_event_set_bpf_prog(event, arg); + default: return -ENOTTY; } @@ -4096,6 +4194,8 @@ static void perf_event_init_userpage(struct perf_event *event) /* Allow new userspace to detect that bit 0 is deprecated */ userpg->cap_bit0_is_deprecated = 1; userpg->size = offsetof(struct perf_event_mmap_page, __reserved); + userpg->data_offset = PAGE_SIZE; + userpg->data_size = perf_data_size(rb); unlock: rcu_read_unlock(); @@ -4263,7 +4363,7 @@ static void rb_free_rcu(struct rcu_head *rcu_head) rb_free(rb); } -static struct ring_buffer *ring_buffer_get(struct perf_event *event) +struct ring_buffer *ring_buffer_get(struct perf_event *event) { struct ring_buffer *rb; @@ -4278,7 +4378,7 @@ static struct ring_buffer *ring_buffer_get(struct perf_event *event) return rb; } -static void ring_buffer_put(struct ring_buffer *rb) +void ring_buffer_put(struct ring_buffer *rb) { if (!atomic_dec_and_test(&rb->refcount)) return; @@ -4295,6 +4395,9 @@ static void perf_mmap_open(struct vm_area_struct *vma) atomic_inc(&event->mmap_count); atomic_inc(&event->rb->mmap_count); + if (vma->vm_pgoff) + atomic_inc(&event->rb->aux_mmap_count); + if (event->pmu->event_mapped) event->pmu->event_mapped(event); } @@ -4319,6 +4422,20 @@ static void perf_mmap_close(struct vm_area_struct *vma) if (event->pmu->event_unmapped) event->pmu->event_unmapped(event); + /* + * rb->aux_mmap_count will always drop before rb->mmap_count and + * event->mmap_count, so it is ok to use event->mmap_mutex to + * serialize with perf_mmap here. + */ + if (rb_has_aux(rb) && vma->vm_pgoff == rb->aux_pgoff && + atomic_dec_and_mutex_lock(&rb->aux_mmap_count, &event->mmap_mutex)) { + atomic_long_sub(rb->aux_nr_pages, &mmap_user->locked_vm); + vma->vm_mm->pinned_vm -= rb->aux_mmap_locked; + + rb_free_aux(rb); + mutex_unlock(&event->mmap_mutex); + } + atomic_dec(&rb->mmap_count); if (!atomic_dec_and_mutex_lock(&event->mmap_count, &event->mmap_mutex)) @@ -4392,7 +4509,7 @@ out_put: static const struct vm_operations_struct perf_mmap_vmops = { .open = perf_mmap_open, - .close = perf_mmap_close, + .close = perf_mmap_close, /* non mergable */ .fault = perf_mmap_fault, .page_mkwrite = perf_mmap_fault, }; @@ -4403,10 +4520,10 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma) unsigned long user_locked, user_lock_limit; struct user_struct *user = current_user(); unsigned long locked, lock_limit; - struct ring_buffer *rb; + struct ring_buffer *rb = NULL; unsigned long vma_size; unsigned long nr_pages; - long user_extra, extra; + long user_extra = 0, extra = 0; int ret = 0, flags = 0; /* @@ -4421,7 +4538,66 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma) return -EINVAL; vma_size = vma->vm_end - vma->vm_start; - nr_pages = (vma_size / PAGE_SIZE) - 1; + + if (vma->vm_pgoff == 0) { + nr_pages = (vma_size / PAGE_SIZE) - 1; + } else { + /* + * AUX area mapping: if rb->aux_nr_pages != 0, it's already + * mapped, all subsequent mappings should have the same size + * and offset. Must be above the normal perf buffer. + */ + u64 aux_offset, aux_size; + + if (!event->rb) + return -EINVAL; + + nr_pages = vma_size / PAGE_SIZE; + + mutex_lock(&event->mmap_mutex); + ret = -EINVAL; + + rb = event->rb; + if (!rb) + goto aux_unlock; + + aux_offset = ACCESS_ONCE(rb->user_page->aux_offset); + aux_size = ACCESS_ONCE(rb->user_page->aux_size); + + if (aux_offset < perf_data_size(rb) + PAGE_SIZE) + goto aux_unlock; + + if (aux_offset != vma->vm_pgoff << PAGE_SHIFT) + goto aux_unlock; + + /* already mapped with a different offset */ + if (rb_has_aux(rb) && rb->aux_pgoff != vma->vm_pgoff) + goto aux_unlock; + + if (aux_size != vma_size || aux_size != nr_pages * PAGE_SIZE) + goto aux_unlock; + + /* already mapped with a different size */ + if (rb_has_aux(rb) && rb->aux_nr_pages != nr_pages) + goto aux_unlock; + + if (!is_power_of_2(nr_pages)) + goto aux_unlock; + + if (!atomic_inc_not_zero(&rb->mmap_count)) + goto aux_unlock; + + if (rb_has_aux(rb)) { + atomic_inc(&rb->aux_mmap_count); + ret = 0; + goto unlock; + } + + atomic_set(&rb->aux_mmap_count, 1); + user_extra = nr_pages; + + goto accounting; + } /* * If we have rb pages ensure they're a power-of-two number, so we @@ -4433,9 +4609,6 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma) if (vma_size != PAGE_SIZE * (1 + nr_pages)) return -EINVAL; - if (vma->vm_pgoff != 0) - return -EINVAL; - WARN_ON_ONCE(event->ctx->parent_ctx); again: mutex_lock(&event->mmap_mutex); @@ -4459,6 +4632,8 @@ again: } user_extra = nr_pages + 1; + +accounting: user_lock_limit = sysctl_perf_event_mlock >> (PAGE_SHIFT - 10); /* @@ -4468,7 +4643,6 @@ again: user_locked = atomic_long_read(&user->locked_vm) + user_extra; - extra = 0; if (user_locked > user_lock_limit) extra = user_locked - user_lock_limit; @@ -4482,35 +4656,46 @@ again: goto unlock; } - WARN_ON(event->rb); + WARN_ON(!rb && event->rb); if (vma->vm_flags & VM_WRITE) flags |= RING_BUFFER_WRITABLE; - rb = rb_alloc(nr_pages, - event->attr.watermark ? event->attr.wakeup_watermark : 0, - event->cpu, flags); - if (!rb) { - ret = -ENOMEM; - goto unlock; - } + rb = rb_alloc(nr_pages, + event->attr.watermark ? event->attr.wakeup_watermark : 0, + event->cpu, flags); - atomic_set(&rb->mmap_count, 1); - rb->mmap_locked = extra; - rb->mmap_user = get_current_user(); + if (!rb) { + ret = -ENOMEM; + goto unlock; + } - atomic_long_add(user_extra, &user->locked_vm); - vma->vm_mm->pinned_vm += extra; + atomic_set(&rb->mmap_count, 1); + rb->mmap_user = get_current_user(); + rb->mmap_locked = extra; - ring_buffer_attach(event, rb); + ring_buffer_attach(event, rb); - perf_event_init_userpage(event); - perf_event_update_userpage(event); + perf_event_init_userpage(event); + perf_event_update_userpage(event); + } else { + ret = rb_alloc_aux(rb, event, vma->vm_pgoff, nr_pages, + event->attr.aux_watermark, flags); + if (!ret) + rb->aux_mmap_locked = extra; + } unlock: - if (!ret) + if (!ret) { + atomic_long_add(user_extra, &user->locked_vm); + vma->vm_mm->pinned_vm += extra; + atomic_inc(&event->mmap_count); + } else if (rb) { + atomic_dec(&rb->mmap_count); + } +aux_unlock: mutex_unlock(&event->mmap_mutex); /* @@ -4766,7 +4951,7 @@ static void __perf_event_header__init_id(struct perf_event_header *header, } if (sample_type & PERF_SAMPLE_TIME) - data->time = perf_clock(); + data->time = perf_event_clock(event); if (sample_type & (PERF_SAMPLE_ID | PERF_SAMPLE_IDENTIFIER)) data->id = primary_event_id(event); @@ -5344,6 +5529,8 @@ static void perf_event_task_output(struct perf_event *event, task_event->event_id.tid = perf_event_tid(event, task); task_event->event_id.ptid = perf_event_tid(event, current); + task_event->event_id.time = perf_event_clock(event); + perf_output_put(&handle, task_event->event_id); perf_event__output_id_sample(event, &handle, &sample); @@ -5377,7 +5564,7 @@ static void perf_event_task(struct task_struct *task, /* .ppid */ /* .tid */ /* .ptid */ - .time = perf_clock(), + /* .time */ }, }; @@ -5732,6 +5919,40 @@ void perf_event_mmap(struct vm_area_struct *vma) perf_event_mmap_event(&mmap_event); } +void perf_event_aux_event(struct perf_event *event, unsigned long head, + unsigned long size, u64 flags) +{ + struct perf_output_handle handle; + struct perf_sample_data sample; + struct perf_aux_event { + struct perf_event_header header; + u64 offset; + u64 size; + u64 flags; + } rec = { + .header = { + .type = PERF_RECORD_AUX, + .misc = 0, + .size = sizeof(rec), + }, + .offset = head, + .size = size, + .flags = flags, + }; + int ret; + + perf_event_header__init_id(&rec.header, &sample, event); + ret = perf_output_begin(&handle, event, rec.header.size); + + if (ret) + return; + + perf_output_put(&handle, rec); + perf_event__output_id_sample(event, &handle, &sample); + + perf_output_end(&handle); +} + /* * IRQ throttle logging */ @@ -5753,7 +5974,7 @@ static void perf_log_throttle(struct perf_event *event, int enable) .misc = 0, .size = sizeof(throttle_event), }, - .time = perf_clock(), + .time = perf_event_clock(event), .id = primary_event_id(event), .stream_id = event->id, }; @@ -5773,6 +5994,44 @@ static void perf_log_throttle(struct perf_event *event, int enable) perf_output_end(&handle); } +static void perf_log_itrace_start(struct perf_event *event) +{ + struct perf_output_handle handle; + struct perf_sample_data sample; + struct perf_aux_event { + struct perf_event_header header; + u32 pid; + u32 tid; + } rec; + int ret; + + if (event->parent) + event = event->parent; + + if (!(event->pmu->capabilities & PERF_PMU_CAP_ITRACE) || + event->hw.itrace_started) + return; + + event->hw.itrace_started = 1; + + rec.header.type = PERF_RECORD_ITRACE_START; + rec.header.misc = 0; + rec.header.size = sizeof(rec); + rec.pid = perf_event_pid(event, current); + rec.tid = perf_event_tid(event, current); + + perf_event_header__init_id(&rec.header, &sample, event); + ret = perf_output_begin(&handle, event, rec.header.size); + + if (ret) + return; + + perf_output_put(&handle, rec); + perf_event__output_id_sample(event, &handle, &sample); + + perf_output_end(&handle); +} + /* * Generic event overflow handling, sampling. */ @@ -6133,6 +6392,7 @@ static int perf_swevent_add(struct perf_event *event, int flags) } hlist_add_head_rcu(&event->hlist_entry, head); + perf_event_update_userpage(event); return 0; } @@ -6296,6 +6556,8 @@ static int perf_swevent_init(struct perf_event *event) static struct pmu perf_swevent = { .task_ctx_nr = perf_sw_context, + .capabilities = PERF_PMU_CAP_NO_NMI, + .event_init = perf_swevent_init, .add = perf_swevent_add, .del = perf_swevent_del, @@ -6449,6 +6711,49 @@ static void perf_event_free_filter(struct perf_event *event) ftrace_profile_free_filter(event); } +static int perf_event_set_bpf_prog(struct perf_event *event, u32 prog_fd) +{ + struct bpf_prog *prog; + + if (event->attr.type != PERF_TYPE_TRACEPOINT) + return -EINVAL; + + if (event->tp_event->prog) + return -EEXIST; + + if (!(event->tp_event->flags & TRACE_EVENT_FL_KPROBE)) + /* bpf programs can only be attached to kprobes */ + return -EINVAL; + + prog = bpf_prog_get(prog_fd); + if (IS_ERR(prog)) + return PTR_ERR(prog); + + if (prog->aux->prog_type != BPF_PROG_TYPE_KPROBE) { + /* valid fd, but invalid bpf program type */ + bpf_prog_put(prog); + return -EINVAL; + } + + event->tp_event->prog = prog; + + return 0; +} + +static void perf_event_free_bpf_prog(struct perf_event *event) +{ + struct bpf_prog *prog; + + if (!event->tp_event) + return; + + prog = event->tp_event->prog; + if (prog) { + event->tp_event->prog = NULL; + bpf_prog_put(prog); + } +} + #else static inline void perf_tp_register(void) @@ -6464,6 +6769,14 @@ static void perf_event_free_filter(struct perf_event *event) { } +static int perf_event_set_bpf_prog(struct perf_event *event, u32 prog_fd) +{ + return -ENOENT; +} + +static void perf_event_free_bpf_prog(struct perf_event *event) +{ +} #endif /* CONFIG_EVENT_TRACING */ #ifdef CONFIG_HAVE_HW_BREAKPOINT @@ -6602,6 +6915,7 @@ static int cpu_clock_event_add(struct perf_event *event, int flags) { if (flags & PERF_EF_START) cpu_clock_event_start(event, flags); + perf_event_update_userpage(event); return 0; } @@ -6638,6 +6952,8 @@ static int cpu_clock_event_init(struct perf_event *event) static struct pmu perf_cpu_clock = { .task_ctx_nr = perf_sw_context, + .capabilities = PERF_PMU_CAP_NO_NMI, + .event_init = cpu_clock_event_init, .add = cpu_clock_event_add, .del = cpu_clock_event_del, @@ -6676,6 +6992,7 @@ static int task_clock_event_add(struct perf_event *event, int flags) { if (flags & PERF_EF_START) task_clock_event_start(event, flags); + perf_event_update_userpage(event); return 0; } @@ -6716,6 +7033,8 @@ static int task_clock_event_init(struct perf_event *event) static struct pmu perf_task_clock = { .task_ctx_nr = perf_sw_context, + .capabilities = PERF_PMU_CAP_NO_NMI, + .event_init = task_clock_event_init, .add = task_clock_event_add, .del = task_clock_event_del, @@ -6993,6 +7312,7 @@ got_cpu_context: pmu->event_idx = perf_event_idx_default; list_add_rcu(&pmu->entry, &pmus); + atomic_set(&pmu->exclusive_cnt, 0); ret = 0; unlock: mutex_unlock(&pmus_lock); @@ -7037,12 +7357,23 @@ EXPORT_SYMBOL_GPL(perf_pmu_unregister); static int perf_try_init_event(struct pmu *pmu, struct perf_event *event) { + struct perf_event_context *ctx = NULL; int ret; if (!try_module_get(pmu->module)) return -ENODEV; + + if (event->group_leader != event) { + ctx = perf_event_ctx_lock(event->group_leader); + BUG_ON(!ctx); + } + event->pmu = pmu; ret = pmu->event_init(event); + + if (ctx) + perf_event_ctx_unlock(event->group_leader, ctx); + if (ret) module_put(pmu->module); @@ -7089,10 +7420,6 @@ static void account_event_cpu(struct perf_event *event, int cpu) if (event->parent) return; - if (has_branch_stack(event)) { - if (!(event->attach_state & PERF_ATTACH_TASK)) - atomic_inc(&per_cpu(perf_branch_stack_events, cpu)); - } if (is_cgroup_event(event)) atomic_inc(&per_cpu(perf_cgroup_events, cpu)); } @@ -7131,7 +7458,7 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu, struct perf_event *group_leader, struct perf_event *parent_event, perf_overflow_handler_t overflow_handler, - void *context) + void *context, int cgroup_fd) { struct pmu *pmu; struct perf_event *event; @@ -7186,18 +7513,18 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu, if (task) { event->attach_state = PERF_ATTACH_TASK; - - if (attr->type == PERF_TYPE_TRACEPOINT) - event->hw.tp_target = task; -#ifdef CONFIG_HAVE_HW_BREAKPOINT /* - * hw_breakpoint is a bit difficult here.. + * XXX pmu::event_init needs to know what task to account to + * and we cannot use the ctx information because we need the + * pmu before we get a ctx. */ - else if (attr->type == PERF_TYPE_BREAKPOINT) - event->hw.bp_target = task; -#endif + event->hw.target = task; } + event->clock = &local_clock; + if (parent_event) + event->clock = parent_event->clock; + if (!overflow_handler && parent_event) { overflow_handler = parent_event->overflow_handler; context = parent_event->overflow_handler_context; @@ -7224,6 +7551,15 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu, if (attr->inherit && (attr->read_format & PERF_FORMAT_GROUP)) goto err_ns; + if (!has_branch_stack(event)) + event->attr.branch_sample_type = 0; + + if (cgroup_fd != -1) { + err = perf_cgroup_connect(cgroup_fd, event, attr, group_leader); + if (err) + goto err_ns; + } + pmu = perf_init_event(event); if (!pmu) goto err_ns; @@ -7232,21 +7568,30 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu, goto err_ns; } + err = exclusive_event_init(event); + if (err) + goto err_pmu; + if (!event->parent) { if (event->attr.sample_type & PERF_SAMPLE_CALLCHAIN) { err = get_callchain_buffers(); if (err) - goto err_pmu; + goto err_per_task; } } return event; +err_per_task: + exclusive_event_destroy(event); + err_pmu: if (event->destroy) event->destroy(event); module_put(pmu->module); err_ns: + if (is_cgroup_event(event)) + perf_detach_cgroup(event); if (event->ns) put_pid_ns(event->ns); kfree(event); @@ -7409,6 +7754,19 @@ perf_event_set_output(struct perf_event *event, struct perf_event *output_event) if (output_event->cpu == -1 && output_event->ctx != event->ctx) goto out; + /* + * Mixing clocks in the same buffer is trouble you don't need. + */ + if (output_event->clock != event->clock) + goto out; + + /* + * If both events generate aux data, they must be on the same PMU + */ + if (has_aux(event) && has_aux(output_event) && + event->pmu != output_event->pmu) + goto out; + set: mutex_lock(&event->mmap_mutex); /* Can't redirect output if we've got an active mmap() */ @@ -7441,6 +7799,43 @@ static void mutex_lock_double(struct mutex *a, struct mutex *b) mutex_lock_nested(b, SINGLE_DEPTH_NESTING); } +static int perf_event_set_clock(struct perf_event *event, clockid_t clk_id) +{ + bool nmi_safe = false; + + switch (clk_id) { + case CLOCK_MONOTONIC: + event->clock = &ktime_get_mono_fast_ns; + nmi_safe = true; + break; + + case CLOCK_MONOTONIC_RAW: + event->clock = &ktime_get_raw_fast_ns; + nmi_safe = true; + break; + + case CLOCK_REALTIME: + event->clock = &ktime_get_real_ns; + break; + + case CLOCK_BOOTTIME: + event->clock = &ktime_get_boot_ns; + break; + + case CLOCK_TAI: + event->clock = &ktime_get_tai_ns; + break; + + default: + return -EINVAL; + } + + if (!nmi_safe && !(event->pmu->capabilities & PERF_PMU_CAP_NO_NMI)) + return -EINVAL; + + return 0; +} + /** * sys_perf_event_open - open a performance event, associate it to a task/cpu * @@ -7465,6 +7860,7 @@ SYSCALL_DEFINE5(perf_event_open, int move_group = 0; int err; int f_flags = O_RDWR; + int cgroup_fd = -1; /* for future expandability... */ if (flags & ~PERF_FLAG_ALL) @@ -7530,21 +7926,16 @@ SYSCALL_DEFINE5(perf_event_open, get_online_cpus(); + if (flags & PERF_FLAG_PID_CGROUP) + cgroup_fd = pid; + event = perf_event_alloc(&attr, cpu, task, group_leader, NULL, - NULL, NULL); + NULL, NULL, cgroup_fd); if (IS_ERR(event)) { err = PTR_ERR(event); goto err_cpus; } - if (flags & PERF_FLAG_PID_CGROUP) { - err = perf_cgroup_connect(pid, event, &attr, group_leader); - if (err) { - __free_event(event); - goto err_cpus; - } - } - if (is_sampling_event(event)) { if (event->pmu->capabilities & PERF_PMU_CAP_NO_INTERRUPT) { err = -ENOTSUPP; @@ -7560,6 +7951,12 @@ SYSCALL_DEFINE5(perf_event_open, */ pmu = event->pmu; + if (attr.use_clockid) { + err = perf_event_set_clock(event, attr.clockid); + if (err) + goto err_alloc; + } + if (group_leader && (is_software_event(event) != is_software_event(group_leader))) { if (is_software_event(event)) { @@ -7586,12 +7983,17 @@ SYSCALL_DEFINE5(perf_event_open, /* * Get the target context (task or percpu): */ - ctx = find_get_context(pmu, task, event->cpu); + ctx = find_get_context(pmu, task, event); if (IS_ERR(ctx)) { err = PTR_ERR(ctx); goto err_alloc; } + if ((pmu->capabilities & PERF_PMU_CAP_EXCLUSIVE) && group_leader) { + err = -EBUSY; + goto err_context; + } + if (task) { put_task_struct(task); task = NULL; @@ -7609,6 +8011,11 @@ SYSCALL_DEFINE5(perf_event_open, */ if (group_leader->group_leader != group_leader) goto err_context; + + /* All events in a group should have the same clock */ + if (group_leader->clock != event->clock) + goto err_context; + /* * Do not allow to attach to a group in a different * task or CPU context: @@ -7709,6 +8116,13 @@ SYSCALL_DEFINE5(perf_event_open, get_ctx(ctx); } + if (!exclusive_event_installable(event, ctx)) { + err = -EBUSY; + mutex_unlock(&ctx->mutex); + fput(event_file); + goto err_context; + } + perf_install_in_context(ctx, event, event->cpu); perf_unpin_context(ctx); @@ -7781,7 +8195,7 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu, */ event = perf_event_alloc(attr, cpu, task, NULL, NULL, - overflow_handler, context); + overflow_handler, context, -1); if (IS_ERR(event)) { err = PTR_ERR(event); goto err; @@ -7792,7 +8206,7 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu, account_event(event); - ctx = find_get_context(event->pmu, task, cpu); + ctx = find_get_context(event->pmu, task, event); if (IS_ERR(ctx)) { err = PTR_ERR(ctx); goto err_free; @@ -7800,6 +8214,14 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu, WARN_ON_ONCE(ctx->parent_ctx); mutex_lock(&ctx->mutex); + if (!exclusive_event_installable(event, ctx)) { + mutex_unlock(&ctx->mutex); + perf_unpin_context(ctx); + put_ctx(ctx); + err = -EBUSY; + goto err_free; + } + perf_install_in_context(ctx, event, cpu); perf_unpin_context(ctx); mutex_unlock(&ctx->mutex); @@ -8142,7 +8564,7 @@ inherit_event(struct perf_event *parent_event, parent_event->cpu, child, group_leader, parent_event, - NULL, NULL); + NULL, NULL, -1); if (IS_ERR(child_event)) return child_event; diff --git a/kernel/events/hw_breakpoint.c b/kernel/events/hw_breakpoint.c index 9803a6600d49..92ce5f4ccc26 100644 --- a/kernel/events/hw_breakpoint.c +++ b/kernel/events/hw_breakpoint.c @@ -116,12 +116,12 @@ static unsigned int max_task_bp_pinned(int cpu, enum bp_type_idx type) */ static int task_bp_pinned(int cpu, struct perf_event *bp, enum bp_type_idx type) { - struct task_struct *tsk = bp->hw.bp_target; + struct task_struct *tsk = bp->hw.target; struct perf_event *iter; int count = 0; list_for_each_entry(iter, &bp_task_head, hw.bp_list) { - if (iter->hw.bp_target == tsk && + if (iter->hw.target == tsk && find_slot_idx(iter) == type && (iter->cpu < 0 || cpu == iter->cpu)) count += hw_breakpoint_weight(iter); @@ -153,7 +153,7 @@ fetch_bp_busy_slots(struct bp_busy_slots *slots, struct perf_event *bp, int nr; nr = info->cpu_pinned; - if (!bp->hw.bp_target) + if (!bp->hw.target) nr += max_task_bp_pinned(cpu, type); else nr += task_bp_pinned(cpu, bp, type); @@ -210,7 +210,7 @@ toggle_bp_slot(struct perf_event *bp, bool enable, enum bp_type_idx type, weight = -weight; /* Pinned counter cpu profiling */ - if (!bp->hw.bp_target) { + if (!bp->hw.target) { get_bp_info(bp->cpu, type)->cpu_pinned += weight; return; } diff --git a/kernel/events/internal.h b/kernel/events/internal.h index 569b218782ad..9f6ce9ba4a04 100644 --- a/kernel/events/internal.h +++ b/kernel/events/internal.h @@ -27,6 +27,7 @@ struct ring_buffer { local_t lost; /* nr records lost */ long watermark; /* wakeup watermark */ + long aux_watermark; /* poll crap */ spinlock_t event_lock; struct list_head event_list; @@ -35,6 +36,20 @@ struct ring_buffer { unsigned long mmap_locked; struct user_struct *mmap_user; + /* AUX area */ + local_t aux_head; + local_t aux_nest; + local_t aux_wakeup; + unsigned long aux_pgoff; + int aux_nr_pages; + int aux_overwrite; + atomic_t aux_mmap_count; + unsigned long aux_mmap_locked; + void (*free_aux)(void *); + atomic_t aux_refcount; + void **aux_pages; + void *aux_priv; + struct perf_event_mmap_page *user_page; void *data_pages[0]; }; @@ -43,6 +58,19 @@ extern void rb_free(struct ring_buffer *rb); extern struct ring_buffer * rb_alloc(int nr_pages, long watermark, int cpu, int flags); extern void perf_event_wakeup(struct perf_event *event); +extern int rb_alloc_aux(struct ring_buffer *rb, struct perf_event *event, + pgoff_t pgoff, int nr_pages, long watermark, int flags); +extern void rb_free_aux(struct ring_buffer *rb); +extern struct ring_buffer *ring_buffer_get(struct perf_event *event); +extern void ring_buffer_put(struct ring_buffer *rb); + +static inline bool rb_has_aux(struct ring_buffer *rb) +{ + return !!rb->aux_nr_pages; +} + +void perf_event_aux_event(struct perf_event *event, unsigned long head, + unsigned long size, u64 flags); extern void perf_event_header__init_id(struct perf_event_header *header, @@ -81,6 +109,11 @@ static inline unsigned long perf_data_size(struct ring_buffer *rb) return rb->nr_pages << (PAGE_SHIFT + page_order(rb)); } +static inline unsigned long perf_aux_size(struct ring_buffer *rb) +{ + return rb->aux_nr_pages << PAGE_SHIFT; +} + #define DEFINE_OUTPUT_COPY(func_name, memcpy_func) \ static inline unsigned long \ func_name(struct perf_output_handle *handle, \ diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c index eadb95ce7aac..232f00f273cb 100644 --- a/kernel/events/ring_buffer.c +++ b/kernel/events/ring_buffer.c @@ -243,14 +243,317 @@ ring_buffer_init(struct ring_buffer *rb, long watermark, int flags) spin_lock_init(&rb->event_lock); } +/* + * This is called before hardware starts writing to the AUX area to + * obtain an output handle and make sure there's room in the buffer. + * When the capture completes, call perf_aux_output_end() to commit + * the recorded data to the buffer. + * + * The ordering is similar to that of perf_output_{begin,end}, with + * the exception of (B), which should be taken care of by the pmu + * driver, since ordering rules will differ depending on hardware. + */ +void *perf_aux_output_begin(struct perf_output_handle *handle, + struct perf_event *event) +{ + struct perf_event *output_event = event; + unsigned long aux_head, aux_tail; + struct ring_buffer *rb; + + if (output_event->parent) + output_event = output_event->parent; + + /* + * Since this will typically be open across pmu::add/pmu::del, we + * grab ring_buffer's refcount instead of holding rcu read lock + * to make sure it doesn't disappear under us. + */ + rb = ring_buffer_get(output_event); + if (!rb) + return NULL; + + if (!rb_has_aux(rb) || !atomic_inc_not_zero(&rb->aux_refcount)) + goto err; + + /* + * Nesting is not supported for AUX area, make sure nested + * writers are caught early + */ + if (WARN_ON_ONCE(local_xchg(&rb->aux_nest, 1))) + goto err_put; + + aux_head = local_read(&rb->aux_head); + + handle->rb = rb; + handle->event = event; + handle->head = aux_head; + handle->size = 0; + + /* + * In overwrite mode, AUX data stores do not depend on aux_tail, + * therefore (A) control dependency barrier does not exist. The + * (B) <-> (C) ordering is still observed by the pmu driver. + */ + if (!rb->aux_overwrite) { + aux_tail = ACCESS_ONCE(rb->user_page->aux_tail); + handle->wakeup = local_read(&rb->aux_wakeup) + rb->aux_watermark; + if (aux_head - aux_tail < perf_aux_size(rb)) + handle->size = CIRC_SPACE(aux_head, aux_tail, perf_aux_size(rb)); + + /* + * handle->size computation depends on aux_tail load; this forms a + * control dependency barrier separating aux_tail load from aux data + * store that will be enabled on successful return + */ + if (!handle->size) { /* A, matches D */ + event->pending_disable = 1; + perf_output_wakeup(handle); + local_set(&rb->aux_nest, 0); + goto err_put; + } + } + + return handle->rb->aux_priv; + +err_put: + rb_free_aux(rb); + +err: + ring_buffer_put(rb); + handle->event = NULL; + + return NULL; +} + +/* + * Commit the data written by hardware into the ring buffer by adjusting + * aux_head and posting a PERF_RECORD_AUX into the perf buffer. It is the + * pmu driver's responsibility to observe ordering rules of the hardware, + * so that all the data is externally visible before this is called. + */ +void perf_aux_output_end(struct perf_output_handle *handle, unsigned long size, + bool truncated) +{ + struct ring_buffer *rb = handle->rb; + unsigned long aux_head; + u64 flags = 0; + + if (truncated) + flags |= PERF_AUX_FLAG_TRUNCATED; + + /* in overwrite mode, driver provides aux_head via handle */ + if (rb->aux_overwrite) { + flags |= PERF_AUX_FLAG_OVERWRITE; + + aux_head = handle->head; + local_set(&rb->aux_head, aux_head); + } else { + aux_head = local_read(&rb->aux_head); + local_add(size, &rb->aux_head); + } + + if (size || flags) { + /* + * Only send RECORD_AUX if we have something useful to communicate + */ + + perf_event_aux_event(handle->event, aux_head, size, flags); + } + + aux_head = rb->user_page->aux_head = local_read(&rb->aux_head); + + if (aux_head - local_read(&rb->aux_wakeup) >= rb->aux_watermark) { + perf_output_wakeup(handle); + local_add(rb->aux_watermark, &rb->aux_wakeup); + } + handle->event = NULL; + + local_set(&rb->aux_nest, 0); + rb_free_aux(rb); + ring_buffer_put(rb); +} + +/* + * Skip over a given number of bytes in the AUX buffer, due to, for example, + * hardware's alignment constraints. + */ +int perf_aux_output_skip(struct perf_output_handle *handle, unsigned long size) +{ + struct ring_buffer *rb = handle->rb; + unsigned long aux_head; + + if (size > handle->size) + return -ENOSPC; + + local_add(size, &rb->aux_head); + + aux_head = rb->user_page->aux_head = local_read(&rb->aux_head); + if (aux_head - local_read(&rb->aux_wakeup) >= rb->aux_watermark) { + perf_output_wakeup(handle); + local_add(rb->aux_watermark, &rb->aux_wakeup); + handle->wakeup = local_read(&rb->aux_wakeup) + + rb->aux_watermark; + } + + handle->head = aux_head; + handle->size -= size; + + return 0; +} + +void *perf_get_aux(struct perf_output_handle *handle) +{ + /* this is only valid between perf_aux_output_begin and *_end */ + if (!handle->event) + return NULL; + + return handle->rb->aux_priv; +} + +#define PERF_AUX_GFP (GFP_KERNEL | __GFP_ZERO | __GFP_NOWARN | __GFP_NORETRY) + +static struct page *rb_alloc_aux_page(int node, int order) +{ + struct page *page; + + if (order > MAX_ORDER) + order = MAX_ORDER; + + do { + page = alloc_pages_node(node, PERF_AUX_GFP, order); + } while (!page && order--); + + if (page && order) { + /* + * Communicate the allocation size to the driver + */ + split_page(page, order); + SetPagePrivate(page); + set_page_private(page, order); + } + + return page; +} + +static void rb_free_aux_page(struct ring_buffer *rb, int idx) +{ + struct page *page = virt_to_page(rb->aux_pages[idx]); + + ClearPagePrivate(page); + page->mapping = NULL; + __free_page(page); +} + +int rb_alloc_aux(struct ring_buffer *rb, struct perf_event *event, + pgoff_t pgoff, int nr_pages, long watermark, int flags) +{ + bool overwrite = !(flags & RING_BUFFER_WRITABLE); + int node = (event->cpu == -1) ? -1 : cpu_to_node(event->cpu); + int ret = -ENOMEM, max_order = 0; + + if (!has_aux(event)) + return -ENOTSUPP; + + if (event->pmu->capabilities & PERF_PMU_CAP_AUX_NO_SG) { + /* + * We need to start with the max_order that fits in nr_pages, + * not the other way around, hence ilog2() and not get_order. + */ + max_order = ilog2(nr_pages); + + /* + * PMU requests more than one contiguous chunks of memory + * for SW double buffering + */ + if ((event->pmu->capabilities & PERF_PMU_CAP_AUX_SW_DOUBLEBUF) && + !overwrite) { + if (!max_order) + return -EINVAL; + + max_order--; + } + } + + rb->aux_pages = kzalloc_node(nr_pages * sizeof(void *), GFP_KERNEL, node); + if (!rb->aux_pages) + return -ENOMEM; + + rb->free_aux = event->pmu->free_aux; + for (rb->aux_nr_pages = 0; rb->aux_nr_pages < nr_pages;) { + struct page *page; + int last, order; + + order = min(max_order, ilog2(nr_pages - rb->aux_nr_pages)); + page = rb_alloc_aux_page(node, order); + if (!page) + goto out; + + for (last = rb->aux_nr_pages + (1 << page_private(page)); + last > rb->aux_nr_pages; rb->aux_nr_pages++) + rb->aux_pages[rb->aux_nr_pages] = page_address(page++); + } + + rb->aux_priv = event->pmu->setup_aux(event->cpu, rb->aux_pages, nr_pages, + overwrite); + if (!rb->aux_priv) + goto out; + + ret = 0; + + /* + * aux_pages (and pmu driver's private data, aux_priv) will be + * referenced in both producer's and consumer's contexts, thus + * we keep a refcount here to make sure either of the two can + * reference them safely. + */ + atomic_set(&rb->aux_refcount, 1); + + rb->aux_overwrite = overwrite; + rb->aux_watermark = watermark; + + if (!rb->aux_watermark && !rb->aux_overwrite) + rb->aux_watermark = nr_pages << (PAGE_SHIFT - 1); + +out: + if (!ret) + rb->aux_pgoff = pgoff; + else + rb_free_aux(rb); + + return ret; +} + +static void __rb_free_aux(struct ring_buffer *rb) +{ + int pg; + + if (rb->aux_priv) { + rb->free_aux(rb->aux_priv); + rb->free_aux = NULL; + rb->aux_priv = NULL; + } + + for (pg = 0; pg < rb->aux_nr_pages; pg++) + rb_free_aux_page(rb, pg); + + kfree(rb->aux_pages); + rb->aux_nr_pages = 0; +} + +void rb_free_aux(struct ring_buffer *rb) +{ + if (atomic_dec_and_test(&rb->aux_refcount)) + __rb_free_aux(rb); +} + #ifndef CONFIG_PERF_USE_VMALLOC /* * Back perf_mmap() with regular GFP_KERNEL-0 pages. */ -struct page * -perf_mmap_to_page(struct ring_buffer *rb, unsigned long pgoff) +static struct page * +__perf_mmap_to_page(struct ring_buffer *rb, unsigned long pgoff) { if (pgoff > rb->nr_pages) return NULL; @@ -340,8 +643,8 @@ static int data_page_nr(struct ring_buffer *rb) return rb->nr_pages << page_order(rb); } -struct page * -perf_mmap_to_page(struct ring_buffer *rb, unsigned long pgoff) +static struct page * +__perf_mmap_to_page(struct ring_buffer *rb, unsigned long pgoff) { /* The '>' counts in the user page. */ if (pgoff > data_page_nr(rb)) @@ -416,3 +719,19 @@ fail: } #endif + +struct page * +perf_mmap_to_page(struct ring_buffer *rb, unsigned long pgoff) +{ + if (rb->aux_nr_pages) { + /* above AUX space */ + if (pgoff > rb->aux_pgoff + rb->aux_nr_pages) + return NULL; + + /* AUX space */ + if (pgoff >= rb->aux_pgoff) + return virt_to_page(rb->aux_pages[pgoff - rb->aux_pgoff]); + } + + return __perf_mmap_to_page(rb, pgoff); +} diff --git a/kernel/livepatch/core.c b/kernel/livepatch/core.c index 3f9f1d6b4c2e..284e2691e380 100644 --- a/kernel/livepatch/core.c +++ b/kernel/livepatch/core.c @@ -335,32 +335,20 @@ unlock: rcu_read_unlock(); } -static int klp_disable_func(struct klp_func *func) +static void klp_disable_func(struct klp_func *func) { struct klp_ops *ops; - int ret; - - if (WARN_ON(func->state != KLP_ENABLED)) - return -EINVAL; - if (WARN_ON(!func->old_addr)) - return -EINVAL; + WARN_ON(func->state != KLP_ENABLED); + WARN_ON(!func->old_addr); ops = klp_find_ops(func->old_addr); if (WARN_ON(!ops)) - return -EINVAL; + return; if (list_is_singular(&ops->func_stack)) { - ret = unregister_ftrace_function(&ops->fops); - if (ret) { - pr_err("failed to unregister ftrace handler for function '%s' (%d)\n", - func->old_name, ret); - return ret; - } - - ret = ftrace_set_filter_ip(&ops->fops, func->old_addr, 1, 0); - if (ret) - pr_warn("function unregister succeeded but failed to clear the filter\n"); + WARN_ON(unregister_ftrace_function(&ops->fops)); + WARN_ON(ftrace_set_filter_ip(&ops->fops, func->old_addr, 1, 0)); list_del_rcu(&func->stack_node); list_del(&ops->node); @@ -370,8 +358,6 @@ static int klp_disable_func(struct klp_func *func) } func->state = KLP_DISABLED; - - return 0; } static int klp_enable_func(struct klp_func *func) @@ -432,23 +418,15 @@ err: return ret; } -static int klp_disable_object(struct klp_object *obj) +static void klp_disable_object(struct klp_object *obj) { struct klp_func *func; - int ret; - for (func = obj->funcs; func->old_name; func++) { - if (func->state != KLP_ENABLED) - continue; - - ret = klp_disable_func(func); - if (ret) - return ret; - } + for (func = obj->funcs; func->old_name; func++) + if (func->state == KLP_ENABLED) + klp_disable_func(func); obj->state = KLP_DISABLED; - - return 0; } static int klp_enable_object(struct klp_object *obj) @@ -464,22 +442,19 @@ static int klp_enable_object(struct klp_object *obj) for (func = obj->funcs; func->old_name; func++) { ret = klp_enable_func(func); - if (ret) - goto unregister; + if (ret) { + klp_disable_object(obj); + return ret; + } } obj->state = KLP_ENABLED; return 0; - -unregister: - WARN_ON(klp_disable_object(obj)); - return ret; } static int __klp_disable_patch(struct klp_patch *patch) { struct klp_object *obj; - int ret; /* enforce stacking: only the last enabled patch can be disabled */ if (!list_is_last(&patch->list, &klp_patches) && @@ -489,12 +464,8 @@ static int __klp_disable_patch(struct klp_patch *patch) pr_notice("disabling patch '%s'\n", patch->mod->name); for (obj = patch->objs; obj->funcs; obj++) { - if (obj->state != KLP_ENABLED) - continue; - - ret = klp_disable_object(obj); - if (ret) - return ret; + if (obj->state == KLP_ENABLED) + klp_disable_object(obj); } patch->state = KLP_DISABLED; @@ -553,8 +524,6 @@ static int __klp_enable_patch(struct klp_patch *patch) pr_notice("enabling patch '%s'\n", patch->mod->name); for (obj = patch->objs; obj->funcs; obj++) { - klp_find_object_module(obj); - if (!klp_is_object_loaded(obj)) continue; @@ -945,7 +914,6 @@ static void klp_module_notify_going(struct klp_patch *patch, { struct module *pmod = patch->mod; struct module *mod = obj->mod; - int ret; if (patch->state == KLP_DISABLED) goto disabled; @@ -953,10 +921,7 @@ static void klp_module_notify_going(struct klp_patch *patch, pr_notice("reverting patch '%s' on unloading module '%s'\n", pmod->name, mod->name); - ret = klp_disable_object(obj); - if (ret) - pr_warn("failed to revert patch '%s' on module '%s' (%d)\n", - pmod->name, mod->name, ret); + klp_disable_object(obj); disabled: klp_free_object_loaded(obj); diff --git a/kernel/module.c b/kernel/module.c index ec53f594e9c9..650b038ae520 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -2770,6 +2770,9 @@ static int find_module_sections(struct module *mod, struct load_info *info) mod->trace_events = section_objs(info, "_ftrace_events", sizeof(*mod->trace_events), &mod->num_trace_events); + mod->trace_enums = section_objs(info, "_ftrace_enum_map", + sizeof(*mod->trace_enums), + &mod->num_trace_enums); #endif #ifdef CONFIG_TRACING mod->trace_bprintk_fmt_start = section_objs(info, "__trace_printk_fmt", diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index bb0635bd74f2..879edfc5ee52 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -32,7 +32,6 @@ #include <linux/security.h> #include <linux/bootmem.h> #include <linux/memblock.h> -#include <linux/aio.h> #include <linux/syscalls.h> #include <linux/kexec.h> #include <linux/kdb.h> @@ -46,6 +45,7 @@ #include <linux/irq_work.h> #include <linux/utsname.h> #include <linux/ctype.h> +#include <linux/uio.h> #include <asm/uaccess.h> @@ -521,7 +521,7 @@ static ssize_t devkmsg_write(struct kiocb *iocb, struct iov_iter *from) int i; int level = default_message_loglevel; int facility = 1; /* LOG_USER */ - size_t len = iocb->ki_nbytes; + size_t len = iov_iter_count(from); ssize_t ret = len; if (len > LOG_LINE_MAX) diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c index 30d42aa55d83..8dbe27611ec3 100644 --- a/kernel/rcu/rcutorture.c +++ b/kernel/rcu/rcutorture.c @@ -853,6 +853,8 @@ rcu_torture_fqs(void *arg) static int rcu_torture_writer(void *arg) { + bool can_expedite = !rcu_gp_is_expedited(); + int expediting = 0; unsigned long gp_snap; bool gp_cond1 = gp_cond, gp_exp1 = gp_exp, gp_normal1 = gp_normal; bool gp_sync1 = gp_sync; @@ -865,9 +867,15 @@ rcu_torture_writer(void *arg) int nsynctypes = 0; VERBOSE_TOROUT_STRING("rcu_torture_writer task started"); + pr_alert("%s" TORTURE_FLAG + " Grace periods expedited from boot/sysfs for %s,\n", + torture_type, cur_ops->name); + pr_alert("%s" TORTURE_FLAG + " Testing of dynamic grace-period expediting diabled.\n", + torture_type); /* Initialize synctype[] array. If none set, take default. */ - if (!gp_cond1 && !gp_exp1 && !gp_normal1 && !gp_sync) + if (!gp_cond1 && !gp_exp1 && !gp_normal1 && !gp_sync1) gp_cond1 = gp_exp1 = gp_normal1 = gp_sync1 = true; if (gp_cond1 && cur_ops->get_state && cur_ops->cond_sync) synctype[nsynctypes++] = RTWS_COND_GET; @@ -949,9 +957,26 @@ rcu_torture_writer(void *arg) } } rcutorture_record_progress(++rcu_torture_current_version); + /* Cycle through nesting levels of rcu_expedite_gp() calls. */ + if (can_expedite && + !(torture_random(&rand) & 0xff & (!!expediting - 1))) { + WARN_ON_ONCE(expediting == 0 && rcu_gp_is_expedited()); + if (expediting >= 0) + rcu_expedite_gp(); + else + rcu_unexpedite_gp(); + if (++expediting > 3) + expediting = -expediting; + } rcu_torture_writer_state = RTWS_STUTTER; stutter_wait("rcu_torture_writer"); } while (!torture_must_stop()); + /* Reset expediting back to unexpedited. */ + if (expediting > 0) + expediting = -expediting; + while (can_expedite && expediting++ < 0) + rcu_unexpedite_gp(); + WARN_ON_ONCE(can_expedite && rcu_gp_is_expedited()); rcu_torture_writer_state = RTWS_STOPPING; torture_kthread_stopping("rcu_torture_writer"); return 0; diff --git a/kernel/rcu/srcu.c b/kernel/rcu/srcu.c index 445bf8ffe3fb..cad76e76b4e7 100644 --- a/kernel/rcu/srcu.c +++ b/kernel/rcu/srcu.c @@ -402,23 +402,6 @@ void call_srcu(struct srcu_struct *sp, struct rcu_head *head, } EXPORT_SYMBOL_GPL(call_srcu); -struct rcu_synchronize { - struct rcu_head head; - struct completion completion; -}; - -/* - * Awaken the corresponding synchronize_srcu() instance now that a - * grace period has elapsed. - */ -static void wakeme_after_rcu(struct rcu_head *head) -{ - struct rcu_synchronize *rcu; - - rcu = container_of(head, struct rcu_synchronize, head); - complete(&rcu->completion); -} - static void srcu_advance_batches(struct srcu_struct *sp, int trycount); static void srcu_reschedule(struct srcu_struct *sp); @@ -507,7 +490,7 @@ static void __synchronize_srcu(struct srcu_struct *sp, int trycount) */ void synchronize_srcu(struct srcu_struct *sp) { - __synchronize_srcu(sp, rcu_expedited + __synchronize_srcu(sp, rcu_gp_is_expedited() ? SYNCHRONIZE_SRCU_EXP_TRYCOUNT : SYNCHRONIZE_SRCU_TRYCOUNT); } diff --git a/kernel/rcu/tiny.c b/kernel/rcu/tiny.c index cc9ceca7bde1..069742d61c68 100644 --- a/kernel/rcu/tiny.c +++ b/kernel/rcu/tiny.c @@ -103,8 +103,7 @@ EXPORT_SYMBOL(__rcu_is_watching); static int rcu_qsctr_help(struct rcu_ctrlblk *rcp) { RCU_TRACE(reset_cpu_stall_ticks(rcp)); - if (rcp->rcucblist != NULL && - rcp->donetail != rcp->curtail) { + if (rcp->donetail != rcp->curtail) { rcp->donetail = rcp->curtail; return 1; } @@ -169,17 +168,6 @@ static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp) unsigned long flags; RCU_TRACE(int cb_count = 0); - /* If no RCU callbacks ready to invoke, just return. */ - if (&rcp->rcucblist == rcp->donetail) { - RCU_TRACE(trace_rcu_batch_start(rcp->name, 0, 0, -1)); - RCU_TRACE(trace_rcu_batch_end(rcp->name, 0, - !!ACCESS_ONCE(rcp->rcucblist), - need_resched(), - is_idle_task(current), - false)); - return; - } - /* Move the ready-to-invoke callbacks to a local list. */ local_irq_save(flags); RCU_TRACE(trace_rcu_batch_start(rcp->name, 0, rcp->qlen, -1)); diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 48d640ca1a05..233165da782f 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -91,8 +91,10 @@ static const char *tp_##sname##_varname __used __tracepoint_string = sname##_var #define RCU_STATE_INITIALIZER(sname, sabbr, cr) \ DEFINE_RCU_TPS(sname) \ +DEFINE_PER_CPU_SHARED_ALIGNED(struct rcu_data, sname##_data); \ struct rcu_state sname##_state = { \ .level = { &sname##_state.node[0] }, \ + .rda = &sname##_data, \ .call = cr, \ .fqs_state = RCU_GP_IDLE, \ .gpnum = 0UL - 300UL, \ @@ -101,11 +103,9 @@ struct rcu_state sname##_state = { \ .orphan_nxttail = &sname##_state.orphan_nxtlist, \ .orphan_donetail = &sname##_state.orphan_donelist, \ .barrier_mutex = __MUTEX_INITIALIZER(sname##_state.barrier_mutex), \ - .onoff_mutex = __MUTEX_INITIALIZER(sname##_state.onoff_mutex), \ .name = RCU_STATE_NAME(sname), \ .abbr = sabbr, \ -}; \ -DEFINE_PER_CPU_SHARED_ALIGNED(struct rcu_data, sname##_data) +} RCU_STATE_INITIALIZER(rcu_sched, 's', call_rcu_sched); RCU_STATE_INITIALIZER(rcu_bh, 'b', call_rcu_bh); @@ -152,6 +152,8 @@ EXPORT_SYMBOL_GPL(rcu_scheduler_active); */ static int rcu_scheduler_fully_active __read_mostly; +static void rcu_init_new_rnp(struct rcu_node *rnp_leaf); +static void rcu_cleanup_dead_rnp(struct rcu_node *rnp_leaf); static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu); static void invoke_rcu_core(void); static void invoke_rcu_callbacks(struct rcu_state *rsp, struct rcu_data *rdp); @@ -160,6 +162,12 @@ static void invoke_rcu_callbacks(struct rcu_state *rsp, struct rcu_data *rdp); static int kthread_prio = CONFIG_RCU_KTHREAD_PRIO; module_param(kthread_prio, int, 0644); +/* Delay in jiffies for grace-period initialization delays. */ +static int gp_init_delay = IS_ENABLED(CONFIG_RCU_TORTURE_TEST_SLOW_INIT) + ? CONFIG_RCU_TORTURE_TEST_SLOW_INIT_DELAY + : 0; +module_param(gp_init_delay, int, 0644); + /* * Track the rcutorture test sequence number and the update version * number within a given test. The rcutorture_testseq is incremented @@ -173,6 +181,17 @@ unsigned long rcutorture_testseq; unsigned long rcutorture_vernum; /* + * Compute the mask of online CPUs for the specified rcu_node structure. + * This will not be stable unless the rcu_node structure's ->lock is + * held, but the bit corresponding to the current CPU will be stable + * in most contexts. + */ +unsigned long rcu_rnp_online_cpus(struct rcu_node *rnp) +{ + return ACCESS_ONCE(rnp->qsmaskinitnext); +} + +/* * Return true if an RCU grace period is in progress. The ACCESS_ONCE()s * permit this function to be invoked without holding the root rcu_node * structure's ->lock, but of course results can be subject to change. @@ -292,10 +311,10 @@ void rcu_note_context_switch(void) EXPORT_SYMBOL_GPL(rcu_note_context_switch); /* - * Register a quiesecent state for all RCU flavors. If there is an + * Register a quiescent state for all RCU flavors. If there is an * emergency, invoke rcu_momentary_dyntick_idle() to do a heavy-weight * dyntick-idle quiescent state visible to other CPUs (but only for those - * RCU flavors in desparate need of a quiescent state, which will normally + * RCU flavors in desperate need of a quiescent state, which will normally * be none of them). Either way, do a lightweight quiescent state for * all RCU flavors. */ @@ -410,6 +429,15 @@ void rcu_bh_force_quiescent_state(void) EXPORT_SYMBOL_GPL(rcu_bh_force_quiescent_state); /* + * Force a quiescent state for RCU-sched. + */ +void rcu_sched_force_quiescent_state(void) +{ + force_quiescent_state(&rcu_sched_state); +} +EXPORT_SYMBOL_GPL(rcu_sched_force_quiescent_state); + +/* * Show the state of the grace-period kthreads. */ void show_rcu_gp_kthreads(void) @@ -483,15 +511,6 @@ void rcutorture_record_progress(unsigned long vernum) EXPORT_SYMBOL_GPL(rcutorture_record_progress); /* - * Force a quiescent state for RCU-sched. - */ -void rcu_sched_force_quiescent_state(void) -{ - force_quiescent_state(&rcu_sched_state); -} -EXPORT_SYMBOL_GPL(rcu_sched_force_quiescent_state); - -/* * Does the CPU have callbacks ready to be invoked? */ static int @@ -954,7 +973,7 @@ bool rcu_lockdep_current_cpu_online(void) preempt_disable(); rdp = this_cpu_ptr(&rcu_sched_data); rnp = rdp->mynode; - ret = (rdp->grpmask & rnp->qsmaskinit) || + ret = (rdp->grpmask & rcu_rnp_online_cpus(rnp)) || !rcu_scheduler_fully_active; preempt_enable(); return ret; @@ -1196,9 +1215,10 @@ static void print_other_cpu_stall(struct rcu_state *rsp, unsigned long gpnum) } else { j = jiffies; gpa = ACCESS_ONCE(rsp->gp_activity); - pr_err("All QSes seen, last %s kthread activity %ld (%ld-%ld), jiffies_till_next_fqs=%ld\n", + pr_err("All QSes seen, last %s kthread activity %ld (%ld-%ld), jiffies_till_next_fqs=%ld, root ->qsmask %#lx\n", rsp->name, j - gpa, j, gpa, - jiffies_till_next_fqs); + jiffies_till_next_fqs, + rcu_get_root(rsp)->qsmask); /* In this case, the current CPU might be at fault. */ sched_show_task(current); } @@ -1328,20 +1348,30 @@ void rcu_cpu_stall_reset(void) } /* - * Initialize the specified rcu_data structure's callback list to empty. + * Initialize the specified rcu_data structure's default callback list + * to empty. The default callback list is the one that is not used by + * no-callbacks CPUs. */ -static void init_callback_list(struct rcu_data *rdp) +static void init_default_callback_list(struct rcu_data *rdp) { int i; - if (init_nocb_callback_list(rdp)) - return; rdp->nxtlist = NULL; for (i = 0; i < RCU_NEXT_SIZE; i++) rdp->nxttail[i] = &rdp->nxtlist; } /* + * Initialize the specified rcu_data structure's callback list to empty. + */ +static void init_callback_list(struct rcu_data *rdp) +{ + if (init_nocb_callback_list(rdp)) + return; + init_default_callback_list(rdp); +} + +/* * Determine the value that ->completed will have at the end of the * next subsequent grace period. This is used to tag callbacks so that * a CPU can invoke callbacks in a timely fashion even if that CPU has @@ -1703,11 +1733,11 @@ static void note_gp_changes(struct rcu_state *rsp, struct rcu_data *rdp) */ static int rcu_gp_init(struct rcu_state *rsp) { + unsigned long oldmask; struct rcu_data *rdp; struct rcu_node *rnp = rcu_get_root(rsp); ACCESS_ONCE(rsp->gp_activity) = jiffies; - rcu_bind_gp_kthread(); raw_spin_lock_irq(&rnp->lock); smp_mb__after_unlock_lock(); if (!ACCESS_ONCE(rsp->gp_flags)) { @@ -1733,9 +1763,54 @@ static int rcu_gp_init(struct rcu_state *rsp) trace_rcu_grace_period(rsp->name, rsp->gpnum, TPS("start")); raw_spin_unlock_irq(&rnp->lock); - /* Exclude any concurrent CPU-hotplug operations. */ - mutex_lock(&rsp->onoff_mutex); - smp_mb__after_unlock_lock(); /* ->gpnum increment before GP! */ + /* + * Apply per-leaf buffered online and offline operations to the + * rcu_node tree. Note that this new grace period need not wait + * for subsequent online CPUs, and that quiescent-state forcing + * will handle subsequent offline CPUs. + */ + rcu_for_each_leaf_node(rsp, rnp) { + raw_spin_lock_irq(&rnp->lock); + smp_mb__after_unlock_lock(); + if (rnp->qsmaskinit == rnp->qsmaskinitnext && + !rnp->wait_blkd_tasks) { + /* Nothing to do on this leaf rcu_node structure. */ + raw_spin_unlock_irq(&rnp->lock); + continue; + } + + /* Record old state, apply changes to ->qsmaskinit field. */ + oldmask = rnp->qsmaskinit; + rnp->qsmaskinit = rnp->qsmaskinitnext; + + /* If zero-ness of ->qsmaskinit changed, propagate up tree. */ + if (!oldmask != !rnp->qsmaskinit) { + if (!oldmask) /* First online CPU for this rcu_node. */ + rcu_init_new_rnp(rnp); + else if (rcu_preempt_has_tasks(rnp)) /* blocked tasks */ + rnp->wait_blkd_tasks = true; + else /* Last offline CPU and can propagate. */ + rcu_cleanup_dead_rnp(rnp); + } + + /* + * If all waited-on tasks from prior grace period are + * done, and if all this rcu_node structure's CPUs are + * still offline, propagate up the rcu_node tree and + * clear ->wait_blkd_tasks. Otherwise, if one of this + * rcu_node structure's CPUs has since come back online, + * simply clear ->wait_blkd_tasks (but rcu_cleanup_dead_rnp() + * checks for this, so just call it unconditionally). + */ + if (rnp->wait_blkd_tasks && + (!rcu_preempt_has_tasks(rnp) || + rnp->qsmaskinit)) { + rnp->wait_blkd_tasks = false; + rcu_cleanup_dead_rnp(rnp); + } + + raw_spin_unlock_irq(&rnp->lock); + } /* * Set the quiescent-state-needed bits in all the rcu_node @@ -1757,8 +1832,8 @@ static int rcu_gp_init(struct rcu_state *rsp) rcu_preempt_check_blocked_tasks(rnp); rnp->qsmask = rnp->qsmaskinit; ACCESS_ONCE(rnp->gpnum) = rsp->gpnum; - WARN_ON_ONCE(rnp->completed != rsp->completed); - ACCESS_ONCE(rnp->completed) = rsp->completed; + if (WARN_ON_ONCE(rnp->completed != rsp->completed)) + ACCESS_ONCE(rnp->completed) = rsp->completed; if (rnp == rdp->mynode) (void)__note_gp_changes(rsp, rnp, rdp); rcu_preempt_boost_start_gp(rnp); @@ -1768,9 +1843,12 @@ static int rcu_gp_init(struct rcu_state *rsp) raw_spin_unlock_irq(&rnp->lock); cond_resched_rcu_qs(); ACCESS_ONCE(rsp->gp_activity) = jiffies; + if (IS_ENABLED(CONFIG_RCU_TORTURE_TEST_SLOW_INIT) && + gp_init_delay > 0 && + !(rsp->gpnum % (rcu_num_nodes * 10))) + schedule_timeout_uninterruptible(gp_init_delay); } - mutex_unlock(&rsp->onoff_mutex); return 1; } @@ -1798,7 +1876,7 @@ static int rcu_gp_fqs(struct rcu_state *rsp, int fqs_state_in) fqs_state = RCU_FORCE_QS; } else { /* Handle dyntick-idle and offline CPUs. */ - isidle = false; + isidle = true; force_qs_rnp(rsp, rcu_implicit_dynticks_qs, &isidle, &maxj); } /* Clear flag to prevent immediate re-entry. */ @@ -1852,6 +1930,8 @@ static void rcu_gp_cleanup(struct rcu_state *rsp) rcu_for_each_node_breadth_first(rsp, rnp) { raw_spin_lock_irq(&rnp->lock); smp_mb__after_unlock_lock(); + WARN_ON_ONCE(rcu_preempt_blocked_readers_cgp(rnp)); + WARN_ON_ONCE(rnp->qsmask); ACCESS_ONCE(rnp->completed) = rsp->gpnum; rdp = this_cpu_ptr(rsp->rda); if (rnp == rdp->mynode) @@ -1895,6 +1975,7 @@ static int __noreturn rcu_gp_kthread(void *arg) struct rcu_state *rsp = arg; struct rcu_node *rnp = rcu_get_root(rsp); + rcu_bind_gp_kthread(); for (;;) { /* Handle grace-period start. */ @@ -2062,25 +2143,32 @@ static void rcu_report_qs_rsp(struct rcu_state *rsp, unsigned long flags) * Similar to rcu_report_qs_rdp(), for which it is a helper function. * Allows quiescent states for a group of CPUs to be reported at one go * to the specified rcu_node structure, though all the CPUs in the group - * must be represented by the same rcu_node structure (which need not be - * a leaf rcu_node structure, though it often will be). That structure's - * lock must be held upon entry, and it is released before return. + * must be represented by the same rcu_node structure (which need not be a + * leaf rcu_node structure, though it often will be). The gps parameter + * is the grace-period snapshot, which means that the quiescent states + * are valid only if rnp->gpnum is equal to gps. That structure's lock + * must be held upon entry, and it is released before return. */ static void rcu_report_qs_rnp(unsigned long mask, struct rcu_state *rsp, - struct rcu_node *rnp, unsigned long flags) + struct rcu_node *rnp, unsigned long gps, unsigned long flags) __releases(rnp->lock) { + unsigned long oldmask = 0; struct rcu_node *rnp_c; /* Walk up the rcu_node hierarchy. */ for (;;) { - if (!(rnp->qsmask & mask)) { + if (!(rnp->qsmask & mask) || rnp->gpnum != gps) { - /* Our bit has already been cleared, so done. */ + /* + * Our bit has already been cleared, or the + * relevant grace period is already over, so done. + */ raw_spin_unlock_irqrestore(&rnp->lock, flags); return; } + WARN_ON_ONCE(oldmask); /* Any child must be all zeroed! */ rnp->qsmask &= ~mask; trace_rcu_quiescent_state_report(rsp->name, rnp->gpnum, mask, rnp->qsmask, rnp->level, @@ -2104,7 +2192,7 @@ rcu_report_qs_rnp(unsigned long mask, struct rcu_state *rsp, rnp = rnp->parent; raw_spin_lock_irqsave(&rnp->lock, flags); smp_mb__after_unlock_lock(); - WARN_ON_ONCE(rnp_c->qsmask); + oldmask = rnp_c->qsmask; } /* @@ -2116,6 +2204,46 @@ rcu_report_qs_rnp(unsigned long mask, struct rcu_state *rsp, } /* + * Record a quiescent state for all tasks that were previously queued + * on the specified rcu_node structure and that were blocking the current + * RCU grace period. The caller must hold the specified rnp->lock with + * irqs disabled, and this lock is released upon return, but irqs remain + * disabled. + */ +static void rcu_report_unblock_qs_rnp(struct rcu_state *rsp, + struct rcu_node *rnp, unsigned long flags) + __releases(rnp->lock) +{ + unsigned long gps; + unsigned long mask; + struct rcu_node *rnp_p; + + if (rcu_state_p == &rcu_sched_state || rsp != rcu_state_p || + rnp->qsmask != 0 || rcu_preempt_blocked_readers_cgp(rnp)) { + raw_spin_unlock_irqrestore(&rnp->lock, flags); + return; /* Still need more quiescent states! */ + } + + rnp_p = rnp->parent; + if (rnp_p == NULL) { + /* + * Only one rcu_node structure in the tree, so don't + * try to report up to its nonexistent parent! + */ + rcu_report_qs_rsp(rsp, flags); + return; + } + + /* Report up the rest of the hierarchy, tracking current ->gpnum. */ + gps = rnp->gpnum; + mask = rnp->grpmask; + raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ + raw_spin_lock(&rnp_p->lock); /* irqs already disabled. */ + smp_mb__after_unlock_lock(); + rcu_report_qs_rnp(mask, rsp, rnp_p, gps, flags); +} + +/* * Record a quiescent state for the specified CPU to that CPU's rcu_data * structure. This must be either called from the specified CPU, or * called when the specified CPU is known to be offline (and when it is @@ -2163,7 +2291,8 @@ rcu_report_qs_rdp(int cpu, struct rcu_state *rsp, struct rcu_data *rdp) */ needwake = rcu_accelerate_cbs(rsp, rnp, rdp); - rcu_report_qs_rnp(mask, rsp, rnp, flags); /* rlses rnp->lock */ + rcu_report_qs_rnp(mask, rsp, rnp, rnp->gpnum, flags); + /* ^^^ Released rnp->lock */ if (needwake) rcu_gp_kthread_wake(rsp); } @@ -2256,8 +2385,12 @@ rcu_send_cbs_to_orphanage(int cpu, struct rcu_state *rsp, rsp->orphan_donetail = rdp->nxttail[RCU_DONE_TAIL]; } - /* Finally, initialize the rcu_data structure's list to empty. */ + /* + * Finally, initialize the rcu_data structure's list to empty and + * disallow further callbacks on this CPU. + */ init_callback_list(rdp); + rdp->nxttail[RCU_NEXT_TAIL] = NULL; } /* @@ -2355,6 +2488,7 @@ static void rcu_cleanup_dead_rnp(struct rcu_node *rnp_leaf) raw_spin_lock(&rnp->lock); /* irqs already disabled. */ smp_mb__after_unlock_lock(); /* GP memory ordering. */ rnp->qsmaskinit &= ~mask; + rnp->qsmask &= ~mask; if (rnp->qsmaskinit) { raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ return; @@ -2364,6 +2498,26 @@ static void rcu_cleanup_dead_rnp(struct rcu_node *rnp_leaf) } /* + * The CPU is exiting the idle loop into the arch_cpu_idle_dead() + * function. We now remove it from the rcu_node tree's ->qsmaskinit + * bit masks. + */ +static void rcu_cleanup_dying_idle_cpu(int cpu, struct rcu_state *rsp) +{ + unsigned long flags; + unsigned long mask; + struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu); + struct rcu_node *rnp = rdp->mynode; /* Outgoing CPU's rdp & rnp. */ + + /* Remove outgoing CPU from mask in the leaf rcu_node structure. */ + mask = rdp->grpmask; + raw_spin_lock_irqsave(&rnp->lock, flags); + smp_mb__after_unlock_lock(); /* Enforce GP memory-order guarantee. */ + rnp->qsmaskinitnext &= ~mask; + raw_spin_unlock_irqrestore(&rnp->lock, flags); +} + +/* * The CPU has been completely removed, and some other CPU is reporting * this fact from process context. Do the remainder of the cleanup, * including orphaning the outgoing CPU's RCU callbacks, and also @@ -2379,29 +2533,15 @@ static void rcu_cleanup_dead_cpu(int cpu, struct rcu_state *rsp) /* Adjust any no-longer-needed kthreads. */ rcu_boost_kthread_setaffinity(rnp, -1); - /* Exclude any attempts to start a new grace period. */ - mutex_lock(&rsp->onoff_mutex); - raw_spin_lock_irqsave(&rsp->orphan_lock, flags); - /* Orphan the dead CPU's callbacks, and adopt them if appropriate. */ + raw_spin_lock_irqsave(&rsp->orphan_lock, flags); rcu_send_cbs_to_orphanage(cpu, rsp, rnp, rdp); rcu_adopt_orphan_cbs(rsp, flags); raw_spin_unlock_irqrestore(&rsp->orphan_lock, flags); - /* Remove outgoing CPU from mask in the leaf rcu_node structure. */ - raw_spin_lock_irqsave(&rnp->lock, flags); - smp_mb__after_unlock_lock(); /* Enforce GP memory-order guarantee. */ - rnp->qsmaskinit &= ~rdp->grpmask; - if (rnp->qsmaskinit == 0 && !rcu_preempt_has_tasks(rnp)) - rcu_cleanup_dead_rnp(rnp); - rcu_report_qs_rnp(rdp->grpmask, rsp, rnp, flags); /* Rlses rnp->lock. */ WARN_ONCE(rdp->qlen != 0 || rdp->nxtlist != NULL, "rcu_cleanup_dead_cpu: Callbacks on offline CPU %d: qlen=%lu, nxtlist=%p\n", cpu, rdp->qlen, rdp->nxtlist); - init_callback_list(rdp); - /* Disallow further callbacks on this CPU. */ - rdp->nxttail[RCU_NEXT_TAIL] = NULL; - mutex_unlock(&rsp->onoff_mutex); } #else /* #ifdef CONFIG_HOTPLUG_CPU */ @@ -2414,6 +2554,10 @@ static void __maybe_unused rcu_cleanup_dead_rnp(struct rcu_node *rnp_leaf) { } +static void rcu_cleanup_dying_idle_cpu(int cpu, struct rcu_state *rsp) +{ +} + static void rcu_cleanup_dead_cpu(int cpu, struct rcu_state *rsp) { } @@ -2589,26 +2733,47 @@ static void force_qs_rnp(struct rcu_state *rsp, return; } if (rnp->qsmask == 0) { - rcu_initiate_boost(rnp, flags); /* releases rnp->lock */ - continue; + if (rcu_state_p == &rcu_sched_state || + rsp != rcu_state_p || + rcu_preempt_blocked_readers_cgp(rnp)) { + /* + * No point in scanning bits because they + * are all zero. But we might need to + * priority-boost blocked readers. + */ + rcu_initiate_boost(rnp, flags); + /* rcu_initiate_boost() releases rnp->lock */ + continue; + } + if (rnp->parent && + (rnp->parent->qsmask & rnp->grpmask)) { + /* + * Race between grace-period + * initialization and task exiting RCU + * read-side critical section: Report. + */ + rcu_report_unblock_qs_rnp(rsp, rnp, flags); + /* rcu_report_unblock_qs_rnp() rlses ->lock */ + continue; + } } cpu = rnp->grplo; bit = 1; for (; cpu <= rnp->grphi; cpu++, bit <<= 1) { if ((rnp->qsmask & bit) != 0) { - if ((rnp->qsmaskinit & bit) != 0) - *isidle = false; + if ((rnp->qsmaskinit & bit) == 0) + *isidle = false; /* Pending hotplug. */ if (f(per_cpu_ptr(rsp->rda, cpu), isidle, maxj)) mask |= bit; } } if (mask != 0) { - - /* rcu_report_qs_rnp() releases rnp->lock. */ - rcu_report_qs_rnp(mask, rsp, rnp, flags); - continue; + /* Idle/offline CPUs, report (releases rnp->lock. */ + rcu_report_qs_rnp(mask, rsp, rnp, rnp->gpnum, flags); + } else { + /* Nothing to do here, so just drop the lock. */ + raw_spin_unlock_irqrestore(&rnp->lock, flags); } - raw_spin_unlock_irqrestore(&rnp->lock, flags); } } @@ -2741,7 +2906,7 @@ static void __call_rcu_core(struct rcu_state *rsp, struct rcu_data *rdp, * If called from an extended quiescent state, invoke the RCU * core in order to force a re-evaluation of RCU's idleness. */ - if (!rcu_is_watching() && cpu_online(smp_processor_id())) + if (!rcu_is_watching()) invoke_rcu_core(); /* If interrupts were disabled or CPU offline, don't invoke RCU core. */ @@ -2827,11 +2992,22 @@ __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu), if (cpu != -1) rdp = per_cpu_ptr(rsp->rda, cpu); - offline = !__call_rcu_nocb(rdp, head, lazy, flags); - WARN_ON_ONCE(offline); - /* _call_rcu() is illegal on offline CPU; leak the callback. */ - local_irq_restore(flags); - return; + if (likely(rdp->mynode)) { + /* Post-boot, so this should be for a no-CBs CPU. */ + offline = !__call_rcu_nocb(rdp, head, lazy, flags); + WARN_ON_ONCE(offline); + /* Offline CPU, _call_rcu() illegal, leak callback. */ + local_irq_restore(flags); + return; + } + /* + * Very early boot, before rcu_init(). Initialize if needed + * and then drop through to queue the callback. + */ + BUG_ON(cpu != -1); + WARN_ON_ONCE(!rcu_is_watching()); + if (!likely(rdp->nxtlist)) + init_default_callback_list(rdp); } ACCESS_ONCE(rdp->qlen) = rdp->qlen + 1; if (lazy) @@ -2954,7 +3130,7 @@ void synchronize_sched(void) "Illegal synchronize_sched() in RCU-sched read-side critical section"); if (rcu_blocking_is_gp()) return; - if (rcu_expedited) + if (rcu_gp_is_expedited()) synchronize_sched_expedited(); else wait_rcu_gp(call_rcu_sched); @@ -2981,7 +3157,7 @@ void synchronize_rcu_bh(void) "Illegal synchronize_rcu_bh() in RCU-bh read-side critical section"); if (rcu_blocking_is_gp()) return; - if (rcu_expedited) + if (rcu_gp_is_expedited()) synchronize_rcu_bh_expedited(); else wait_rcu_gp(call_rcu_bh); @@ -3518,6 +3694,28 @@ void rcu_barrier_sched(void) EXPORT_SYMBOL_GPL(rcu_barrier_sched); /* + * Propagate ->qsinitmask bits up the rcu_node tree to account for the + * first CPU in a given leaf rcu_node structure coming online. The caller + * must hold the corresponding leaf rcu_node ->lock with interrrupts + * disabled. + */ +static void rcu_init_new_rnp(struct rcu_node *rnp_leaf) +{ + long mask; + struct rcu_node *rnp = rnp_leaf; + + for (;;) { + mask = rnp->grpmask; + rnp = rnp->parent; + if (rnp == NULL) + return; + raw_spin_lock(&rnp->lock); /* Interrupts already disabled. */ + rnp->qsmaskinit |= mask; + raw_spin_unlock(&rnp->lock); /* Interrupts remain disabled. */ + } +} + +/* * Do boot-time initialization of a CPU's per-CPU RCU data. */ static void __init @@ -3553,49 +3751,37 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp) struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu); struct rcu_node *rnp = rcu_get_root(rsp); - /* Exclude new grace periods. */ - mutex_lock(&rsp->onoff_mutex); - /* Set up local state, ensuring consistent view of global state. */ raw_spin_lock_irqsave(&rnp->lock, flags); rdp->beenonline = 1; /* We have now been online. */ rdp->qlen_last_fqs_check = 0; rdp->n_force_qs_snap = rsp->n_force_qs; rdp->blimit = blimit; - init_callback_list(rdp); /* Re-enable callbacks on this CPU. */ + if (!rdp->nxtlist) + init_callback_list(rdp); /* Re-enable callbacks on this CPU. */ rdp->dynticks->dynticks_nesting = DYNTICK_TASK_EXIT_IDLE; rcu_sysidle_init_percpu_data(rdp->dynticks); atomic_set(&rdp->dynticks->dynticks, (atomic_read(&rdp->dynticks->dynticks) & ~0x1) + 1); raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ - /* Add CPU to rcu_node bitmasks. */ + /* + * Add CPU to leaf rcu_node pending-online bitmask. Any needed + * propagation up the rcu_node tree will happen at the beginning + * of the next grace period. + */ rnp = rdp->mynode; mask = rdp->grpmask; - do { - /* Exclude any attempts to start a new GP on small systems. */ - raw_spin_lock(&rnp->lock); /* irqs already disabled. */ - rnp->qsmaskinit |= mask; - mask = rnp->grpmask; - if (rnp == rdp->mynode) { - /* - * If there is a grace period in progress, we will - * set up to wait for it next time we run the - * RCU core code. - */ - rdp->gpnum = rnp->completed; - rdp->completed = rnp->completed; - rdp->passed_quiesce = 0; - rdp->rcu_qs_ctr_snap = __this_cpu_read(rcu_qs_ctr); - rdp->qs_pending = 0; - trace_rcu_grace_period(rsp->name, rdp->gpnum, TPS("cpuonl")); - } - raw_spin_unlock(&rnp->lock); /* irqs already disabled. */ - rnp = rnp->parent; - } while (rnp != NULL && !(rnp->qsmaskinit & mask)); - local_irq_restore(flags); - - mutex_unlock(&rsp->onoff_mutex); + raw_spin_lock(&rnp->lock); /* irqs already disabled. */ + smp_mb__after_unlock_lock(); + rnp->qsmaskinitnext |= mask; + rdp->gpnum = rnp->completed; /* Make CPU later note any new GP. */ + rdp->completed = rnp->completed; + rdp->passed_quiesce = false; + rdp->rcu_qs_ctr_snap = __this_cpu_read(rcu_qs_ctr); + rdp->qs_pending = false; + trace_rcu_grace_period(rsp->name, rdp->gpnum, TPS("cpuonl")); + raw_spin_unlock_irqrestore(&rnp->lock, flags); } static void rcu_prepare_cpu(int cpu) @@ -3609,15 +3795,14 @@ static void rcu_prepare_cpu(int cpu) /* * Handle CPU online/offline notification events. */ -static int rcu_cpu_notify(struct notifier_block *self, - unsigned long action, void *hcpu) +int rcu_cpu_notify(struct notifier_block *self, + unsigned long action, void *hcpu) { long cpu = (long)hcpu; struct rcu_data *rdp = per_cpu_ptr(rcu_state_p->rda, cpu); struct rcu_node *rnp = rdp->mynode; struct rcu_state *rsp; - trace_rcu_utilization(TPS("Start CPU hotplug")); switch (action) { case CPU_UP_PREPARE: case CPU_UP_PREPARE_FROZEN: @@ -3637,6 +3822,11 @@ static int rcu_cpu_notify(struct notifier_block *self, for_each_rcu_flavor(rsp) rcu_cleanup_dying_cpu(rsp); break; + case CPU_DYING_IDLE: + for_each_rcu_flavor(rsp) { + rcu_cleanup_dying_idle_cpu(cpu, rsp); + } + break; case CPU_DEAD: case CPU_DEAD_FROZEN: case CPU_UP_CANCELED: @@ -3649,7 +3839,6 @@ static int rcu_cpu_notify(struct notifier_block *self, default: break; } - trace_rcu_utilization(TPS("End CPU hotplug")); return NOTIFY_OK; } @@ -3660,11 +3849,12 @@ static int rcu_pm_notify(struct notifier_block *self, case PM_HIBERNATION_PREPARE: case PM_SUSPEND_PREPARE: if (nr_cpu_ids <= 256) /* Expediting bad for large systems. */ - rcu_expedited = 1; + rcu_expedite_gp(); break; case PM_POST_HIBERNATION: case PM_POST_SUSPEND: - rcu_expedited = 0; + if (nr_cpu_ids <= 256) /* Expediting bad for large systems. */ + rcu_unexpedite_gp(); break; default: break; @@ -3734,30 +3924,26 @@ void rcu_scheduler_starting(void) * Compute the per-level fanout, either using the exact fanout specified * or balancing the tree, depending on CONFIG_RCU_FANOUT_EXACT. */ -#ifdef CONFIG_RCU_FANOUT_EXACT -static void __init rcu_init_levelspread(struct rcu_state *rsp) -{ - int i; - - rsp->levelspread[rcu_num_lvls - 1] = rcu_fanout_leaf; - for (i = rcu_num_lvls - 2; i >= 0; i--) - rsp->levelspread[i] = CONFIG_RCU_FANOUT; -} -#else /* #ifdef CONFIG_RCU_FANOUT_EXACT */ static void __init rcu_init_levelspread(struct rcu_state *rsp) { - int ccur; - int cprv; int i; - cprv = nr_cpu_ids; - for (i = rcu_num_lvls - 1; i >= 0; i--) { - ccur = rsp->levelcnt[i]; - rsp->levelspread[i] = (cprv + ccur - 1) / ccur; - cprv = ccur; + if (IS_ENABLED(CONFIG_RCU_FANOUT_EXACT)) { + rsp->levelspread[rcu_num_lvls - 1] = rcu_fanout_leaf; + for (i = rcu_num_lvls - 2; i >= 0; i--) + rsp->levelspread[i] = CONFIG_RCU_FANOUT; + } else { + int ccur; + int cprv; + + cprv = nr_cpu_ids; + for (i = rcu_num_lvls - 1; i >= 0; i--) { + ccur = rsp->levelcnt[i]; + rsp->levelspread[i] = (cprv + ccur - 1) / ccur; + cprv = ccur; + } } } -#endif /* #else #ifdef CONFIG_RCU_FANOUT_EXACT */ /* * Helper function for rcu_init() that initializes one rcu_state structure. @@ -3833,7 +4019,6 @@ static void __init rcu_init_one(struct rcu_state *rsp, } } - rsp->rda = rda; init_waitqueue_head(&rsp->gp_wq); rnp = rsp->level[rcu_num_lvls - 1]; for_each_possible_cpu(i) { @@ -3926,6 +4111,8 @@ void __init rcu_init(void) { int cpu; + rcu_early_boot_tests(); + rcu_bootup_announce(); rcu_init_geometry(); rcu_init_one(&rcu_bh_state, &rcu_bh_data); @@ -3942,8 +4129,6 @@ void __init rcu_init(void) pm_notifier(rcu_pm_notify, 0); for_each_online_cpu(cpu) rcu_cpu_notify(NULL, CPU_UP_PREPARE, (void *)(long)cpu); - - rcu_early_boot_tests(); } #include "tree_plugin.h" diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h index 119de399eb2f..a69d3dab2ec4 100644 --- a/kernel/rcu/tree.h +++ b/kernel/rcu/tree.h @@ -141,12 +141,20 @@ struct rcu_node { /* complete (only for PREEMPT_RCU). */ unsigned long qsmaskinit; /* Per-GP initial value for qsmask & expmask. */ + /* Initialized from ->qsmaskinitnext at the */ + /* beginning of each grace period. */ + unsigned long qsmaskinitnext; + /* Online CPUs for next grace period. */ unsigned long grpmask; /* Mask to apply to parent qsmask. */ /* Only one bit will be set in this mask. */ int grplo; /* lowest-numbered CPU or group here. */ int grphi; /* highest-numbered CPU or group here. */ u8 grpnum; /* CPU/group number for next level up. */ u8 level; /* root is at level 0. */ + bool wait_blkd_tasks;/* Necessary to wait for blocked tasks to */ + /* exit RCU read-side critical sections */ + /* before propagating offline up the */ + /* rcu_node tree? */ struct rcu_node *parent; struct list_head blkd_tasks; /* Tasks blocked in RCU read-side critical */ @@ -448,8 +456,6 @@ struct rcu_state { long qlen; /* Total number of callbacks. */ /* End of fields guarded by orphan_lock. */ - struct mutex onoff_mutex; /* Coordinate hotplug & GPs. */ - struct mutex barrier_mutex; /* Guards barrier fields. */ atomic_t barrier_cpu_count; /* # CPUs waiting on. */ struct completion barrier_completion; /* Wake at barrier end. */ @@ -559,6 +565,7 @@ static void rcu_prepare_kthreads(int cpu); static void rcu_cleanup_after_idle(void); static void rcu_prepare_for_idle(void); static void rcu_idle_count_callbacks_posted(void); +static bool rcu_preempt_has_tasks(struct rcu_node *rnp); static void print_cpu_stall_info_begin(void); static void print_cpu_stall_info(struct rcu_state *rsp, int cpu); static void print_cpu_stall_info_end(void); diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index 0a571e9a0f1d..8c0ec0f5a027 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -58,38 +58,33 @@ static bool __read_mostly rcu_nocb_poll; /* Offload kthread are to poll. */ */ static void __init rcu_bootup_announce_oddness(void) { -#ifdef CONFIG_RCU_TRACE - pr_info("\tRCU debugfs-based tracing is enabled.\n"); -#endif -#if (defined(CONFIG_64BIT) && CONFIG_RCU_FANOUT != 64) || (!defined(CONFIG_64BIT) && CONFIG_RCU_FANOUT != 32) - pr_info("\tCONFIG_RCU_FANOUT set to non-default value of %d\n", - CONFIG_RCU_FANOUT); -#endif -#ifdef CONFIG_RCU_FANOUT_EXACT - pr_info("\tHierarchical RCU autobalancing is disabled.\n"); -#endif -#ifdef CONFIG_RCU_FAST_NO_HZ - pr_info("\tRCU dyntick-idle grace-period acceleration is enabled.\n"); -#endif -#ifdef CONFIG_PROVE_RCU - pr_info("\tRCU lockdep checking is enabled.\n"); -#endif -#ifdef CONFIG_RCU_TORTURE_TEST_RUNNABLE - pr_info("\tRCU torture testing starts during boot.\n"); -#endif -#if defined(CONFIG_RCU_CPU_STALL_INFO) - pr_info("\tAdditional per-CPU info printed with stalls.\n"); -#endif -#if NUM_RCU_LVL_4 != 0 - pr_info("\tFour-level hierarchy is enabled.\n"); -#endif + if (IS_ENABLED(CONFIG_RCU_TRACE)) + pr_info("\tRCU debugfs-based tracing is enabled.\n"); + if ((IS_ENABLED(CONFIG_64BIT) && CONFIG_RCU_FANOUT != 64) || + (!IS_ENABLED(CONFIG_64BIT) && CONFIG_RCU_FANOUT != 32)) + pr_info("\tCONFIG_RCU_FANOUT set to non-default value of %d\n", + CONFIG_RCU_FANOUT); + if (IS_ENABLED(CONFIG_RCU_FANOUT_EXACT)) + pr_info("\tHierarchical RCU autobalancing is disabled.\n"); + if (IS_ENABLED(CONFIG_RCU_FAST_NO_HZ)) + pr_info("\tRCU dyntick-idle grace-period acceleration is enabled.\n"); + if (IS_ENABLED(CONFIG_PROVE_RCU)) + pr_info("\tRCU lockdep checking is enabled.\n"); + if (IS_ENABLED(CONFIG_RCU_TORTURE_TEST_RUNNABLE)) + pr_info("\tRCU torture testing starts during boot.\n"); + if (IS_ENABLED(CONFIG_RCU_CPU_STALL_INFO)) + pr_info("\tAdditional per-CPU info printed with stalls.\n"); + if (NUM_RCU_LVL_4 != 0) + pr_info("\tFour-level hierarchy is enabled.\n"); + if (CONFIG_RCU_FANOUT_LEAF != 16) + pr_info("\tBuild-time adjustment of leaf fanout to %d.\n", + CONFIG_RCU_FANOUT_LEAF); if (rcu_fanout_leaf != CONFIG_RCU_FANOUT_LEAF) pr_info("\tBoot-time adjustment of leaf fanout to %d.\n", rcu_fanout_leaf); if (nr_cpu_ids != NR_CPUS) pr_info("\tRCU restricting CPUs from NR_CPUS=%d to nr_cpu_ids=%d.\n", NR_CPUS, nr_cpu_ids); -#ifdef CONFIG_RCU_BOOST - pr_info("\tRCU kthread priority: %d.\n", kthread_prio); -#endif + if (IS_ENABLED(CONFIG_RCU_BOOST)) + pr_info("\tRCU kthread priority: %d.\n", kthread_prio); } #ifdef CONFIG_PREEMPT_RCU @@ -180,7 +175,7 @@ static void rcu_preempt_note_context_switch(void) * But first, note that the current CPU must still be * on line! */ - WARN_ON_ONCE((rdp->grpmask & rnp->qsmaskinit) == 0); + WARN_ON_ONCE((rdp->grpmask & rcu_rnp_online_cpus(rnp)) == 0); WARN_ON_ONCE(!list_empty(&t->rcu_node_entry)); if ((rnp->qsmask & rdp->grpmask) && rnp->gp_tasks != NULL) { list_add(&t->rcu_node_entry, rnp->gp_tasks->prev); @@ -233,43 +228,6 @@ static int rcu_preempt_blocked_readers_cgp(struct rcu_node *rnp) } /* - * Record a quiescent state for all tasks that were previously queued - * on the specified rcu_node structure and that were blocking the current - * RCU grace period. The caller must hold the specified rnp->lock with - * irqs disabled, and this lock is released upon return, but irqs remain - * disabled. - */ -static void rcu_report_unblock_qs_rnp(struct rcu_node *rnp, unsigned long flags) - __releases(rnp->lock) -{ - unsigned long mask; - struct rcu_node *rnp_p; - - if (rnp->qsmask != 0 || rcu_preempt_blocked_readers_cgp(rnp)) { - raw_spin_unlock_irqrestore(&rnp->lock, flags); - return; /* Still need more quiescent states! */ - } - - rnp_p = rnp->parent; - if (rnp_p == NULL) { - /* - * Either there is only one rcu_node in the tree, - * or tasks were kicked up to root rcu_node due to - * CPUs going offline. - */ - rcu_report_qs_rsp(&rcu_preempt_state, flags); - return; - } - - /* Report up the rest of the hierarchy. */ - mask = rnp->grpmask; - raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ - raw_spin_lock(&rnp_p->lock); /* irqs already disabled. */ - smp_mb__after_unlock_lock(); - rcu_report_qs_rnp(mask, &rcu_preempt_state, rnp_p, flags); -} - -/* * Advance a ->blkd_tasks-list pointer to the next entry, instead * returning NULL if at the end of the list. */ @@ -300,7 +258,6 @@ static bool rcu_preempt_has_tasks(struct rcu_node *rnp) */ void rcu_read_unlock_special(struct task_struct *t) { - bool empty; bool empty_exp; bool empty_norm; bool empty_exp_now; @@ -334,7 +291,13 @@ void rcu_read_unlock_special(struct task_struct *t) } /* Hardware IRQ handlers cannot block, complain if they get here. */ - if (WARN_ON_ONCE(in_irq() || in_serving_softirq())) { + if (in_irq() || in_serving_softirq()) { + lockdep_rcu_suspicious(__FILE__, __LINE__, + "rcu_read_unlock() from irq or softirq with blocking in critical section!!!\n"); + pr_alert("->rcu_read_unlock_special: %#x (b: %d, nq: %d)\n", + t->rcu_read_unlock_special.s, + t->rcu_read_unlock_special.b.blocked, + t->rcu_read_unlock_special.b.need_qs); local_irq_restore(flags); return; } @@ -356,7 +319,6 @@ void rcu_read_unlock_special(struct task_struct *t) break; raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ } - empty = !rcu_preempt_has_tasks(rnp); empty_norm = !rcu_preempt_blocked_readers_cgp(rnp); empty_exp = !rcu_preempted_readers_exp(rnp); smp_mb(); /* ensure expedited fastpath sees end of RCU c-s. */ @@ -377,14 +339,6 @@ void rcu_read_unlock_special(struct task_struct *t) #endif /* #ifdef CONFIG_RCU_BOOST */ /* - * If this was the last task on the list, go see if we - * need to propagate ->qsmaskinit bit clearing up the - * rcu_node tree. - */ - if (!empty && !rcu_preempt_has_tasks(rnp)) - rcu_cleanup_dead_rnp(rnp); - - /* * If this was the last task on the current list, and if * we aren't waiting on any CPUs, report the quiescent state. * Note that rcu_report_unblock_qs_rnp() releases rnp->lock, @@ -399,7 +353,8 @@ void rcu_read_unlock_special(struct task_struct *t) rnp->grplo, rnp->grphi, !!rnp->gp_tasks); - rcu_report_unblock_qs_rnp(rnp, flags); + rcu_report_unblock_qs_rnp(&rcu_preempt_state, + rnp, flags); } else { raw_spin_unlock_irqrestore(&rnp->lock, flags); } @@ -520,10 +475,6 @@ static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp) WARN_ON_ONCE(rnp->qsmask); } -#ifdef CONFIG_HOTPLUG_CPU - -#endif /* #ifdef CONFIG_HOTPLUG_CPU */ - /* * Check for a quiescent state from the current CPU. When a task blocks, * the task is recorded in the corresponding CPU's rcu_node structure, @@ -585,7 +536,7 @@ void synchronize_rcu(void) "Illegal synchronize_rcu() in RCU read-side critical section"); if (!rcu_scheduler_active) return; - if (rcu_expedited) + if (rcu_gp_is_expedited()) synchronize_rcu_expedited(); else wait_rcu_gp(call_rcu); @@ -630,9 +581,6 @@ static int sync_rcu_preempt_exp_done(struct rcu_node *rnp) * recursively up the tree. (Calm down, calm down, we do the recursion * iteratively!) * - * Most callers will set the "wake" flag, but the task initiating the - * expedited grace period need not wake itself. - * * Caller must hold sync_rcu_preempt_exp_mutex. */ static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp, @@ -667,29 +615,85 @@ static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp, /* * Snapshot the tasks blocking the newly started preemptible-RCU expedited - * grace period for the specified rcu_node structure. If there are no such - * tasks, report it up the rcu_node hierarchy. + * grace period for the specified rcu_node structure, phase 1. If there + * are such tasks, set the ->expmask bits up the rcu_node tree and also + * set the ->expmask bits on the leaf rcu_node structures to tell phase 2 + * that work is needed here. * - * Caller must hold sync_rcu_preempt_exp_mutex and must exclude - * CPU hotplug operations. + * Caller must hold sync_rcu_preempt_exp_mutex. */ static void -sync_rcu_preempt_exp_init(struct rcu_state *rsp, struct rcu_node *rnp) +sync_rcu_preempt_exp_init1(struct rcu_state *rsp, struct rcu_node *rnp) { unsigned long flags; - int must_wait = 0; + unsigned long mask; + struct rcu_node *rnp_up; raw_spin_lock_irqsave(&rnp->lock, flags); smp_mb__after_unlock_lock(); + WARN_ON_ONCE(rnp->expmask); + WARN_ON_ONCE(rnp->exp_tasks); if (!rcu_preempt_has_tasks(rnp)) { + /* No blocked tasks, nothing to do. */ raw_spin_unlock_irqrestore(&rnp->lock, flags); - } else { + return; + } + /* Call for Phase 2 and propagate ->expmask bits up the tree. */ + rnp->expmask = 1; + rnp_up = rnp; + while (rnp_up->parent) { + mask = rnp_up->grpmask; + rnp_up = rnp_up->parent; + if (rnp_up->expmask & mask) + break; + raw_spin_lock(&rnp_up->lock); /* irqs already off */ + smp_mb__after_unlock_lock(); + rnp_up->expmask |= mask; + raw_spin_unlock(&rnp_up->lock); /* irqs still off */ + } + raw_spin_unlock_irqrestore(&rnp->lock, flags); +} + +/* + * Snapshot the tasks blocking the newly started preemptible-RCU expedited + * grace period for the specified rcu_node structure, phase 2. If the + * leaf rcu_node structure has its ->expmask field set, check for tasks. + * If there are some, clear ->expmask and set ->exp_tasks accordingly, + * then initiate RCU priority boosting. Otherwise, clear ->expmask and + * invoke rcu_report_exp_rnp() to clear out the upper-level ->expmask bits, + * enabling rcu_read_unlock_special() to do the bit-clearing. + * + * Caller must hold sync_rcu_preempt_exp_mutex. + */ +static void +sync_rcu_preempt_exp_init2(struct rcu_state *rsp, struct rcu_node *rnp) +{ + unsigned long flags; + + raw_spin_lock_irqsave(&rnp->lock, flags); + smp_mb__after_unlock_lock(); + if (!rnp->expmask) { + /* Phase 1 didn't do anything, so Phase 2 doesn't either. */ + raw_spin_unlock_irqrestore(&rnp->lock, flags); + return; + } + + /* Phase 1 is over. */ + rnp->expmask = 0; + + /* + * If there are still blocked tasks, set up ->exp_tasks so that + * rcu_read_unlock_special() will wake us and then boost them. + */ + if (rcu_preempt_has_tasks(rnp)) { rnp->exp_tasks = rnp->blkd_tasks.next; rcu_initiate_boost(rnp, flags); /* releases rnp->lock */ - must_wait = 1; + return; } - if (!must_wait) - rcu_report_exp_rnp(rsp, rnp, false); /* Don't wake self. */ + + /* No longer any blocked tasks, so undo bit setting. */ + raw_spin_unlock_irqrestore(&rnp->lock, flags); + rcu_report_exp_rnp(rsp, rnp, false); } /** @@ -706,7 +710,6 @@ sync_rcu_preempt_exp_init(struct rcu_state *rsp, struct rcu_node *rnp) */ void synchronize_rcu_expedited(void) { - unsigned long flags; struct rcu_node *rnp; struct rcu_state *rsp = &rcu_preempt_state; unsigned long snap; @@ -757,19 +760,16 @@ void synchronize_rcu_expedited(void) /* force all RCU readers onto ->blkd_tasks lists. */ synchronize_sched_expedited(); - /* Initialize ->expmask for all non-leaf rcu_node structures. */ - rcu_for_each_nonleaf_node_breadth_first(rsp, rnp) { - raw_spin_lock_irqsave(&rnp->lock, flags); - smp_mb__after_unlock_lock(); - rnp->expmask = rnp->qsmaskinit; - raw_spin_unlock_irqrestore(&rnp->lock, flags); - } - - /* Snapshot current state of ->blkd_tasks lists. */ + /* + * Snapshot current state of ->blkd_tasks lists into ->expmask. + * Phase 1 sets bits and phase 2 permits rcu_read_unlock_special() + * to start clearing them. Doing this in one phase leads to + * strange races between setting and clearing bits, so just say "no"! + */ + rcu_for_each_leaf_node(rsp, rnp) + sync_rcu_preempt_exp_init1(rsp, rnp); rcu_for_each_leaf_node(rsp, rnp) - sync_rcu_preempt_exp_init(rsp, rnp); - if (NUM_RCU_NODES > 1) - sync_rcu_preempt_exp_init(rsp, rcu_get_root(rsp)); + sync_rcu_preempt_exp_init2(rsp, rnp); put_online_cpus(); @@ -859,8 +859,6 @@ static int rcu_preempt_blocked_readers_cgp(struct rcu_node *rnp) return 0; } -#ifdef CONFIG_HOTPLUG_CPU - /* * Because there is no preemptible RCU, there can be no readers blocked. */ @@ -869,8 +867,6 @@ static bool rcu_preempt_has_tasks(struct rcu_node *rnp) return false; } -#endif /* #ifdef CONFIG_HOTPLUG_CPU */ - /* * Because preemptible RCU does not exist, we never have to check for * tasks blocked within RCU read-side critical sections. @@ -1170,7 +1166,7 @@ static void rcu_preempt_boost_start_gp(struct rcu_node *rnp) * Returns zero if all is well, a negated errno otherwise. */ static int rcu_spawn_one_boost_kthread(struct rcu_state *rsp, - struct rcu_node *rnp) + struct rcu_node *rnp) { int rnp_index = rnp - &rsp->node[0]; unsigned long flags; @@ -1180,7 +1176,7 @@ static int rcu_spawn_one_boost_kthread(struct rcu_state *rsp, if (&rcu_preempt_state != rsp) return 0; - if (!rcu_scheduler_fully_active || rnp->qsmaskinit == 0) + if (!rcu_scheduler_fully_active || rcu_rnp_online_cpus(rnp) == 0) return 0; rsp->boost = 1; @@ -1273,7 +1269,7 @@ static void rcu_cpu_kthread(unsigned int cpu) static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu) { struct task_struct *t = rnp->boost_kthread_task; - unsigned long mask = rnp->qsmaskinit; + unsigned long mask = rcu_rnp_online_cpus(rnp); cpumask_var_t cm; int cpu; @@ -1945,7 +1941,8 @@ static bool rcu_nocb_cpu_needs_barrier(struct rcu_state *rsp, int cpu) rhp = ACCESS_ONCE(rdp->nocb_follower_head); /* Having no rcuo kthread but CBs after scheduler starts is bad! */ - if (!ACCESS_ONCE(rdp->nocb_kthread) && rhp) { + if (!ACCESS_ONCE(rdp->nocb_kthread) && rhp && + rcu_scheduler_fully_active) { /* RCU callback enqueued before CPU first came online??? */ pr_err("RCU: Never-onlined no-CBs CPU %d has CB %p\n", cpu, rhp->func); @@ -2392,18 +2389,8 @@ void __init rcu_init_nohz(void) pr_info("\tPoll for callbacks from no-CBs CPUs.\n"); for_each_rcu_flavor(rsp) { - for_each_cpu(cpu, rcu_nocb_mask) { - struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu); - - /* - * If there are early callbacks, they will need - * to be moved to the nocb lists. - */ - WARN_ON_ONCE(rdp->nxttail[RCU_NEXT_TAIL] != - &rdp->nxtlist && - rdp->nxttail[RCU_NEXT_TAIL] != NULL); - init_nocb_callback_list(rdp); - } + for_each_cpu(cpu, rcu_nocb_mask) + init_nocb_callback_list(per_cpu_ptr(rsp->rda, cpu)); rcu_organize_nocb_kthreads(rsp); } } @@ -2540,6 +2527,16 @@ static bool init_nocb_callback_list(struct rcu_data *rdp) if (!rcu_is_nocb_cpu(rdp->cpu)) return false; + /* If there are early-boot callbacks, move them to nocb lists. */ + if (rdp->nxtlist) { + rdp->nocb_head = rdp->nxtlist; + rdp->nocb_tail = rdp->nxttail[RCU_NEXT_TAIL]; + atomic_long_set(&rdp->nocb_q_count, rdp->qlen); + atomic_long_set(&rdp->nocb_q_count_lazy, rdp->qlen_lazy); + rdp->nxtlist = NULL; + rdp->qlen = 0; + rdp->qlen_lazy = 0; + } rdp->nxttail[RCU_NEXT_TAIL] = NULL; return true; } @@ -2763,7 +2760,8 @@ static void rcu_sysidle_exit(int irq) /* * Check to see if the current CPU is idle. Note that usermode execution - * does not count as idle. The caller must have disabled interrupts. + * does not count as idle. The caller must have disabled interrupts, + * and must be running on tick_do_timer_cpu. */ static void rcu_sysidle_check_cpu(struct rcu_data *rdp, bool *isidle, unsigned long *maxj) @@ -2784,8 +2782,8 @@ static void rcu_sysidle_check_cpu(struct rcu_data *rdp, bool *isidle, if (!*isidle || rdp->rsp != rcu_state_p || cpu_is_offline(rdp->cpu) || rdp->cpu == tick_do_timer_cpu) return; - if (rcu_gp_in_progress(rdp->rsp)) - WARN_ON_ONCE(smp_processor_id() != tick_do_timer_cpu); + /* Verify affinity of current kthread. */ + WARN_ON_ONCE(smp_processor_id() != tick_do_timer_cpu); /* Pick up current idle and NMI-nesting counter and check. */ cur = atomic_read(&rdtp->dynticks_idle); @@ -3068,11 +3066,10 @@ static void rcu_bind_gp_kthread(void) return; #ifdef CONFIG_NO_HZ_FULL_SYSIDLE cpu = tick_do_timer_cpu; - if (cpu >= 0 && cpu < nr_cpu_ids && raw_smp_processor_id() != cpu) + if (cpu >= 0 && cpu < nr_cpu_ids) set_cpus_allowed_ptr(current, cpumask_of(cpu)); #else /* #ifdef CONFIG_NO_HZ_FULL_SYSIDLE */ - if (!is_housekeeping_cpu(raw_smp_processor_id())) - housekeeping_affine(current); + housekeeping_affine(current); #endif /* #else #ifdef CONFIG_NO_HZ_FULL_SYSIDLE */ } diff --git a/kernel/rcu/tree_trace.c b/kernel/rcu/tree_trace.c index fbb6240509ea..f92361efd0f5 100644 --- a/kernel/rcu/tree_trace.c +++ b/kernel/rcu/tree_trace.c @@ -283,8 +283,8 @@ static void print_one_rcu_state(struct seq_file *m, struct rcu_state *rsp) seq_puts(m, "\n"); level = rnp->level; } - seq_printf(m, "%lx/%lx %c%c>%c %d:%d ^%d ", - rnp->qsmask, rnp->qsmaskinit, + seq_printf(m, "%lx/%lx->%lx %c%c>%c %d:%d ^%d ", + rnp->qsmask, rnp->qsmaskinit, rnp->qsmaskinitnext, ".G"[rnp->gp_tasks != NULL], ".E"[rnp->exp_tasks != NULL], ".T"[!list_empty(&rnp->blkd_tasks)], diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c index e0d31a345ee6..1f133350da01 100644 --- a/kernel/rcu/update.c +++ b/kernel/rcu/update.c @@ -62,6 +62,63 @@ MODULE_ALIAS("rcupdate"); module_param(rcu_expedited, int, 0); +#ifndef CONFIG_TINY_RCU + +static atomic_t rcu_expedited_nesting = + ATOMIC_INIT(IS_ENABLED(CONFIG_RCU_EXPEDITE_BOOT) ? 1 : 0); + +/* + * Should normal grace-period primitives be expedited? Intended for + * use within RCU. Note that this function takes the rcu_expedited + * sysfs/boot variable into account as well as the rcu_expedite_gp() + * nesting. So looping on rcu_unexpedite_gp() until rcu_gp_is_expedited() + * returns false is a -really- bad idea. + */ +bool rcu_gp_is_expedited(void) +{ + return rcu_expedited || atomic_read(&rcu_expedited_nesting); +} +EXPORT_SYMBOL_GPL(rcu_gp_is_expedited); + +/** + * rcu_expedite_gp - Expedite future RCU grace periods + * + * After a call to this function, future calls to synchronize_rcu() and + * friends act as the corresponding synchronize_rcu_expedited() function + * had instead been called. + */ +void rcu_expedite_gp(void) +{ + atomic_inc(&rcu_expedited_nesting); +} +EXPORT_SYMBOL_GPL(rcu_expedite_gp); + +/** + * rcu_unexpedite_gp - Cancel prior rcu_expedite_gp() invocation + * + * Undo a prior call to rcu_expedite_gp(). If all prior calls to + * rcu_expedite_gp() are undone by a subsequent call to rcu_unexpedite_gp(), + * and if the rcu_expedited sysfs/boot parameter is not set, then all + * subsequent calls to synchronize_rcu() and friends will return to + * their normal non-expedited behavior. + */ +void rcu_unexpedite_gp(void) +{ + atomic_dec(&rcu_expedited_nesting); +} +EXPORT_SYMBOL_GPL(rcu_unexpedite_gp); + +#endif /* #ifndef CONFIG_TINY_RCU */ + +/* + * Inform RCU of the end of the in-kernel boot sequence. + */ +void rcu_end_inkernel_boot(void) +{ + if (IS_ENABLED(CONFIG_RCU_EXPEDITE_BOOT)) + rcu_unexpedite_gp(); +} + #ifdef CONFIG_PREEMPT_RCU /* @@ -199,16 +256,13 @@ EXPORT_SYMBOL_GPL(rcu_read_lock_bh_held); #endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ -struct rcu_synchronize { - struct rcu_head head; - struct completion completion; -}; - -/* - * Awaken the corresponding synchronize_rcu() instance now that a - * grace period has elapsed. +/** + * wakeme_after_rcu() - Callback function to awaken a task after grace period + * @head: Pointer to rcu_head member within rcu_synchronize structure + * + * Awaken the corresponding task now that a grace period has elapsed. */ -static void wakeme_after_rcu(struct rcu_head *head) +void wakeme_after_rcu(struct rcu_head *head) { struct rcu_synchronize *rcu; diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 2f7937ee9e3a..f9123a82cbb6 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -2853,7 +2853,7 @@ asmlinkage __visible void __sched schedule_user(void) * we find a better solution. * * NB: There are buggy callers of this function. Ideally we - * should warn if prev_state != IN_USER, but that will trigger + * should warn if prev_state != CONTEXT_USER, but that will trigger * too frequently to make sense yet. */ enum ctx_state prev_state = exception_enter(); diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c index 4d207d2abcbd..deef1caa94c6 100644 --- a/kernel/sched/idle.c +++ b/kernel/sched/idle.c @@ -209,6 +209,8 @@ use_default: goto exit_idle; } +DEFINE_PER_CPU(bool, cpu_dead_idle); + /* * Generic idle loop implementation * @@ -233,8 +235,13 @@ static void cpu_idle_loop(void) check_pgt_cache(); rmb(); - if (cpu_is_offline(smp_processor_id())) + if (cpu_is_offline(smp_processor_id())) { + rcu_cpu_notify(NULL, CPU_DYING_IDLE, + (void *)(long)smp_processor_id()); + smp_mb(); /* all activity before dead. */ + this_cpu_write(cpu_dead_idle, true); arch_cpu_idle_dead(); + } local_irq_disable(); arch_cpu_idle_enter(); diff --git a/kernel/smpboot.c b/kernel/smpboot.c index 40190f28db35..c697f73d82d6 100644 --- a/kernel/smpboot.c +++ b/kernel/smpboot.c @@ -4,6 +4,7 @@ #include <linux/cpu.h> #include <linux/err.h> #include <linux/smp.h> +#include <linux/delay.h> #include <linux/init.h> #include <linux/list.h> #include <linux/slab.h> @@ -314,3 +315,158 @@ void smpboot_unregister_percpu_thread(struct smp_hotplug_thread *plug_thread) put_online_cpus(); } EXPORT_SYMBOL_GPL(smpboot_unregister_percpu_thread); + +static DEFINE_PER_CPU(atomic_t, cpu_hotplug_state) = ATOMIC_INIT(CPU_POST_DEAD); + +/* + * Called to poll specified CPU's state, for example, when waiting for + * a CPU to come online. + */ +int cpu_report_state(int cpu) +{ + return atomic_read(&per_cpu(cpu_hotplug_state, cpu)); +} + +/* + * If CPU has died properly, set its state to CPU_UP_PREPARE and + * return success. Otherwise, return -EBUSY if the CPU died after + * cpu_wait_death() timed out. And yet otherwise again, return -EAGAIN + * if cpu_wait_death() timed out and the CPU still hasn't gotten around + * to dying. In the latter two cases, the CPU might not be set up + * properly, but it is up to the arch-specific code to decide. + * Finally, -EIO indicates an unanticipated problem. + * + * Note that it is permissible to omit this call entirely, as is + * done in architectures that do no CPU-hotplug error checking. + */ +int cpu_check_up_prepare(int cpu) +{ + if (!IS_ENABLED(CONFIG_HOTPLUG_CPU)) { + atomic_set(&per_cpu(cpu_hotplug_state, cpu), CPU_UP_PREPARE); + return 0; + } + + switch (atomic_read(&per_cpu(cpu_hotplug_state, cpu))) { + + case CPU_POST_DEAD: + + /* The CPU died properly, so just start it up again. */ + atomic_set(&per_cpu(cpu_hotplug_state, cpu), CPU_UP_PREPARE); + return 0; + + case CPU_DEAD_FROZEN: + + /* + * Timeout during CPU death, so let caller know. + * The outgoing CPU completed its processing, but after + * cpu_wait_death() timed out and reported the error. The + * caller is free to proceed, in which case the state + * will be reset properly by cpu_set_state_online(). + * Proceeding despite this -EBUSY return makes sense + * for systems where the outgoing CPUs take themselves + * offline, with no post-death manipulation required from + * a surviving CPU. + */ + return -EBUSY; + + case CPU_BROKEN: + + /* + * The most likely reason we got here is that there was + * a timeout during CPU death, and the outgoing CPU never + * did complete its processing. This could happen on + * a virtualized system if the outgoing VCPU gets preempted + * for more than five seconds, and the user attempts to + * immediately online that same CPU. Trying again later + * might return -EBUSY above, hence -EAGAIN. + */ + return -EAGAIN; + + default: + + /* Should not happen. Famous last words. */ + return -EIO; + } +} + +/* + * Mark the specified CPU online. + * + * Note that it is permissible to omit this call entirely, as is + * done in architectures that do no CPU-hotplug error checking. + */ +void cpu_set_state_online(int cpu) +{ + (void)atomic_xchg(&per_cpu(cpu_hotplug_state, cpu), CPU_ONLINE); +} + +#ifdef CONFIG_HOTPLUG_CPU + +/* + * Wait for the specified CPU to exit the idle loop and die. + */ +bool cpu_wait_death(unsigned int cpu, int seconds) +{ + int jf_left = seconds * HZ; + int oldstate; + bool ret = true; + int sleep_jf = 1; + + might_sleep(); + + /* The outgoing CPU will normally get done quite quickly. */ + if (atomic_read(&per_cpu(cpu_hotplug_state, cpu)) == CPU_DEAD) + goto update_state; + udelay(5); + + /* But if the outgoing CPU dawdles, wait increasingly long times. */ + while (atomic_read(&per_cpu(cpu_hotplug_state, cpu)) != CPU_DEAD) { + schedule_timeout_uninterruptible(sleep_jf); + jf_left -= sleep_jf; + if (jf_left <= 0) + break; + sleep_jf = DIV_ROUND_UP(sleep_jf * 11, 10); + } +update_state: + oldstate = atomic_read(&per_cpu(cpu_hotplug_state, cpu)); + if (oldstate == CPU_DEAD) { + /* Outgoing CPU died normally, update state. */ + smp_mb(); /* atomic_read() before update. */ + atomic_set(&per_cpu(cpu_hotplug_state, cpu), CPU_POST_DEAD); + } else { + /* Outgoing CPU still hasn't died, set state accordingly. */ + if (atomic_cmpxchg(&per_cpu(cpu_hotplug_state, cpu), + oldstate, CPU_BROKEN) != oldstate) + goto update_state; + ret = false; + } + return ret; +} + +/* + * Called by the outgoing CPU to report its successful death. Return + * false if this report follows the surviving CPU's timing out. + * + * A separate "CPU_DEAD_FROZEN" is used when the surviving CPU + * timed out. This approach allows architectures to omit calls to + * cpu_check_up_prepare() and cpu_set_state_online() without defeating + * the next cpu_wait_death()'s polling loop. + */ +bool cpu_report_death(void) +{ + int oldstate; + int newstate; + int cpu = smp_processor_id(); + + do { + oldstate = atomic_read(&per_cpu(cpu_hotplug_state, cpu)); + if (oldstate != CPU_BROKEN) + newstate = CPU_DEAD; + else + newstate = CPU_DEAD_FROZEN; + } while (atomic_cmpxchg(&per_cpu(cpu_hotplug_state, cpu), + oldstate, newstate) != oldstate); + return newstate == CPU_DEAD; +} + +#endif /* #ifdef CONFIG_HOTPLUG_CPU */ diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 245e7dcc3741..8c0eabd41886 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -19,6 +19,7 @@ */ #include <linux/module.h> +#include <linux/aio.h> #include <linux/mm.h> #include <linux/swap.h> #include <linux/slab.h> diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index a5da09c899dd..3b9a48ae153a 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -432,6 +432,14 @@ config UPROBE_EVENT This option is required if you plan to use perf-probe subcommand of perf tools on user space applications. +config BPF_EVENTS + depends on BPF_SYSCALL + depends on KPROBE_EVENT + bool + default y + help + This allows the user to attach BPF programs to kprobe events. + config PROBE_EVENTS def_bool n @@ -599,6 +607,34 @@ config RING_BUFFER_STARTUP_TEST If unsure, say N +config TRACE_ENUM_MAP_FILE + bool "Show enum mappings for trace events" + depends on TRACING + help + The "print fmt" of the trace events will show the enum names instead + of their values. This can cause problems for user space tools that + use this string to parse the raw data as user space does not know + how to convert the string to its value. + + To fix this, there's a special macro in the kernel that can be used + to convert the enum into its value. If this macro is used, then the + print fmt strings will have the enums converted to their values. + + If something does not get converted properly, this option can be + used to show what enums the kernel tried to convert. + + This option is for debugging the enum conversions. A file is created + in the tracing directory called "enum_map" that will show the enum + names matched with their values and what trace event system they + belong too. + + Normally, the mapping of the strings to values will be freed after + boot up or module load. With this option, they will not be freed, as + they are needed for the "enum_map" file. Enabling this option will + increase the memory footprint of the running kernel. + + If unsure, say N + endif # FTRACE endif # TRACING_SUPPORT diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile index 98f26588255e..9b1044e936a6 100644 --- a/kernel/trace/Makefile +++ b/kernel/trace/Makefile @@ -53,6 +53,7 @@ obj-$(CONFIG_EVENT_TRACING) += trace_event_perf.o endif obj-$(CONFIG_EVENT_TRACING) += trace_events_filter.o obj-$(CONFIG_EVENT_TRACING) += trace_events_trigger.o +obj-$(CONFIG_BPF_EVENTS) += bpf_trace.o obj-$(CONFIG_KPROBE_EVENT) += trace_kprobe.o obj-$(CONFIG_TRACEPOINTS) += power-traces.o ifeq ($(CONFIG_PM),y) diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c new file mode 100644 index 000000000000..2d56ce501632 --- /dev/null +++ b/kernel/trace/bpf_trace.c @@ -0,0 +1,222 @@ +/* Copyright (c) 2011-2015 PLUMgrid, http://plumgrid.com + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + */ +#include <linux/kernel.h> +#include <linux/types.h> +#include <linux/slab.h> +#include <linux/bpf.h> +#include <linux/filter.h> +#include <linux/uaccess.h> +#include <linux/ctype.h> +#include "trace.h" + +static DEFINE_PER_CPU(int, bpf_prog_active); + +/** + * trace_call_bpf - invoke BPF program + * @prog: BPF program + * @ctx: opaque context pointer + * + * kprobe handlers execute BPF programs via this helper. + * Can be used from static tracepoints in the future. + * + * Return: BPF programs always return an integer which is interpreted by + * kprobe handler as: + * 0 - return from kprobe (event is filtered out) + * 1 - store kprobe event into ring buffer + * Other values are reserved and currently alias to 1 + */ +unsigned int trace_call_bpf(struct bpf_prog *prog, void *ctx) +{ + unsigned int ret; + + if (in_nmi()) /* not supported yet */ + return 1; + + preempt_disable(); + + if (unlikely(__this_cpu_inc_return(bpf_prog_active) != 1)) { + /* + * since some bpf program is already running on this cpu, + * don't call into another bpf program (same or different) + * and don't send kprobe event into ring-buffer, + * so return zero here + */ + ret = 0; + goto out; + } + + rcu_read_lock(); + ret = BPF_PROG_RUN(prog, ctx); + rcu_read_unlock(); + + out: + __this_cpu_dec(bpf_prog_active); + preempt_enable(); + + return ret; +} +EXPORT_SYMBOL_GPL(trace_call_bpf); + +static u64 bpf_probe_read(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) +{ + void *dst = (void *) (long) r1; + int size = (int) r2; + void *unsafe_ptr = (void *) (long) r3; + + return probe_kernel_read(dst, unsafe_ptr, size); +} + +static const struct bpf_func_proto bpf_probe_read_proto = { + .func = bpf_probe_read, + .gpl_only = true, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_STACK, + .arg2_type = ARG_CONST_STACK_SIZE, + .arg3_type = ARG_ANYTHING, +}; + +static u64 bpf_ktime_get_ns(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) +{ + /* NMI safe access to clock monotonic */ + return ktime_get_mono_fast_ns(); +} + +static const struct bpf_func_proto bpf_ktime_get_ns_proto = { + .func = bpf_ktime_get_ns, + .gpl_only = true, + .ret_type = RET_INTEGER, +}; + +/* + * limited trace_printk() + * only %d %u %x %ld %lu %lx %lld %llu %llx %p conversion specifiers allowed + */ +static u64 bpf_trace_printk(u64 r1, u64 fmt_size, u64 r3, u64 r4, u64 r5) +{ + char *fmt = (char *) (long) r1; + int mod[3] = {}; + int fmt_cnt = 0; + int i; + + /* + * bpf_check()->check_func_arg()->check_stack_boundary() + * guarantees that fmt points to bpf program stack, + * fmt_size bytes of it were initialized and fmt_size > 0 + */ + if (fmt[--fmt_size] != 0) + return -EINVAL; + + /* check format string for allowed specifiers */ + for (i = 0; i < fmt_size; i++) { + if ((!isprint(fmt[i]) && !isspace(fmt[i])) || !isascii(fmt[i])) + return -EINVAL; + + if (fmt[i] != '%') + continue; + + if (fmt_cnt >= 3) + return -EINVAL; + + /* fmt[i] != 0 && fmt[last] == 0, so we can access fmt[i + 1] */ + i++; + if (fmt[i] == 'l') { + mod[fmt_cnt]++; + i++; + } else if (fmt[i] == 'p') { + mod[fmt_cnt]++; + i++; + if (!isspace(fmt[i]) && !ispunct(fmt[i]) && fmt[i] != 0) + return -EINVAL; + fmt_cnt++; + continue; + } + + if (fmt[i] == 'l') { + mod[fmt_cnt]++; + i++; + } + + if (fmt[i] != 'd' && fmt[i] != 'u' && fmt[i] != 'x') + return -EINVAL; + fmt_cnt++; + } + + return __trace_printk(1/* fake ip will not be printed */, fmt, + mod[0] == 2 ? r3 : mod[0] == 1 ? (long) r3 : (u32) r3, + mod[1] == 2 ? r4 : mod[1] == 1 ? (long) r4 : (u32) r4, + mod[2] == 2 ? r5 : mod[2] == 1 ? (long) r5 : (u32) r5); +} + +static const struct bpf_func_proto bpf_trace_printk_proto = { + .func = bpf_trace_printk, + .gpl_only = true, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_STACK, + .arg2_type = ARG_CONST_STACK_SIZE, +}; + +static const struct bpf_func_proto *kprobe_prog_func_proto(enum bpf_func_id func_id) +{ + switch (func_id) { + case BPF_FUNC_map_lookup_elem: + return &bpf_map_lookup_elem_proto; + case BPF_FUNC_map_update_elem: + return &bpf_map_update_elem_proto; + case BPF_FUNC_map_delete_elem: + return &bpf_map_delete_elem_proto; + case BPF_FUNC_probe_read: + return &bpf_probe_read_proto; + case BPF_FUNC_ktime_get_ns: + return &bpf_ktime_get_ns_proto; + + case BPF_FUNC_trace_printk: + /* + * this program might be calling bpf_trace_printk, + * so allocate per-cpu printk buffers + */ + trace_printk_init_buffers(); + + return &bpf_trace_printk_proto; + default: + return NULL; + } +} + +/* bpf+kprobe programs can access fields of 'struct pt_regs' */ +static bool kprobe_prog_is_valid_access(int off, int size, enum bpf_access_type type) +{ + /* check bounds */ + if (off < 0 || off >= sizeof(struct pt_regs)) + return false; + + /* only read is allowed */ + if (type != BPF_READ) + return false; + + /* disallow misaligned access */ + if (off % size != 0) + return false; + + return true; +} + +static struct bpf_verifier_ops kprobe_prog_ops = { + .get_func_proto = kprobe_prog_func_proto, + .is_valid_access = kprobe_prog_is_valid_access, +}; + +static struct bpf_prog_type_list kprobe_tl = { + .ops = &kprobe_prog_ops, + .type = BPF_PROG_TYPE_KPROBE, +}; + +static int __init register_kprobe_prog_ops(void) +{ + bpf_register_prog_type(&kprobe_tl); + return 0; +} +late_initcall(register_kprobe_prog_ops); diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 4f228024055b..02bece4a99ea 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -18,7 +18,7 @@ #include <linux/kallsyms.h> #include <linux/seq_file.h> #include <linux/suspend.h> -#include <linux/debugfs.h> +#include <linux/tracefs.h> #include <linux/hardirq.h> #include <linux/kthread.h> #include <linux/uaccess.h> @@ -249,6 +249,19 @@ static void update_function_graph_func(void); static inline void update_function_graph_func(void) { } #endif + +static ftrace_func_t ftrace_ops_get_list_func(struct ftrace_ops *ops) +{ + /* + * If this is a dynamic ops or we force list func, + * then it needs to call the list anyway. + */ + if (ops->flags & FTRACE_OPS_FL_DYNAMIC || FTRACE_FORCE_LIST_FUNC) + return ftrace_ops_list_func; + + return ftrace_ops_get_func(ops); +} + static void update_ftrace_function(void) { ftrace_func_t func; @@ -270,7 +283,7 @@ static void update_ftrace_function(void) * then have the mcount trampoline call the function directly. */ } else if (ftrace_ops_list->next == &ftrace_list_end) { - func = ftrace_ops_get_func(ftrace_ops_list); + func = ftrace_ops_get_list_func(ftrace_ops_list); } else { /* Just use the default ftrace_ops */ @@ -1008,7 +1021,7 @@ static struct tracer_stat function_stats __initdata = { .stat_show = function_stat_show }; -static __init void ftrace_profile_debugfs(struct dentry *d_tracer) +static __init void ftrace_profile_tracefs(struct dentry *d_tracer) { struct ftrace_profile_stat *stat; struct dentry *entry; @@ -1044,15 +1057,15 @@ static __init void ftrace_profile_debugfs(struct dentry *d_tracer) } } - entry = debugfs_create_file("function_profile_enabled", 0644, + entry = tracefs_create_file("function_profile_enabled", 0644, d_tracer, NULL, &ftrace_profile_fops); if (!entry) - pr_warning("Could not create debugfs " + pr_warning("Could not create tracefs " "'function_profile_enabled' entry\n"); } #else /* CONFIG_FUNCTION_PROFILER */ -static __init void ftrace_profile_debugfs(struct dentry *d_tracer) +static __init void ftrace_profile_tracefs(struct dentry *d_tracer) { } #endif /* CONFIG_FUNCTION_PROFILER */ @@ -4712,7 +4725,7 @@ void ftrace_destroy_filter_files(struct ftrace_ops *ops) mutex_unlock(&ftrace_lock); } -static __init int ftrace_init_dyn_debugfs(struct dentry *d_tracer) +static __init int ftrace_init_dyn_tracefs(struct dentry *d_tracer) { trace_create_file("available_filter_functions", 0444, @@ -5020,7 +5033,7 @@ static int __init ftrace_nodyn_init(void) } core_initcall(ftrace_nodyn_init); -static inline int ftrace_init_dyn_debugfs(struct dentry *d_tracer) { return 0; } +static inline int ftrace_init_dyn_tracefs(struct dentry *d_tracer) { return 0; } static inline void ftrace_startup_enable(int command) { } static inline void ftrace_startup_all(int command) { } /* Keep as macros so we do not need to define the commands */ @@ -5209,13 +5222,6 @@ static void ftrace_ops_recurs_func(unsigned long ip, unsigned long parent_ip, ftrace_func_t ftrace_ops_get_func(struct ftrace_ops *ops) { /* - * If this is a dynamic ops or we force list func, - * then it needs to call the list anyway. - */ - if (ops->flags & FTRACE_OPS_FL_DYNAMIC || FTRACE_FORCE_LIST_FUNC) - return ftrace_ops_list_func; - - /* * If the func handles its own recursion, call it directly. * Otherwise call the recursion protected function that * will call the ftrace ops function. @@ -5473,7 +5479,7 @@ static const struct file_operations ftrace_pid_fops = { .release = ftrace_pid_release, }; -static __init int ftrace_init_debugfs(void) +static __init int ftrace_init_tracefs(void) { struct dentry *d_tracer; @@ -5481,16 +5487,16 @@ static __init int ftrace_init_debugfs(void) if (IS_ERR(d_tracer)) return 0; - ftrace_init_dyn_debugfs(d_tracer); + ftrace_init_dyn_tracefs(d_tracer); trace_create_file("set_ftrace_pid", 0644, d_tracer, NULL, &ftrace_pid_fops); - ftrace_profile_debugfs(d_tracer); + ftrace_profile_tracefs(d_tracer); return 0; } -fs_initcall(ftrace_init_debugfs); +fs_initcall(ftrace_init_tracefs); /** * ftrace_kill - kill ftrace diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 5040d44fe5a3..0315d43176d8 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -2679,7 +2679,7 @@ static DEFINE_PER_CPU(unsigned int, current_context); static __always_inline int trace_recursive_lock(void) { - unsigned int val = this_cpu_read(current_context); + unsigned int val = __this_cpu_read(current_context); int bit; if (in_interrupt()) { @@ -2696,18 +2696,14 @@ static __always_inline int trace_recursive_lock(void) return 1; val |= (1 << bit); - this_cpu_write(current_context, val); + __this_cpu_write(current_context, val); return 0; } static __always_inline void trace_recursive_unlock(void) { - unsigned int val = this_cpu_read(current_context); - - val--; - val &= this_cpu_read(current_context); - this_cpu_write(current_context, val); + __this_cpu_and(current_context, __this_cpu_read(current_context) - 1); } #else diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 62c6506d663f..91eecaaa43e0 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -20,6 +20,7 @@ #include <linux/notifier.h> #include <linux/irqflags.h> #include <linux/debugfs.h> +#include <linux/tracefs.h> #include <linux/pagemap.h> #include <linux/hardirq.h> #include <linux/linkage.h> @@ -31,6 +32,7 @@ #include <linux/splice.h> #include <linux/kdebug.h> #include <linux/string.h> +#include <linux/mount.h> #include <linux/rwsem.h> #include <linux/slab.h> #include <linux/ctype.h> @@ -123,6 +125,42 @@ enum ftrace_dump_mode ftrace_dump_on_oops; /* When set, tracing will stop when a WARN*() is hit */ int __disable_trace_on_warning; +#ifdef CONFIG_TRACE_ENUM_MAP_FILE +/* Map of enums to their values, for "enum_map" file */ +struct trace_enum_map_head { + struct module *mod; + unsigned long length; +}; + +union trace_enum_map_item; + +struct trace_enum_map_tail { + /* + * "end" is first and points to NULL as it must be different + * than "mod" or "enum_string" + */ + union trace_enum_map_item *next; + const char *end; /* points to NULL */ +}; + +static DEFINE_MUTEX(trace_enum_mutex); + +/* + * The trace_enum_maps are saved in an array with two extra elements, + * one at the beginning, and one at the end. The beginning item contains + * the count of the saved maps (head.length), and the module they + * belong to if not built in (head.mod). The ending item contains a + * pointer to the next array of saved enum_map items. + */ +union trace_enum_map_item { + struct trace_enum_map map; + struct trace_enum_map_head head; + struct trace_enum_map_tail tail; +}; + +static union trace_enum_map_item *trace_enum_maps; +#endif /* CONFIG_TRACE_ENUM_MAP_FILE */ + static int tracing_set_tracer(struct trace_array *tr, const char *buf); #define MAX_TRACER_SIZE 100 @@ -3908,6 +3946,182 @@ static const struct file_operations tracing_saved_cmdlines_size_fops = { .write = tracing_saved_cmdlines_size_write, }; +#ifdef CONFIG_TRACE_ENUM_MAP_FILE +static union trace_enum_map_item * +update_enum_map(union trace_enum_map_item *ptr) +{ + if (!ptr->map.enum_string) { + if (ptr->tail.next) { + ptr = ptr->tail.next; + /* Set ptr to the next real item (skip head) */ + ptr++; + } else + return NULL; + } + return ptr; +} + +static void *enum_map_next(struct seq_file *m, void *v, loff_t *pos) +{ + union trace_enum_map_item *ptr = v; + + /* + * Paranoid! If ptr points to end, we don't want to increment past it. + * This really should never happen. + */ + ptr = update_enum_map(ptr); + if (WARN_ON_ONCE(!ptr)) + return NULL; + + ptr++; + + (*pos)++; + + ptr = update_enum_map(ptr); + + return ptr; +} + +static void *enum_map_start(struct seq_file *m, loff_t *pos) +{ + union trace_enum_map_item *v; + loff_t l = 0; + + mutex_lock(&trace_enum_mutex); + + v = trace_enum_maps; + if (v) + v++; + + while (v && l < *pos) { + v = enum_map_next(m, v, &l); + } + + return v; +} + +static void enum_map_stop(struct seq_file *m, void *v) +{ + mutex_unlock(&trace_enum_mutex); +} + +static int enum_map_show(struct seq_file *m, void *v) +{ + union trace_enum_map_item *ptr = v; + + seq_printf(m, "%s %ld (%s)\n", + ptr->map.enum_string, ptr->map.enum_value, + ptr->map.system); + + return 0; +} + +static const struct seq_operations tracing_enum_map_seq_ops = { + .start = enum_map_start, + .next = enum_map_next, + .stop = enum_map_stop, + .show = enum_map_show, +}; + +static int tracing_enum_map_open(struct inode *inode, struct file *filp) +{ + if (tracing_disabled) + return -ENODEV; + + return seq_open(filp, &tracing_enum_map_seq_ops); +} + +static const struct file_operations tracing_enum_map_fops = { + .open = tracing_enum_map_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, +}; + +static inline union trace_enum_map_item * +trace_enum_jmp_to_tail(union trace_enum_map_item *ptr) +{ + /* Return tail of array given the head */ + return ptr + ptr->head.length + 1; +} + +static void +trace_insert_enum_map_file(struct module *mod, struct trace_enum_map **start, + int len) +{ + struct trace_enum_map **stop; + struct trace_enum_map **map; + union trace_enum_map_item *map_array; + union trace_enum_map_item *ptr; + + stop = start + len; + + /* + * The trace_enum_maps contains the map plus a head and tail item, + * where the head holds the module and length of array, and the + * tail holds a pointer to the next list. + */ + map_array = kmalloc(sizeof(*map_array) * (len + 2), GFP_KERNEL); + if (!map_array) { + pr_warning("Unable to allocate trace enum mapping\n"); + return; + } + + mutex_lock(&trace_enum_mutex); + + if (!trace_enum_maps) + trace_enum_maps = map_array; + else { + ptr = trace_enum_maps; + for (;;) { + ptr = trace_enum_jmp_to_tail(ptr); + if (!ptr->tail.next) + break; + ptr = ptr->tail.next; + + } + ptr->tail.next = map_array; + } + map_array->head.mod = mod; + map_array->head.length = len; + map_array++; + + for (map = start; (unsigned long)map < (unsigned long)stop; map++) { + map_array->map = **map; + map_array++; + } + memset(map_array, 0, sizeof(*map_array)); + + mutex_unlock(&trace_enum_mutex); +} + +static void trace_create_enum_file(struct dentry *d_tracer) +{ + trace_create_file("enum_map", 0444, d_tracer, + NULL, &tracing_enum_map_fops); +} + +#else /* CONFIG_TRACE_ENUM_MAP_FILE */ +static inline void trace_create_enum_file(struct dentry *d_tracer) { } +static inline void trace_insert_enum_map_file(struct module *mod, + struct trace_enum_map **start, int len) { } +#endif /* !CONFIG_TRACE_ENUM_MAP_FILE */ + +static void trace_insert_enum_map(struct module *mod, + struct trace_enum_map **start, int len) +{ + struct trace_enum_map **map; + + if (len <= 0) + return; + + map = start; + + trace_event_enum_update(map, len); + + trace_insert_enum_map_file(mod, start, len); +} + static ssize_t tracing_set_trace_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) @@ -4105,9 +4319,24 @@ static void tracing_set_nop(struct trace_array *tr) tr->current_trace = &nop_trace; } -static int tracing_set_tracer(struct trace_array *tr, const char *buf) +static void update_tracer_options(struct trace_array *tr, struct tracer *t) { static struct trace_option_dentry *topts; + + /* Only enable if the directory has been created already. */ + if (!tr->dir) + return; + + /* Currently, only the top instance has options */ + if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL)) + return; + + destroy_trace_option_files(topts); + topts = create_trace_option_files(tr, t); +} + +static int tracing_set_tracer(struct trace_array *tr, const char *buf) +{ struct tracer *t; #ifdef CONFIG_TRACER_MAX_TRACE bool had_max_tr; @@ -4172,11 +4401,7 @@ static int tracing_set_tracer(struct trace_array *tr, const char *buf) free_snapshot(tr); } #endif - /* Currently, only the top instance has options */ - if (tr->flags & TRACE_ARRAY_FL_GLOBAL) { - destroy_trace_option_files(topts); - topts = create_trace_option_files(tr, t); - } + update_tracer_options(tr, t); #ifdef CONFIG_TRACER_MAX_TRACE if (t->use_max_tr && !had_max_tr) { @@ -5817,6 +6042,14 @@ static inline __init int register_snapshot_cmd(void) { return 0; } static struct dentry *tracing_get_dentry(struct trace_array *tr) { + if (WARN_ON(!tr->dir)) + return ERR_PTR(-ENODEV); + + /* Top directory uses NULL as the parent */ + if (tr->flags & TRACE_ARRAY_FL_GLOBAL) + return NULL; + + /* All sub buffers have a descriptor */ return tr->dir; } @@ -5831,10 +6064,10 @@ static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu) if (IS_ERR(d_tracer)) return NULL; - tr->percpu_dir = debugfs_create_dir("per_cpu", d_tracer); + tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer); WARN_ONCE(!tr->percpu_dir, - "Could not create debugfs directory 'per_cpu/%d'\n", cpu); + "Could not create tracefs directory 'per_cpu/%d'\n", cpu); return tr->percpu_dir; } @@ -5851,7 +6084,7 @@ trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent, } static void -tracing_init_debugfs_percpu(struct trace_array *tr, long cpu) +tracing_init_tracefs_percpu(struct trace_array *tr, long cpu) { struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu); struct dentry *d_cpu; @@ -5861,9 +6094,9 @@ tracing_init_debugfs_percpu(struct trace_array *tr, long cpu) return; snprintf(cpu_dir, 30, "cpu%ld", cpu); - d_cpu = debugfs_create_dir(cpu_dir, d_percpu); + d_cpu = tracefs_create_dir(cpu_dir, d_percpu); if (!d_cpu) { - pr_warning("Could not create debugfs '%s' entry\n", cpu_dir); + pr_warning("Could not create tracefs '%s' entry\n", cpu_dir); return; } @@ -6015,9 +6248,9 @@ struct dentry *trace_create_file(const char *name, { struct dentry *ret; - ret = debugfs_create_file(name, mode, parent, data, fops); + ret = tracefs_create_file(name, mode, parent, data, fops); if (!ret) - pr_warning("Could not create debugfs '%s' entry\n", name); + pr_warning("Could not create tracefs '%s' entry\n", name); return ret; } @@ -6034,9 +6267,9 @@ static struct dentry *trace_options_init_dentry(struct trace_array *tr) if (IS_ERR(d_tracer)) return NULL; - tr->options = debugfs_create_dir("options", d_tracer); + tr->options = tracefs_create_dir("options", d_tracer); if (!tr->options) { - pr_warning("Could not create debugfs directory 'options'\n"); + pr_warning("Could not create tracefs directory 'options'\n"); return NULL; } @@ -6105,7 +6338,7 @@ destroy_trace_option_files(struct trace_option_dentry *topts) return; for (cnt = 0; topts[cnt].opt; cnt++) - debugfs_remove(topts[cnt].entry); + tracefs_remove(topts[cnt].entry); kfree(topts); } @@ -6194,7 +6427,7 @@ static const struct file_operations rb_simple_fops = { struct dentry *trace_instance_dir; static void -init_tracer_debugfs(struct trace_array *tr, struct dentry *d_tracer); +init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer); static int allocate_trace_buffer(struct trace_array *tr, struct trace_buffer *buf, int size) @@ -6271,7 +6504,7 @@ static void free_trace_buffers(struct trace_array *tr) #endif } -static int new_instance_create(const char *name) +static int instance_mkdir(const char *name) { struct trace_array *tr; int ret; @@ -6310,17 +6543,17 @@ static int new_instance_create(const char *name) if (allocate_trace_buffers(tr, trace_buf_size) < 0) goto out_free_tr; - tr->dir = debugfs_create_dir(name, trace_instance_dir); + tr->dir = tracefs_create_dir(name, trace_instance_dir); if (!tr->dir) goto out_free_tr; ret = event_trace_add_tracer(tr->dir, tr); if (ret) { - debugfs_remove_recursive(tr->dir); + tracefs_remove_recursive(tr->dir); goto out_free_tr; } - init_tracer_debugfs(tr, tr->dir); + init_tracer_tracefs(tr, tr->dir); list_add(&tr->list, &ftrace_trace_arrays); @@ -6341,7 +6574,7 @@ static int new_instance_create(const char *name) } -static int instance_delete(const char *name) +static int instance_rmdir(const char *name) { struct trace_array *tr; int found = 0; @@ -6382,82 +6615,17 @@ static int instance_delete(const char *name) return ret; } -static int instance_mkdir (struct inode *inode, struct dentry *dentry, umode_t mode) -{ - struct dentry *parent; - int ret; - - /* Paranoid: Make sure the parent is the "instances" directory */ - parent = hlist_entry(inode->i_dentry.first, struct dentry, d_u.d_alias); - if (WARN_ON_ONCE(parent != trace_instance_dir)) - return -ENOENT; - - /* - * The inode mutex is locked, but debugfs_create_dir() will also - * take the mutex. As the instances directory can not be destroyed - * or changed in any other way, it is safe to unlock it, and - * let the dentry try. If two users try to make the same dir at - * the same time, then the new_instance_create() will determine the - * winner. - */ - mutex_unlock(&inode->i_mutex); - - ret = new_instance_create(dentry->d_iname); - - mutex_lock(&inode->i_mutex); - - return ret; -} - -static int instance_rmdir(struct inode *inode, struct dentry *dentry) -{ - struct dentry *parent; - int ret; - - /* Paranoid: Make sure the parent is the "instances" directory */ - parent = hlist_entry(inode->i_dentry.first, struct dentry, d_u.d_alias); - if (WARN_ON_ONCE(parent != trace_instance_dir)) - return -ENOENT; - - /* The caller did a dget() on dentry */ - mutex_unlock(&dentry->d_inode->i_mutex); - - /* - * The inode mutex is locked, but debugfs_create_dir() will also - * take the mutex. As the instances directory can not be destroyed - * or changed in any other way, it is safe to unlock it, and - * let the dentry try. If two users try to make the same dir at - * the same time, then the instance_delete() will determine the - * winner. - */ - mutex_unlock(&inode->i_mutex); - - ret = instance_delete(dentry->d_iname); - - mutex_lock_nested(&inode->i_mutex, I_MUTEX_PARENT); - mutex_lock(&dentry->d_inode->i_mutex); - - return ret; -} - -static const struct inode_operations instance_dir_inode_operations = { - .lookup = simple_lookup, - .mkdir = instance_mkdir, - .rmdir = instance_rmdir, -}; - static __init void create_trace_instances(struct dentry *d_tracer) { - trace_instance_dir = debugfs_create_dir("instances", d_tracer); + trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer, + instance_mkdir, + instance_rmdir); if (WARN_ON(!trace_instance_dir)) return; - - /* Hijack the dir inode operations, to allow mkdir */ - trace_instance_dir->d_inode->i_op = &instance_dir_inode_operations; } static void -init_tracer_debugfs(struct trace_array *tr, struct dentry *d_tracer) +init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer) { int cpu; @@ -6511,10 +6679,32 @@ init_tracer_debugfs(struct trace_array *tr, struct dentry *d_tracer) #endif for_each_tracing_cpu(cpu) - tracing_init_debugfs_percpu(tr, cpu); + tracing_init_tracefs_percpu(tr, cpu); } +static struct vfsmount *trace_automount(void *ingore) +{ + struct vfsmount *mnt; + struct file_system_type *type; + + /* + * To maintain backward compatibility for tools that mount + * debugfs to get to the tracing facility, tracefs is automatically + * mounted to the debugfs/tracing directory. + */ + type = get_fs_type("tracefs"); + if (!type) + return NULL; + mnt = vfs_kern_mount(type, 0, "tracefs", NULL); + put_filesystem(type); + if (IS_ERR(mnt)) + return NULL; + mntget(mnt); + + return mnt; +} + /** * tracing_init_dentry - initialize top level trace array * @@ -6526,23 +6716,112 @@ struct dentry *tracing_init_dentry(void) { struct trace_array *tr = &global_trace; + /* The top level trace array uses NULL as parent */ if (tr->dir) - return tr->dir; + return NULL; if (WARN_ON(!debugfs_initialized())) return ERR_PTR(-ENODEV); - tr->dir = debugfs_create_dir("tracing", NULL); - + /* + * As there may still be users that expect the tracing + * files to exist in debugfs/tracing, we must automount + * the tracefs file system there, so older tools still + * work with the newer kerenl. + */ + tr->dir = debugfs_create_automount("tracing", NULL, + trace_automount, NULL); if (!tr->dir) { pr_warn_once("Could not create debugfs directory 'tracing'\n"); return ERR_PTR(-ENOMEM); } - return tr->dir; + return NULL; +} + +extern struct trace_enum_map *__start_ftrace_enum_maps[]; +extern struct trace_enum_map *__stop_ftrace_enum_maps[]; + +static void __init trace_enum_init(void) +{ + int len; + + len = __stop_ftrace_enum_maps - __start_ftrace_enum_maps; + trace_insert_enum_map(NULL, __start_ftrace_enum_maps, len); +} + +#ifdef CONFIG_MODULES +static void trace_module_add_enums(struct module *mod) +{ + if (!mod->num_trace_enums) + return; + + /* + * Modules with bad taint do not have events created, do + * not bother with enums either. + */ + if (trace_module_has_bad_taint(mod)) + return; + + trace_insert_enum_map(mod, mod->trace_enums, mod->num_trace_enums); } -static __init int tracer_init_debugfs(void) +#ifdef CONFIG_TRACE_ENUM_MAP_FILE +static void trace_module_remove_enums(struct module *mod) +{ + union trace_enum_map_item *map; + union trace_enum_map_item **last = &trace_enum_maps; + + if (!mod->num_trace_enums) + return; + + mutex_lock(&trace_enum_mutex); + + map = trace_enum_maps; + + while (map) { + if (map->head.mod == mod) + break; + map = trace_enum_jmp_to_tail(map); + last = &map->tail.next; + map = map->tail.next; + } + if (!map) + goto out; + + *last = trace_enum_jmp_to_tail(map)->tail.next; + kfree(map); + out: + mutex_unlock(&trace_enum_mutex); +} +#else +static inline void trace_module_remove_enums(struct module *mod) { } +#endif /* CONFIG_TRACE_ENUM_MAP_FILE */ + +static int trace_module_notify(struct notifier_block *self, + unsigned long val, void *data) +{ + struct module *mod = data; + + switch (val) { + case MODULE_STATE_COMING: + trace_module_add_enums(mod); + break; + case MODULE_STATE_GOING: + trace_module_remove_enums(mod); + break; + } + + return 0; +} + +static struct notifier_block trace_module_nb = { + .notifier_call = trace_module_notify, + .priority = 0, +}; +#endif /* CONFIG_MODULES */ + +static __init int tracer_init_tracefs(void) { struct dentry *d_tracer; @@ -6552,7 +6831,7 @@ static __init int tracer_init_debugfs(void) if (IS_ERR(d_tracer)) return 0; - init_tracer_debugfs(&global_trace, d_tracer); + init_tracer_tracefs(&global_trace, d_tracer); trace_create_file("tracing_thresh", 0644, d_tracer, &global_trace, &tracing_thresh_fops); @@ -6566,6 +6845,14 @@ static __init int tracer_init_debugfs(void) trace_create_file("saved_cmdlines_size", 0644, d_tracer, NULL, &tracing_saved_cmdlines_size_fops); + trace_enum_init(); + + trace_create_enum_file(d_tracer); + +#ifdef CONFIG_MODULES + register_module_notifier(&trace_module_nb); +#endif + #ifdef CONFIG_DYNAMIC_FTRACE trace_create_file("dyn_ftrace_total_info", 0444, d_tracer, &ftrace_update_tot_cnt, &tracing_dyn_info_fops); @@ -6575,6 +6862,10 @@ static __init int tracer_init_debugfs(void) create_trace_options_dir(&global_trace); + /* If the tracer was started via cmdline, create options for it here */ + if (global_trace.current_trace != &nop_trace) + update_tracer_options(&global_trace, global_trace.current_trace); + return 0; } @@ -6888,7 +7179,7 @@ void __init trace_init(void) tracepoint_printk = 0; } tracer_alloc_buffers(); - trace_event_init(); + trace_event_init(); } __init static int clear_boot_tracer(void) @@ -6910,5 +7201,5 @@ __init static int clear_boot_tracer(void) return 0; } -fs_initcall(tracer_init_debugfs); +fs_initcall(tracer_init_tracefs); late_initcall(clear_boot_tracer); diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index dd8205a35760..d2612016de94 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -334,7 +334,7 @@ struct tracer_flags { /** - * struct tracer - a specific tracer and its callbacks to interact with debugfs + * struct tracer - a specific tracer and its callbacks to interact with tracefs * @name: the name chosen to select it on the available_tracers file * @init: called when one switches to this tracer (echo name > current_tracer) * @reset: called when one switches to another tracer @@ -1309,8 +1309,10 @@ static inline void init_ftrace_syscalls(void) { } #ifdef CONFIG_EVENT_TRACING void trace_event_init(void); +void trace_event_enum_update(struct trace_enum_map **map, int len); #else static inline void __init trace_event_init(void) { } +static inlin void trace_event_enum_update(struct trace_enum_map **map, int len) { } #endif extern struct trace_iterator *tracepoint_print_iter; diff --git a/kernel/trace/trace_entries.h b/kernel/trace/trace_entries.h index e2d027ac66a2..ee7b94a4810a 100644 --- a/kernel/trace/trace_entries.h +++ b/kernel/trace/trace_entries.h @@ -223,7 +223,7 @@ FTRACE_ENTRY(bprint, bprint_entry, __dynamic_array( u32, buf ) ), - F_printk("%pf: %s", + F_printk("%ps: %s", (void *)__entry->ip, __entry->fmt), FILTER_OTHER @@ -238,7 +238,7 @@ FTRACE_ENTRY(print, print_entry, __dynamic_array( char, buf ) ), - F_printk("%pf: %s", + F_printk("%ps: %s", (void *)__entry->ip, __entry->buf), FILTER_OTHER @@ -253,7 +253,7 @@ FTRACE_ENTRY(bputs, bputs_entry, __field( const char *, str ) ), - F_printk("%pf: %s", + F_printk("%ps: %s", (void *)__entry->ip, __entry->str), FILTER_OTHER diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index db54dda10ccc..7da1dfeb322e 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -13,7 +13,7 @@ #include <linux/workqueue.h> #include <linux/spinlock.h> #include <linux/kthread.h> -#include <linux/debugfs.h> +#include <linux/tracefs.h> #include <linux/uaccess.h> #include <linux/module.h> #include <linux/ctype.h> @@ -480,7 +480,7 @@ static void remove_subsystem(struct ftrace_subsystem_dir *dir) return; if (!--dir->nr_events) { - debugfs_remove_recursive(dir->entry); + tracefs_remove_recursive(dir->entry); list_del(&dir->list); __put_system_dir(dir); } @@ -499,7 +499,7 @@ static void remove_event_file_dir(struct ftrace_event_file *file) } spin_unlock(&dir->d_lock); - debugfs_remove_recursive(dir); + tracefs_remove_recursive(dir); } list_del(&file->list); @@ -1526,7 +1526,7 @@ event_subsystem_dir(struct trace_array *tr, const char *name, } else __get_system(system); - dir->entry = debugfs_create_dir(name, parent); + dir->entry = tracefs_create_dir(name, parent); if (!dir->entry) { pr_warn("Failed to create system directory %s\n", name); __put_system(system); @@ -1539,12 +1539,12 @@ event_subsystem_dir(struct trace_array *tr, const char *name, dir->subsystem = system; file->system = dir; - entry = debugfs_create_file("filter", 0644, dir->entry, dir, + entry = tracefs_create_file("filter", 0644, dir->entry, dir, &ftrace_subsystem_filter_fops); if (!entry) { kfree(system->filter); system->filter = NULL; - pr_warn("Could not create debugfs '%s/filter' entry\n", name); + pr_warn("Could not create tracefs '%s/filter' entry\n", name); } trace_create_file("enable", 0644, dir->entry, dir, @@ -1585,9 +1585,9 @@ event_create_dir(struct dentry *parent, struct ftrace_event_file *file) d_events = parent; name = ftrace_event_name(call); - file->dir = debugfs_create_dir(name, d_events); + file->dir = tracefs_create_dir(name, d_events); if (!file->dir) { - pr_warn("Could not create debugfs '%s' directory\n", name); + pr_warn("Could not create tracefs '%s' directory\n", name); return -1; } @@ -1704,6 +1704,125 @@ __register_event(struct ftrace_event_call *call, struct module *mod) return 0; } +static char *enum_replace(char *ptr, struct trace_enum_map *map, int len) +{ + int rlen; + int elen; + + /* Find the length of the enum value as a string */ + elen = snprintf(ptr, 0, "%ld", map->enum_value); + /* Make sure there's enough room to replace the string with the value */ + if (len < elen) + return NULL; + + snprintf(ptr, elen + 1, "%ld", map->enum_value); + + /* Get the rest of the string of ptr */ + rlen = strlen(ptr + len); + memmove(ptr + elen, ptr + len, rlen); + /* Make sure we end the new string */ + ptr[elen + rlen] = 0; + + return ptr + elen; +} + +static void update_event_printk(struct ftrace_event_call *call, + struct trace_enum_map *map) +{ + char *ptr; + int quote = 0; + int len = strlen(map->enum_string); + + for (ptr = call->print_fmt; *ptr; ptr++) { + if (*ptr == '\\') { + ptr++; + /* paranoid */ + if (!*ptr) + break; + continue; + } + if (*ptr == '"') { + quote ^= 1; + continue; + } + if (quote) + continue; + if (isdigit(*ptr)) { + /* skip numbers */ + do { + ptr++; + /* Check for alpha chars like ULL */ + } while (isalnum(*ptr)); + /* + * A number must have some kind of delimiter after + * it, and we can ignore that too. + */ + continue; + } + if (isalpha(*ptr) || *ptr == '_') { + if (strncmp(map->enum_string, ptr, len) == 0 && + !isalnum(ptr[len]) && ptr[len] != '_') { + ptr = enum_replace(ptr, map, len); + /* Hmm, enum string smaller than value */ + if (WARN_ON_ONCE(!ptr)) + return; + /* + * No need to decrement here, as enum_replace() + * returns the pointer to the character passed + * the enum, and two enums can not be placed + * back to back without something in between. + * We can skip that something in between. + */ + continue; + } + skip_more: + do { + ptr++; + } while (isalnum(*ptr) || *ptr == '_'); + /* + * If what comes after this variable is a '.' or + * '->' then we can continue to ignore that string. + */ + if (*ptr == '.' || (ptr[0] == '-' && ptr[1] == '>')) { + ptr += *ptr == '.' ? 1 : 2; + goto skip_more; + } + /* + * Once again, we can skip the delimiter that came + * after the string. + */ + continue; + } + } +} + +void trace_event_enum_update(struct trace_enum_map **map, int len) +{ + struct ftrace_event_call *call, *p; + const char *last_system = NULL; + int last_i; + int i; + + down_write(&trace_event_sem); + list_for_each_entry_safe(call, p, &ftrace_events, list) { + /* events are usually grouped together with systems */ + if (!last_system || call->class->system != last_system) { + last_i = 0; + last_system = call->class->system; + } + + for (i = last_i; i < len; i++) { + if (call->class->system == map[i]->system) { + /* Save the first system if need be */ + if (!last_i) + last_i = i; + update_event_printk(call, map[i]); + } + } + } + up_write(&trace_event_sem); +} + static struct ftrace_event_file * trace_create_new_event(struct ftrace_event_call *call, struct trace_array *tr) @@ -1915,7 +2034,7 @@ static int trace_module_notify(struct notifier_block *self, static struct notifier_block trace_module_nb = { .notifier_call = trace_module_notify, - .priority = 0, + .priority = 1, /* higher than trace.c module notify */ }; #endif /* CONFIG_MODULES */ @@ -2228,7 +2347,7 @@ static inline int register_event_cmds(void) { return 0; } /* * The top level array has already had its ftrace_event_file * descriptors created in order to allow for early events to - * be recorded. This function is called after the debugfs has been + * be recorded. This function is called after the tracefs has been * initialized, and we now have to create the files associated * to the events. */ @@ -2311,16 +2430,16 @@ create_event_toplevel_files(struct dentry *parent, struct trace_array *tr) struct dentry *d_events; struct dentry *entry; - entry = debugfs_create_file("set_event", 0644, parent, + entry = tracefs_create_file("set_event", 0644, parent, tr, &ftrace_set_event_fops); if (!entry) { - pr_warn("Could not create debugfs 'set_event' entry\n"); + pr_warn("Could not create tracefs 'set_event' entry\n"); return -ENOMEM; } - d_events = debugfs_create_dir("events", parent); + d_events = tracefs_create_dir("events", parent); if (!d_events) { - pr_warn("Could not create debugfs 'events' directory\n"); + pr_warn("Could not create tracefs 'events' directory\n"); return -ENOMEM; } @@ -2412,7 +2531,7 @@ int event_trace_del_tracer(struct trace_array *tr) down_write(&trace_event_sem); __trace_remove_event_dirs(tr); - debugfs_remove_recursive(tr->event_dir); + tracefs_remove_recursive(tr->event_dir); up_write(&trace_event_sem); tr->event_dir = NULL; @@ -2534,10 +2653,10 @@ static __init int event_trace_init(void) if (IS_ERR(d_tracer)) return 0; - entry = debugfs_create_file("available_events", 0444, d_tracer, + entry = tracefs_create_file("available_events", 0444, d_tracer, tr, &ftrace_avail_fops); if (!entry) - pr_warn("Could not create debugfs 'available_events' entry\n"); + pr_warn("Could not create tracefs 'available_events' entry\n"); if (trace_define_common_fields()) pr_warn("tracing: Failed to allocate common fields"); diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c index 12e2b99be862..174a6a71146c 100644 --- a/kernel/trace/trace_export.c +++ b/kernel/trace/trace_export.c @@ -177,7 +177,7 @@ struct ftrace_event_call __used event_##call = { \ }, \ .event.type = etype, \ .print_fmt = print, \ - .flags = TRACE_EVENT_FL_IGNORE_ENABLE | TRACE_EVENT_FL_USE_CALL_FILTER, \ + .flags = TRACE_EVENT_FL_IGNORE_ENABLE, \ }; \ struct ftrace_event_call __used \ __attribute__((section("_ftrace_events"))) *__event_##call = &event_##call; diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c index 2d25ad1526bb..9cfea4c6d314 100644 --- a/kernel/trace/trace_functions_graph.c +++ b/kernel/trace/trace_functions_graph.c @@ -6,7 +6,6 @@ * is Copyright (c) Steven Rostedt <srostedt@redhat.com> * */ -#include <linux/debugfs.h> #include <linux/uaccess.h> #include <linux/ftrace.h> #include <linux/slab.h> @@ -151,7 +150,7 @@ ftrace_push_return_trace(unsigned long ret, unsigned long func, int *depth, * The curr_ret_stack is initialized to -1 and get increased * in this function. So it can be less than -1 only if it was * filtered out via ftrace_graph_notrace_addr() which can be - * set from set_graph_notrace file in debugfs by user. + * set from set_graph_notrace file in tracefs by user. */ if (current->curr_ret_stack < -1) return -EBUSY; @@ -1432,7 +1431,7 @@ static const struct file_operations graph_depth_fops = { .llseek = generic_file_llseek, }; -static __init int init_graph_debugfs(void) +static __init int init_graph_tracefs(void) { struct dentry *d_tracer; @@ -1445,7 +1444,7 @@ static __init int init_graph_debugfs(void) return 0; } -fs_initcall(init_graph_debugfs); +fs_initcall(init_graph_tracefs); static __init int init_graph_trace(void) { diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index d73f565b4e06..d0ce590f06e1 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -250,7 +250,7 @@ DEFINE_FETCH_symbol(string_size) #define fetch_file_offset_string_size NULL /* Fetch type information table */ -const struct fetch_type kprobes_fetch_type_table[] = { +static const struct fetch_type kprobes_fetch_type_table[] = { /* Special types */ [FETCH_TYPE_STRING] = __ASSIGN_FETCH_TYPE("string", string, string, sizeof(u32), 1, "__data_loc char[]"), @@ -760,7 +760,8 @@ static int create_trace_kprobe(int argc, char **argv) /* Parse fetch argument */ ret = traceprobe_parse_probe_arg(arg, &tk->tp.size, parg, - is_return, true); + is_return, true, + kprobes_fetch_type_table); if (ret) { pr_info("Parse error at argument[%d]. (%d)\n", i, ret); goto error; @@ -1134,11 +1135,15 @@ static void kprobe_perf_func(struct trace_kprobe *tk, struct pt_regs *regs) { struct ftrace_event_call *call = &tk->tp.call; + struct bpf_prog *prog = call->prog; struct kprobe_trace_entry_head *entry; struct hlist_head *head; int size, __size, dsize; int rctx; + if (prog && !trace_call_bpf(prog, regs)) + return; + head = this_cpu_ptr(call->perf_events); if (hlist_empty(head)) return; @@ -1165,11 +1170,15 @@ kretprobe_perf_func(struct trace_kprobe *tk, struct kretprobe_instance *ri, struct pt_regs *regs) { struct ftrace_event_call *call = &tk->tp.call; + struct bpf_prog *prog = call->prog; struct kretprobe_trace_entry_head *entry; struct hlist_head *head; int size, __size, dsize; int rctx; + if (prog && !trace_call_bpf(prog, regs)) + return; + head = this_cpu_ptr(call->perf_events); if (hlist_empty(head)) return; @@ -1286,7 +1295,7 @@ static int register_kprobe_event(struct trace_kprobe *tk) kfree(call->print_fmt); return -ENODEV; } - call->flags = 0; + call->flags = TRACE_EVENT_FL_KPROBE; call->class->reg = kprobe_register; call->data = tk; ret = trace_add_event_call(call); @@ -1310,7 +1319,7 @@ static int unregister_kprobe_event(struct trace_kprobe *tk) return ret; } -/* Make a debugfs interface for controlling probe points */ +/* Make a tracefs interface for controlling probe points */ static __init int init_kprobe_trace(void) { struct dentry *d_tracer; @@ -1323,20 +1332,20 @@ static __init int init_kprobe_trace(void) if (IS_ERR(d_tracer)) return 0; - entry = debugfs_create_file("kprobe_events", 0644, d_tracer, + entry = tracefs_create_file("kprobe_events", 0644, d_tracer, NULL, &kprobe_events_ops); /* Event list interface */ if (!entry) - pr_warning("Could not create debugfs " + pr_warning("Could not create tracefs " "'kprobe_events' entry\n"); /* Profile interface */ - entry = debugfs_create_file("kprobe_profile", 0444, d_tracer, + entry = tracefs_create_file("kprobe_profile", 0444, d_tracer, NULL, &kprobe_profile_ops); if (!entry) - pr_warning("Could not create debugfs " + pr_warning("Could not create tracefs " "'kprobe_profile' entry\n"); return 0; } diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c index b983b2fd2ca1..1769a81da8a7 100644 --- a/kernel/trace/trace_probe.c +++ b/kernel/trace/trace_probe.c @@ -356,17 +356,14 @@ static int parse_probe_vars(char *arg, const struct fetch_type *t, /* Recursive argument parser */ static int parse_probe_arg(char *arg, const struct fetch_type *t, - struct fetch_param *f, bool is_return, bool is_kprobe) + struct fetch_param *f, bool is_return, bool is_kprobe, + const struct fetch_type *ftbl) { - const struct fetch_type *ftbl; unsigned long param; long offset; char *tmp; int ret = 0; - ftbl = is_kprobe ? kprobes_fetch_type_table : uprobes_fetch_type_table; - BUG_ON(ftbl == NULL); - switch (arg[0]) { case '$': ret = parse_probe_vars(arg + 1, t, f, is_return, is_kprobe); @@ -447,7 +444,7 @@ static int parse_probe_arg(char *arg, const struct fetch_type *t, dprm->fetch_size = get_fetch_size_function(t, dprm->fetch, ftbl); ret = parse_probe_arg(arg, t2, &dprm->orig, is_return, - is_kprobe); + is_kprobe, ftbl); if (ret) kfree(dprm); else { @@ -505,15 +502,12 @@ static int __parse_bitfield_probe_arg(const char *bf, /* String length checking wrapper */ int traceprobe_parse_probe_arg(char *arg, ssize_t *size, - struct probe_arg *parg, bool is_return, bool is_kprobe) + struct probe_arg *parg, bool is_return, bool is_kprobe, + const struct fetch_type *ftbl) { - const struct fetch_type *ftbl; const char *t; int ret; - ftbl = is_kprobe ? kprobes_fetch_type_table : uprobes_fetch_type_table; - BUG_ON(ftbl == NULL); - if (strlen(arg) > MAX_ARGSTR_LEN) { pr_info("Argument is too long.: %s\n", arg); return -ENOSPC; @@ -535,7 +529,8 @@ int traceprobe_parse_probe_arg(char *arg, ssize_t *size, } parg->offset = *size; *size += parg->type->size; - ret = parse_probe_arg(arg, parg->type, &parg->fetch, is_return, is_kprobe); + ret = parse_probe_arg(arg, parg->type, &parg->fetch, is_return, + is_kprobe, ftbl); if (ret >= 0 && t != NULL) ret = __parse_bitfield_probe_arg(t, parg->type, &parg->fetch); diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h index 4f815fbce16d..ab283e146b70 100644 --- a/kernel/trace/trace_probe.h +++ b/kernel/trace/trace_probe.h @@ -25,7 +25,7 @@ #include <linux/seq_file.h> #include <linux/slab.h> #include <linux/smp.h> -#include <linux/debugfs.h> +#include <linux/tracefs.h> #include <linux/types.h> #include <linux/string.h> #include <linux/ctype.h> @@ -229,13 +229,6 @@ ASSIGN_FETCH_FUNC(file_offset, ftype), \ #define FETCH_TYPE_STRING 0 #define FETCH_TYPE_STRSIZE 1 -/* - * Fetch type information table. - * It's declared as a weak symbol due to conditional compilation. - */ -extern __weak const struct fetch_type kprobes_fetch_type_table[]; -extern __weak const struct fetch_type uprobes_fetch_type_table[]; - #ifdef CONFIG_KPROBE_EVENT struct symbol_cache; unsigned long update_symbol_cache(struct symbol_cache *sc); @@ -333,7 +326,8 @@ find_event_file_link(struct trace_probe *tp, struct ftrace_event_file *file) } extern int traceprobe_parse_probe_arg(char *arg, ssize_t *size, - struct probe_arg *parg, bool is_return, bool is_kprobe); + struct probe_arg *parg, bool is_return, bool is_kprobe, + const struct fetch_type *ftbl); extern int traceprobe_conflict_field_name(const char *name, struct probe_arg *args, int narg); diff --git a/kernel/trace/trace_stat.c b/kernel/trace/trace_stat.c index 75e19e86c954..6cf935316769 100644 --- a/kernel/trace/trace_stat.c +++ b/kernel/trace/trace_stat.c @@ -12,7 +12,7 @@ #include <linux/list.h> #include <linux/slab.h> #include <linux/rbtree.h> -#include <linux/debugfs.h> +#include <linux/tracefs.h> #include "trace_stat.h" #include "trace.h" @@ -65,7 +65,7 @@ static void reset_stat_session(struct stat_session *session) static void destroy_session(struct stat_session *session) { - debugfs_remove(session->file); + tracefs_remove(session->file); __reset_stat_session(session); mutex_destroy(&session->stat_mutex); kfree(session); @@ -279,9 +279,9 @@ static int tracing_stat_init(void) if (IS_ERR(d_tracing)) return 0; - stat_dir = debugfs_create_dir("trace_stat", d_tracing); + stat_dir = tracefs_create_dir("trace_stat", d_tracing); if (!stat_dir) - pr_warning("Could not create debugfs " + pr_warning("Could not create tracefs " "'trace_stat' entry\n"); return 0; } @@ -291,7 +291,7 @@ static int init_stat_file(struct stat_session *session) if (!stat_dir && tracing_stat_init()) return -ENODEV; - session->file = debugfs_create_file(session->ts->name, 0644, + session->file = tracefs_create_file(session->ts->name, 0644, stat_dir, session, &tracing_stat_fops); if (!session->file) diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index 7dc1c8abecd6..d60fe62ec4fa 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c @@ -196,7 +196,7 @@ DEFINE_FETCH_file_offset(string) DEFINE_FETCH_file_offset(string_size) /* Fetch type information table */ -const struct fetch_type uprobes_fetch_type_table[] = { +static const struct fetch_type uprobes_fetch_type_table[] = { /* Special types */ [FETCH_TYPE_STRING] = __ASSIGN_FETCH_TYPE("string", string, string, sizeof(u32), 1, "__data_loc char[]"), @@ -535,7 +535,8 @@ static int create_trace_uprobe(int argc, char **argv) /* Parse fetch argument */ ret = traceprobe_parse_probe_arg(arg, &tu->tp.size, parg, - is_return, false); + is_return, false, + uprobes_fetch_type_table); if (ret) { pr_info("Parse error at argument[%d]. (%d)\n", i, ret); goto error; @@ -1005,7 +1006,7 @@ __uprobe_perf_filter(struct trace_uprobe_filter *filter, struct mm_struct *mm) return true; list_for_each_entry(event, &filter->perf_events, hw.tp_list) { - if (event->hw.tp_target->mm == mm) + if (event->hw.target->mm == mm) return true; } @@ -1015,7 +1016,7 @@ __uprobe_perf_filter(struct trace_uprobe_filter *filter, struct mm_struct *mm) static inline bool uprobe_filter_event(struct trace_uprobe *tu, struct perf_event *event) { - return __uprobe_perf_filter(&tu->filter, event->hw.tp_target->mm); + return __uprobe_perf_filter(&tu->filter, event->hw.target->mm); } static int uprobe_perf_close(struct trace_uprobe *tu, struct perf_event *event) @@ -1023,10 +1024,10 @@ static int uprobe_perf_close(struct trace_uprobe *tu, struct perf_event *event) bool done; write_lock(&tu->filter.rwlock); - if (event->hw.tp_target) { + if (event->hw.target) { list_del(&event->hw.tp_list); done = tu->filter.nr_systemwide || - (event->hw.tp_target->flags & PF_EXITING) || + (event->hw.target->flags & PF_EXITING) || uprobe_filter_event(tu, event); } else { tu->filter.nr_systemwide--; @@ -1046,7 +1047,7 @@ static int uprobe_perf_open(struct trace_uprobe *tu, struct perf_event *event) int err; write_lock(&tu->filter.rwlock); - if (event->hw.tp_target) { + if (event->hw.target) { /* * event->parent != NULL means copy_process(), we can avoid * uprobe_apply(). current->mm must be probed and we can rely diff --git a/kernel/watchdog.c b/kernel/watchdog.c index f2be11ab7e08..2316f50b07a4 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -603,9 +603,37 @@ static void watchdog_nmi_disable(unsigned int cpu) cpu0_err = 0; } } + +void watchdog_nmi_enable_all(void) +{ + int cpu; + + if (!watchdog_user_enabled) + return; + + get_online_cpus(); + for_each_online_cpu(cpu) + watchdog_nmi_enable(cpu); + put_online_cpus(); +} + +void watchdog_nmi_disable_all(void) +{ + int cpu; + + if (!watchdog_running) + return; + + get_online_cpus(); + for_each_online_cpu(cpu) + watchdog_nmi_disable(cpu); + put_online_cpus(); +} #else static int watchdog_nmi_enable(unsigned int cpu) { return 0; } static void watchdog_nmi_disable(unsigned int cpu) { return; } +void watchdog_nmi_enable_all(void) {} +void watchdog_nmi_disable_all(void) {} #endif /* CONFIG_HARDLOCKUP_DETECTOR */ static struct smp_hotplug_thread watchdog_threads = { diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 15c9118f0389..17670573dda8 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -1193,16 +1193,7 @@ config DEBUG_CREDENTIALS menu "RCU Debugging" config PROVE_RCU - bool "RCU debugging: prove RCU correctness" - depends on PROVE_LOCKING - default n - help - This feature enables lockdep extensions that check for correct - use of RCU APIs. This is currently under development. Say Y - if you want to debug RCU usage or help work on the PROVE_RCU - feature. - - Say N if you are unsure. + def_bool PROVE_LOCKING config PROVE_RCU_REPEATEDLY bool "RCU debugging: don't disable PROVE_RCU on first splat" @@ -1270,6 +1261,30 @@ config RCU_TORTURE_TEST_RUNNABLE Say N here if you want the RCU torture tests to start only after being manually enabled via /proc. +config RCU_TORTURE_TEST_SLOW_INIT + bool "Slow down RCU grace-period initialization to expose races" + depends on RCU_TORTURE_TEST + help + This option makes grace-period initialization block for a + few jiffies between initializing each pair of consecutive + rcu_node structures. This helps to expose races involving + grace-period initialization, in other words, it makes your + kernel less stable. It can also greatly increase grace-period + latency, especially on systems with large numbers of CPUs. + This is useful when torture-testing RCU, but in almost no + other circumstance. + + Say Y here if you want your system to crash and hang more often. + Say N if you want a sane system. + +config RCU_TORTURE_TEST_SLOW_INIT_DELAY + int "How much to slow down RCU grace-period initialization" + range 0 5 + default 3 + help + This option specifies the number of jiffies to wait between + each rcu_node structure initialization. + config RCU_CPU_STALL_TIMEOUT int "RCU CPU stall timeout in seconds" depends on RCU_STALL_COMMON diff --git a/lib/div64.c b/lib/div64.c index 4382ad77777e..19ea7ed4b948 100644 --- a/lib/div64.c +++ b/lib/div64.c @@ -127,7 +127,7 @@ EXPORT_SYMBOL(div64_u64_rem); * by the book 'Hacker's Delight'. The original source and full proof * can be found here and is available for use without restriction. * - * 'http://www.hackersdelight.org/HDcode/newCode/divDouble.c.txt' + * 'http://www.hackersdelight.org/hdcodetxt/divDouble.c.txt' */ #ifndef div64_u64 u64 div64_u64(u64 dividend, u64 divisor) diff --git a/lib/iov_iter.c b/lib/iov_iter.c index 9d96e283520c..75232ad0a5e7 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -317,6 +317,32 @@ int iov_iter_fault_in_readable(struct iov_iter *i, size_t bytes) } EXPORT_SYMBOL(iov_iter_fault_in_readable); +/* + * Fault in one or more iovecs of the given iov_iter, to a maximum length of + * bytes. For each iovec, fault in each page that constitutes the iovec. + * + * Return 0 on success, or non-zero if the memory could not be accessed (i.e. + * because it is an invalid address). + */ +int iov_iter_fault_in_multipages_readable(struct iov_iter *i, size_t bytes) +{ + size_t skip = i->iov_offset; + const struct iovec *iov; + int err; + struct iovec v; + + if (!(i->type & (ITER_BVEC|ITER_KVEC))) { + iterate_iovec(i, bytes, v, iov, skip, ({ + err = fault_in_multipages_readable(v.iov_base, + v.iov_len); + if (unlikely(err)) + return err; + 0;})) + } + return 0; +} +EXPORT_SYMBOL(iov_iter_fault_in_multipages_readable); + void iov_iter_init(struct iov_iter *i, int direction, const struct iovec *iov, unsigned long nr_segs, size_t count) @@ -766,3 +792,60 @@ const void *dup_iter(struct iov_iter *new, struct iov_iter *old, gfp_t flags) flags); } EXPORT_SYMBOL(dup_iter); + +int import_iovec(int type, const struct iovec __user * uvector, + unsigned nr_segs, unsigned fast_segs, + struct iovec **iov, struct iov_iter *i) +{ + ssize_t n; + struct iovec *p; + n = rw_copy_check_uvector(type, uvector, nr_segs, fast_segs, + *iov, &p); + if (n < 0) { + if (p != *iov) + kfree(p); + *iov = NULL; + return n; + } + iov_iter_init(i, type, p, nr_segs, n); + *iov = p == *iov ? NULL : p; + return 0; +} +EXPORT_SYMBOL(import_iovec); + +#ifdef CONFIG_COMPAT +#include <linux/compat.h> + +int compat_import_iovec(int type, const struct compat_iovec __user * uvector, + unsigned nr_segs, unsigned fast_segs, + struct iovec **iov, struct iov_iter *i) +{ + ssize_t n; + struct iovec *p; + n = compat_rw_copy_check_uvector(type, uvector, nr_segs, fast_segs, + *iov, &p); + if (n < 0) { + if (p != *iov) + kfree(p); + *iov = NULL; + return n; + } + iov_iter_init(i, type, p, nr_segs, n); + *iov = p == *iov ? NULL : p; + return 0; +} +#endif + +int import_single_range(int rw, void __user *buf, size_t len, + struct iovec *iov, struct iov_iter *i) +{ + if (len > MAX_RW_COUNT) + len = MAX_RW_COUNT; + if (unlikely(!access_ok(!rw, buf, len))) + return -EFAULT; + + iov->iov_base = buf; + iov->iov_len = len; + iov_iter_init(i, rw, iov, 1, len); + return 0; +} diff --git a/mm/filemap.c b/mm/filemap.c index 434dba317400..12548d03c11d 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -13,7 +13,6 @@ #include <linux/compiler.h> #include <linux/fs.h> #include <linux/uaccess.h> -#include <linux/aio.h> #include <linux/capability.h> #include <linux/kernel_stat.h> #include <linux/gfp.h> diff --git a/mm/page_io.c b/mm/page_io.c index e6045804c8d8..a96c8562d835 100644 --- a/mm/page_io.c +++ b/mm/page_io.c @@ -20,8 +20,8 @@ #include <linux/buffer_head.h> #include <linux/writeback.h> #include <linux/frontswap.h> -#include <linux/aio.h> #include <linux/blkdev.h> +#include <linux/uio.h> #include <asm/pgtable.h> static struct bio *get_swap_bio(gfp_t gfp_flags, @@ -274,7 +274,6 @@ int __swap_writepage(struct page *page, struct writeback_control *wbc, iov_iter_bvec(&from, ITER_BVEC | WRITE, &bv, 1, PAGE_SIZE); init_sync_kiocb(&kiocb, swap_file); kiocb.ki_pos = page_file_offset(page); - kiocb.ki_nbytes = PAGE_SIZE; set_page_writeback(page); unlock_page(page); diff --git a/mm/process_vm_access.c b/mm/process_vm_access.c index b1597690530c..e88d071648c2 100644 --- a/mm/process_vm_access.c +++ b/mm/process_vm_access.c @@ -257,22 +257,18 @@ static ssize_t process_vm_rw(pid_t pid, struct iovec *iov_r = iovstack_r; struct iov_iter iter; ssize_t rc; + int dir = vm_write ? WRITE : READ; if (flags != 0) return -EINVAL; /* Check iovecs */ - if (vm_write) - rc = rw_copy_check_uvector(WRITE, lvec, liovcnt, UIO_FASTIOV, - iovstack_l, &iov_l); - else - rc = rw_copy_check_uvector(READ, lvec, liovcnt, UIO_FASTIOV, - iovstack_l, &iov_l); - if (rc <= 0) + rc = import_iovec(dir, lvec, liovcnt, UIO_FASTIOV, &iov_l, &iter); + if (rc < 0) + return rc; + if (!iov_iter_count(&iter)) goto free_iovecs; - iov_iter_init(&iter, vm_write ? WRITE : READ, iov_l, liovcnt, rc); - rc = rw_copy_check_uvector(CHECK_IOVEC_ONLY, rvec, riovcnt, UIO_FASTIOV, iovstack_r, &iov_r); if (rc <= 0) @@ -283,8 +279,7 @@ static ssize_t process_vm_rw(pid_t pid, free_iovecs: if (iov_r != iovstack_r) kfree(iov_r); - if (iov_l != iovstack_l) - kfree(iov_l); + kfree(iov_l); return rc; } @@ -320,21 +315,16 @@ compat_process_vm_rw(compat_pid_t pid, struct iovec *iov_r = iovstack_r; struct iov_iter iter; ssize_t rc = -EFAULT; + int dir = vm_write ? WRITE : READ; if (flags != 0) return -EINVAL; - if (vm_write) - rc = compat_rw_copy_check_uvector(WRITE, lvec, liovcnt, - UIO_FASTIOV, iovstack_l, - &iov_l); - else - rc = compat_rw_copy_check_uvector(READ, lvec, liovcnt, - UIO_FASTIOV, iovstack_l, - &iov_l); - if (rc <= 0) + rc = compat_import_iovec(dir, lvec, liovcnt, UIO_FASTIOV, &iov_l, &iter); + if (rc < 0) + return rc; + if (!iov_iter_count(&iter)) goto free_iovecs; - iov_iter_init(&iter, vm_write ? WRITE : READ, iov_l, liovcnt, rc); rc = compat_rw_copy_check_uvector(CHECK_IOVEC_ONLY, rvec, riovcnt, UIO_FASTIOV, iovstack_r, &iov_r); @@ -346,8 +336,7 @@ compat_process_vm_rw(compat_pid_t pid, free_iovecs: if (iov_r != iovstack_r) kfree(iov_r); - if (iov_l != iovstack_l) - kfree(iov_l); + kfree(iov_l); return rc; } diff --git a/mm/shmem.c b/mm/shmem.c index cf2d0ca010bc..80b360c7bcd1 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -31,7 +31,7 @@ #include <linux/mm.h> #include <linux/export.h> #include <linux/swap.h> -#include <linux/aio.h> +#include <linux/uio.h> static struct vfsmount *shm_mnt; diff --git a/net/atm/mpoa_proc.c b/net/atm/mpoa_proc.c index 5bdd300db0f7..2df34eb5d65f 100644 --- a/net/atm/mpoa_proc.c +++ b/net/atm/mpoa_proc.c @@ -272,7 +272,7 @@ static int parse_qos(const char *buff) qos.rxtp.max_pcr = rx_pcr; qos.rxtp.max_sdu = rx_sdu; qos.aal = ATM_AAL5; - dprintk("parse_qos(): setting qos paramameters to tx=%d,%d rx=%d,%d\n", + dprintk("parse_qos(): setting qos parameters to tx=%d,%d rx=%d,%d\n", qos.txtp.max_pcr, qos.txtp.max_sdu, qos.rxtp.max_pcr, qos.rxtp.max_sdu); diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index f027a708b7e0..4a356b7c081b 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -46,7 +46,6 @@ #include <linux/stddef.h> #include <linux/slab.h> #include <linux/errno.h> -#include <linux/aio.h> #include <linux/kernel.h> #include <linux/export.h> #include <linux/spinlock.h> diff --git a/net/mac80211/trace.c b/net/mac80211/trace.c index 386e45d8a958..edfe0c170a1c 100644 --- a/net/mac80211/trace.c +++ b/net/mac80211/trace.c @@ -8,6 +8,7 @@ #include "debug.h" #define CREATE_TRACE_POINTS #include "trace.h" +#include "trace_msg.h" #ifdef CONFIG_MAC80211_MESSAGE_TRACING void __sdata_info(const char *fmt, ...) diff --git a/net/mac80211/trace.h b/net/mac80211/trace.h index 263a9561eb26..755a5388dbca 100644 --- a/net/mac80211/trace.h +++ b/net/mac80211/trace.h @@ -2312,44 +2312,6 @@ TRACE_EVENT(drv_tdls_recv_channel_switch, ) ); -#ifdef CONFIG_MAC80211_MESSAGE_TRACING -#undef TRACE_SYSTEM -#define TRACE_SYSTEM mac80211_msg - -#define MAX_MSG_LEN 100 - -DECLARE_EVENT_CLASS(mac80211_msg_event, - TP_PROTO(struct va_format *vaf), - - TP_ARGS(vaf), - - TP_STRUCT__entry( - __dynamic_array(char, msg, MAX_MSG_LEN) - ), - - TP_fast_assign( - WARN_ON_ONCE(vsnprintf(__get_dynamic_array(msg), - MAX_MSG_LEN, vaf->fmt, - *vaf->va) >= MAX_MSG_LEN); - ), - - TP_printk("%s", __get_str(msg)) -); - -DEFINE_EVENT(mac80211_msg_event, mac80211_info, - TP_PROTO(struct va_format *vaf), - TP_ARGS(vaf) -); -DEFINE_EVENT(mac80211_msg_event, mac80211_dbg, - TP_PROTO(struct va_format *vaf), - TP_ARGS(vaf) -); -DEFINE_EVENT(mac80211_msg_event, mac80211_err, - TP_PROTO(struct va_format *vaf), - TP_ARGS(vaf) -); -#endif - #endif /* !__MAC80211_DRIVER_TRACE || TRACE_HEADER_MULTI_READ */ #undef TRACE_INCLUDE_PATH diff --git a/net/mac80211/trace_msg.h b/net/mac80211/trace_msg.h new file mode 100644 index 000000000000..768f7c22a190 --- /dev/null +++ b/net/mac80211/trace_msg.h @@ -0,0 +1,53 @@ +#ifdef CONFIG_MAC80211_MESSAGE_TRACING + +#if !defined(__MAC80211_MSG_DRIVER_TRACE) || defined(TRACE_HEADER_MULTI_READ) +#define __MAC80211_MSG_DRIVER_TRACE + +#include <linux/tracepoint.h> +#include <net/mac80211.h> +#include "ieee80211_i.h" + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM mac80211_msg + +#define MAX_MSG_LEN 100 + +DECLARE_EVENT_CLASS(mac80211_msg_event, + TP_PROTO(struct va_format *vaf), + + TP_ARGS(vaf), + + TP_STRUCT__entry( + __dynamic_array(char, msg, MAX_MSG_LEN) + ), + + TP_fast_assign( + WARN_ON_ONCE(vsnprintf(__get_dynamic_array(msg), + MAX_MSG_LEN, vaf->fmt, + *vaf->va) >= MAX_MSG_LEN); + ), + + TP_printk("%s", __get_str(msg)) +); + +DEFINE_EVENT(mac80211_msg_event, mac80211_info, + TP_PROTO(struct va_format *vaf), + TP_ARGS(vaf) +); +DEFINE_EVENT(mac80211_msg_event, mac80211_dbg, + TP_PROTO(struct va_format *vaf), + TP_ARGS(vaf) +); +DEFINE_EVENT(mac80211_msg_event, mac80211_err, + TP_PROTO(struct va_format *vaf), + TP_ARGS(vaf) +); +#endif /* !__MAC80211_MSG_DRIVER_TRACE || TRACE_HEADER_MULTI_READ */ + +#undef TRACE_INCLUDE_PATH +#define TRACE_INCLUDE_PATH . +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_FILE trace_msg +#include <trace/define_trace.h> + +#endif diff --git a/net/socket.c b/net/socket.c index 245330ca0015..1dbff3e60437 100644 --- a/net/socket.c +++ b/net/socket.c @@ -633,8 +633,7 @@ static int do_sock_sendmsg(struct socket *sock, struct msghdr *msg, init_sync_kiocb(&iocb, NULL); ret = nosec ? __sock_sendmsg_nosec(&iocb, sock, msg, size) : __sock_sendmsg(&iocb, sock, msg, size); - if (-EIOCBQUEUED == ret) - ret = wait_on_sync_kiocb(&iocb); + BUG_ON(ret == -EIOCBQUEUED); return ret; } @@ -766,8 +765,7 @@ int sock_recvmsg(struct socket *sock, struct msghdr *msg, init_sync_kiocb(&iocb, NULL); ret = __sock_recvmsg(&iocb, sock, msg, size, flags); - if (-EIOCBQUEUED == ret) - ret = wait_on_sync_kiocb(&iocb); + BUG_ON(ret == -EIOCBQUEUED); return ret; } EXPORT_SYMBOL(sock_recvmsg); @@ -780,8 +778,7 @@ static int sock_recvmsg_nosec(struct socket *sock, struct msghdr *msg, init_sync_kiocb(&iocb, NULL); ret = __sock_recvmsg_nosec(&iocb, sock, msg, size, flags); - if (-EIOCBQUEUED == ret) - ret = wait_on_sync_kiocb(&iocb); + BUG_ON(ret == -EIOCBQUEUED); return ret; } @@ -858,11 +855,11 @@ static ssize_t sock_read_iter(struct kiocb *iocb, struct iov_iter *to) if (iocb->ki_pos != 0) return -ESPIPE; - if (iocb->ki_nbytes == 0) /* Match SYS5 behaviour */ + if (!iov_iter_count(to)) /* Match SYS5 behaviour */ return 0; res = __sock_recvmsg(iocb, sock, &msg, - iocb->ki_nbytes, msg.msg_flags); + iov_iter_count(to), msg.msg_flags); *to = msg.msg_iter; return res; } @@ -883,7 +880,7 @@ static ssize_t sock_write_iter(struct kiocb *iocb, struct iov_iter *from) if (sock->type == SOCK_SEQPACKET) msg.msg_flags |= MSG_EOR; - res = __sock_sendmsg(iocb, sock, &msg, iocb->ki_nbytes); + res = __sock_sendmsg(iocb, sock, &msg, iov_iter_count(from)); *from = msg.msg_iter; return res; } diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 124676c13780..e28909fddd30 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -1136,7 +1136,7 @@ rpcrdma_init_fmrs(struct rpcrdma_ia *ia, struct rpcrdma_buffer *buf) int i, rc; i = (buf->rb_max_requests + 1) * RPCRDMA_MAX_SEGS; - dprintk("RPC: %s: initalizing %d FMRs\n", __func__, i); + dprintk("RPC: %s: initializing %d FMRs\n", __func__, i); while (i--) { r = kzalloc(sizeof(*r), GFP_KERNEL); @@ -1169,7 +1169,7 @@ rpcrdma_init_frmrs(struct rpcrdma_ia *ia, struct rpcrdma_buffer *buf) int i, rc; i = (buf->rb_max_requests + 1) * RPCRDMA_MAX_SEGS; - dprintk("RPC: %s: initalizing %d FRMRs\n", __func__, i); + dprintk("RPC: %s: initializing %d FRMRs\n", __func__, i); while (i--) { r = kzalloc(sizeof(*r), GFP_KERNEL); diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index b5b3600dcdf5..fe98fb226e6e 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile @@ -6,23 +6,39 @@ hostprogs-y := test_verifier test_maps hostprogs-y += sock_example hostprogs-y += sockex1 hostprogs-y += sockex2 +hostprogs-y += tracex1 +hostprogs-y += tracex2 +hostprogs-y += tracex3 +hostprogs-y += tracex4 test_verifier-objs := test_verifier.o libbpf.o test_maps-objs := test_maps.o libbpf.o sock_example-objs := sock_example.o libbpf.o sockex1-objs := bpf_load.o libbpf.o sockex1_user.o sockex2-objs := bpf_load.o libbpf.o sockex2_user.o +tracex1-objs := bpf_load.o libbpf.o tracex1_user.o +tracex2-objs := bpf_load.o libbpf.o tracex2_user.o +tracex3-objs := bpf_load.o libbpf.o tracex3_user.o +tracex4-objs := bpf_load.o libbpf.o tracex4_user.o # Tell kbuild to always build the programs always := $(hostprogs-y) always += sockex1_kern.o always += sockex2_kern.o +always += tracex1_kern.o +always += tracex2_kern.o +always += tracex3_kern.o +always += tracex4_kern.o HOSTCFLAGS += -I$(objtree)/usr/include HOSTCFLAGS_bpf_load.o += -I$(objtree)/usr/include -Wno-unused-variable HOSTLOADLIBES_sockex1 += -lelf HOSTLOADLIBES_sockex2 += -lelf +HOSTLOADLIBES_tracex1 += -lelf +HOSTLOADLIBES_tracex2 += -lelf +HOSTLOADLIBES_tracex3 += -lelf +HOSTLOADLIBES_tracex4 += -lelf -lrt # point this to your LLVM backend with bpf support LLC=$(srctree)/tools/bpf/llvm/bld/Debug+Asserts/bin/llc diff --git a/samples/bpf/bpf_helpers.h b/samples/bpf/bpf_helpers.h index ca0333146006..1c872bcf5a80 100644 --- a/samples/bpf/bpf_helpers.h +++ b/samples/bpf/bpf_helpers.h @@ -15,6 +15,12 @@ static int (*bpf_map_update_elem)(void *map, void *key, void *value, (void *) BPF_FUNC_map_update_elem; static int (*bpf_map_delete_elem)(void *map, void *key) = (void *) BPF_FUNC_map_delete_elem; +static int (*bpf_probe_read)(void *dst, int size, void *unsafe_ptr) = + (void *) BPF_FUNC_probe_read; +static unsigned long long (*bpf_ktime_get_ns)(void) = + (void *) BPF_FUNC_ktime_get_ns; +static int (*bpf_trace_printk)(const char *fmt, int fmt_size, ...) = + (void *) BPF_FUNC_trace_printk; /* llvm builtin functions that eBPF C program may use to * emit BPF_LD_ABS and BPF_LD_IND instructions diff --git a/samples/bpf/bpf_load.c b/samples/bpf/bpf_load.c index 1831d236382b..38dac5a53b51 100644 --- a/samples/bpf/bpf_load.c +++ b/samples/bpf/bpf_load.c @@ -8,29 +8,70 @@ #include <unistd.h> #include <string.h> #include <stdbool.h> +#include <stdlib.h> #include <linux/bpf.h> #include <linux/filter.h> +#include <linux/perf_event.h> +#include <sys/syscall.h> +#include <sys/ioctl.h> +#include <sys/mman.h> +#include <poll.h> #include "libbpf.h" #include "bpf_helpers.h" #include "bpf_load.h" +#define DEBUGFS "/sys/kernel/debug/tracing/" + static char license[128]; +static int kern_version; static bool processed_sec[128]; int map_fd[MAX_MAPS]; int prog_fd[MAX_PROGS]; +int event_fd[MAX_PROGS]; int prog_cnt; static int load_and_attach(const char *event, struct bpf_insn *prog, int size) { - int fd; bool is_socket = strncmp(event, "socket", 6) == 0; - - if (!is_socket) - /* tracing events tbd */ + bool is_kprobe = strncmp(event, "kprobe/", 7) == 0; + bool is_kretprobe = strncmp(event, "kretprobe/", 10) == 0; + enum bpf_prog_type prog_type; + char buf[256]; + int fd, efd, err, id; + struct perf_event_attr attr = {}; + + attr.type = PERF_TYPE_TRACEPOINT; + attr.sample_type = PERF_SAMPLE_RAW; + attr.sample_period = 1; + attr.wakeup_events = 1; + + if (is_socket) { + prog_type = BPF_PROG_TYPE_SOCKET_FILTER; + } else if (is_kprobe || is_kretprobe) { + prog_type = BPF_PROG_TYPE_KPROBE; + } else { + printf("Unknown event '%s'\n", event); return -1; + } + + if (is_kprobe || is_kretprobe) { + if (is_kprobe) + event += 7; + else + event += 10; + + snprintf(buf, sizeof(buf), + "echo '%c:%s %s' >> /sys/kernel/debug/tracing/kprobe_events", + is_kprobe ? 'p' : 'r', event, event); + err = system(buf); + if (err < 0) { + printf("failed to create kprobe '%s' error '%s'\n", + event, strerror(errno)); + return -1; + } + } - fd = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, - prog, size, license); + fd = bpf_prog_load(prog_type, prog, size, license, kern_version); if (fd < 0) { printf("bpf_prog_load() err=%d\n%s", errno, bpf_log_buf); @@ -39,6 +80,41 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size) prog_fd[prog_cnt++] = fd; + if (is_socket) + return 0; + + strcpy(buf, DEBUGFS); + strcat(buf, "events/kprobes/"); + strcat(buf, event); + strcat(buf, "/id"); + + efd = open(buf, O_RDONLY, 0); + if (efd < 0) { + printf("failed to open event %s\n", event); + return -1; + } + + err = read(efd, buf, sizeof(buf)); + if (err < 0 || err >= sizeof(buf)) { + printf("read from '%s' failed '%s'\n", event, strerror(errno)); + return -1; + } + + close(efd); + + buf[err] = 0; + id = atoi(buf); + attr.config = id; + + efd = perf_event_open(&attr, -1/*pid*/, 0/*cpu*/, -1/*group_fd*/, 0); + if (efd < 0) { + printf("event %d fd %d err %s\n", id, efd, strerror(errno)); + return -1; + } + event_fd[prog_cnt - 1] = efd; + ioctl(efd, PERF_EVENT_IOC_ENABLE, 0); + ioctl(efd, PERF_EVENT_IOC_SET_BPF, fd); + return 0; } @@ -135,6 +211,9 @@ int load_bpf_file(char *path) if (gelf_getehdr(elf, &ehdr) != &ehdr) return 1; + /* clear all kprobes */ + i = system("echo \"\" > /sys/kernel/debug/tracing/kprobe_events"); + /* scan over all elf sections to get license and map info */ for (i = 1; i < ehdr.e_shnum; i++) { @@ -149,6 +228,14 @@ int load_bpf_file(char *path) if (strcmp(shname, "license") == 0) { processed_sec[i] = true; memcpy(license, data->d_buf, data->d_size); + } else if (strcmp(shname, "version") == 0) { + processed_sec[i] = true; + if (data->d_size != sizeof(int)) { + printf("invalid size of version section %zd\n", + data->d_size); + return 1; + } + memcpy(&kern_version, data->d_buf, sizeof(int)); } else if (strcmp(shname, "maps") == 0) { processed_sec[i] = true; if (load_maps(data->d_buf, data->d_size)) @@ -178,7 +265,8 @@ int load_bpf_file(char *path) if (parse_relo_and_apply(data, symbols, &shdr, insns)) continue; - if (memcmp(shname_prog, "events/", 7) == 0 || + if (memcmp(shname_prog, "kprobe/", 7) == 0 || + memcmp(shname_prog, "kretprobe/", 10) == 0 || memcmp(shname_prog, "socket", 6) == 0) load_and_attach(shname_prog, insns, data_prog->d_size); } @@ -193,7 +281,8 @@ int load_bpf_file(char *path) if (get_sec(elf, i, &ehdr, &shname, &shdr, &data)) continue; - if (memcmp(shname, "events/", 7) == 0 || + if (memcmp(shname, "kprobe/", 7) == 0 || + memcmp(shname, "kretprobe/", 10) == 0 || memcmp(shname, "socket", 6) == 0) load_and_attach(shname, data->d_buf, data->d_size); } @@ -201,3 +290,23 @@ int load_bpf_file(char *path) close(fd); return 0; } + +void read_trace_pipe(void) +{ + int trace_fd; + + trace_fd = open(DEBUGFS "trace_pipe", O_RDONLY, 0); + if (trace_fd < 0) + return; + + while (1) { + static char buf[4096]; + ssize_t sz; + + sz = read(trace_fd, buf, sizeof(buf)); + if (sz > 0) { + buf[sz] = 0; + puts(buf); + } + } +} diff --git a/samples/bpf/bpf_load.h b/samples/bpf/bpf_load.h index 27789a34f5e6..cbd7c2b532b9 100644 --- a/samples/bpf/bpf_load.h +++ b/samples/bpf/bpf_load.h @@ -6,6 +6,7 @@ extern int map_fd[MAX_MAPS]; extern int prog_fd[MAX_PROGS]; +extern int event_fd[MAX_PROGS]; /* parses elf file compiled by llvm .c->.o * . parses 'maps' section and creates maps via BPF syscall @@ -21,4 +22,6 @@ extern int prog_fd[MAX_PROGS]; */ int load_bpf_file(char *path); +void read_trace_pipe(void); + #endif diff --git a/samples/bpf/libbpf.c b/samples/bpf/libbpf.c index 46d50b7ddf79..7e1efa7e2ed7 100644 --- a/samples/bpf/libbpf.c +++ b/samples/bpf/libbpf.c @@ -81,7 +81,7 @@ char bpf_log_buf[LOG_BUF_SIZE]; int bpf_prog_load(enum bpf_prog_type prog_type, const struct bpf_insn *insns, int prog_len, - const char *license) + const char *license, int kern_version) { union bpf_attr attr = { .prog_type = prog_type, @@ -93,6 +93,11 @@ int bpf_prog_load(enum bpf_prog_type prog_type, .log_level = 1, }; + /* assign one field outside of struct init to make sure any + * padding is zero initialized + */ + attr.kern_version = kern_version; + bpf_log_buf[0] = 0; return syscall(__NR_bpf, BPF_PROG_LOAD, &attr, sizeof(attr)); @@ -121,3 +126,10 @@ int open_raw_sock(const char *name) return sock; } + +int perf_event_open(struct perf_event_attr *attr, int pid, int cpu, + int group_fd, unsigned long flags) +{ + return syscall(__NR_perf_event_open, attr, pid, cpu, + group_fd, flags); +} diff --git a/samples/bpf/libbpf.h b/samples/bpf/libbpf.h index 58c5fe1bdba1..ac7b09672b46 100644 --- a/samples/bpf/libbpf.h +++ b/samples/bpf/libbpf.h @@ -13,7 +13,7 @@ int bpf_get_next_key(int fd, void *key, void *next_key); int bpf_prog_load(enum bpf_prog_type prog_type, const struct bpf_insn *insns, int insn_len, - const char *license); + const char *license, int kern_version); #define LOG_BUF_SIZE 65536 extern char bpf_log_buf[LOG_BUF_SIZE]; @@ -182,4 +182,7 @@ extern char bpf_log_buf[LOG_BUF_SIZE]; /* create RAW socket and bind to interface 'name' */ int open_raw_sock(const char *name); +struct perf_event_attr; +int perf_event_open(struct perf_event_attr *attr, int pid, int cpu, + int group_fd, unsigned long flags); #endif diff --git a/samples/bpf/sock_example.c b/samples/bpf/sock_example.c index c8ad0404416f..a0ce251c5390 100644 --- a/samples/bpf/sock_example.c +++ b/samples/bpf/sock_example.c @@ -56,7 +56,7 @@ static int test_sock(void) }; prog_fd = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, prog, sizeof(prog), - "GPL"); + "GPL", 0); if (prog_fd < 0) { printf("failed to load prog '%s'\n", strerror(errno)); goto cleanup; diff --git a/samples/bpf/test_verifier.c b/samples/bpf/test_verifier.c index b96175e90363..740ce97cda5e 100644 --- a/samples/bpf/test_verifier.c +++ b/samples/bpf/test_verifier.c @@ -689,7 +689,7 @@ static int test(void) prog_fd = bpf_prog_load(BPF_PROG_TYPE_UNSPEC, prog, prog_len * sizeof(struct bpf_insn), - "GPL"); + "GPL", 0); if (tests[i].result == ACCEPT) { if (prog_fd < 0) { diff --git a/samples/bpf/tracex1_kern.c b/samples/bpf/tracex1_kern.c new file mode 100644 index 000000000000..31620463701a --- /dev/null +++ b/samples/bpf/tracex1_kern.c @@ -0,0 +1,50 @@ +/* Copyright (c) 2013-2015 PLUMgrid, http://plumgrid.com + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + */ +#include <linux/skbuff.h> +#include <linux/netdevice.h> +#include <uapi/linux/bpf.h> +#include <linux/version.h> +#include "bpf_helpers.h" + +#define _(P) ({typeof(P) val = 0; bpf_probe_read(&val, sizeof(val), &P); val;}) + +/* kprobe is NOT a stable ABI + * kernel functions can be removed, renamed or completely change semantics. + * Number of arguments and their positions can change, etc. + * In such case this bpf+kprobe example will no longer be meaningful + */ +SEC("kprobe/__netif_receive_skb_core") +int bpf_prog1(struct pt_regs *ctx) +{ + /* attaches to kprobe netif_receive_skb, + * looks for packets on loobpack device and prints them + */ + char devname[IFNAMSIZ] = {}; + struct net_device *dev; + struct sk_buff *skb; + int len; + + /* non-portable! works for the given kernel only */ + skb = (struct sk_buff *) ctx->di; + + dev = _(skb->dev); + + len = _(skb->len); + + bpf_probe_read(devname, sizeof(devname), dev->name); + + if (devname[0] == 'l' && devname[1] == 'o') { + char fmt[] = "skb %p len %d\n"; + /* using bpf_trace_printk() for DEBUG ONLY */ + bpf_trace_printk(fmt, sizeof(fmt), skb, len); + } + + return 0; +} + +char _license[] SEC("license") = "GPL"; +u32 _version SEC("version") = LINUX_VERSION_CODE; diff --git a/samples/bpf/tracex1_user.c b/samples/bpf/tracex1_user.c new file mode 100644 index 000000000000..31a48183beea --- /dev/null +++ b/samples/bpf/tracex1_user.c @@ -0,0 +1,25 @@ +#include <stdio.h> +#include <linux/bpf.h> +#include <unistd.h> +#include "libbpf.h" +#include "bpf_load.h" + +int main(int ac, char **argv) +{ + FILE *f; + char filename[256]; + + snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); + + if (load_bpf_file(filename)) { + printf("%s", bpf_log_buf); + return 1; + } + + f = popen("taskset 1 ping -c5 localhost", "r"); + (void) f; + + read_trace_pipe(); + + return 0; +} diff --git a/samples/bpf/tracex2_kern.c b/samples/bpf/tracex2_kern.c new file mode 100644 index 000000000000..19ec1cfc45db --- /dev/null +++ b/samples/bpf/tracex2_kern.c @@ -0,0 +1,86 @@ +/* Copyright (c) 2013-2015 PLUMgrid, http://plumgrid.com + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + */ +#include <linux/skbuff.h> +#include <linux/netdevice.h> +#include <linux/version.h> +#include <uapi/linux/bpf.h> +#include "bpf_helpers.h" + +struct bpf_map_def SEC("maps") my_map = { + .type = BPF_MAP_TYPE_HASH, + .key_size = sizeof(long), + .value_size = sizeof(long), + .max_entries = 1024, +}; + +/* kprobe is NOT a stable ABI. If kernel internals change this bpf+kprobe + * example will no longer be meaningful + */ +SEC("kprobe/kfree_skb") +int bpf_prog2(struct pt_regs *ctx) +{ + long loc = 0; + long init_val = 1; + long *value; + + /* x64 specific: read ip of kfree_skb caller. + * non-portable version of __builtin_return_address(0) + */ + bpf_probe_read(&loc, sizeof(loc), (void *)ctx->sp); + + value = bpf_map_lookup_elem(&my_map, &loc); + if (value) + *value += 1; + else + bpf_map_update_elem(&my_map, &loc, &init_val, BPF_ANY); + return 0; +} + +static unsigned int log2(unsigned int v) +{ + unsigned int r; + unsigned int shift; + + r = (v > 0xFFFF) << 4; v >>= r; + shift = (v > 0xFF) << 3; v >>= shift; r |= shift; + shift = (v > 0xF) << 2; v >>= shift; r |= shift; + shift = (v > 0x3) << 1; v >>= shift; r |= shift; + r |= (v >> 1); + return r; +} + +static unsigned int log2l(unsigned long v) +{ + unsigned int hi = v >> 32; + if (hi) + return log2(hi) + 32; + else + return log2(v); +} + +struct bpf_map_def SEC("maps") my_hist_map = { + .type = BPF_MAP_TYPE_ARRAY, + .key_size = sizeof(u32), + .value_size = sizeof(long), + .max_entries = 64, +}; + +SEC("kprobe/sys_write") +int bpf_prog3(struct pt_regs *ctx) +{ + long write_size = ctx->dx; /* arg3 */ + long init_val = 1; + long *value; + u32 index = log2l(write_size); + + value = bpf_map_lookup_elem(&my_hist_map, &index); + if (value) + __sync_fetch_and_add(value, 1); + return 0; +} +char _license[] SEC("license") = "GPL"; +u32 _version SEC("version") = LINUX_VERSION_CODE; diff --git a/samples/bpf/tracex2_user.c b/samples/bpf/tracex2_user.c new file mode 100644 index 000000000000..91b8d0896fbb --- /dev/null +++ b/samples/bpf/tracex2_user.c @@ -0,0 +1,95 @@ +#include <stdio.h> +#include <unistd.h> +#include <stdlib.h> +#include <signal.h> +#include <linux/bpf.h> +#include "libbpf.h" +#include "bpf_load.h" + +#define MAX_INDEX 64 +#define MAX_STARS 38 + +static void stars(char *str, long val, long max, int width) +{ + int i; + + for (i = 0; i < (width * val / max) - 1 && i < width - 1; i++) + str[i] = '*'; + if (val > max) + str[i - 1] = '+'; + str[i] = '\0'; +} + +static void print_hist(int fd) +{ + int key; + long value; + long data[MAX_INDEX] = {}; + char starstr[MAX_STARS]; + int i; + int max_ind = -1; + long max_value = 0; + + for (key = 0; key < MAX_INDEX; key++) { + bpf_lookup_elem(fd, &key, &value); + data[key] = value; + if (value && key > max_ind) + max_ind = key; + if (value > max_value) + max_value = value; + } + + printf(" syscall write() stats\n"); + printf(" byte_size : count distribution\n"); + for (i = 1; i <= max_ind + 1; i++) { + stars(starstr, data[i - 1], max_value, MAX_STARS); + printf("%8ld -> %-8ld : %-8ld |%-*s|\n", + (1l << i) >> 1, (1l << i) - 1, data[i - 1], + MAX_STARS, starstr); + } +} +static void int_exit(int sig) +{ + print_hist(map_fd[1]); + exit(0); +} + +int main(int ac, char **argv) +{ + char filename[256]; + long key, next_key, value; + FILE *f; + int i; + + snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); + + signal(SIGINT, int_exit); + + /* start 'ping' in the background to have some kfree_skb events */ + f = popen("ping -c5 localhost", "r"); + (void) f; + + /* start 'dd' in the background to have plenty of 'write' syscalls */ + f = popen("dd if=/dev/zero of=/dev/null count=5000000", "r"); + (void) f; + + if (load_bpf_file(filename)) { + printf("%s", bpf_log_buf); + return 1; + } + + for (i = 0; i < 5; i++) { + key = 0; + while (bpf_get_next_key(map_fd[0], &key, &next_key) == 0) { + bpf_lookup_elem(map_fd[0], &next_key, &value); + printf("location 0x%lx count %ld\n", next_key, value); + key = next_key; + } + if (key) + printf("\n"); + sleep(1); + } + print_hist(map_fd[1]); + + return 0; +} diff --git a/samples/bpf/tracex3_kern.c b/samples/bpf/tracex3_kern.c new file mode 100644 index 000000000000..255ff2792366 --- /dev/null +++ b/samples/bpf/tracex3_kern.c @@ -0,0 +1,89 @@ +/* Copyright (c) 2013-2015 PLUMgrid, http://plumgrid.com + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + */ +#include <linux/skbuff.h> +#include <linux/netdevice.h> +#include <linux/version.h> +#include <uapi/linux/bpf.h> +#include "bpf_helpers.h" + +struct bpf_map_def SEC("maps") my_map = { + .type = BPF_MAP_TYPE_HASH, + .key_size = sizeof(long), + .value_size = sizeof(u64), + .max_entries = 4096, +}; + +/* kprobe is NOT a stable ABI. If kernel internals change this bpf+kprobe + * example will no longer be meaningful + */ +SEC("kprobe/blk_mq_start_request") +int bpf_prog1(struct pt_regs *ctx) +{ + long rq = ctx->di; + u64 val = bpf_ktime_get_ns(); + + bpf_map_update_elem(&my_map, &rq, &val, BPF_ANY); + return 0; +} + +static unsigned int log2l(unsigned long long n) +{ +#define S(k) if (n >= (1ull << k)) { i += k; n >>= k; } + int i = -(n == 0); + S(32); S(16); S(8); S(4); S(2); S(1); + return i; +#undef S +} + +#define SLOTS 100 + +struct bpf_map_def SEC("maps") lat_map = { + .type = BPF_MAP_TYPE_ARRAY, + .key_size = sizeof(u32), + .value_size = sizeof(u64), + .max_entries = SLOTS, +}; + +SEC("kprobe/blk_update_request") +int bpf_prog2(struct pt_regs *ctx) +{ + long rq = ctx->di; + u64 *value, l, base; + u32 index; + + value = bpf_map_lookup_elem(&my_map, &rq); + if (!value) + return 0; + + u64 cur_time = bpf_ktime_get_ns(); + u64 delta = cur_time - *value; + + bpf_map_delete_elem(&my_map, &rq); + + /* the lines below are computing index = log10(delta)*10 + * using integer arithmetic + * index = 29 ~ 1 usec + * index = 59 ~ 1 msec + * index = 89 ~ 1 sec + * index = 99 ~ 10sec or more + * log10(x)*10 = log2(x)*10/log2(10) = log2(x)*3 + */ + l = log2l(delta); + base = 1ll << l; + index = (l * 64 + (delta - base) * 64 / base) * 3 / 64; + + if (index >= SLOTS) + index = SLOTS - 1; + + value = bpf_map_lookup_elem(&lat_map, &index); + if (value) + __sync_fetch_and_add((long *)value, 1); + + return 0; +} +char _license[] SEC("license") = "GPL"; +u32 _version SEC("version") = LINUX_VERSION_CODE; diff --git a/samples/bpf/tracex3_user.c b/samples/bpf/tracex3_user.c new file mode 100644 index 000000000000..0aaa933ab938 --- /dev/null +++ b/samples/bpf/tracex3_user.c @@ -0,0 +1,150 @@ +/* Copyright (c) 2013-2015 PLUMgrid, http://plumgrid.com + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + */ +#include <stdio.h> +#include <stdlib.h> +#include <signal.h> +#include <unistd.h> +#include <stdbool.h> +#include <string.h> +#include <linux/bpf.h> +#include "libbpf.h" +#include "bpf_load.h" + +#define ARRAY_SIZE(x) (sizeof(x) / sizeof(*(x))) + +#define SLOTS 100 + +static void clear_stats(int fd) +{ + __u32 key; + __u64 value = 0; + + for (key = 0; key < SLOTS; key++) + bpf_update_elem(fd, &key, &value, BPF_ANY); +} + +const char *color[] = { + "\033[48;5;255m", + "\033[48;5;252m", + "\033[48;5;250m", + "\033[48;5;248m", + "\033[48;5;246m", + "\033[48;5;244m", + "\033[48;5;242m", + "\033[48;5;240m", + "\033[48;5;238m", + "\033[48;5;236m", + "\033[48;5;234m", + "\033[48;5;232m", +}; +const int num_colors = ARRAY_SIZE(color); + +const char nocolor[] = "\033[00m"; + +const char *sym[] = { + " ", + " ", + ".", + ".", + "*", + "*", + "o", + "o", + "O", + "O", + "#", + "#", +}; + +bool full_range = false; +bool text_only = false; + +static void print_banner(void) +{ + if (full_range) + printf("|1ns |10ns |100ns |1us |10us |100us" + " |1ms |10ms |100ms |1s |10s\n"); + else + printf("|1us |10us |100us |1ms |10ms " + "|100ms |1s |10s\n"); +} + +static void print_hist(int fd) +{ + __u32 key; + __u64 value; + __u64 cnt[SLOTS]; + __u64 max_cnt = 0; + __u64 total_events = 0; + + for (key = 0; key < SLOTS; key++) { + value = 0; + bpf_lookup_elem(fd, &key, &value); + cnt[key] = value; + total_events += value; + if (value > max_cnt) + max_cnt = value; + } + clear_stats(fd); + for (key = full_range ? 0 : 29; key < SLOTS; key++) { + int c = num_colors * cnt[key] / (max_cnt + 1); + + if (text_only) + printf("%s", sym[c]); + else + printf("%s %s", color[c], nocolor); + } + printf(" # %lld\n", total_events); +} + +int main(int ac, char **argv) +{ + char filename[256]; + int i; + + snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); + + if (load_bpf_file(filename)) { + printf("%s", bpf_log_buf); + return 1; + } + + for (i = 1; i < ac; i++) { + if (strcmp(argv[i], "-a") == 0) { + full_range = true; + } else if (strcmp(argv[i], "-t") == 0) { + text_only = true; + } else if (strcmp(argv[i], "-h") == 0) { + printf("Usage:\n" + " -a display wider latency range\n" + " -t text only\n"); + return 1; + } + } + + printf(" heatmap of IO latency\n"); + if (text_only) + printf(" %s", sym[num_colors - 1]); + else + printf(" %s %s", color[num_colors - 1], nocolor); + printf(" - many events with this latency\n"); + + if (text_only) + printf(" %s", sym[0]); + else + printf(" %s %s", color[0], nocolor); + printf(" - few events\n"); + + for (i = 0; ; i++) { + if (i % 20 == 0) + print_banner(); + print_hist(map_fd[1]); + sleep(2); + } + + return 0; +} diff --git a/samples/bpf/tracex4_kern.c b/samples/bpf/tracex4_kern.c new file mode 100644 index 000000000000..126b80512228 --- /dev/null +++ b/samples/bpf/tracex4_kern.c @@ -0,0 +1,54 @@ +/* Copyright (c) 2015 PLUMgrid, http://plumgrid.com + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + */ +#include <linux/ptrace.h> +#include <linux/version.h> +#include <uapi/linux/bpf.h> +#include "bpf_helpers.h" + +struct pair { + u64 val; + u64 ip; +}; + +struct bpf_map_def SEC("maps") my_map = { + .type = BPF_MAP_TYPE_HASH, + .key_size = sizeof(long), + .value_size = sizeof(struct pair), + .max_entries = 1000000, +}; + +/* kprobe is NOT a stable ABI. If kernel internals change this bpf+kprobe + * example will no longer be meaningful + */ +SEC("kprobe/kmem_cache_free") +int bpf_prog1(struct pt_regs *ctx) +{ + long ptr = ctx->si; + + bpf_map_delete_elem(&my_map, &ptr); + return 0; +} + +SEC("kretprobe/kmem_cache_alloc_node") +int bpf_prog2(struct pt_regs *ctx) +{ + long ptr = ctx->ax; + long ip = 0; + + /* get ip address of kmem_cache_alloc_node() caller */ + bpf_probe_read(&ip, sizeof(ip), (void *)(ctx->bp + sizeof(ip))); + + struct pair v = { + .val = bpf_ktime_get_ns(), + .ip = ip, + }; + + bpf_map_update_elem(&my_map, &ptr, &v, BPF_ANY); + return 0; +} +char _license[] SEC("license") = "GPL"; +u32 _version SEC("version") = LINUX_VERSION_CODE; diff --git a/samples/bpf/tracex4_user.c b/samples/bpf/tracex4_user.c new file mode 100644 index 000000000000..bc4a3bdea6ed --- /dev/null +++ b/samples/bpf/tracex4_user.c @@ -0,0 +1,69 @@ +/* Copyright (c) 2015 PLUMgrid, http://plumgrid.com + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + */ +#include <stdio.h> +#include <stdlib.h> +#include <signal.h> +#include <unistd.h> +#include <stdbool.h> +#include <string.h> +#include <time.h> +#include <linux/bpf.h> +#include "libbpf.h" +#include "bpf_load.h" + +struct pair { + long long val; + __u64 ip; +}; + +static __u64 time_get_ns(void) +{ + struct timespec ts; + + clock_gettime(CLOCK_MONOTONIC, &ts); + return ts.tv_sec * 1000000000ull + ts.tv_nsec; +} + +static void print_old_objects(int fd) +{ + long long val = time_get_ns(); + __u64 key, next_key; + struct pair v; + + key = write(1, "\e[1;1H\e[2J", 12); /* clear screen */ + + key = -1; + while (bpf_get_next_key(map_fd[0], &key, &next_key) == 0) { + bpf_lookup_elem(map_fd[0], &next_key, &v); + key = next_key; + if (val - v.val < 1000000000ll) + /* object was allocated more then 1 sec ago */ + continue; + printf("obj 0x%llx is %2lldsec old was allocated at ip %llx\n", + next_key, (val - v.val) / 1000000000ll, v.ip); + } +} + +int main(int ac, char **argv) +{ + char filename[256]; + int i; + + snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); + + if (load_bpf_file(filename)) { + printf("%s", bpf_log_buf); + return 1; + } + + for (i = 0; ; i++) { + print_old_objects(map_fd[1]); + sleep(1); + } + + return 0; +} diff --git a/samples/hidraw/Makefile b/samples/hidraw/Makefile index 382eeae77bd6..a9ab96188fbe 100644 --- a/samples/hidraw/Makefile +++ b/samples/hidraw/Makefile @@ -8,3 +8,5 @@ hostprogs-y := hid-example always := $(hostprogs-y) HOSTCFLAGS_hid-example.o += -I$(objtree)/usr/include + +all: hid-example diff --git a/samples/hidraw/hid-example.c b/samples/hidraw/hid-example.c index 512a7e50bcae..92e6c1511910 100644 --- a/samples/hidraw/hid-example.c +++ b/samples/hidraw/hid-example.c @@ -46,10 +46,14 @@ int main(int argc, char **argv) char buf[256]; struct hidraw_report_descriptor rpt_desc; struct hidraw_devinfo info; + char *device = "/dev/hidraw0"; + + if (argc > 1) + device = argv[1]; /* Open the Device with non-blocking reads. In real life, don't use a hard coded path; use libudev instead. */ - fd = open("/dev/hidraw0", O_RDWR|O_NONBLOCK); + fd = open(device, O_RDWR|O_NONBLOCK); if (fd < 0) { perror("Unable to open device"); diff --git a/samples/trace_events/trace-events-sample.h b/samples/trace_events/trace-events-sample.h index a2c8b02b6359..8965d1bb8811 100644 --- a/samples/trace_events/trace-events-sample.h +++ b/samples/trace_events/trace-events-sample.h @@ -22,7 +22,25 @@ * protection, just like TRACE_INCLUDE_FILE. */ #undef TRACE_SYSTEM -#define TRACE_SYSTEM sample +#define TRACE_SYSTEM sample-trace + +/* + * TRACE_SYSTEM is expected to be a C valid variable (alpha-numeric + * and underscore), although it may start with numbers. If for some + * reason it is not, you need to add the following lines: + */ +#undef TRACE_SYSTEM_VAR +#define TRACE_SYSTEM_VAR sample_trace +/* + * But the above is only needed if TRACE_SYSTEM is not alpha-numeric + * and underscored. By default, TRACE_SYSTEM_VAR will be equal to + * TRACE_SYSTEM. As TRACE_SYSTEM_VAR must be alpha-numeric, if + * TRACE_SYSTEM is not, then TRACE_SYSTEM_VAR must be defined with + * only alpha-numeric and underscores. + * + * The TRACE_SYSTEM_VAR is only used internally and not visible to + * user space. + */ /* * Notice that this file is not protected like a normal header. @@ -180,8 +198,30 @@ static inline int __length_of(const int *list) ; return i; } + +enum { + TRACE_SAMPLE_FOO = 2, + TRACE_SAMPLE_BAR = 4, + TRACE_SAMPLE_ZOO = 8, +}; #endif +/* + * If enums are used in the TP_printk(), their names will be shown in + * format files and not their values. This can cause problems with user + * space programs that parse the format files to know how to translate + * the raw binary trace output into human readable text. + * + * To help out user space programs, any enum that is used in the TP_printk() + * should be defined by TRACE_DEFINE_ENUM() macro. All that is needed to + * be done is to add this macro with the enum within it in the trace + * header file, and it will be converted in the output. + */ + +TRACE_DEFINE_ENUM(TRACE_SAMPLE_FOO); +TRACE_DEFINE_ENUM(TRACE_SAMPLE_BAR); +TRACE_DEFINE_ENUM(TRACE_SAMPLE_ZOO); + TRACE_EVENT(foo_bar, TP_PROTO(const char *foo, int bar, const int *lst, @@ -206,7 +246,47 @@ TRACE_EVENT(foo_bar, __assign_bitmask(cpus, cpumask_bits(mask), num_possible_cpus()); ), - TP_printk("foo %s %d %s %s (%s)", __entry->foo, __entry->bar, + TP_printk("foo %s %d %s %s %s %s (%s)", __entry->foo, __entry->bar, + +/* + * Notice here the use of some helper functions. This includes: + * + * __print_symbolic( variable, { value, "string" }, ... ), + * + * The variable is tested against each value of the { } pair. If + * the variable matches one of the values, then it will print the + * string in that pair. If non are matched, it returns a string + * version of the number (if __entry->bar == 7 then "7" is returned). + */ + __print_symbolic(__entry->bar, + { 0, "zero" }, + { TRACE_SAMPLE_FOO, "TWO" }, + { TRACE_SAMPLE_BAR, "FOUR" }, + { TRACE_SAMPLE_ZOO, "EIGHT" }, + { 10, "TEN" } + ), + +/* + * __print_flags( variable, "delim", { value, "flag" }, ... ), + * + * This is similar to __print_symbolic, except that it tests the bits + * of the value. If ((FLAG & variable) == FLAG) then the string is + * printed. If more than one flag matches, then each one that does is + * also printed with delim in between them. + * If not all bits are accounted for, then the not found bits will be + * added in hex format: 0x506 will show BIT2|BIT4|0x500 + */ + __print_flags(__entry->bar, "|", + { 1, "BIT1" }, + { 2, "BIT2" }, + { 4, "BIT3" }, + { 8, "BIT4" } + ), +/* + * __print_array( array, len, element_size ) + * + * This prints out the array that is defined by __array in a nice format. + */ __print_array(__get_dynamic_array(list), __get_dynamic_array_len(list), sizeof(int)), diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c index 107db88b1d5f..dd56bffd6500 100644 --- a/security/apparmor/lsm.c +++ b/security/apparmor/lsm.c @@ -364,12 +364,12 @@ static int apparmor_path_chown(struct path *path, kuid_t uid, kgid_t gid) return common_perm(OP_CHOWN, path, AA_MAY_CHOWN, &cond); } -static int apparmor_inode_getattr(struct vfsmount *mnt, struct dentry *dentry) +static int apparmor_inode_getattr(const struct path *path) { - if (!mediated_filesystem(dentry)) + if (!mediated_filesystem(path->dentry)) return 0; - return common_perm_mnt_dentry(OP_GETATTR, mnt, dentry, + return common_perm_mnt_dentry(OP_GETATTR, path->mnt, path->dentry, AA_MAY_META_READ); } diff --git a/security/capability.c b/security/capability.c index 070dd46f62f4..bdf22034a961 100644 --- a/security/capability.c +++ b/security/capability.c @@ -225,7 +225,7 @@ static int cap_inode_setattr(struct dentry *dentry, struct iattr *iattr) return 0; } -static int cap_inode_getattr(struct vfsmount *mnt, struct dentry *dentry) +static int cap_inode_getattr(const struct path *path) { return 0; } diff --git a/security/keys/compat.c b/security/keys/compat.c index 347896548ad3..25430a3aa7f7 100644 --- a/security/keys/compat.c +++ b/security/keys/compat.c @@ -31,30 +31,21 @@ static long compat_keyctl_instantiate_key_iov( key_serial_t ringid) { struct iovec iovstack[UIO_FASTIOV], *iov = iovstack; + struct iov_iter from; long ret; - if (!_payload_iov || !ioc) - goto no_payload; + if (!_payload_iov) + ioc = 0; - ret = compat_rw_copy_check_uvector(WRITE, _payload_iov, ioc, - ARRAY_SIZE(iovstack), - iovstack, &iov); + ret = compat_import_iovec(WRITE, _payload_iov, ioc, + ARRAY_SIZE(iovstack), &iov, + &from); if (ret < 0) - goto err; - if (ret == 0) - goto no_payload_free; - - ret = keyctl_instantiate_key_common(id, iov, ioc, ret, ringid); -err: - if (iov != iovstack) - kfree(iov); - return ret; + return ret; -no_payload_free: - if (iov != iovstack) - kfree(iov); -no_payload: - return keyctl_instantiate_key_common(id, NULL, 0, 0, ringid); + ret = keyctl_instantiate_key_common(id, &from, ringid); + kfree(iov); + return ret; } /* diff --git a/security/keys/internal.h b/security/keys/internal.h index 200e37867336..5105c2c2da75 100644 --- a/security/keys/internal.h +++ b/security/keys/internal.h @@ -243,9 +243,10 @@ extern long keyctl_instantiate_key_iov(key_serial_t, unsigned, key_serial_t); extern long keyctl_invalidate_key(key_serial_t); +struct iov_iter; extern long keyctl_instantiate_key_common(key_serial_t, - const struct iovec *, - unsigned, size_t, key_serial_t); + struct iov_iter *, + key_serial_t); #ifdef CONFIG_PERSISTENT_KEYRINGS extern long keyctl_get_persistent(uid_t, key_serial_t); extern unsigned persistent_keyring_expiry; diff --git a/security/keys/keyctl.c b/security/keys/keyctl.c index 4743d71e4aa6..0b9ec78a7a7a 100644 --- a/security/keys/keyctl.c +++ b/security/keys/keyctl.c @@ -998,21 +998,6 @@ static int keyctl_change_reqkey_auth(struct key *key) } /* - * Copy the iovec data from userspace - */ -static long copy_from_user_iovec(void *buffer, const struct iovec *iov, - unsigned ioc) -{ - for (; ioc > 0; ioc--) { - if (copy_from_user(buffer, iov->iov_base, iov->iov_len) != 0) - return -EFAULT; - buffer += iov->iov_len; - iov++; - } - return 0; -} - -/* * Instantiate a key with the specified payload and link the key into the * destination keyring if one is given. * @@ -1022,20 +1007,21 @@ static long copy_from_user_iovec(void *buffer, const struct iovec *iov, * If successful, 0 will be returned. */ long keyctl_instantiate_key_common(key_serial_t id, - const struct iovec *payload_iov, - unsigned ioc, - size_t plen, + struct iov_iter *from, key_serial_t ringid) { const struct cred *cred = current_cred(); struct request_key_auth *rka; struct key *instkey, *dest_keyring; + size_t plen = from ? iov_iter_count(from) : 0; void *payload; long ret; - bool vm = false; kenter("%d,,%zu,%d", id, plen, ringid); + if (!plen) + from = NULL; + ret = -EINVAL; if (plen > 1024 * 1024 - 1) goto error; @@ -1054,20 +1040,19 @@ long keyctl_instantiate_key_common(key_serial_t id, /* pull the payload in if one was supplied */ payload = NULL; - if (payload_iov) { + if (from) { ret = -ENOMEM; payload = kmalloc(plen, GFP_KERNEL); if (!payload) { if (plen <= PAGE_SIZE) goto error; - vm = true; payload = vmalloc(plen); if (!payload) goto error; } - ret = copy_from_user_iovec(payload, payload_iov, ioc); - if (ret < 0) + ret = -EFAULT; + if (copy_from_iter(payload, plen, from) != plen) goto error2; } @@ -1089,10 +1074,7 @@ long keyctl_instantiate_key_common(key_serial_t id, keyctl_change_reqkey_auth(NULL); error2: - if (!vm) - kfree(payload); - else - vfree(payload); + kvfree(payload); error: return ret; } @@ -1112,15 +1094,19 @@ long keyctl_instantiate_key(key_serial_t id, key_serial_t ringid) { if (_payload && plen) { - struct iovec iov[1] = { - [0].iov_base = (void __user *)_payload, - [0].iov_len = plen - }; + struct iovec iov; + struct iov_iter from; + int ret; - return keyctl_instantiate_key_common(id, iov, 1, plen, ringid); + ret = import_single_range(WRITE, (void __user *)_payload, plen, + &iov, &from); + if (unlikely(ret)) + return ret; + + return keyctl_instantiate_key_common(id, &from, ringid); } - return keyctl_instantiate_key_common(id, NULL, 0, 0, ringid); + return keyctl_instantiate_key_common(id, NULL, ringid); } /* @@ -1138,29 +1124,19 @@ long keyctl_instantiate_key_iov(key_serial_t id, key_serial_t ringid) { struct iovec iovstack[UIO_FASTIOV], *iov = iovstack; + struct iov_iter from; long ret; - if (!_payload_iov || !ioc) - goto no_payload; + if (!_payload_iov) + ioc = 0; - ret = rw_copy_check_uvector(WRITE, _payload_iov, ioc, - ARRAY_SIZE(iovstack), iovstack, &iov); + ret = import_iovec(WRITE, _payload_iov, ioc, + ARRAY_SIZE(iovstack), &iov, &from); if (ret < 0) - goto err; - if (ret == 0) - goto no_payload_free; - - ret = keyctl_instantiate_key_common(id, iov, ioc, ret, ringid); -err: - if (iov != iovstack) - kfree(iov); + return ret; + ret = keyctl_instantiate_key_common(id, &from, ringid); + kfree(iov); return ret; - -no_payload_free: - if (iov != iovstack) - kfree(iov); -no_payload: - return keyctl_instantiate_key_common(id, NULL, 0, 0, ringid); } /* diff --git a/security/security.c b/security/security.c index e81d5bbe7363..ed890c6d31c5 100644 --- a/security/security.c +++ b/security/security.c @@ -608,11 +608,11 @@ int security_inode_setattr(struct dentry *dentry, struct iattr *attr) } EXPORT_SYMBOL_GPL(security_inode_setattr); -int security_inode_getattr(struct vfsmount *mnt, struct dentry *dentry) +int security_inode_getattr(const struct path *path) { - if (unlikely(IS_PRIVATE(dentry->d_inode))) + if (unlikely(IS_PRIVATE(path->dentry->d_inode))) return 0; - return security_ops->inode_getattr(mnt, dentry); + return security_ops->inode_getattr(path); } int security_inode_setxattr(struct dentry *dentry, const char *name, diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 4d1a54190388..e119cdcffc87 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -1623,7 +1623,7 @@ static inline int dentry_has_perm(const struct cred *cred, the path to help the auditing code to more easily generate the pathname if needed. */ static inline int path_has_perm(const struct cred *cred, - struct path *path, + const struct path *path, u32 av) { struct inode *inode = path->dentry->d_inode; @@ -2954,15 +2954,9 @@ static int selinux_inode_setattr(struct dentry *dentry, struct iattr *iattr) return dentry_has_perm(cred, dentry, av); } -static int selinux_inode_getattr(struct vfsmount *mnt, struct dentry *dentry) +static int selinux_inode_getattr(const struct path *path) { - const struct cred *cred = current_cred(); - struct path path; - - path.dentry = dentry; - path.mnt = mnt; - - return path_has_perm(cred, &path, FILE__GETATTR); + return path_has_perm(current_cred(), path, FILE__GETATTR); } static int selinux_inode_setotherxattr(struct dentry *dentry, const char *name) diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c index c934311812f1..1511965549b8 100644 --- a/security/smack/smack_lsm.c +++ b/security/smack/smack_lsm.c @@ -1034,19 +1034,16 @@ static int smack_inode_setattr(struct dentry *dentry, struct iattr *iattr) * * Returns 0 if access is permitted, an error code otherwise */ -static int smack_inode_getattr(struct vfsmount *mnt, struct dentry *dentry) +static int smack_inode_getattr(const struct path *path) { struct smk_audit_info ad; - struct path path; + struct inode *inode = path->dentry->d_inode; int rc; - path.dentry = dentry; - path.mnt = mnt; - smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_PATH); - smk_ad_setfield_u_fs_path(&ad, path); - rc = smk_curacc(smk_of_inode(dentry->d_inode), MAY_READ, &ad); - rc = smk_bu_inode(dentry->d_inode, MAY_READ, rc); + smk_ad_setfield_u_fs_path(&ad, *path); + rc = smk_curacc(smk_of_inode(inode), MAY_READ, &ad); + rc = smk_bu_inode(inode, MAY_READ, rc); return rc; } diff --git a/security/tomoyo/common.h b/security/tomoyo/common.h index b897d4862016..f9c9fb1d56b4 100644 --- a/security/tomoyo/common.h +++ b/security/tomoyo/common.h @@ -945,7 +945,7 @@ char *tomoyo_encode2(const char *str, int str_len); char *tomoyo_init_log(struct tomoyo_request_info *r, int len, const char *fmt, va_list args); char *tomoyo_read_token(struct tomoyo_acl_param *param); -char *tomoyo_realpath_from_path(struct path *path); +char *tomoyo_realpath_from_path(const struct path *path); char *tomoyo_realpath_nofollow(const char *pathname); const char *tomoyo_get_exe(void); const char *tomoyo_yesno(const unsigned int value); @@ -978,7 +978,7 @@ int tomoyo_path2_perm(const u8 operation, struct path *path1, struct path *path2); int tomoyo_path_number_perm(const u8 operation, struct path *path, unsigned long number); -int tomoyo_path_perm(const u8 operation, struct path *path, +int tomoyo_path_perm(const u8 operation, const struct path *path, const char *target); unsigned int tomoyo_poll_control(struct file *file, poll_table *wait); unsigned int tomoyo_poll_log(struct file *file, poll_table *wait); diff --git a/security/tomoyo/file.c b/security/tomoyo/file.c index c151a1869597..2367b100cc62 100644 --- a/security/tomoyo/file.c +++ b/security/tomoyo/file.c @@ -145,7 +145,7 @@ static void tomoyo_add_slash(struct tomoyo_path_info *buf) * * Returns true on success, false otherwise. */ -static bool tomoyo_get_realpath(struct tomoyo_path_info *buf, struct path *path) +static bool tomoyo_get_realpath(struct tomoyo_path_info *buf, const struct path *path) { buf->name = tomoyo_realpath_from_path(path); if (buf->name) { @@ -782,7 +782,7 @@ int tomoyo_check_open_permission(struct tomoyo_domain_info *domain, * * Returns 0 on success, negative value otherwise. */ -int tomoyo_path_perm(const u8 operation, struct path *path, const char *target) +int tomoyo_path_perm(const u8 operation, const struct path *path, const char *target) { struct tomoyo_request_info r; struct tomoyo_obj_info obj = { diff --git a/security/tomoyo/realpath.c b/security/tomoyo/realpath.c index bed745c8b1a3..1e0d480ff6a6 100644 --- a/security/tomoyo/realpath.c +++ b/security/tomoyo/realpath.c @@ -89,7 +89,7 @@ char *tomoyo_encode(const char *str) * * If dentry is a directory, trailing '/' is appended. */ -static char *tomoyo_get_absolute_path(struct path *path, char * const buffer, +static char *tomoyo_get_absolute_path(const struct path *path, char * const buffer, const int buflen) { char *pos = ERR_PTR(-ENOMEM); @@ -216,7 +216,7 @@ out: * * Returns the buffer. */ -static char *tomoyo_get_socket_name(struct path *path, char * const buffer, +static char *tomoyo_get_socket_name(const struct path *path, char * const buffer, const int buflen) { struct inode *inode = path->dentry->d_inode; @@ -247,7 +247,7 @@ static char *tomoyo_get_socket_name(struct path *path, char * const buffer, * These functions use kzalloc(), so the caller must call kfree() * if these functions didn't return NULL. */ -char *tomoyo_realpath_from_path(struct path *path) +char *tomoyo_realpath_from_path(const struct path *path) { char *buf = NULL; char *name = NULL; diff --git a/security/tomoyo/tomoyo.c b/security/tomoyo/tomoyo.c index f0b756e27fed..57c88d52ffa5 100644 --- a/security/tomoyo/tomoyo.c +++ b/security/tomoyo/tomoyo.c @@ -144,10 +144,9 @@ static int tomoyo_bprm_check_security(struct linux_binprm *bprm) * * Returns 0 on success, negative value otherwise. */ -static int tomoyo_inode_getattr(struct vfsmount *mnt, struct dentry *dentry) +static int tomoyo_inode_getattr(const struct path *path) { - struct path path = { mnt, dentry }; - return tomoyo_path_perm(TOMOYO_TYPE_GETATTR, &path, NULL); + return tomoyo_path_perm(TOMOYO_TYPE_GETATTR, path, NULL); } /** diff --git a/sound/core/pcm_native.c b/sound/core/pcm_native.c index 279e24f61305..a69ebc79bc50 100644 --- a/sound/core/pcm_native.c +++ b/sound/core/pcm_native.c @@ -25,7 +25,6 @@ #include <linux/slab.h> #include <linux/time.h> #include <linux/pm_qos.h> -#include <linux/aio.h> #include <linux/io.h> #include <linux/dma-mapping.h> #include <sound/core.h> @@ -35,6 +34,7 @@ #include <sound/pcm_params.h> #include <sound/timer.h> #include <sound/minors.h> +#include <linux/uio.h> /* * Compatibility diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c index e5c990889dcc..10f0886e78ec 100644 --- a/sound/soc/soc-core.c +++ b/sound/soc/soc-core.c @@ -2755,7 +2755,7 @@ int snd_soc_register_component(struct device *dev, ret = snd_soc_register_dais(cmpnt, dai_drv, num_dai, true); if (ret < 0) { - dev_err(dev, "ASoC: Failed to regster DAIs: %d\n", ret); + dev_err(dev, "ASoC: Failed to register DAIs: %d\n", ret); goto err_cleanup; } @@ -3076,7 +3076,7 @@ int snd_soc_register_codec(struct device *dev, ret = snd_soc_register_dais(&codec->component, dai_drv, num_dai, false); if (ret < 0) { - dev_err(dev, "ASoC: Failed to regster DAIs: %d\n", ret); + dev_err(dev, "ASoC: Failed to register DAIs: %d\n", ret); goto err_cleanup; } diff --git a/tools/build/Build.include b/tools/build/Build.include new file mode 100644 index 000000000000..4c8daaccb82a --- /dev/null +++ b/tools/build/Build.include @@ -0,0 +1,81 @@ +### +# build: Generic definitions +# +# Lots of this code have been borrowed or heavily inspired from parts +# of kbuild code, which is not credited, but mostly developed by: +# +# Copyright (C) Sam Ravnborg <sam@mars.ravnborg.org>, 2015 +# Copyright (C) Linus Torvalds <torvalds@linux-foundation.org>, 2015 +# + +### +# Convenient variables +comma := , +squote := ' + +### +# Name of target with a '.' as filename prefix. foo/bar.o => foo/.bar.o +dot-target = $(dir $@).$(notdir $@) + +### +# filename of target with directory and extension stripped +basetarget = $(basename $(notdir $@)) + +### +# The temporary file to save gcc -MD generated dependencies must not +# contain a comma +depfile = $(subst $(comma),_,$(dot-target).d) + +### +# Check if both arguments has same arguments. Result is empty string if equal. +arg-check = $(strip $(filter-out $(cmd_$(1)), $(cmd_$@)) \ + $(filter-out $(cmd_$@), $(cmd_$(1))) ) + +### +# Escape single quote for use in echo statements +escsq = $(subst $(squote),'\$(squote)',$1) + +# Echo command +# Short version is used, if $(quiet) equals `quiet_', otherwise full one. +echo-cmd = $(if $($(quiet)cmd_$(1)),\ + echo ' $(call escsq,$($(quiet)cmd_$(1)))';) + +### +# Replace >$< with >$$< to preserve $ when reloading the .cmd file +# (needed for make) +# Replace >#< with >\#< to avoid starting a comment in the .cmd file +# (needed for make) +# Replace >'< with >'\''< to be able to enclose the whole string in '...' +# (needed for the shell) +make-cmd = $(call escsq,$(subst \#,\\\#,$(subst $$,$$$$,$(cmd_$(1))))) + +### +# Find any prerequisites that is newer than target or that does not exist. +# PHONY targets skipped in both cases. +any-prereq = $(filter-out $(PHONY),$?) $(filter-out $(PHONY) $(wildcard $^),$^) + +### +# if_changed_dep - execute command if any prerequisite is newer than +# target, or command line has changed and update +# dependencies in the cmd file +if_changed_dep = $(if $(strip $(any-prereq) $(arg-check)), \ + @set -e; \ + $(echo-cmd) $(cmd_$(1)); \ + cat $(depfile) > $(dot-target).cmd; \ + printf '%s\n' 'cmd_$@ := $(make-cmd)' >> $(dot-target).cmd) + +# if_changed - execute command if any prerequisite is newer than +# target, or command line has changed +if_changed = $(if $(strip $(any-prereq) $(arg-check)), \ + @set -e; \ + $(echo-cmd) $(cmd_$(1)); \ + printf '%s\n' 'cmd_$@ := $(make-cmd)' > $(dot-target).cmd) + +### +# C flags to be used in rule definitions, includes: +# - depfile generation +# - global $(CFLAGS) +# - per target C flags +# - per object C flags +# - BUILD_STR macro to allow '-D"$(variable)"' constructs +c_flags = -Wp,-MD,$(depfile),-MT,$@ $(CFLAGS) -D"BUILD_STR(s)=\#s" $(CFLAGS_$(basetarget).o) $(CFLAGS_$(obj)) diff --git a/tools/build/Documentation/Build.txt b/tools/build/Documentation/Build.txt new file mode 100644 index 000000000000..00ad2d608727 --- /dev/null +++ b/tools/build/Documentation/Build.txt @@ -0,0 +1,139 @@ +Build Framework +=============== + +The perf build framework was adopted from the kernel build system, hence the +idea and the way how objects are built is the same. + +Basically the user provides set of 'Build' files that list objects and +directories to nest for specific target to be build. + +Unlike the kernel we don't have a single build object 'obj-y' list that where +we setup source objects, but we support more. This allows one 'Build' file to +carry a sources list for multiple build objects. + +a) Build framework makefiles +---------------------------- + +The build framework consists of 2 Makefiles: + + Build.include + Makefile.build + +While the 'Build.include' file contains just some generic definitions, the +'Makefile.build' file is the makefile used from the outside. It's +interface/usage is following: + + $ make -f tools/build/Makefile srctree=$(KSRC) dir=$(DIR) obj=$(OBJECT) + +where: + + KSRC - is the path to kernel sources + DIR - is the path to the project to be built + OBJECT - is the name of the build object + +When succefully finished the $(DIR) directory contains the final object file +called $(OBJECT)-in.o: + + $ ls $(DIR)/$(OBJECT)-in.o + +which includes all compiled sources described in 'Build' makefiles. + +a) Build makefiles +------------------ + +The user supplies 'Build' makefiles that contains a objects list, and connects +the build to nested directories. + +Assume we have the following project structure: + + ex/a.c + /b.c + /c.c + /d.c + /arch/e.c + /arch/f.c + +Out of which you build the 'ex' binary ' and the 'libex.a' library: + + 'ex' - consists of 'a.o', 'b.o' and libex.a + 'libex.a' - consists of 'c.o', 'd.o', 'e.o' and 'f.o' + +The build framework does not create the 'ex' and 'libex.a' binaries for you, it +only prepares proper objects to be compiled and grouped together. + +To follow the above example, the user provides following 'Build' files: + + ex/Build: + ex-y += a.o + ex-y += b.o + + libex-y += c.o + libex-y += d.o + libex-y += arch/ + + ex/arch/Build: + libex-y += e.o + libex-y += f.o + +and runs: + + $ make -f tools/build/Makefile.build dir=. obj=ex + $ make -f tools/build/Makefile.build dir=. obj=libex + +which creates the following objects: + + ex/ex-in.o + ex/libex-in.o + +that contain request objects names in Build files. + +It's only a matter of 2 single commands to create the final binaries: + + $ ar rcs libex.a libex-in.o + $ gcc -o ex ex-in.o libex.a + +You can check the 'ex' example in 'tools/build/tests/ex' for more details. + +b) Rules +-------- + +The build framework provides standard compilation rules to handle .S and .c +compilation. + +It's possible to include special rule if needed (like we do for flex or bison +code generation). + +c) CFLAGS +--------- + +It's possible to alter the standard object C flags in the following way: + + CFLAGS_perf.o += '...' - alters CFLAGS for perf.o object + CFLAGS_gtk += '...' - alters CFLAGS for gtk build object + +This C flags changes has the scope of the Build makefile they are defined in. + + +d) Dependencies +--------------- + +For each built object file 'a.o' the '.a.cmd' is created and holds: + + - Command line used to built that object + (for each object) + + - Dependency rules generated by 'gcc -Wp,-MD,...' + (for compiled object) + +All existing '.cmd' files are included in the Build process to follow properly +the dependencies and trigger a rebuild when necessary. + + +e) Single rules +--------------- + +It's possible to build single object file by choice, like: + + $ make util/map.o # objects + $ make util/map.i # preprocessor + $ make util/map.s # assembly diff --git a/tools/build/Makefile.build b/tools/build/Makefile.build new file mode 100644 index 000000000000..10df57237a66 --- /dev/null +++ b/tools/build/Makefile.build @@ -0,0 +1,130 @@ +### +# Main build makefile. +# +# Lots of this code have been borrowed or heavily inspired from parts +# of kbuild code, which is not credited, but mostly developed by: +# +# Copyright (C) Sam Ravnborg <sam@mars.ravnborg.org>, 2015 +# Copyright (C) Linus Torvalds <torvalds@linux-foundation.org>, 2015 +# + +PHONY := __build +__build: + +ifeq ($(V),1) + quiet = + Q = +else + quiet=quiet_ + Q=@ +endif + +build-dir := $(srctree)/tools/build + +# Generic definitions +include $(build-dir)/Build.include + +# do not force detected configuration +-include .config-detected + +# Init all relevant variables used in build files so +# 1) they have correct type +# 2) they do not inherit any value from the environment +subdir-y := +obj-y := +subdir-y := +subdir-obj-y := + +# Build definitions +build-file := $(dir)/Build +include $(build-file) + +quiet_cmd_flex = FLEX $@ +quiet_cmd_bison = BISON $@ + +# Create directory unless it exists +quiet_cmd_mkdir = MKDIR $(dir $@) + cmd_mkdir = mkdir -p $(dir $@) + rule_mkdir = $(if $(wildcard $(dir $@)),,@$(call echo-cmd,mkdir) $(cmd_mkdir)) + +# Compile command +quiet_cmd_cc_o_c = CC $@ + cmd_cc_o_c = $(CC) $(c_flags) -c -o $@ $< + +quiet_cmd_cc_i_c = CPP $@ + cmd_cc_i_c = $(CC) $(c_flags) -E -o $@ $< + +quiet_cmd_cc_s_c = AS $@ + cmd_cc_s_c = $(CC) $(c_flags) -S -o $@ $< + +# Link agregate command +# If there's nothing to link, create empty $@ object. +quiet_cmd_ld_multi = LD $@ + cmd_ld_multi = $(if $(strip $(obj-y)),\ + $(LD) -r -o $@ $(obj-y),rm -f $@; $(AR) rcs $@) + +# Build rules +$(OUTPUT)%.o: %.c FORCE + $(call rule_mkdir) + $(call if_changed_dep,cc_o_c) + +$(OUTPUT)%.o: %.S FORCE + $(call rule_mkdir) + $(call if_changed_dep,cc_o_c) + +$(OUTPUT)%.i: %.c FORCE + $(call rule_mkdir) + $(call if_changed_dep,cc_i_c) + +$(OUTPUT)%.i: %.S FORCE + $(call rule_mkdir) + $(call if_changed_dep,cc_i_c) + +$(OUTPUT)%.s: %.c FORCE + $(call rule_mkdir) + $(call if_changed_dep,cc_s_c) + +# Gather build data: +# obj-y - list of build objects +# subdir-y - list of directories to nest +# subdir-obj-y - list of directories objects 'dir/$(obj)-in.o' +obj-y := $($(obj)-y) +subdir-y := $(patsubst %/,%,$(filter %/, $(obj-y))) +obj-y := $(patsubst %/, %/$(obj)-in.o, $(obj-y)) +subdir-obj-y := $(filter %/$(obj)-in.o, $(obj-y)) + +# '$(OUTPUT)/dir' prefix to all objects +prefix := $(subst ./,,$(OUTPUT)$(dir)/) +obj-y := $(addprefix $(prefix),$(obj-y)) +subdir-obj-y := $(addprefix $(prefix),$(subdir-obj-y)) + +# Final '$(obj)-in.o' object +in-target := $(prefix)$(obj)-in.o + +PHONY += $(subdir-y) + +$(subdir-y): + $(Q)$(MAKE) -f $(build-dir)/Makefile.build dir=$(dir)/$@ obj=$(obj) + +$(sort $(subdir-obj-y)): $(subdir-y) ; + +$(in-target): $(obj-y) FORCE + $(call rule_mkdir) + $(call if_changed,ld_multi) + +__build: $(in-target) + @: + +PHONY += FORCE +FORCE: + +# Include all cmd files to get all the dependency rules +# for all objects included +targets := $(wildcard $(sort $(obj-y) $(in-target) $(MAKECMDGOALS))) +cmd_files := $(wildcard $(foreach f,$(targets),$(dir $(f)).$(notdir $(f)).cmd)) + +ifneq ($(cmd_files),) + include $(cmd_files) +endif + +.PHONY: $(PHONY) diff --git a/tools/build/Makefile.feature b/tools/build/Makefile.feature new file mode 100644 index 000000000000..3a0b0ca2a28c --- /dev/null +++ b/tools/build/Makefile.feature @@ -0,0 +1,171 @@ +feature_dir := $(srctree)/tools/build/feature + +ifneq ($(OUTPUT),) + OUTPUT_FEATURES = $(OUTPUT)feature/ + $(shell mkdir -p $(OUTPUT_FEATURES)) +endif + +feature_check = $(eval $(feature_check_code)) +define feature_check_code + feature-$(1) := $(shell $(MAKE) OUTPUT=$(OUTPUT_FEATURES) CFLAGS="$(EXTRA_CFLAGS) $(FEATURE_CHECK_CFLAGS-$(1))" LDFLAGS="$(LDFLAGS) $(FEATURE_CHECK_LDFLAGS-$(1))" -C $(feature_dir) test-$1.bin >/dev/null 2>/dev/null && echo 1 || echo 0) +endef + +feature_set = $(eval $(feature_set_code)) +define feature_set_code + feature-$(1) := 1 +endef + +# +# Build the feature check binaries in parallel, ignore errors, ignore return value and suppress output: +# + +# +# Note that this is not a complete list of all feature tests, just +# those that are typically built on a fully configured system. +# +# [ Feature tests not mentioned here have to be built explicitly in +# the rule that uses them - an example for that is the 'bionic' +# feature check. ] +# +FEATURE_TESTS = \ + backtrace \ + dwarf \ + fortify-source \ + sync-compare-and-swap \ + glibc \ + gtk2 \ + gtk2-infobar \ + libaudit \ + libbfd \ + libelf \ + libelf-getphdrnum \ + libelf-mmap \ + libnuma \ + libperl \ + libpython \ + libpython-version \ + libslang \ + libunwind \ + pthread-attr-setaffinity-np \ + stackprotector-all \ + timerfd \ + libdw-dwarf-unwind \ + zlib \ + lzma + +FEATURE_DISPLAY = \ + dwarf \ + glibc \ + gtk2 \ + libaudit \ + libbfd \ + libelf \ + libnuma \ + libperl \ + libpython \ + libslang \ + libunwind \ + libdw-dwarf-unwind \ + zlib \ + lzma + +# Set FEATURE_CHECK_(C|LD)FLAGS-all for all FEATURE_TESTS features. +# If in the future we need per-feature checks/flags for features not +# mentioned in this list we need to refactor this ;-). +set_test_all_flags = $(eval $(set_test_all_flags_code)) +define set_test_all_flags_code + FEATURE_CHECK_CFLAGS-all += $(FEATURE_CHECK_CFLAGS-$(1)) + FEATURE_CHECK_LDFLAGS-all += $(FEATURE_CHECK_LDFLAGS-$(1)) +endef + +$(foreach feat,$(FEATURE_TESTS),$(call set_test_all_flags,$(feat))) + +# +# Special fast-path for the 'all features are available' case: +# +$(call feature_check,all,$(MSG)) + +# +# Just in case the build freshly failed, make sure we print the +# feature matrix: +# +ifeq ($(feature-all), 1) + # + # test-all.c passed - just set all the core feature flags to 1: + # + $(foreach feat,$(FEATURE_TESTS),$(call feature_set,$(feat))) +else + $(shell $(MAKE) OUTPUT=$(OUTPUT_FEATURES) CFLAGS="$(EXTRA_CFLAGS)" LDFLAGS=$(LDFLAGS) -i -j -C $(feature_dir) $(addsuffix .bin,$(FEATURE_TESTS)) >/dev/null 2>&1) + $(foreach feat,$(FEATURE_TESTS),$(call feature_check,$(feat))) +endif + +# +# Print the result of the feature test: +# +feature_print_status = $(eval $(feature_print_status_code)) $(info $(MSG)) + +define feature_print_status_code + ifeq ($(feature-$(1)), 1) + MSG = $(shell printf '...%30s: [ \033[32mon\033[m ]' $(1)) + else + MSG = $(shell printf '...%30s: [ \033[31mOFF\033[m ]' $(1)) + endif +endef + +feature_print_text = $(eval $(feature_print_text_code)) $(info $(MSG)) +define feature_print_text_code + MSG = $(shell printf '...%30s: %s' $(1) $(2)) +endef + +FEATURE_DUMP := $(foreach feat,$(FEATURE_DISPLAY),feature-$(feat)($(feature-$(feat)))) +FEATURE_DUMP_FILE := $(shell touch $(OUTPUT)FEATURE-DUMP; cat $(OUTPUT)FEATURE-DUMP) + +ifeq ($(dwarf-post-unwind),1) + FEATURE_DUMP += dwarf-post-unwind($(dwarf-post-unwind-text)) +endif + +# The $(feature_display) controls the default detection message +# output. It's set if: +# - detected features differes from stored features from +# last build (in FEATURE-DUMP file) +# - one of the $(FEATURE_DISPLAY) is not detected +# - VF is enabled + +ifneq ("$(FEATURE_DUMP)","$(FEATURE_DUMP_FILE)") + $(shell echo "$(FEATURE_DUMP)" > $(OUTPUT)FEATURE-DUMP) + feature_display := 1 +endif + +feature_display_check = $(eval $(feature_check_code)) +define feature_display_check_code + ifneq ($(feature-$(1)), 1) + feature_display := 1 + endif +endef + +$(foreach feat,$(FEATURE_DISPLAY),$(call feature_display_check,$(feat))) + +ifeq ($(VF),1) + feature_display := 1 + feature_verbose := 1 +endif + +ifeq ($(feature_display),1) + $(info ) + $(info Auto-detecting system features:) + $(foreach feat,$(FEATURE_DISPLAY),$(call feature_print_status,$(feat),)) + + ifeq ($(dwarf-post-unwind),1) + $(call feature_print_text,"DWARF post unwind library", $(dwarf-post-unwind-text)) + endif + + ifneq ($(feature_verbose),1) + $(info ) + endif +endif + +ifeq ($(feature_verbose),1) + TMP := $(filter-out $(FEATURE_DISPLAY),$(FEATURE_TESTS)) + $(foreach feat,$(TMP),$(call feature_print_status,$(feat),)) + $(info ) +endif diff --git a/tools/perf/config/feature-checks/.gitignore b/tools/build/feature/.gitignore index 80f3da0c3515..09b335b98842 100644 --- a/tools/perf/config/feature-checks/.gitignore +++ b/tools/build/feature/.gitignore @@ -1,2 +1,3 @@ *.d *.bin +*.output diff --git a/tools/perf/config/feature-checks/Makefile b/tools/build/feature/Makefile index b32ff3372514..463ed8f2a267 100644 --- a/tools/perf/config/feature-checks/Makefile +++ b/tools/build/feature/Makefile @@ -29,33 +29,36 @@ FILES= \ test-stackprotector-all.bin \ test-timerfd.bin \ test-libdw-dwarf-unwind.bin \ + test-libbabeltrace.bin \ test-compile-32.bin \ test-compile-x32.bin \ - test-zlib.bin + test-zlib.bin \ + test-lzma.bin CC := $(CROSS_COMPILE)gcc -MD PKG_CONFIG := $(CROSS_COMPILE)pkg-config all: $(FILES) -BUILD = $(CC) $(CFLAGS) -o $(OUTPUT)$@ $(patsubst %.bin,%.c,$@) $(LDFLAGS) +__BUILD = $(CC) $(CFLAGS) -Wall -Werror -o $(OUTPUT)$@ $(patsubst %.bin,%.c,$@) $(LDFLAGS) + BUILD = $(__BUILD) > $(OUTPUT)$(@:.bin=.make.output) 2>&1 ############################### test-all.bin: - $(BUILD) -Werror -fstack-protector-all -O2 -Werror -D_FORTIFY_SOURCE=2 -ldw -lelf -lnuma -lelf -laudit -I/usr/include/slang -lslang $(shell $(PKG_CONFIG) --libs --cflags gtk+-2.0 2>/dev/null) $(FLAGS_PERL_EMBED) $(FLAGS_PYTHON_EMBED) -DPACKAGE='"perf"' -lbfd -ldl -lz + $(BUILD) -fstack-protector-all -O2 -D_FORTIFY_SOURCE=2 -ldw -lelf -lnuma -lelf -laudit -I/usr/include/slang -lslang $(shell $(PKG_CONFIG) --libs --cflags gtk+-2.0 2>/dev/null) $(FLAGS_PERL_EMBED) $(FLAGS_PYTHON_EMBED) -DPACKAGE='"perf"' -lbfd -ldl -lz -llzma test-hello.bin: $(BUILD) test-pthread-attr-setaffinity-np.bin: - $(BUILD) -D_GNU_SOURCE -Werror -lpthread + $(BUILD) -D_GNU_SOURCE -lpthread test-stackprotector-all.bin: - $(BUILD) -Werror -fstack-protector-all + $(BUILD) -fstack-protector-all test-fortify-source.bin: - $(BUILD) -O2 -Werror -D_FORTIFY_SOURCE=2 + $(BUILD) -O2 -D_FORTIFY_SOURCE=2 test-bionic.bin: $(BUILD) @@ -118,10 +121,10 @@ test-libbfd.bin: $(BUILD) -DPACKAGE='"perf"' -lbfd -lz -liberty -ldl test-liberty.bin: - $(CC) -o $(OUTPUT)$@ test-libbfd.c -DPACKAGE='"perf"' -lbfd -ldl -liberty + $(CC) -Wall -Werror -o $(OUTPUT)$@ test-libbfd.c -DPACKAGE='"perf"' -lbfd -ldl -liberty test-liberty-z.bin: - $(CC) -o $(OUTPUT)$@ test-libbfd.c -DPACKAGE='"perf"' -lbfd -ldl -liberty -lz + $(CC) -Wall -Werror -o $(OUTPUT)$@ test-libbfd.c -DPACKAGE='"perf"' -lbfd -ldl -liberty -lz test-cplus-demangle.bin: $(BUILD) -liberty @@ -133,10 +136,13 @@ test-timerfd.bin: $(BUILD) test-libdw-dwarf-unwind.bin: - $(BUILD) + $(BUILD) # -ldw provided by $(FEATURE_CHECK_LDFLAGS-libdw-dwarf-unwind) + +test-libbabeltrace.bin: + $(BUILD) # -lbabeltrace provided by $(FEATURE_CHECK_LDFLAGS-libbabeltrace) test-sync-compare-and-swap.bin: - $(BUILD) -Werror + $(BUILD) test-compile-32.bin: $(CC) -m32 -o $(OUTPUT)$@ test-compile.c @@ -147,9 +153,12 @@ test-compile-x32.bin: test-zlib.bin: $(BUILD) -lz +test-lzma.bin: + $(BUILD) -llzma + -include *.d ############################### clean: - rm -f $(FILES) *.d + rm -f $(FILES) *.d $(FILES:.bin=.make.output) diff --git a/tools/perf/config/feature-checks/test-all.c b/tools/build/feature/test-all.c index 6d4d09323922..84689a67814a 100644 --- a/tools/perf/config/feature-checks/test-all.c +++ b/tools/build/feature/test-all.c @@ -98,7 +98,23 @@ #undef main #define main main_test_pthread_attr_setaffinity_np -# include "test-pthread_attr_setaffinity_np.c" +# include "test-pthread-attr-setaffinity-np.c" +#undef main + +# if 0 +/* + * Disable libbabeltrace check for test-all, because the requested + * library version is not released yet in most distributions. Will + * reenable later. + */ + +#define main main_test_libbabeltrace +# include "test-libbabeltrace.c" +#undef main +#endif + +#define main main_test_lzma +# include "test-lzma.c" #undef main int main(int argc, char *argv[]) @@ -126,6 +142,7 @@ int main(int argc, char *argv[]) main_test_sync_compare_and_swap(argc, argv); main_test_zlib(); main_test_pthread_attr_setaffinity_np(); + main_test_lzma(); return 0; } diff --git a/tools/perf/config/feature-checks/test-backtrace.c b/tools/build/feature/test-backtrace.c index 7124aa1dc8fb..7124aa1dc8fb 100644 --- a/tools/perf/config/feature-checks/test-backtrace.c +++ b/tools/build/feature/test-backtrace.c diff --git a/tools/perf/config/feature-checks/test-bionic.c b/tools/build/feature/test-bionic.c index eac24e9513eb..eac24e9513eb 100644 --- a/tools/perf/config/feature-checks/test-bionic.c +++ b/tools/build/feature/test-bionic.c diff --git a/tools/perf/config/feature-checks/test-compile.c b/tools/build/feature/test-compile.c index 31dbf45bf99c..31dbf45bf99c 100644 --- a/tools/perf/config/feature-checks/test-compile.c +++ b/tools/build/feature/test-compile.c diff --git a/tools/perf/config/feature-checks/test-cplus-demangle.c b/tools/build/feature/test-cplus-demangle.c index 610c686e0009..610c686e0009 100644 --- a/tools/perf/config/feature-checks/test-cplus-demangle.c +++ b/tools/build/feature/test-cplus-demangle.c diff --git a/tools/perf/config/feature-checks/test-dwarf.c b/tools/build/feature/test-dwarf.c index 3fc1801ce4a9..3fc1801ce4a9 100644 --- a/tools/perf/config/feature-checks/test-dwarf.c +++ b/tools/build/feature/test-dwarf.c diff --git a/tools/perf/config/feature-checks/test-fortify-source.c b/tools/build/feature/test-fortify-source.c index c9f398d87868..c9f398d87868 100644 --- a/tools/perf/config/feature-checks/test-fortify-source.c +++ b/tools/build/feature/test-fortify-source.c diff --git a/tools/perf/config/feature-checks/test-glibc.c b/tools/build/feature/test-glibc.c index b0820345cd98..b0820345cd98 100644 --- a/tools/perf/config/feature-checks/test-glibc.c +++ b/tools/build/feature/test-glibc.c diff --git a/tools/perf/config/feature-checks/test-gtk2-infobar.c b/tools/build/feature/test-gtk2-infobar.c index 397b4646d066..397b4646d066 100644 --- a/tools/perf/config/feature-checks/test-gtk2-infobar.c +++ b/tools/build/feature/test-gtk2-infobar.c diff --git a/tools/perf/config/feature-checks/test-gtk2.c b/tools/build/feature/test-gtk2.c index 6bd80e509439..6bd80e509439 100644 --- a/tools/perf/config/feature-checks/test-gtk2.c +++ b/tools/build/feature/test-gtk2.c diff --git a/tools/perf/config/feature-checks/test-hello.c b/tools/build/feature/test-hello.c index c9f398d87868..c9f398d87868 100644 --- a/tools/perf/config/feature-checks/test-hello.c +++ b/tools/build/feature/test-hello.c diff --git a/tools/perf/config/feature-checks/test-libaudit.c b/tools/build/feature/test-libaudit.c index afc019f08641..afc019f08641 100644 --- a/tools/perf/config/feature-checks/test-libaudit.c +++ b/tools/build/feature/test-libaudit.c diff --git a/tools/build/feature/test-libbabeltrace.c b/tools/build/feature/test-libbabeltrace.c new file mode 100644 index 000000000000..9cf802a04885 --- /dev/null +++ b/tools/build/feature/test-libbabeltrace.c @@ -0,0 +1,9 @@ + +#include <babeltrace/ctf-writer/writer.h> +#include <babeltrace/ctf-ir/stream-class.h> + +int main(void) +{ + bt_ctf_stream_class_get_packet_context_type((void *) 0); + return 0; +} diff --git a/tools/perf/config/feature-checks/test-libbfd.c b/tools/build/feature/test-libbfd.c index 24059907e990..24059907e990 100644 --- a/tools/perf/config/feature-checks/test-libbfd.c +++ b/tools/build/feature/test-libbfd.c diff --git a/tools/perf/config/feature-checks/test-libdw-dwarf-unwind.c b/tools/build/feature/test-libdw-dwarf-unwind.c index f676a3ff442a..f676a3ff442a 100644 --- a/tools/perf/config/feature-checks/test-libdw-dwarf-unwind.c +++ b/tools/build/feature/test-libdw-dwarf-unwind.c diff --git a/tools/perf/config/feature-checks/test-libelf-getphdrnum.c b/tools/build/feature/test-libelf-getphdrnum.c index d710459306c3..d710459306c3 100644 --- a/tools/perf/config/feature-checks/test-libelf-getphdrnum.c +++ b/tools/build/feature/test-libelf-getphdrnum.c diff --git a/tools/perf/config/feature-checks/test-libelf-mmap.c b/tools/build/feature/test-libelf-mmap.c index 564427d7ef18..564427d7ef18 100644 --- a/tools/perf/config/feature-checks/test-libelf-mmap.c +++ b/tools/build/feature/test-libelf-mmap.c diff --git a/tools/perf/config/feature-checks/test-libelf.c b/tools/build/feature/test-libelf.c index 08db322d8957..08db322d8957 100644 --- a/tools/perf/config/feature-checks/test-libelf.c +++ b/tools/build/feature/test-libelf.c diff --git a/tools/perf/config/feature-checks/test-libnuma.c b/tools/build/feature/test-libnuma.c index 4763d9cd587d..4763d9cd587d 100644 --- a/tools/perf/config/feature-checks/test-libnuma.c +++ b/tools/build/feature/test-libnuma.c diff --git a/tools/perf/config/feature-checks/test-libperl.c b/tools/build/feature/test-libperl.c index 8871f6a0fdb4..8871f6a0fdb4 100644 --- a/tools/perf/config/feature-checks/test-libperl.c +++ b/tools/build/feature/test-libperl.c diff --git a/tools/perf/config/feature-checks/test-libpython-version.c b/tools/build/feature/test-libpython-version.c index facea122d812..facea122d812 100644 --- a/tools/perf/config/feature-checks/test-libpython-version.c +++ b/tools/build/feature/test-libpython-version.c diff --git a/tools/perf/config/feature-checks/test-libpython.c b/tools/build/feature/test-libpython.c index b24b28ad6324..b24b28ad6324 100644 --- a/tools/perf/config/feature-checks/test-libpython.c +++ b/tools/build/feature/test-libpython.c diff --git a/tools/perf/config/feature-checks/test-libslang.c b/tools/build/feature/test-libslang.c index 22ff22ed94d1..22ff22ed94d1 100644 --- a/tools/perf/config/feature-checks/test-libslang.c +++ b/tools/build/feature/test-libslang.c diff --git a/tools/perf/config/feature-checks/test-libunwind-debug-frame.c b/tools/build/feature/test-libunwind-debug-frame.c index 0ef8087a104a..0ef8087a104a 100644 --- a/tools/perf/config/feature-checks/test-libunwind-debug-frame.c +++ b/tools/build/feature/test-libunwind-debug-frame.c diff --git a/tools/perf/config/feature-checks/test-libunwind.c b/tools/build/feature/test-libunwind.c index 43b9369bcab7..43b9369bcab7 100644 --- a/tools/perf/config/feature-checks/test-libunwind.c +++ b/tools/build/feature/test-libunwind.c diff --git a/tools/build/feature/test-lzma.c b/tools/build/feature/test-lzma.c new file mode 100644 index 000000000000..95adc8ced3dd --- /dev/null +++ b/tools/build/feature/test-lzma.c @@ -0,0 +1,10 @@ +#include <lzma.h> + +int main(void) +{ + lzma_stream strm = LZMA_STREAM_INIT; + int ret; + + ret = lzma_stream_decoder(&strm, UINT64_MAX, LZMA_CONCATENATED); + return ret ? -1 : 0; +} diff --git a/tools/perf/config/feature-checks/test-pthread-attr-setaffinity-np.c b/tools/build/feature/test-pthread-attr-setaffinity-np.c index 2b81b72eca23..fdada5e8d454 100644 --- a/tools/perf/config/feature-checks/test-pthread-attr-setaffinity-np.c +++ b/tools/build/feature/test-pthread-attr-setaffinity-np.c @@ -1,5 +1,6 @@ #include <stdint.h> #include <pthread.h> +#include <sched.h> int main(void) { @@ -8,7 +9,8 @@ int main(void) cpu_set_t cs; pthread_attr_init(&thread_attr); - /* don't care abt exact args, just the API itself in libpthread */ + CPU_ZERO(&cs); + ret = pthread_attr_setaffinity_np(&thread_attr, sizeof(cs), &cs); return ret; diff --git a/tools/perf/config/feature-checks/test-stackprotector-all.c b/tools/build/feature/test-stackprotector-all.c index c9f398d87868..c9f398d87868 100644 --- a/tools/perf/config/feature-checks/test-stackprotector-all.c +++ b/tools/build/feature/test-stackprotector-all.c diff --git a/tools/perf/config/feature-checks/test-sync-compare-and-swap.c b/tools/build/feature/test-sync-compare-and-swap.c index c34d4ca4af56..c34d4ca4af56 100644 --- a/tools/perf/config/feature-checks/test-sync-compare-and-swap.c +++ b/tools/build/feature/test-sync-compare-and-swap.c diff --git a/tools/perf/config/feature-checks/test-timerfd.c b/tools/build/feature/test-timerfd.c index 8c5c083b4d3c..8c5c083b4d3c 100644 --- a/tools/perf/config/feature-checks/test-timerfd.c +++ b/tools/build/feature/test-timerfd.c diff --git a/tools/perf/config/feature-checks/test-zlib.c b/tools/build/feature/test-zlib.c index e111fff6240e..e111fff6240e 100644 --- a/tools/perf/config/feature-checks/test-zlib.c +++ b/tools/build/feature/test-zlib.c diff --git a/tools/build/tests/ex/Build b/tools/build/tests/ex/Build new file mode 100644 index 000000000000..0e6c3e6767e6 --- /dev/null +++ b/tools/build/tests/ex/Build @@ -0,0 +1,8 @@ +ex-y += ex.o +ex-y += a.o +ex-y += b.o +ex-y += empty/ + +libex-y += c.o +libex-y += d.o +libex-y += arch/ diff --git a/tools/build/tests/ex/Makefile b/tools/build/tests/ex/Makefile new file mode 100644 index 000000000000..52d2476073a3 --- /dev/null +++ b/tools/build/tests/ex/Makefile @@ -0,0 +1,23 @@ +export srctree := ../../../.. +export CC := gcc +export LD := ld +export AR := ar + +build := -f $(srctree)/tools/build/Makefile.build dir=. obj +ex: ex-in.o libex-in.o + gcc -o $@ $^ + +ex.%: FORCE + make -f $(srctree)/tools/build/Makefile.build dir=. $@ + +ex-in.o: FORCE + make $(build)=ex + +libex-in.o: FORCE + make $(build)=libex + +clean: + find . -name '*.o' -delete -o -name '\.*.cmd' -delete -o -name '\.*.d' -delete + rm -f ex ex.i ex.s + +.PHONY: FORCE diff --git a/tools/build/tests/ex/a.c b/tools/build/tests/ex/a.c new file mode 100644 index 000000000000..851762798c83 --- /dev/null +++ b/tools/build/tests/ex/a.c @@ -0,0 +1,5 @@ + +int a(void) +{ + return 0; +} diff --git a/tools/build/tests/ex/arch/Build b/tools/build/tests/ex/arch/Build new file mode 100644 index 000000000000..55506189efae --- /dev/null +++ b/tools/build/tests/ex/arch/Build @@ -0,0 +1,2 @@ +libex-y += e.o +libex-y += f.o diff --git a/tools/build/tests/ex/arch/e.c b/tools/build/tests/ex/arch/e.c new file mode 100644 index 000000000000..beaa4a1d7ba8 --- /dev/null +++ b/tools/build/tests/ex/arch/e.c @@ -0,0 +1,5 @@ + +int e(void) +{ + return 0; +} diff --git a/tools/build/tests/ex/arch/f.c b/tools/build/tests/ex/arch/f.c new file mode 100644 index 000000000000..7c3e9e9da5b7 --- /dev/null +++ b/tools/build/tests/ex/arch/f.c @@ -0,0 +1,5 @@ + +int f(void) +{ + return 0; +} diff --git a/tools/build/tests/ex/b.c b/tools/build/tests/ex/b.c new file mode 100644 index 000000000000..c24ff9ca9a97 --- /dev/null +++ b/tools/build/tests/ex/b.c @@ -0,0 +1,5 @@ + +int b(void) +{ + return 0; +} diff --git a/tools/build/tests/ex/c.c b/tools/build/tests/ex/c.c new file mode 100644 index 000000000000..e216d0217499 --- /dev/null +++ b/tools/build/tests/ex/c.c @@ -0,0 +1,5 @@ + +int c(void) +{ + return 0; +} diff --git a/tools/build/tests/ex/d.c b/tools/build/tests/ex/d.c new file mode 100644 index 000000000000..80dc0f06151b --- /dev/null +++ b/tools/build/tests/ex/d.c @@ -0,0 +1,5 @@ + +int d(void) +{ + return 0; +} diff --git a/tools/build/tests/ex/empty/Build b/tools/build/tests/ex/empty/Build new file mode 100644 index 000000000000..e69de29bb2d1 --- /dev/null +++ b/tools/build/tests/ex/empty/Build diff --git a/tools/build/tests/ex/ex.c b/tools/build/tests/ex/ex.c new file mode 100644 index 000000000000..dc42eb2e1a67 --- /dev/null +++ b/tools/build/tests/ex/ex.c @@ -0,0 +1,19 @@ + +int a(void); +int b(void); +int c(void); +int d(void); +int e(void); +int f(void); + +int main(void) +{ + a(); + b(); + c(); + d(); + e(); + f(); + + return 0; +} diff --git a/tools/build/tests/run.sh b/tools/build/tests/run.sh new file mode 100755 index 000000000000..5494f8ea7567 --- /dev/null +++ b/tools/build/tests/run.sh @@ -0,0 +1,42 @@ +#!/bin/sh + +function test_ex { + make -C ex V=1 clean > ex.out 2>&1 + make -C ex V=1 >> ex.out 2>&1 + + if [ ! -x ./ex/ex ]; then + echo FAILED + exit -1 + fi + + make -C ex V=1 clean > /dev/null 2>&1 + rm -f ex.out +} + +function test_ex_suffix { + make -C ex V=1 clean > ex.out 2>&1 + + # use -rR to disable make's builtin rules + make -rR -C ex V=1 ex.o >> ex.out 2>&1 + make -rR -C ex V=1 ex.i >> ex.out 2>&1 + make -rR -C ex V=1 ex.s >> ex.out 2>&1 + + if [ -x ./ex/ex ]; then + echo FAILED + exit -1 + fi + + if [ ! -f ./ex/ex.o -o ! -f ./ex/ex.i -o ! -f ./ex/ex.s ]; then + echo FAILED + exit -1 + fi + + make -C ex V=1 clean > /dev/null 2>&1 + rm -f ex.out +} +echo -n Testing.. + +test_ex +test_ex_suffix + +echo OK diff --git a/tools/lib/api/Build b/tools/lib/api/Build new file mode 100644 index 000000000000..3653965cf481 --- /dev/null +++ b/tools/lib/api/Build @@ -0,0 +1,2 @@ +libapi-y += fd/ +libapi-y += fs/ diff --git a/tools/lib/api/Makefile b/tools/lib/api/Makefile index 36c08b1f4afb..d8fe29fc19a4 100644 --- a/tools/lib/api/Makefile +++ b/tools/lib/api/Makefile @@ -1,49 +1,43 @@ include ../../scripts/Makefile.include include ../../perf/config/utilities.mak # QUIET_CLEAN +ifeq ($(srctree),) +srctree := $(patsubst %/,%,$(dir $(shell pwd))) +srctree := $(patsubst %/,%,$(dir $(srctree))) +srctree := $(patsubst %/,%,$(dir $(srctree))) +#$(info Determined 'srctree' to be $(srctree)) +endif + CC = $(CROSS_COMPILE)gcc AR = $(CROSS_COMPILE)ar -# guard against environment variables -LIB_H= -LIB_OBJS= - -LIB_H += fs/debugfs.h -LIB_H += fs/fs.h -# See comment below about piggybacking... -LIB_H += fd/array.h - -LIB_OBJS += $(OUTPUT)fs/debugfs.o -LIB_OBJS += $(OUTPUT)fs/fs.o -# XXX piggybacking here, need to introduce libapikfd, or rename this -# to plain libapik.a and make it have it all api goodies -LIB_OBJS += $(OUTPUT)fd/array.o +MAKEFLAGS += --no-print-directory -LIBFILE = libapikfs.a +LIBFILE = $(OUTPUT)libapi.a -CFLAGS = -ggdb3 -Wall -Wextra -std=gnu99 -Werror -O6 -D_FORTIFY_SOURCE=2 $(EXTRA_WARNINGS) $(EXTRA_CFLAGS) -fPIC -EXTLIBS = -lelf -lpthread -lrt -lm -ALL_CFLAGS = $(CFLAGS) $(BASIC_CFLAGS) -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -ALL_LDFLAGS = $(LDFLAGS) +CFLAGS := $(EXTRA_WARNINGS) $(EXTRA_CFLAGS) +CFLAGS += -ggdb3 -Wall -Wextra -std=gnu99 -Werror -O6 -D_FORTIFY_SOURCE=2 -fPIC +CFLAGS += -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 RM = rm -f -$(LIBFILE): $(LIB_OBJS) - $(QUIET_AR)$(RM) $@ && $(AR) rcs $(OUTPUT)$@ $(LIB_OBJS) +build := -f $(srctree)/tools/build/Makefile.build dir=. obj +API_IN := $(OUTPUT)libapi-in.o -$(LIB_OBJS): $(LIB_H) +export srctree OUTPUT CC LD CFLAGS V -libapi_dirs: - $(QUIET_MKDIR)mkdir -p $(OUTPUT)fd $(OUTPUT)fs +all: $(LIBFILE) -$(OUTPUT)%.o: %.c libapi_dirs - $(QUIET_CC)$(CC) -o $@ -c $(ALL_CFLAGS) $< -$(OUTPUT)%.s: %.c libapi_dirs - $(QUIET_CC)$(CC) -S $(ALL_CFLAGS) $< -$(OUTPUT)%.o: %.S libapi_dirs - $(QUIET_CC)$(CC) -o $@ -c $(ALL_CFLAGS) $< +$(API_IN): FORCE + @$(MAKE) $(build)=libapi + +$(LIBFILE): $(API_IN) + $(QUIET_AR)$(RM) $@ && $(AR) rcs $@ $(API_IN) clean: - $(call QUIET_CLEAN, libapi) $(RM) $(LIB_OBJS) $(LIBFILE) + $(call QUIET_CLEAN, libapi) $(RM) $(LIBFILE); \ + find $(if $(OUTPUT),$(OUTPUT),.) -name \*.o | xargs $(RM) + +FORCE: -.PHONY: clean +.PHONY: clean FORCE diff --git a/tools/lib/api/fd/Build b/tools/lib/api/fd/Build new file mode 100644 index 000000000000..605d99f6d71a --- /dev/null +++ b/tools/lib/api/fd/Build @@ -0,0 +1 @@ +libapi-y += array.o diff --git a/tools/lib/api/fs/Build b/tools/lib/api/fs/Build new file mode 100644 index 000000000000..6de5a4f0b501 --- /dev/null +++ b/tools/lib/api/fs/Build @@ -0,0 +1,4 @@ +libapi-y += fs.o +libapi-y += debugfs.o +libapi-y += findfs.o +libapi-y += tracefs.o diff --git a/tools/lib/api/fs/debugfs.c b/tools/lib/api/fs/debugfs.c index d2b18e887071..8305b3e9d48e 100644 --- a/tools/lib/api/fs/debugfs.c +++ b/tools/lib/api/fs/debugfs.c @@ -3,75 +3,50 @@ #include <stdio.h> #include <stdlib.h> #include <string.h> +#include <unistd.h> #include <stdbool.h> #include <sys/vfs.h> +#include <sys/types.h> +#include <sys/stat.h> #include <sys/mount.h> #include <linux/kernel.h> #include "debugfs.h" -char debugfs_mountpoint[PATH_MAX + 1] = "/sys/kernel/debug"; +#ifndef DEBUGFS_DEFAULT_PATH +#define DEBUGFS_DEFAULT_PATH "/sys/kernel/debug" +#endif + +char debugfs_mountpoint[PATH_MAX + 1] = DEBUGFS_DEFAULT_PATH; static const char * const debugfs_known_mountpoints[] = { - "/sys/kernel/debug", + DEBUGFS_DEFAULT_PATH, "/debug", 0, }; static bool debugfs_found; +bool debugfs_configured(void) +{ + return debugfs_find_mountpoint() != NULL; +} + /* find the path to the mounted debugfs */ const char *debugfs_find_mountpoint(void) { - const char * const *ptr; - char type[100]; - FILE *fp; + const char *ret; if (debugfs_found) return (const char *)debugfs_mountpoint; - ptr = debugfs_known_mountpoints; - while (*ptr) { - if (debugfs_valid_mountpoint(*ptr) == 0) { - debugfs_found = true; - strcpy(debugfs_mountpoint, *ptr); - return debugfs_mountpoint; - } - ptr++; - } - - /* give up and parse /proc/mounts */ - fp = fopen("/proc/mounts", "r"); - if (fp == NULL) - return NULL; - - while (fscanf(fp, "%*s %" STR(PATH_MAX) "s %99s %*s %*d %*d\n", - debugfs_mountpoint, type) == 2) { - if (strcmp(type, "debugfs") == 0) - break; - } - fclose(fp); + ret = find_mountpoint("debugfs", (long) DEBUGFS_MAGIC, + debugfs_mountpoint, PATH_MAX + 1, + debugfs_known_mountpoints); + if (ret) + debugfs_found = true; - if (strcmp(type, "debugfs") != 0) - return NULL; - - debugfs_found = true; - - return debugfs_mountpoint; -} - -/* verify that a mountpoint is actually a debugfs instance */ - -int debugfs_valid_mountpoint(const char *debugfs) -{ - struct statfs st_fs; - - if (statfs(debugfs, &st_fs) < 0) - return -ENOENT; - else if ((long)st_fs.f_type != (long)DEBUGFS_MAGIC) - return -ENOENT; - - return 0; + return ret; } /* mount the debugfs somewhere if it's not mounted */ @@ -87,7 +62,7 @@ char *debugfs_mount(const char *mountpoint) mountpoint = getenv(PERF_DEBUGFS_ENVIRONMENT); /* if no environment variable, use default */ if (mountpoint == NULL) - mountpoint = "/sys/kernel/debug"; + mountpoint = DEBUGFS_DEFAULT_PATH; } if (mount(NULL, mountpoint, "debugfs", 0, NULL) < 0) diff --git a/tools/lib/api/fs/debugfs.h b/tools/lib/api/fs/debugfs.h index 0739881a9897..455023698d2b 100644 --- a/tools/lib/api/fs/debugfs.h +++ b/tools/lib/api/fs/debugfs.h @@ -1,16 +1,7 @@ #ifndef __API_DEBUGFS_H__ #define __API_DEBUGFS_H__ -#define _STR(x) #x -#define STR(x) _STR(x) - -/* - * On most systems <limits.h> would have given us this, but not on some systems - * (e.g. GNU/Hurd). - */ -#ifndef PATH_MAX -#define PATH_MAX 4096 -#endif +#include "findfs.h" #ifndef DEBUGFS_MAGIC #define DEBUGFS_MAGIC 0x64626720 @@ -20,8 +11,8 @@ #define PERF_DEBUGFS_ENVIRONMENT "PERF_DEBUGFS_DIR" #endif +bool debugfs_configured(void); const char *debugfs_find_mountpoint(void); -int debugfs_valid_mountpoint(const char *debugfs); char *debugfs_mount(const char *mountpoint); extern char debugfs_mountpoint[]; diff --git a/tools/lib/api/fs/findfs.c b/tools/lib/api/fs/findfs.c new file mode 100644 index 000000000000..49946cb6d7af --- /dev/null +++ b/tools/lib/api/fs/findfs.c @@ -0,0 +1,63 @@ +#include <errno.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <stdbool.h> +#include <sys/vfs.h> + +#include "findfs.h" + +/* verify that a mountpoint is actually the type we want */ + +int valid_mountpoint(const char *mount, long magic) +{ + struct statfs st_fs; + + if (statfs(mount, &st_fs) < 0) + return -ENOENT; + else if ((long)st_fs.f_type != magic) + return -ENOENT; + + return 0; +} + +/* find the path to a mounted file system */ +const char *find_mountpoint(const char *fstype, long magic, + char *mountpoint, int len, + const char * const *known_mountpoints) +{ + const char * const *ptr; + char format[128]; + char type[100]; + FILE *fp; + + if (known_mountpoints) { + ptr = known_mountpoints; + while (*ptr) { + if (valid_mountpoint(*ptr, magic) == 0) { + strncpy(mountpoint, *ptr, len - 1); + mountpoint[len-1] = 0; + return mountpoint; + } + ptr++; + } + } + + /* give up and parse /proc/mounts */ + fp = fopen("/proc/mounts", "r"); + if (fp == NULL) + return NULL; + + snprintf(format, 128, "%%*s %%%ds %%99s %%*s %%*d %%*d\n", len); + + while (fscanf(fp, format, mountpoint, type) == 2) { + if (strcmp(type, fstype) == 0) + break; + } + fclose(fp); + + if (strcmp(type, fstype) != 0) + return NULL; + + return mountpoint; +} diff --git a/tools/lib/api/fs/findfs.h b/tools/lib/api/fs/findfs.h new file mode 100644 index 000000000000..b6f5d05acc42 --- /dev/null +++ b/tools/lib/api/fs/findfs.h @@ -0,0 +1,23 @@ +#ifndef __API_FINDFS_H__ +#define __API_FINDFS_H__ + +#include <stdbool.h> + +#define _STR(x) #x +#define STR(x) _STR(x) + +/* + * On most systems <limits.h> would have given us this, but not on some systems + * (e.g. GNU/Hurd). + */ +#ifndef PATH_MAX +#define PATH_MAX 4096 +#endif + +const char *find_mountpoint(const char *fstype, long magic, + char *mountpoint, int len, + const char * const *known_mountpoints); + +int valid_mountpoint(const char *mount, long magic); + +#endif /* __API_FINDFS_H__ */ diff --git a/tools/lib/api/fs/tracefs.c b/tools/lib/api/fs/tracefs.c new file mode 100644 index 000000000000..e4aa9688b71e --- /dev/null +++ b/tools/lib/api/fs/tracefs.c @@ -0,0 +1,78 @@ +#include <errno.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <stdbool.h> +#include <sys/vfs.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <sys/mount.h> +#include <linux/kernel.h> + +#include "tracefs.h" + +#ifndef TRACEFS_DEFAULT_PATH +#define TRACEFS_DEFAULT_PATH "/sys/kernel/tracing" +#endif + +char tracefs_mountpoint[PATH_MAX + 1] = TRACEFS_DEFAULT_PATH; + +static const char * const tracefs_known_mountpoints[] = { + TRACEFS_DEFAULT_PATH, + "/sys/kernel/debug/tracing", + "/tracing", + "/trace", + 0, +}; + +static bool tracefs_found; + +bool tracefs_configured(void) +{ + return tracefs_find_mountpoint() != NULL; +} + +/* find the path to the mounted tracefs */ +const char *tracefs_find_mountpoint(void) +{ + const char *ret; + + if (tracefs_found) + return (const char *)tracefs_mountpoint; + + ret = find_mountpoint("tracefs", (long) TRACEFS_MAGIC, + tracefs_mountpoint, PATH_MAX + 1, + tracefs_known_mountpoints); + + if (ret) + tracefs_found = true; + + return ret; +} + +/* mount the tracefs somewhere if it's not mounted */ +char *tracefs_mount(const char *mountpoint) +{ + /* see if it's already mounted */ + if (tracefs_find_mountpoint()) + goto out; + + /* if not mounted and no argument */ + if (mountpoint == NULL) { + /* see if environment variable set */ + mountpoint = getenv(PERF_TRACEFS_ENVIRONMENT); + /* if no environment variable, use default */ + if (mountpoint == NULL) + mountpoint = TRACEFS_DEFAULT_PATH; + } + + if (mount(NULL, mountpoint, "tracefs", 0, NULL) < 0) + return NULL; + + /* save the mountpoint */ + tracefs_found = true; + strncpy(tracefs_mountpoint, mountpoint, sizeof(tracefs_mountpoint)); +out: + return tracefs_mountpoint; +} diff --git a/tools/lib/api/fs/tracefs.h b/tools/lib/api/fs/tracefs.h new file mode 100644 index 000000000000..da780ac49acb --- /dev/null +++ b/tools/lib/api/fs/tracefs.h @@ -0,0 +1,21 @@ +#ifndef __API_TRACEFS_H__ +#define __API_TRACEFS_H__ + +#include "findfs.h" + +#ifndef TRACEFS_MAGIC +#define TRACEFS_MAGIC 0x74726163 +#endif + +#ifndef PERF_TRACEFS_ENVIRONMENT +#define PERF_TRACEFS_ENVIRONMENT "PERF_TRACEFS_DIR" +#endif + +bool tracefs_configured(void); +const char *tracefs_find_mountpoint(void); +int tracefs_valid_mountpoint(const char *debugfs); +char *tracefs_mount(const char *mountpoint); + +extern char tracefs_mountpoint[]; + +#endif /* __API_DEBUGFS_H__ */ diff --git a/tools/lib/lockdep/Build b/tools/lib/lockdep/Build new file mode 100644 index 000000000000..6f667355b068 --- /dev/null +++ b/tools/lib/lockdep/Build @@ -0,0 +1 @@ +liblockdep-y += common.o lockdep.o preload.o rbtree.o diff --git a/tools/lib/lockdep/Makefile b/tools/lib/lockdep/Makefile index 4b866c54f624..0c356fb65022 100644 --- a/tools/lib/lockdep/Makefile +++ b/tools/lib/lockdep/Makefile @@ -35,6 +35,10 @@ bindir = $(prefix)/$(bindir_relative) export DESTDIR DESTDIR_SQ INSTALL +MAKEFLAGS += --no-print-directory + +include ../../scripts/Makefile.include + # copy a bit from Linux kbuild ifeq ("$(origin V)", "command line") @@ -44,56 +48,21 @@ ifndef VERBOSE VERBOSE = 0 endif -ifeq ("$(origin O)", "command line") - BUILD_OUTPUT := $(O) +ifeq ($(srctree),) +srctree := $(patsubst %/,%,$(dir $(shell pwd))) +srctree := $(patsubst %/,%,$(dir $(srctree))) +srctree := $(patsubst %/,%,$(dir $(srctree))) +#$(info Determined 'srctree' to be $(srctree)) endif -ifeq ($(BUILD_SRC),) -ifneq ($(BUILD_OUTPUT),) - -define build_output - $(if $(VERBOSE:1=),@)$(MAKE) -C $(BUILD_OUTPUT) \ - BUILD_SRC=$(CURDIR) -f $(CURDIR)/Makefile $1 -endef - -saved-output := $(BUILD_OUTPUT) -BUILD_OUTPUT := $(shell cd $(BUILD_OUTPUT) && /bin/pwd) -$(if $(BUILD_OUTPUT),, \ - $(error output directory "$(saved-output)" does not exist)) - -all: sub-make - -gui: force - $(call build_output, all_cmd) - -$(filter-out gui,$(MAKECMDGOALS)): sub-make - -sub-make: force - $(call build_output, $(MAKECMDGOALS)) - - -# Leave processing to above invocation of make -skip-makefile := 1 - -endif # BUILD_OUTPUT -endif # BUILD_SRC - -# We process the rest of the Makefile if this is the final invocation of make -ifeq ($(skip-makefile),) - -srctree := $(realpath $(if $(BUILD_SRC),$(BUILD_SRC),$(CURDIR))) -objtree := $(realpath $(CURDIR)) -src := $(srctree) -obj := $(objtree) - -export prefix libdir bindir src obj - # Shell quotes libdir_SQ = $(subst ','\'',$(libdir)) bindir_SQ = $(subst ','\'',$(bindir)) -LIB_FILE = liblockdep.a liblockdep.so.$(LIBLOCKDEP_VERSION) +LIB_IN := $(OUTPUT)liblockdep-in.o + BIN_FILE = lockdep +LIB_FILE = $(OUTPUT)liblockdep.a $(OUTPUT)liblockdep.so.$(LIBLOCKDEP_VERSION) CONFIG_INCLUDES = CONFIG_LIBS = @@ -108,33 +77,23 @@ INCLUDES = -I. -I./uinclude -I./include -I../../include $(CONFIG_INCLUDES) # Set compile option CFLAGS if not set elsewhere CFLAGS ?= -g -DCONFIG_LOCKDEP -DCONFIG_STACKTRACE -DCONFIG_PROVE_LOCKING -DBITS_PER_LONG=__WORDSIZE -DLIBLOCKDEP_VERSION='"$(LIBLOCKDEP_VERSION)"' -rdynamic -O0 -g +CFLAGS += -fPIC override CFLAGS += $(CONFIG_FLAGS) $(INCLUDES) $(PLUGIN_DIR_SQ) ifeq ($(VERBOSE),1) Q = - print_compile = - print_app_build = - print_fpic_compile = print_shared_lib_compile = print_install = else Q = @ - print_compile = echo ' CC '$(OBJ); - print_app_build = echo ' BUILD '$(OBJ); - print_fpic_compile = echo ' CC FPIC '$(OBJ); - print_shared_lib_compile = echo ' BUILD SHARED LIB '$(OBJ); - print_static_lib_build = echo ' BUILD STATIC LIB '$(OBJ); - print_install = echo ' INSTALL '$1' to $(DESTDIR_SQ)$2'; + print_shared_lib_compile = echo ' LD '$(OBJ); + print_static_lib_build = echo ' LD '$(OBJ); + print_install = echo ' INSTALL '$1' to $(DESTDIR_SQ)$2'; endif -do_fpic_compile = \ - ($(print_fpic_compile) \ - $(CC) -c $(CFLAGS) $(EXT) -fPIC $< -o $@) - -do_app_build = \ - ($(print_app_build) \ - $(CC) $^ -rdynamic -o $@ $(CONFIG_LIBS) $(LIBS)) +export srctree OUTPUT CC LD CFLAGS V +build := -f $(srctree)/tools/build/Makefile.build dir=. obj do_compile_shared_library = \ ($(print_shared_lib_compile) \ @@ -144,22 +103,6 @@ do_build_static_lib = \ ($(print_static_lib_build) \ $(RM) $@; $(AR) rcs $@ $^) - -define do_compile - $(print_compile) \ - $(CC) -c $(CFLAGS) $(EXT) $< -o $(obj)/$@; -endef - -$(obj)/%.o: $(src)/%.c - $(Q)$(call do_compile) - -%.o: $(src)/%.c - $(Q)$(call do_compile) - -PEVENT_LIB_OBJS = common.o lockdep.o preload.o rbtree.o - -ALL_OBJS = $(PEVENT_LIB_OBJS) - CMD_TARGETS = $(LIB_FILE) TARGETS = $(CMD_TARGETS) @@ -169,42 +112,15 @@ all: all_cmd all_cmd: $(CMD_TARGETS) -liblockdep.so.$(LIBLOCKDEP_VERSION): $(PEVENT_LIB_OBJS) +$(LIB_IN): force + $(Q)$(MAKE) $(build)=liblockdep + +liblockdep.so.$(LIBLOCKDEP_VERSION): $(LIB_IN) $(Q)$(do_compile_shared_library) -liblockdep.a: $(PEVENT_LIB_OBJS) +liblockdep.a: $(LIB_IN) $(Q)$(do_build_static_lib) -$(PEVENT_LIB_OBJS): %.o: $(src)/%.c - $(Q)$(do_fpic_compile) - -## make deps - -all_objs := $(sort $(ALL_OBJS)) -all_deps := $(all_objs:%.o=.%.d) - -# let .d file also depends on the source and header files -define check_deps - @set -e; $(RM) $@; \ - $(CC) -MM $(CFLAGS) $< > $@.$$$$; \ - sed 's,\($*\)\.o[ :]*,\1.o $@ : ,g' < $@.$$$$ > $@; \ - $(RM) $@.$$$$ -endef - -$(all_deps): .%.d: $(src)/%.c - $(Q)$(call check_deps) - -$(all_objs) : %.o : .%.d - -dep_includes := $(wildcard $(all_deps)) - -ifneq ($(dep_includes),) - include $(dep_includes) -endif - -### Detect environment changes -TRACK_CFLAGS = $(subst ','\'',$(CFLAGS)):$(ARCH):$(CROSS_COMPILE) - tags: force $(RM) tags find . -name '*.[ch]' | xargs ctags --extra=+f --c-kinds=+px \ @@ -233,8 +149,6 @@ clean: $(RM) *.o *~ $(TARGETS) *.a *liblockdep*.so* $(VERSION_FILES) .*.d $(RM) tags TAGS -endif # skip-makefile - PHONY += force force: diff --git a/tools/lib/traceevent/Build b/tools/lib/traceevent/Build new file mode 100644 index 000000000000..c681d0575d16 --- /dev/null +++ b/tools/lib/traceevent/Build @@ -0,0 +1,17 @@ +libtraceevent-y += event-parse.o +libtraceevent-y += event-plugin.o +libtraceevent-y += trace-seq.o +libtraceevent-y += parse-filter.o +libtraceevent-y += parse-utils.o +libtraceevent-y += kbuffer-parse.o + +plugin_jbd2-y += plugin_jbd2.o +plugin_hrtimer-y += plugin_hrtimer.o +plugin_kmem-y += plugin_kmem.o +plugin_kvm-y += plugin_kvm.o +plugin_mac80211-y += plugin_mac80211.o +plugin_sched_switch-y += plugin_sched_switch.o +plugin_function-y += plugin_function.o +plugin_xen-y += plugin_xen.o +plugin_scsi-y += plugin_scsi.o +plugin_cfg80211-y += plugin_cfg80211.o diff --git a/tools/lib/traceevent/Makefile b/tools/lib/traceevent/Makefile index 005c9cc06935..d410da335e3d 100644 --- a/tools/lib/traceevent/Makefile +++ b/tools/lib/traceevent/Makefile @@ -67,7 +67,7 @@ PLUGIN_DIR = -DPLUGIN_DIR="$(plugin_dir)" PLUGIN_DIR_SQ = '$(subst ','\'',$(PLUGIN_DIR))' endif -include $(if $(BUILD_SRC),$(BUILD_SRC)/)../../scripts/Makefile.include +include ../../scripts/Makefile.include # copy a bit from Linux kbuild @@ -78,40 +78,13 @@ ifndef VERBOSE VERBOSE = 0 endif -ifeq ("$(origin O)", "command line") - BUILD_OUTPUT := $(O) +ifeq ($(srctree),) +srctree := $(patsubst %/,%,$(dir $(shell pwd))) +srctree := $(patsubst %/,%,$(dir $(srctree))) +srctree := $(patsubst %/,%,$(dir $(srctree))) +#$(info Determined 'srctree' to be $(srctree)) endif -ifeq ($(BUILD_SRC),) -ifneq ($(OUTPUT),) - -define build_output - $(if $(VERBOSE:1=),@)+$(MAKE) -C $(OUTPUT) \ - BUILD_SRC=$(CURDIR)/ -f $(CURDIR)/Makefile $1 -endef - -all: sub-make - -$(MAKECMDGOALS): sub-make - -sub-make: force - $(call build_output, $(MAKECMDGOALS)) - - -# Leave processing to above invocation of make -skip-makefile := 1 - -endif # OUTPUT -endif # BUILD_SRC - -# We process the rest of the Makefile if this is the final invocation of make -ifeq ($(skip-makefile),) - -srctree := $(if $(BUILD_SRC),$(BUILD_SRC),$(CURDIR)) -objtree := $(CURDIR) -src := $(srctree) -obj := $(objtree) - export prefix bindir src obj # Shell quotes @@ -132,16 +105,19 @@ EXTRAVERSION = $(EP_EXTRAVERSION) OBJ = $@ N = -export Q VERBOSE - EVENT_PARSE_VERSION = $(EP_VERSION).$(EP_PATCHLEVEL).$(EP_EXTRAVERSION) -INCLUDES = -I. -I $(srctree)/../../include $(CONFIG_INCLUDES) +INCLUDES = -I. -I $(srctree)/tools/include $(CONFIG_INCLUDES) -# Set compile option CFLAGS if not set elsewhere -CFLAGS ?= -g -Wall +# Set compile option CFLAGS +ifdef EXTRA_CFLAGS + CFLAGS := $(EXTRA_CFLAGS) +else + CFLAGS := -g -Wall +endif # Append required CFLAGS +override CFLAGS += -fPIC override CFLAGS += $(CONFIG_FLAGS) $(INCLUDES) $(PLUGIN_DIR_SQ) override CFLAGS += $(udis86-flags) -D_GNU_SOURCE @@ -151,74 +127,58 @@ else Q = @ endif -do_compile_shared_library = \ - ($(print_shared_lib_compile) \ - $(CC) --shared $^ -o $@) - -do_plugin_build = \ - ($(print_plugin_build) \ - $(CC) $(CFLAGS) -shared -nostartfiles -o $@ $<) - -do_build_static_lib = \ - ($(print_static_lib_build) \ - $(RM) $@; $(AR) rcs $@ $^) - - -do_compile = $(QUIET_CC)$(CC) -c $(CFLAGS) $(EXT) $< -o $(obj)/$@; +# Disable command line variables (CFLAGS) overide from top +# level Makefile (perf), otherwise build Makefile will get +# the same command line setup. +MAKEOVERRIDES= -$(obj)/%.o: $(src)/%.c - $(call do_compile) +export srctree OUTPUT CC LD CFLAGS V +build := -f $(srctree)/tools/build/Makefile.build dir=. obj -%.o: $(src)/%.c - $(call do_compile) +PLUGINS = plugin_jbd2.so +PLUGINS += plugin_hrtimer.so +PLUGINS += plugin_kmem.so +PLUGINS += plugin_kvm.so +PLUGINS += plugin_mac80211.so +PLUGINS += plugin_sched_switch.so +PLUGINS += plugin_function.so +PLUGINS += plugin_xen.so +PLUGINS += plugin_scsi.so +PLUGINS += plugin_cfg80211.so -PEVENT_LIB_OBJS = event-parse.o -PEVENT_LIB_OBJS += event-plugin.o -PEVENT_LIB_OBJS += trace-seq.o -PEVENT_LIB_OBJS += parse-filter.o -PEVENT_LIB_OBJS += parse-utils.o -PEVENT_LIB_OBJS += kbuffer-parse.o +PLUGINS := $(addprefix $(OUTPUT),$(PLUGINS)) +PLUGINS_IN := $(PLUGINS:.so=-in.o) -PLUGIN_OBJS = plugin_jbd2.o -PLUGIN_OBJS += plugin_hrtimer.o -PLUGIN_OBJS += plugin_kmem.o -PLUGIN_OBJS += plugin_kvm.o -PLUGIN_OBJS += plugin_mac80211.o -PLUGIN_OBJS += plugin_sched_switch.o -PLUGIN_OBJS += plugin_function.o -PLUGIN_OBJS += plugin_xen.o -PLUGIN_OBJS += plugin_scsi.o -PLUGIN_OBJS += plugin_cfg80211.o - -PLUGINS := $(PLUGIN_OBJS:.o=.so) - -ALL_OBJS = $(PEVENT_LIB_OBJS) $(PLUGIN_OBJS) +TE_IN := $(OUTPUT)libtraceevent-in.o +LIB_FILE := $(addprefix $(OUTPUT),$(LIB_FILE)) CMD_TARGETS = $(LIB_FILE) $(PLUGINS) TARGETS = $(CMD_TARGETS) - all: all_cmd all_cmd: $(CMD_TARGETS) -libtraceevent.so: $(PEVENT_LIB_OBJS) +$(TE_IN): force + $(Q)$(MAKE) $(build)=libtraceevent + +$(OUTPUT)libtraceevent.so: $(TE_IN) $(QUIET_LINK)$(CC) --shared $^ -o $@ -libtraceevent.a: $(PEVENT_LIB_OBJS) +$(OUTPUT)libtraceevent.a: $(TE_IN) $(QUIET_LINK)$(RM) $@; $(AR) rcs $@ $^ plugins: $(PLUGINS) -$(PEVENT_LIB_OBJS): %.o: $(src)/%.c TRACEEVENT-CFLAGS - $(QUIET_CC_FPIC)$(CC) -c $(CFLAGS) $(EXT) -fPIC $< -o $@ +__plugin_obj = $(notdir $@) + plugin_obj = $(__plugin_obj:-in.o=) -$(PLUGIN_OBJS): %.o : $(src)/%.c - $(QUIET_CC_FPIC)$(CC) -c $(CFLAGS) -fPIC -o $@ $< +$(PLUGINS_IN): force + $(Q)$(MAKE) $(build)=$(plugin_obj) -$(PLUGINS): %.so: %.o - $(QUIET_LINK)$(CC) $(CFLAGS) -shared -nostartfiles -o $@ $< +$(OUTPUT)%.so: $(OUTPUT)%-in.o + $(QUIET_LINK)$(CC) $(CFLAGS) -shared -nostartfiles -o $@ $^ define make_version.h (echo '/* This file is automatically generated. Do not modify. */'; \ @@ -255,40 +215,6 @@ define update_dir fi); endef -## make deps - -all_objs := $(sort $(ALL_OBJS)) -all_deps := $(all_objs:%.o=.%.d) - -# let .d file also depends on the source and header files -define check_deps - @set -e; $(RM) $@; \ - $(CC) -MM $(CFLAGS) $< > $@.$$$$; \ - sed 's,\($*\)\.o[ :]*,\1.o $@ : ,g' < $@.$$$$ > $@; \ - $(RM) $@.$$$$ -endef - -$(all_deps): .%.d: $(src)/%.c - $(Q)$(call check_deps) - -$(all_objs) : %.o : .%.d - -dep_includes := $(wildcard $(all_deps)) - -ifneq ($(dep_includes),) - include $(dep_includes) -endif - -### Detect environment changes -TRACK_CFLAGS = $(subst ','\'',$(CFLAGS)):$(ARCH):$(CROSS_COMPILE) - -TRACEEVENT-CFLAGS: force - @FLAGS='$(TRACK_CFLAGS)'; \ - if test x"$$FLAGS" != x"`cat TRACEEVENT-CFLAGS 2>/dev/null`" ; then \ - echo 1>&2 " FLAGS: * new build flags or cross compiler"; \ - echo "$$FLAGS" >TRACEEVENT-CFLAGS; \ - fi - tags: force $(RM) tags find . -name '*.[ch]' | xargs ctags --extra=+f --c-kinds=+px \ @@ -327,14 +253,9 @@ clean: $(RM) *.o *~ $(TARGETS) *.a *.so $(VERSION_FILES) .*.d \ $(RM) TRACEEVENT-CFLAGS tags TAGS -endif # skip-makefile - PHONY += force plugins force: -plugins: - @echo > /dev/null - # Declare the contents of the .PHONY variable as phony. We keep that # information in a variable so we can use it in if_changed and friends. .PHONY: $(PHONY) diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c index afe20ed9fac8..e0917c0f5d9f 100644 --- a/tools/lib/traceevent/event-parse.c +++ b/tools/lib/traceevent/event-parse.c @@ -304,7 +304,10 @@ int pevent_register_comm(struct pevent *pevent, const char *comm, int pid) if (!item) return -1; - item->comm = strdup(comm); + if (comm) + item->comm = strdup(comm); + else + item->comm = strdup("<...>"); if (!item->comm) { free(item); return -1; @@ -318,9 +321,14 @@ int pevent_register_comm(struct pevent *pevent, const char *comm, int pid) return 0; } -void pevent_register_trace_clock(struct pevent *pevent, char *trace_clock) +int pevent_register_trace_clock(struct pevent *pevent, const char *trace_clock) { - pevent->trace_clock = trace_clock; + pevent->trace_clock = strdup(trace_clock); + if (!pevent->trace_clock) { + errno = ENOMEM; + return -1; + } + return 0; } struct func_map { @@ -758,6 +766,11 @@ static void free_arg(struct print_arg *arg) free_arg(arg->hex.field); free_arg(arg->hex.size); break; + case PRINT_INT_ARRAY: + free_arg(arg->int_array.field); + free_arg(arg->int_array.count); + free_arg(arg->int_array.el_size); + break; case PRINT_TYPE: free(arg->typecast.type); free_arg(arg->typecast.item); @@ -1926,7 +1939,22 @@ process_op(struct event_format *event, struct print_arg *arg, char **tok) goto out_warn_free; type = process_arg_token(event, right, tok, type); - arg->op.right = right; + + if (right->type == PRINT_OP && + get_op_prio(arg->op.op) < get_op_prio(right->op.op)) { + struct print_arg tmp; + + /* rotate ops according to the priority */ + arg->op.right = right->op.left; + + tmp = *arg; + *arg = *right; + *right = tmp; + + arg->op.left = right; + } else { + arg->op.right = right; + } } else if (strcmp(token, "[") == 0) { @@ -2014,6 +2042,38 @@ process_entry(struct event_format *event __maybe_unused, struct print_arg *arg, return EVENT_ERROR; } +static int alloc_and_process_delim(struct event_format *event, char *next_token, + struct print_arg **print_arg) +{ + struct print_arg *field; + enum event_type type; + char *token; + int ret = 0; + + field = alloc_arg(); + if (!field) { + do_warning_event(event, "%s: not enough memory!", __func__); + errno = ENOMEM; + return -1; + } + + type = process_arg(event, field, &token); + + if (test_type_token(type, token, EVENT_DELIM, next_token)) { + errno = EINVAL; + ret = -1; + free_arg(field); + goto out_free_token; + } + + *print_arg = field; + +out_free_token: + free_token(token); + + return ret; +} + static char *arg_eval (struct print_arg *arg); static unsigned long long @@ -2486,49 +2546,46 @@ out_free: static enum event_type process_hex(struct event_format *event, struct print_arg *arg, char **tok) { - struct print_arg *field; - enum event_type type; - char *token = NULL; - memset(arg, 0, sizeof(*arg)); arg->type = PRINT_HEX; - field = alloc_arg(); - if (!field) { - do_warning_event(event, "%s: not enough memory!", __func__); - goto out_free; - } + if (alloc_and_process_delim(event, ",", &arg->hex.field)) + goto out; - type = process_arg(event, field, &token); + if (alloc_and_process_delim(event, ")", &arg->hex.size)) + goto free_field; - if (test_type_token(type, token, EVENT_DELIM, ",")) - goto out_free; - - arg->hex.field = field; + return read_token_item(tok); - free_token(token); +free_field: + free_arg(arg->hex.field); +out: + *tok = NULL; + return EVENT_ERROR; +} - field = alloc_arg(); - if (!field) { - do_warning_event(event, "%s: not enough memory!", __func__); - *tok = NULL; - return EVENT_ERROR; - } +static enum event_type +process_int_array(struct event_format *event, struct print_arg *arg, char **tok) +{ + memset(arg, 0, sizeof(*arg)); + arg->type = PRINT_INT_ARRAY; - type = process_arg(event, field, &token); + if (alloc_and_process_delim(event, ",", &arg->int_array.field)) + goto out; - if (test_type_token(type, token, EVENT_DELIM, ")")) - goto out_free; + if (alloc_and_process_delim(event, ",", &arg->int_array.count)) + goto free_field; - arg->hex.size = field; + if (alloc_and_process_delim(event, ")", &arg->int_array.el_size)) + goto free_size; - free_token(token); - type = read_token_item(tok); - return type; + return read_token_item(tok); - out_free: - free_arg(field); - free_token(token); +free_size: + free_arg(arg->int_array.count); +free_field: + free_arg(arg->int_array.field); +out: *tok = NULL; return EVENT_ERROR; } @@ -2828,6 +2885,10 @@ process_function(struct event_format *event, struct print_arg *arg, free_token(token); return process_hex(event, arg, tok); } + if (strcmp(token, "__print_array") == 0) { + free_token(token); + return process_int_array(event, arg, tok); + } if (strcmp(token, "__get_str") == 0) { free_token(token); return process_str(event, arg, tok); @@ -3356,6 +3417,7 @@ eval_num_arg(void *data, int size, struct event_format *event, struct print_arg break; case PRINT_FLAGS: case PRINT_SYMBOL: + case PRINT_INT_ARRAY: case PRINT_HEX: break; case PRINT_TYPE: @@ -3568,7 +3630,7 @@ static const struct flag flags[] = { { "HRTIMER_RESTART", 1 }, }; -static unsigned long long eval_flag(const char *flag) +static long long eval_flag(const char *flag) { int i; @@ -3584,7 +3646,7 @@ static unsigned long long eval_flag(const char *flag) if (strcmp(flags[i].name, flag) == 0) return flags[i].value; - return 0; + return -1LL; } static void print_str_to_seq(struct trace_seq *s, const char *format, @@ -3658,7 +3720,7 @@ static void print_str_arg(struct trace_seq *s, void *data, int size, struct print_flag_sym *flag; struct format_field *field; struct printk_map *printk; - unsigned long long val, fval; + long long val, fval; unsigned long addr; char *str; unsigned char *hex; @@ -3717,11 +3779,11 @@ static void print_str_arg(struct trace_seq *s, void *data, int size, print = 0; for (flag = arg->flags.flags; flag; flag = flag->next) { fval = eval_flag(flag->value); - if (!val && !fval) { + if (!val && fval < 0) { print_str_to_seq(s, format, len_arg, flag->str); break; } - if (fval && (val & fval) == fval) { + if (fval > 0 && (val & fval) == fval) { if (print && arg->flags.delim) trace_seq_puts(s, arg->flags.delim); print_str_to_seq(s, format, len_arg, flag->str); @@ -3766,6 +3828,54 @@ static void print_str_arg(struct trace_seq *s, void *data, int size, } break; + case PRINT_INT_ARRAY: { + void *num; + int el_size; + + if (arg->int_array.field->type == PRINT_DYNAMIC_ARRAY) { + unsigned long offset; + struct format_field *field = + arg->int_array.field->dynarray.field; + offset = pevent_read_number(pevent, + data + field->offset, + field->size); + num = data + (offset & 0xffff); + } else { + field = arg->int_array.field->field.field; + if (!field) { + str = arg->int_array.field->field.name; + field = pevent_find_any_field(event, str); + if (!field) + goto out_warning_field; + arg->int_array.field->field.field = field; + } + num = data + field->offset; + } + len = eval_num_arg(data, size, event, arg->int_array.count); + el_size = eval_num_arg(data, size, event, + arg->int_array.el_size); + for (i = 0; i < len; i++) { + if (i) + trace_seq_putc(s, ' '); + + if (el_size == 1) { + trace_seq_printf(s, "%u", *(uint8_t *)num); + } else if (el_size == 2) { + trace_seq_printf(s, "%u", *(uint16_t *)num); + } else if (el_size == 4) { + trace_seq_printf(s, "%u", *(uint32_t *)num); + } else if (el_size == 8) { + trace_seq_printf(s, "%lu", *(uint64_t *)num); + } else { + trace_seq_printf(s, "BAD SIZE:%d 0x%x", + el_size, *(uint8_t *)num); + el_size = 1; + } + + num += el_size; + } + break; + } case PRINT_TYPE: break; case PRINT_STRING: { @@ -3976,7 +4086,7 @@ static struct print_arg *make_bprint_args(char *fmt, void *data, int size, struc if (asprintf(&arg->atom.atom, "%lld", ip) < 0) goto out_free; - /* skip the first "%pf: " */ + /* skip the first "%ps: " */ for (ptr = fmt + 5, bptr = data + field->offset; bptr < data + size && *ptr; ptr++) { int ls = 0; @@ -3997,6 +4107,10 @@ static struct print_arg *make_bprint_args(char *fmt, void *data, int size, struc goto process_again; case '.': goto process_again; + case 'z': + case 'Z': + ls = 1; + goto process_again; case 'p': ls = 1; /* fall through */ @@ -4939,6 +5053,96 @@ const char *pevent_data_comm_from_pid(struct pevent *pevent, int pid) return comm; } +static struct cmdline * +pid_from_cmdlist(struct pevent *pevent, const char *comm, struct cmdline *next) +{ + struct cmdline_list *cmdlist = (struct cmdline_list *)next; + + if (cmdlist) + cmdlist = cmdlist->next; + else + cmdlist = pevent->cmdlist; + + while (cmdlist && strcmp(cmdlist->comm, comm) != 0) + cmdlist = cmdlist->next; + + return (struct cmdline *)cmdlist; +} + +/** + * pevent_data_pid_from_comm - return the pid from a given comm + * @pevent: a handle to the pevent + * @comm: the cmdline to find the pid from + * @next: the cmdline structure to find the next comm + * + * This returns the cmdline structure that holds a pid for a given + * comm, or NULL if none found. As there may be more than one pid for + * a given comm, the result of this call can be passed back into + * a recurring call in the @next paramater, and then it will find the + * next pid. + * Also, it does a linear seach, so it may be slow. + */ +struct cmdline *pevent_data_pid_from_comm(struct pevent *pevent, const char *comm, + struct cmdline *next) +{ + struct cmdline *cmdline; + + /* + * If the cmdlines have not been converted yet, then use + * the list. + */ + if (!pevent->cmdlines) + return pid_from_cmdlist(pevent, comm, next); + + if (next) { + /* + * The next pointer could have been still from + * a previous call before cmdlines were created + */ + if (next < pevent->cmdlines || + next >= pevent->cmdlines + pevent->cmdline_count) + next = NULL; + else + cmdline = next++; + } + + if (!next) + cmdline = pevent->cmdlines; + + while (cmdline < pevent->cmdlines + pevent->cmdline_count) { + if (strcmp(cmdline->comm, comm) == 0) + return cmdline; + cmdline++; + } + return NULL; +} + +/** + * pevent_cmdline_pid - return the pid associated to a given cmdline + * @cmdline: The cmdline structure to get the pid from + * + * Returns the pid for a give cmdline. If @cmdline is NULL, then + * -1 is returned. + */ +int pevent_cmdline_pid(struct pevent *pevent, struct cmdline *cmdline) +{ + struct cmdline_list *cmdlist = (struct cmdline_list *)cmdline; + + if (!cmdline) + return -1; + + /* + * If cmdlines have not been created yet, or cmdline is + * not part of the array, then treat it as a cmdlist instead. + */ + if (!pevent->cmdlines || + cmdline < pevent->cmdlines || + cmdline >= pevent->cmdlines + pevent->cmdline_count) + return cmdlist->pid; + + return cmdline->pid; +} + /** * pevent_data_comm_from_pid - parse the data into the print format * @s: the trace_seq to write to @@ -5256,6 +5460,15 @@ static void print_args(struct print_arg *args) print_args(args->hex.size); printf(")"); break; + case PRINT_INT_ARRAY: + printf("__print_array("); + print_args(args->int_array.field); + printf(", "); + print_args(args->int_array.count); + printf(", "); + print_args(args->int_array.el_size); + printf(")"); + break; case PRINT_STRING: case PRINT_BSTRING: printf("__get_str(%s)", args->string.string); @@ -6228,15 +6441,20 @@ void pevent_ref(struct pevent *pevent) pevent->ref_count++; } +void pevent_free_format_field(struct format_field *field) +{ + free(field->type); + free(field->name); + free(field); +} + static void free_format_fields(struct format_field *field) { struct format_field *next; while (field) { next = field->next; - free(field->type); - free(field->name); - free(field); + pevent_free_format_field(field); field = next; } } @@ -6341,6 +6559,7 @@ void pevent_free(struct pevent *pevent) free_handler(handle); } + free(pevent->trace_clock); free(pevent->events); free(pevent->sort_events); diff --git a/tools/lib/traceevent/event-parse.h b/tools/lib/traceevent/event-parse.h index 7a3873ff9a4f..86a5839fb048 100644 --- a/tools/lib/traceevent/event-parse.h +++ b/tools/lib/traceevent/event-parse.h @@ -22,6 +22,7 @@ #include <stdbool.h> #include <stdarg.h> +#include <stdio.h> #include <regex.h> #include <string.h> @@ -91,6 +92,7 @@ extern int trace_seq_putc(struct trace_seq *s, unsigned char c); extern void trace_seq_terminate(struct trace_seq *s); +extern int trace_seq_do_fprintf(struct trace_seq *s, FILE *fp); extern int trace_seq_do_printf(struct trace_seq *s); @@ -114,7 +116,7 @@ struct pevent_plugin_option { char *name; char *plugin_alias; char *description; - char *value; + const char *value; void *priv; int set; }; @@ -152,6 +154,10 @@ struct pevent_plugin_option { * .plugin_alias is used to give a shorter name to access * the vairable. Useful if a plugin handles more than one event. * + * If .value is not set, then it is considered a boolean and only + * .set will be processed. If .value is defined, then it is considered + * a string option and .set will be ignored. + * * PEVENT_PLUGIN_ALIAS: (optional) * The name to use for finding options (uses filename if not defined) */ @@ -245,6 +251,12 @@ struct print_arg_hex { struct print_arg *size; }; +struct print_arg_int_array { + struct print_arg *field; + struct print_arg *count; + struct print_arg *el_size; +}; + struct print_arg_dynarray { struct format_field *field; struct print_arg *index; @@ -273,6 +285,7 @@ enum print_arg_type { PRINT_FLAGS, PRINT_SYMBOL, PRINT_HEX, + PRINT_INT_ARRAY, PRINT_TYPE, PRINT_STRING, PRINT_BSTRING, @@ -292,6 +305,7 @@ struct print_arg { struct print_arg_flags flags; struct print_arg_symbol symbol; struct print_arg_hex hex; + struct print_arg_int_array int_array; struct print_arg_func func; struct print_arg_string string; struct print_arg_bitmask bitmask; @@ -597,7 +611,7 @@ enum trace_flag_type { }; int pevent_register_comm(struct pevent *pevent, const char *comm, int pid); -void pevent_register_trace_clock(struct pevent *pevent, char *trace_clock); +int pevent_register_trace_clock(struct pevent *pevent, const char *trace_clock); int pevent_register_function(struct pevent *pevent, char *name, unsigned long long addr, char *mod); int pevent_register_print_string(struct pevent *pevent, const char *fmt, @@ -617,6 +631,7 @@ enum pevent_errno pevent_parse_format(struct pevent *pevent, const char *buf, unsigned long size, const char *sys); void pevent_free_format(struct event_format *event); +void pevent_free_format_field(struct format_field *field); void *pevent_get_field_raw(struct trace_seq *s, struct event_format *event, const char *name, struct pevent_record *record, @@ -675,6 +690,11 @@ int pevent_data_type(struct pevent *pevent, struct pevent_record *rec); struct event_format *pevent_data_event_from_type(struct pevent *pevent, int type); int pevent_data_pid(struct pevent *pevent, struct pevent_record *rec); const char *pevent_data_comm_from_pid(struct pevent *pevent, int pid); +struct cmdline; +struct cmdline *pevent_data_pid_from_comm(struct pevent *pevent, const char *comm, + struct cmdline *next); +int pevent_cmdline_pid(struct pevent *pevent, struct cmdline *cmdline); + void pevent_event_info(struct trace_seq *s, struct event_format *event, struct pevent_record *record); int pevent_strerror(struct pevent *pevent, enum pevent_errno errnum, diff --git a/tools/lib/traceevent/event-plugin.c b/tools/lib/traceevent/event-plugin.c index 136162c03af1..a16756ae3526 100644 --- a/tools/lib/traceevent/event-plugin.c +++ b/tools/lib/traceevent/event-plugin.c @@ -18,6 +18,7 @@ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ +#include <ctype.h> #include <stdio.h> #include <string.h> #include <dlfcn.h> @@ -49,6 +50,52 @@ struct plugin_list { void *handle; }; +static void lower_case(char *str) +{ + if (!str) + return; + for (; *str; str++) + *str = tolower(*str); +} + +static int update_option_value(struct pevent_plugin_option *op, const char *val) +{ + char *op_val; + + if (!val) { + /* toggle, only if option is boolean */ + if (op->value) + /* Warn? */ + return 0; + op->set ^= 1; + return 0; + } + + /* + * If the option has a value then it takes a string + * otherwise the option is a boolean. + */ + if (op->value) { + op->value = val; + return 0; + } + + /* Option is boolean, must be either "1", "0", "true" or "false" */ + + op_val = strdup(val); + if (!op_val) + return -1; + lower_case(op_val); + + if (strcmp(val, "1") == 0 || strcmp(val, "true") == 0) + op->set = 1; + else if (strcmp(val, "0") == 0 || strcmp(val, "false") == 0) + op->set = 0; + free(op_val); + + return 0; +} + /** * traceevent_plugin_list_options - get list of plugin options * @@ -120,6 +167,7 @@ update_option(const char *file, struct pevent_plugin_option *option) { struct trace_plugin_options *op; char *plugin; + int ret = 0; if (option->plugin_alias) { plugin = strdup(option->plugin_alias); @@ -144,9 +192,10 @@ update_option(const char *file, struct pevent_plugin_option *option) if (strcmp(op->option, option->name) != 0) continue; - option->value = op->value; - option->set ^= 1; - goto out; + ret = update_option_value(option, op->value); + if (ret) + goto out; + break; } /* first look for unnamed options */ @@ -156,14 +205,13 @@ update_option(const char *file, struct pevent_plugin_option *option) if (strcmp(op->option, option->name) != 0) continue; - option->value = op->value; - option->set ^= 1; + ret = update_option_value(option, op->value); break; } out: free(plugin); - return 0; + return ret; } /** diff --git a/tools/lib/traceevent/kbuffer-parse.c b/tools/lib/traceevent/kbuffer-parse.c index dcc665228c71..3bcada3ae05a 100644 --- a/tools/lib/traceevent/kbuffer-parse.c +++ b/tools/lib/traceevent/kbuffer-parse.c @@ -372,7 +372,6 @@ translate_data(struct kbuffer *kbuf, void *data, void **rptr, switch (type_len) { case KBUFFER_TYPE_PADDING: *length = read_4(kbuf, data); - data += *length; break; case KBUFFER_TYPE_TIME_EXTEND: @@ -730,3 +729,14 @@ void kbuffer_set_old_format(struct kbuffer *kbuf) kbuf->next_event = __old_next_event; } + +/** + * kbuffer_start_of_data - return offset of where data starts on subbuffer + * @kbuf: The kbuffer + * + * Returns the location on the subbuffer where the data starts. + */ +int kbuffer_start_of_data(struct kbuffer *kbuf) +{ + return kbuf->start; +} diff --git a/tools/lib/traceevent/kbuffer.h b/tools/lib/traceevent/kbuffer.h index c831f64b17a0..03dce757553f 100644 --- a/tools/lib/traceevent/kbuffer.h +++ b/tools/lib/traceevent/kbuffer.h @@ -63,5 +63,6 @@ int kbuffer_missed_events(struct kbuffer *kbuf); int kbuffer_subbuffer_size(struct kbuffer *kbuf); void kbuffer_set_old_format(struct kbuffer *kbuf); +int kbuffer_start_of_data(struct kbuffer *kbuf); #endif /* _K_BUFFER_H */ diff --git a/tools/lib/traceevent/parse-filter.c b/tools/lib/traceevent/parse-filter.c index b50234402fc2..0144b3d1bb77 100644 --- a/tools/lib/traceevent/parse-filter.c +++ b/tools/lib/traceevent/parse-filter.c @@ -1058,6 +1058,7 @@ process_filter(struct event_format *event, struct filter_arg **parg, *parg = current_op; else *parg = current_exp; + free(token); return PEVENT_ERRNO__UNBALANCED_PAREN; } break; @@ -1168,6 +1169,7 @@ process_filter(struct event_format *event, struct filter_arg **parg, *parg = current_op; + free(token); return 0; fail_alloc: diff --git a/tools/lib/traceevent/trace-seq.c b/tools/lib/traceevent/trace-seq.c index ec3bd16a5488..292dc9f1d233 100644 --- a/tools/lib/traceevent/trace-seq.c +++ b/tools/lib/traceevent/trace-seq.c @@ -231,19 +231,24 @@ void trace_seq_terminate(struct trace_seq *s) s->buffer[s->len] = 0; } -int trace_seq_do_printf(struct trace_seq *s) +int trace_seq_do_fprintf(struct trace_seq *s, FILE *fp) { TRACE_SEQ_CHECK(s); switch (s->state) { case TRACE_SEQ__GOOD: - return printf("%.*s", s->len, s->buffer); + return fprintf(fp, "%.*s", s->len, s->buffer); case TRACE_SEQ__BUFFER_POISONED: - puts("Usage of trace_seq after it was destroyed"); + fprintf(fp, "%s\n", "Usage of trace_seq after it was destroyed"); break; case TRACE_SEQ__MEM_ALLOC_FAILED: - puts("Can't allocate trace_seq buffer memory"); + fprintf(fp, "%s\n", "Can't allocate trace_seq buffer memory"); break; } return -1; } + +int trace_seq_do_printf(struct trace_seq *s) +{ + return trace_seq_do_fprintf(s, stdout); +} diff --git a/tools/perf/.gitignore b/tools/perf/.gitignore index 40399c3d97d6..812f904193e8 100644 --- a/tools/perf/.gitignore +++ b/tools/perf/.gitignore @@ -1,6 +1,7 @@ PERF-CFLAGS PERF-GUI-VARS PERF-VERSION-FILE +FEATURE-DUMP perf perf-read-vdso32 perf-read-vdsox32 diff --git a/tools/perf/Build b/tools/perf/Build new file mode 100644 index 000000000000..b77370ef7005 --- /dev/null +++ b/tools/perf/Build @@ -0,0 +1,44 @@ +perf-y += builtin-bench.o +perf-y += builtin-annotate.o +perf-y += builtin-diff.o +perf-y += builtin-evlist.o +perf-y += builtin-help.o +perf-y += builtin-sched.o +perf-y += builtin-buildid-list.o +perf-y += builtin-buildid-cache.o +perf-y += builtin-list.o +perf-y += builtin-record.o +perf-y += builtin-report.o +perf-y += builtin-stat.o +perf-y += builtin-timechart.o +perf-y += builtin-top.o +perf-y += builtin-script.o +perf-y += builtin-kmem.o +perf-y += builtin-lock.o +perf-y += builtin-kvm.o +perf-y += builtin-inject.o +perf-y += builtin-mem.o +perf-y += builtin-data.o + +perf-$(CONFIG_AUDIT) += builtin-trace.o +perf-$(CONFIG_LIBELF) += builtin-probe.o + +perf-y += bench/ +perf-y += tests/ + +perf-y += perf.o + +paths += -DPERF_HTML_PATH="BUILD_STR($(htmldir_SQ))" +paths += -DPERF_INFO_PATH="BUILD_STR($(infodir_SQ))" +paths += -DPERF_MAN_PATH="BUILD_STR($(mandir_SQ))" + +CFLAGS_builtin-help.o += $(paths) +CFLAGS_builtin-timechart.o += $(paths) +CFLAGS_perf.o += -DPERF_HTML_PATH="BUILD_STR($(htmldir_SQ))" -include $(OUTPUT)PERF-VERSION-FILE + +libperf-y += util/ +libperf-y += arch/ +libperf-y += ui/ +libperf-y += scripts/ + +gtk-y += ui/gtk/ diff --git a/tools/perf/Documentation/Build.txt b/tools/perf/Documentation/Build.txt new file mode 100644 index 000000000000..f6fc6507ba55 --- /dev/null +++ b/tools/perf/Documentation/Build.txt @@ -0,0 +1,49 @@ + +1) perf build +============= +The perf build process consists of several separated building blocks, +which are linked together to form the perf binary: + - libperf library (static) + - perf builtin commands + - traceevent library (static) + - GTK ui library + +Several makefiles govern the perf build: + + - Makefile + top level Makefile working as a wrapper that calls the main + Makefile.perf with a -j option to do parallel builds. + + - Makefile.perf + main makefile that triggers build of all perf objects including + installation and documentation processing. + + - tools/build/Makefile.build + main makefile of the build framework + + - tools/build/Build.include + build framework generic definitions + + - Build makefiles + makefiles that defines build objects + +Please refer to tools/build/Documentation/Build.txt for more +information about build framework. + + +2) perf build +============= +The Makefile.perf triggers the build framework for build objects: + perf, libperf, gtk + +resulting in following objects: + $ ls *-in.o + gtk-in.o libperf-in.o perf-in.o + +Those objects are then used in final linking: + libperf-gtk.so <- gtk-in.o libperf-in.o + perf <- perf-in.o libperf-in.o + + +NOTE this description is omitting other libraries involved, only + focusing on build framework outcomes diff --git a/tools/perf/Documentation/perf-buildid-cache.txt b/tools/perf/Documentation/perf-buildid-cache.txt index 0294c57b1f5e..dd07b55f58d8 100644 --- a/tools/perf/Documentation/perf-buildid-cache.txt +++ b/tools/perf/Documentation/perf-buildid-cache.txt @@ -12,9 +12,9 @@ SYNOPSIS DESCRIPTION ----------- -This command manages the build-id cache. It can add and remove files to/from -the cache. In the future it should as well purge older entries, set upper -limits for the space used by the cache, etc. +This command manages the build-id cache. It can add, remove, update and purge +files to/from the cache. In the future it should as well set upper limits for +the space used by the cache, etc. OPTIONS ------- @@ -36,14 +36,24 @@ OPTIONS actually made. -r:: --remove=:: - Remove specified file from the cache. + Remove a cached binary which has same build-id of specified file + from the cache. +-p:: +--purge=:: + Purge all cached binaries including older caches which have specified + path from the cache. -M:: --missing=:: List missing build ids in the cache for the specified file. -u:: ---update:: - Update specified file of the cache. It can be used to update kallsyms - kernel dso to vmlinux in order to support annotation. +--update=:: + Update specified file of the cache. Note that this doesn't remove + older entires since those may be still needed for annotating old + (or remote) perf.data. Only if there is already a cache which has + exactly same build-id, that is replaced by new one. It can be used + to update kallsyms and kernel dso to vmlinux in order to support + annotation. + -v:: --verbose:: Be more verbose. diff --git a/tools/perf/Documentation/perf-data.txt b/tools/perf/Documentation/perf-data.txt new file mode 100644 index 000000000000..be8fa1a0a97e --- /dev/null +++ b/tools/perf/Documentation/perf-data.txt @@ -0,0 +1,40 @@ +perf-data(1) +============== + +NAME +---- +perf-data - Data file related processing + +SYNOPSIS +-------- +[verse] +'perf data' [<common options>] <command> [<options>]", + +DESCRIPTION +----------- +Data file related processing. + +COMMANDS +-------- +convert:: + Converts perf data file into another format (only CTF [1] format is support by now). + It's possible to set data-convert debug variable to get debug messages from conversion, + like: + perf --debug data-convert data convert ... + +OPTIONS for 'convert' +--------------------- +--to-ctf:: + Triggers the CTF conversion, specify the path of CTF data directory. + +-i:: + Specify input perf data file path. + +-v:: +--verbose:: + Be more verbose (show counter open errors, etc). + +SEE ALSO +-------- +linkperf:perf[1] +[1] Common Trace Format - http://www.efficios.com/ctf diff --git a/tools/perf/Documentation/perf-diff.txt b/tools/perf/Documentation/perf-diff.txt index e463caa3eb49..d1deb573877f 100644 --- a/tools/perf/Documentation/perf-diff.txt +++ b/tools/perf/Documentation/perf-diff.txt @@ -20,12 +20,20 @@ If no parameters are passed it will assume perf.data.old and perf.data. The differential profile is displayed only for events matching both specified perf.data files. +If no parameters are passed the samples will be sorted by dso and symbol. +As the perf.data files could come from different binaries, the symbols addresses +could vary. So perf diff is based on the comparison of the files and +symbols name. + OPTIONS ------- -D:: --dump-raw-trace:: Dump raw trace in ASCII. +--kallsyms=<file>:: + kallsyms pathname + -m:: --modules:: Load module symbols. WARNING: use only with -k and LIVE kernel diff --git a/tools/perf/Documentation/perf-kmem.txt b/tools/perf/Documentation/perf-kmem.txt index 7c8fbbf3f61c..150253cc3c97 100644 --- a/tools/perf/Documentation/perf-kmem.txt +++ b/tools/perf/Documentation/perf-kmem.txt @@ -25,6 +25,10 @@ OPTIONS --input=<file>:: Select the input file (default: perf.data unless stdin is a fifo) +-v:: +--verbose:: + Be more verbose. (show symbol address, etc) + --caller:: Show per-callsite statistics diff --git a/tools/perf/Documentation/perf-list.txt b/tools/perf/Documentation/perf-list.txt index 3e2aec94f806..bada8933fdd4 100644 --- a/tools/perf/Documentation/perf-list.txt +++ b/tools/perf/Documentation/perf-list.txt @@ -26,6 +26,7 @@ counted. The following modifiers exist: u - user-space counting k - kernel counting h - hypervisor counting + I - non idle counting G - guest counting (in KVM guests) H - host counting (not in KVM guests) p - precise level @@ -127,6 +128,12 @@ To limit the list use: One or more types can be used at the same time, listing the events for the types specified. +Support raw format: + +. '--raw-dump', shows the raw-dump of all the events. +. '--raw-dump [hw|sw|cache|tracepoint|pmu|event_glob]', shows the raw-dump of + a certain kind of events. + SEE ALSO -------- linkperf:perf-stat[1], linkperf:perf-top[1], diff --git a/tools/perf/Documentation/perf-probe.txt b/tools/perf/Documentation/perf-probe.txt index aaa869be3dc1..239609c09f83 100644 --- a/tools/perf/Documentation/perf-probe.txt +++ b/tools/perf/Documentation/perf-probe.txt @@ -47,6 +47,12 @@ OPTIONS -v:: --verbose:: Be more verbose (show parsed arguments, etc). + Can not use with -q. + +-q:: +--quiet:: + Be quiet (do not show any messages including errors). + Can not use with -v. -a:: --add=:: @@ -96,7 +102,7 @@ OPTIONS Dry run. With this option, --add and --del doesn't execute actual adding and removal operations. ---max-probes:: +--max-probes=NUM:: Set the maximum number of probe points for an event. Default is 128. -x:: @@ -104,8 +110,13 @@ OPTIONS Specify path to the executable or shared library file for user space tracing. Can also be used with --funcs option. +--demangle:: + Demangle application symbols. --no-demangle is also available + for disabling demangling. + --demangle-kernel:: - Demangle kernel symbols. + Demangle kernel symbols. --no-demangle-kernel is also available + for disabling kernel demangling. In absence of -m/-x options, perf probe checks if the first argument after the options is an absolute path name. If its an absolute path, perf probe @@ -137,6 +148,7 @@ Each probe argument follows below syntax. [NAME=]LOCALVAR|$retval|%REG|@SYMBOL[:TYPE] 'NAME' specifies the name of this argument (optional). You can use the name of local variable, local data structure member (e.g. var->field, var.field2), local array with fixed index (e.g. array[1], var->array[0], var->pointer[2]), or kprobe-tracer argument format (e.g. $retval, %ax, etc). Note that the name of this argument will be set as the last member name if you specify a local data structure member (e.g. field2 for 'var->field1.field2'.) +'$vars' special argument is also available for NAME, it is expanded to the local variables which can access at given probe point. 'TYPE' casts the type of this argument (optional). If omitted, perf probe automatically set the type based on debuginfo. You can specify 'string' type only for the local variable or structure member which is an array of or a pointer to 'char' or 'unsigned char' type. On x86 systems %REG is always the short form of the register: for example %AX. %RAX or %EAX is not valid. diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt index 31e977459c51..4847a793de65 100644 --- a/tools/perf/Documentation/perf-record.txt +++ b/tools/perf/Documentation/perf-record.txt @@ -55,6 +55,11 @@ OPTIONS If you want to profile write accesses in [0x1000~1008), just set 'mem:0x1000/8:w'. + - a group of events surrounded by a pair of brace ("{event1,event2,...}"). + Each event is separated by commas and the group should be quoted to + prevent the shell interpretation. You also need to use --group on + "perf report" to view group events together. + --filter=<filter>:: Event filter. @@ -62,9 +67,6 @@ OPTIONS --all-cpus:: System-wide collection from all CPUs. --l:: - Scale counter values. - -p:: --pid=:: Record events on existing process ID (comma separated list). @@ -107,6 +109,10 @@ OPTIONS specification with appended unit character - B/K/M/G. The size is rounded up to have nearest pages power of two value. +--group:: + Put all events in a single event group. This precedes the --event + option and remains only for backward compatibility. See --event. + -g:: Enables call-graph (stack chain/backtrace) recording. @@ -115,13 +121,19 @@ OPTIONS implies -g. Allows specifying "fp" (frame pointer) or "dwarf" - (DWARF's CFI - Call Frame Information) as the method to collect + (DWARF's CFI - Call Frame Information) or "lbr" + (Hardware Last Branch Record facility) as the method to collect the information used to show the call graphs. In some systems, where binaries are build with gcc --fomit-frame-pointer, using the "fp" method will produce bogus call graphs, using "dwarf", if available (perf tools linked to the libunwind library) should be used instead. + Using the "lbr" method doesn't require any compiler options. It + will produce call graphs from the hardware LBR registers. The + main limition is that it is only available on new Intel + platforms, such as Haswell. It can only get user call chain. It + doesn't work with branch stack sampling at the same time. -q:: --quiet:: @@ -235,6 +247,16 @@ Capture machine state (registers) at interrupt, i.e., on counter overflows for each sample. List of captured registers depends on the architecture. This option is off by default. +--running-time:: +Record running and enabled time for read events (:S) + +-k:: +--clockid:: +Sets the clock id to use for the various time fields in the perf_event_type +records. See clock_gettime(). In particular CLOCK_MONOTONIC and +CLOCK_MONOTONIC_RAW are supported, some events might also allow +CLOCK_BOOTTIME, CLOCK_REALTIME and CLOCK_TAI. + SEE ALSO -------- linkperf:perf-stat[1], linkperf:perf-list[1] diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt index dd7cccdde498..4879cf638824 100644 --- a/tools/perf/Documentation/perf-report.txt +++ b/tools/perf/Documentation/perf-report.txt @@ -40,6 +40,11 @@ OPTIONS Only consider symbols in these comms. CSV that understands file://filename entries. This option will affect the percentage of the overhead column. See --percentage for more info. +--pid=:: + Only show events for given process ID (comma separated list). + +--tid=:: + Only show events for given thread ID (comma separated list). -d:: --dsos=:: Only consider symbols in these dsos. CSV that understands diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt index a21eec05bc42..79445750fcb3 100644 --- a/tools/perf/Documentation/perf-script.txt +++ b/tools/perf/Documentation/perf-script.txt @@ -193,6 +193,12 @@ OPTIONS Only display events for these comms. CSV that understands file://filename entries. +--pid=:: + Only show events for given process ID (comma separated list). + +--tid=:: + Only show events for given thread ID (comma separated list). + -I:: --show-info:: Display extended information about the perf.data file. This adds diff --git a/tools/perf/Documentation/perf-trace.txt b/tools/perf/Documentation/perf-trace.txt index 7e1b1f2bb83c..ba03fd5d1a54 100644 --- a/tools/perf/Documentation/perf-trace.txt +++ b/tools/perf/Documentation/perf-trace.txt @@ -55,6 +55,9 @@ OPTIONS --uid=:: Record events in threads owned by uid. Name or number. +--filter-pids=:: + Filter out events for these pids and for 'trace' itself (comma separated list). + -v:: --verbose=:: Verbosity level. @@ -115,6 +118,9 @@ the thread executes on the designated CPUs. Default is to monitor all CPUs. --syscalls:: Trace system calls. This options is enabled by default. +--event:: + Trace other events, see 'perf list' for a complete list. + PAGEFAULTS ---------- diff --git a/tools/perf/Documentation/perf.txt b/tools/perf/Documentation/perf.txt index 1e8e400b4493..2b131776363e 100644 --- a/tools/perf/Documentation/perf.txt +++ b/tools/perf/Documentation/perf.txt @@ -13,11 +13,16 @@ SYNOPSIS OPTIONS ------- --debug:: - Setup debug variable (just verbose for now) in value + Setup debug variable (see list below) in value range (0, 10). Use like: --debug verbose # sets verbose = 1 --debug verbose=2 # sets verbose = 2 + List of debug variables allowed to set: + verbose - general debug messages + ordered-events - ordered events object debug messages + data-convert - data convert command debug messages + --buildid-dir:: Setup buildid cache directory. It has higher priority than buildid.dir config file option. diff --git a/tools/perf/MANIFEST b/tools/perf/MANIFEST index fbbfdc39271d..11ccbb22ea2b 100644 --- a/tools/perf/MANIFEST +++ b/tools/perf/MANIFEST @@ -1,5 +1,6 @@ tools/perf tools/scripts +tools/build tools/lib/traceevent tools/lib/api tools/lib/symbol/kallsyms.c diff --git a/tools/perf/Makefile b/tools/perf/Makefile index cb2e5868c8e8..c699dc35eef9 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -24,8 +24,8 @@ unexport MAKEFLAGS # (To override it, run 'make JOBS=1' and similar.) # ifeq ($(JOBS),) - JOBS := $(shell grep -c ^processor /proc/cpuinfo 2>/dev/null) - ifeq ($(JOBS),) + JOBS := $(shell egrep -c '^processor|^CPU' /proc/cpuinfo 2>/dev/null) + ifeq ($(JOBS),0) JOBS := 1 endif endif diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index aa6a50447c32..c43a20517591 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -68,7 +68,11 @@ include config/utilities.mak # for reading the x32 mode 32-bit compatibility VDSO in 64-bit mode # # Define NO_ZLIB if you do not want to support compressed kernel modules - +# +# Define LIBBABELTRACE if you DO want libbabeltrace support +# for CTF data format. +# +# Define NO_LZMA if you do not want to support compressed (xz) kernel modules ifeq ($(srctree),) srctree := $(patsubst %/,%,$(dir $(shell pwd))) @@ -82,13 +86,29 @@ endif ifneq ($(OUTPUT),) #$(info Determined 'OUTPUT' to be $(OUTPUT)) +# Adding $(OUTPUT) as a directory to look for source files, +# because use generated output files as sources dependency +# for flex/bison parsers. +VPATH += $(OUTPUT) +export VPATH endif +ifeq ($(V),1) + Q = +else + Q = @ +endif + +# Do not use make's built-in rules +# (this improves performance and avoids hard-to-debug behaviour); +MAKEFLAGS += -r + $(OUTPUT)PERF-VERSION-FILE: ../../.git/HEAD - @$(SHELL_PATH) util/PERF-VERSION-GEN $(OUTPUT) - @touch $(OUTPUT)PERF-VERSION-FILE + $(Q)$(SHELL_PATH) util/PERF-VERSION-GEN $(OUTPUT) + $(Q)touch $(OUTPUT)PERF-VERSION-FILE CC = $(CROSS_COMPILE)gcc +LD = $(CROSS_COMPILE)ld AR = $(CROSS_COMPILE)ar PKG_CONFIG = $(CROSS_COMPILE)pkg-config @@ -127,10 +147,6 @@ export prefix bindir sharedir sysconfdir DESTDIR SPARSE_FLAGS = -D__BIG_ENDIAN__ -D__powerpc__ # Guard against environment variables -BUILTIN_OBJS = -LIB_H = -LIB_OBJS = -GTK_OBJS = PYRF_OBJS = SCRIPT_SH = @@ -155,8 +171,8 @@ endif LIBTRACEEVENT = $(TE_PATH)libtraceevent.a export LIBTRACEEVENT -LIBAPIKFS = $(LIB_PATH)libapikfs.a -export LIBAPIKFS +LIBAPI = $(LIB_PATH)libapi.a +export LIBAPI # python extension build directories PYTHON_EXTBUILD := $(OUTPUT)python_ext_build/ @@ -167,7 +183,7 @@ export PYTHON_EXTBUILD_LIB PYTHON_EXTBUILD_TMP python-clean := $(call QUIET_CLEAN, python) $(RM) -r $(PYTHON_EXTBUILD) $(OUTPUT)python/perf.so PYTHON_EXT_SRCS := $(shell grep -v ^\# util/python-ext-sources) -PYTHON_EXT_DEPS := util/python-ext-sources util/setup.py $(LIBTRACEEVENT) $(LIBAPIKFS) +PYTHON_EXT_DEPS := util/python-ext-sources util/setup.py $(LIBTRACEEVENT) $(LIBAPI) $(OUTPUT)python/perf.so: $(PYTHON_EXT_SRCS) $(PYTHON_EXT_DEPS) $(QUIET_GEN)CFLAGS='$(CFLAGS)' $(PYTHON_WORD) util/setup.py \ @@ -206,297 +222,9 @@ endif export PERL_PATH -$(OUTPUT)util/parse-events-flex.c: util/parse-events.l $(OUTPUT)util/parse-events-bison.c - $(QUIET_FLEX)$(FLEX) -o $@ --header-file=$(OUTPUT)util/parse-events-flex.h $(PARSER_DEBUG_FLEX) util/parse-events.l - -$(OUTPUT)util/parse-events-bison.c: util/parse-events.y - $(QUIET_BISON)$(BISON) -v util/parse-events.y -d $(PARSER_DEBUG_BISON) -o $(OUTPUT)util/parse-events-bison.c -p parse_events_ - -$(OUTPUT)util/pmu-flex.c: util/pmu.l $(OUTPUT)util/pmu-bison.c - $(QUIET_FLEX)$(FLEX) -o $@ --header-file=$(OUTPUT)util/pmu-flex.h util/pmu.l - -$(OUTPUT)util/pmu-bison.c: util/pmu.y - $(QUIET_BISON)$(BISON) -v util/pmu.y -d -o $(OUTPUT)util/pmu-bison.c -p perf_pmu_ - -$(OUTPUT)util/parse-events.o: $(OUTPUT)util/parse-events-flex.c $(OUTPUT)util/parse-events-bison.c -$(OUTPUT)util/pmu.o: $(OUTPUT)util/pmu-flex.c $(OUTPUT)util/pmu-bison.c - LIB_FILE=$(OUTPUT)libperf.a -LIB_H += ../lib/symbol/kallsyms.h -LIB_H += ../../include/uapi/linux/perf_event.h -LIB_H += ../../include/linux/rbtree.h -LIB_H += ../../include/linux/list.h -LIB_H += ../../include/uapi/linux/const.h -LIB_H += ../include/linux/hash.h -LIB_H += ../../include/linux/stringify.h -LIB_H += util/include/linux/bitmap.h -LIB_H += ../include/linux/bitops.h -LIB_H += ../include/asm-generic/bitops/arch_hweight.h -LIB_H += ../include/asm-generic/bitops/atomic.h -LIB_H += ../include/asm-generic/bitops/const_hweight.h -LIB_H += ../include/asm-generic/bitops/find.h -LIB_H += ../include/asm-generic/bitops/fls64.h -LIB_H += ../include/asm-generic/bitops/fls.h -LIB_H += ../include/asm-generic/bitops/__ffs.h -LIB_H += ../include/asm-generic/bitops/__fls.h -LIB_H += ../include/asm-generic/bitops/hweight.h -LIB_H += ../include/asm-generic/bitops.h -LIB_H += ../include/linux/compiler.h -LIB_H += ../include/linux/log2.h -LIB_H += util/include/linux/const.h -LIB_H += util/include/linux/ctype.h -LIB_H += util/include/linux/kernel.h -LIB_H += util/include/linux/list.h -LIB_H += ../include/linux/export.h -LIB_H += util/include/linux/poison.h -LIB_H += util/include/linux/rbtree.h -LIB_H += util/include/linux/rbtree_augmented.h -LIB_H += util/include/linux/string.h -LIB_H += ../include/linux/types.h -LIB_H += util/include/linux/linkage.h -LIB_H += util/include/asm/asm-offsets.h -LIB_H += ../include/asm/bug.h -LIB_H += util/include/asm/byteorder.h -LIB_H += util/include/asm/swab.h -LIB_H += util/include/asm/system.h -LIB_H += util/include/asm/uaccess.h -LIB_H += util/include/dwarf-regs.h -LIB_H += util/include/asm/dwarf2.h -LIB_H += util/include/asm/cpufeature.h -LIB_H += util/include/asm/unistd_32.h -LIB_H += util/include/asm/unistd_64.h -LIB_H += perf.h -LIB_H += util/annotate.h -LIB_H += util/cache.h -LIB_H += util/callchain.h -LIB_H += util/build-id.h -LIB_H += util/db-export.h -LIB_H += util/debug.h -LIB_H += util/pmu.h -LIB_H += util/event.h -LIB_H += util/evsel.h -LIB_H += util/evlist.h -LIB_H += util/exec_cmd.h -LIB_H += util/find-vdso-map.c -LIB_H += util/levenshtein.h -LIB_H += util/machine.h -LIB_H += util/map.h -LIB_H += util/parse-options.h -LIB_H += util/parse-events.h -LIB_H += util/quote.h -LIB_H += util/util.h -LIB_H += util/xyarray.h -LIB_H += util/header.h -LIB_H += util/help.h -LIB_H += util/session.h -LIB_H += util/ordered-events.h -LIB_H += util/strbuf.h -LIB_H += util/strlist.h -LIB_H += util/strfilter.h -LIB_H += util/svghelper.h -LIB_H += util/tool.h -LIB_H += util/run-command.h -LIB_H += util/sigchain.h -LIB_H += util/dso.h -LIB_H += util/symbol.h -LIB_H += util/color.h -LIB_H += util/values.h -LIB_H += util/sort.h -LIB_H += util/hist.h -LIB_H += util/comm.h -LIB_H += util/thread.h -LIB_H += util/thread_map.h -LIB_H += util/trace-event.h -LIB_H += util/probe-finder.h -LIB_H += util/dwarf-aux.h -LIB_H += util/probe-event.h -LIB_H += util/pstack.h -LIB_H += util/cpumap.h -LIB_H += util/top.h -LIB_H += $(ARCH_INCLUDE) -LIB_H += util/cgroup.h -LIB_H += $(LIB_INCLUDE)traceevent/event-parse.h -LIB_H += util/target.h -LIB_H += util/rblist.h -LIB_H += util/intlist.h -LIB_H += util/perf_regs.h -LIB_H += util/unwind.h -LIB_H += util/vdso.h -LIB_H += util/tsc.h -LIB_H += ui/helpline.h -LIB_H += ui/progress.h -LIB_H += ui/util.h -LIB_H += ui/ui.h -LIB_H += util/data.h -LIB_H += util/kvm-stat.h -LIB_H += util/thread-stack.h - -LIB_OBJS += $(OUTPUT)util/abspath.o -LIB_OBJS += $(OUTPUT)util/alias.o -LIB_OBJS += $(OUTPUT)util/annotate.o -LIB_OBJS += $(OUTPUT)util/build-id.o -LIB_OBJS += $(OUTPUT)util/config.o -LIB_OBJS += $(OUTPUT)util/ctype.o -LIB_OBJS += $(OUTPUT)util/db-export.o -LIB_OBJS += $(OUTPUT)util/pmu.o -LIB_OBJS += $(OUTPUT)util/environment.o -LIB_OBJS += $(OUTPUT)util/event.o -LIB_OBJS += $(OUTPUT)util/evlist.o -LIB_OBJS += $(OUTPUT)util/evsel.o -LIB_OBJS += $(OUTPUT)util/exec_cmd.o -LIB_OBJS += $(OUTPUT)util/find_next_bit.o -LIB_OBJS += $(OUTPUT)util/help.o -LIB_OBJS += $(OUTPUT)util/kallsyms.o -LIB_OBJS += $(OUTPUT)util/levenshtein.o -LIB_OBJS += $(OUTPUT)util/parse-options.o -LIB_OBJS += $(OUTPUT)util/parse-events.o -LIB_OBJS += $(OUTPUT)util/path.o -LIB_OBJS += $(OUTPUT)util/rbtree.o -LIB_OBJS += $(OUTPUT)util/bitmap.o -LIB_OBJS += $(OUTPUT)util/hweight.o -LIB_OBJS += $(OUTPUT)util/run-command.o -LIB_OBJS += $(OUTPUT)util/quote.o -LIB_OBJS += $(OUTPUT)util/strbuf.o -LIB_OBJS += $(OUTPUT)util/string.o -LIB_OBJS += $(OUTPUT)util/strlist.o -LIB_OBJS += $(OUTPUT)util/strfilter.o -LIB_OBJS += $(OUTPUT)util/top.o -LIB_OBJS += $(OUTPUT)util/usage.o -LIB_OBJS += $(OUTPUT)util/wrapper.o -LIB_OBJS += $(OUTPUT)util/sigchain.o -LIB_OBJS += $(OUTPUT)util/dso.o -LIB_OBJS += $(OUTPUT)util/symbol.o -LIB_OBJS += $(OUTPUT)util/symbol-elf.o -LIB_OBJS += $(OUTPUT)util/color.o -LIB_OBJS += $(OUTPUT)util/pager.o -LIB_OBJS += $(OUTPUT)util/header.o -LIB_OBJS += $(OUTPUT)util/callchain.o -LIB_OBJS += $(OUTPUT)util/values.o -LIB_OBJS += $(OUTPUT)util/debug.o -LIB_OBJS += $(OUTPUT)util/machine.o -LIB_OBJS += $(OUTPUT)util/map.o -LIB_OBJS += $(OUTPUT)util/pstack.o -LIB_OBJS += $(OUTPUT)util/session.o -LIB_OBJS += $(OUTPUT)util/ordered-events.o -LIB_OBJS += $(OUTPUT)util/comm.o -LIB_OBJS += $(OUTPUT)util/thread.o -LIB_OBJS += $(OUTPUT)util/thread_map.o -LIB_OBJS += $(OUTPUT)util/trace-event-parse.o -LIB_OBJS += $(OUTPUT)util/parse-events-flex.o -LIB_OBJS += $(OUTPUT)util/parse-events-bison.o -LIB_OBJS += $(OUTPUT)util/pmu-flex.o -LIB_OBJS += $(OUTPUT)util/pmu-bison.o -LIB_OBJS += $(OUTPUT)util/trace-event-read.o -LIB_OBJS += $(OUTPUT)util/trace-event-info.o -LIB_OBJS += $(OUTPUT)util/trace-event-scripting.o -LIB_OBJS += $(OUTPUT)util/trace-event.o -LIB_OBJS += $(OUTPUT)util/svghelper.o -LIB_OBJS += $(OUTPUT)util/sort.o -LIB_OBJS += $(OUTPUT)util/hist.o -LIB_OBJS += $(OUTPUT)util/probe-event.o -LIB_OBJS += $(OUTPUT)util/util.o -LIB_OBJS += $(OUTPUT)util/xyarray.o -LIB_OBJS += $(OUTPUT)util/cpumap.o -LIB_OBJS += $(OUTPUT)util/cgroup.o -LIB_OBJS += $(OUTPUT)util/target.o -LIB_OBJS += $(OUTPUT)util/rblist.o -LIB_OBJS += $(OUTPUT)util/intlist.o -LIB_OBJS += $(OUTPUT)util/vdso.o -LIB_OBJS += $(OUTPUT)util/stat.o -LIB_OBJS += $(OUTPUT)util/record.o -LIB_OBJS += $(OUTPUT)util/srcline.o -LIB_OBJS += $(OUTPUT)util/data.o -LIB_OBJS += $(OUTPUT)util/tsc.o -LIB_OBJS += $(OUTPUT)util/cloexec.o -LIB_OBJS += $(OUTPUT)util/thread-stack.o - -LIB_OBJS += $(OUTPUT)ui/setup.o -LIB_OBJS += $(OUTPUT)ui/helpline.o -LIB_OBJS += $(OUTPUT)ui/progress.o -LIB_OBJS += $(OUTPUT)ui/util.o -LIB_OBJS += $(OUTPUT)ui/hist.o -LIB_OBJS += $(OUTPUT)ui/stdio/hist.o - -LIB_OBJS += $(OUTPUT)arch/common.o - -LIB_OBJS += $(OUTPUT)tests/parse-events.o -LIB_OBJS += $(OUTPUT)tests/dso-data.o -LIB_OBJS += $(OUTPUT)tests/attr.o -LIB_OBJS += $(OUTPUT)tests/vmlinux-kallsyms.o -LIB_OBJS += $(OUTPUT)tests/open-syscall.o -LIB_OBJS += $(OUTPUT)tests/open-syscall-all-cpus.o -LIB_OBJS += $(OUTPUT)tests/open-syscall-tp-fields.o -LIB_OBJS += $(OUTPUT)tests/mmap-basic.o -LIB_OBJS += $(OUTPUT)tests/perf-record.o -LIB_OBJS += $(OUTPUT)tests/rdpmc.o -LIB_OBJS += $(OUTPUT)tests/evsel-roundtrip-name.o -LIB_OBJS += $(OUTPUT)tests/evsel-tp-sched.o -LIB_OBJS += $(OUTPUT)tests/fdarray.o -LIB_OBJS += $(OUTPUT)tests/pmu.o -LIB_OBJS += $(OUTPUT)tests/hists_common.o -LIB_OBJS += $(OUTPUT)tests/hists_link.o -LIB_OBJS += $(OUTPUT)tests/hists_filter.o -LIB_OBJS += $(OUTPUT)tests/hists_output.o -LIB_OBJS += $(OUTPUT)tests/hists_cumulate.o -LIB_OBJS += $(OUTPUT)tests/python-use.o -LIB_OBJS += $(OUTPUT)tests/bp_signal.o -LIB_OBJS += $(OUTPUT)tests/bp_signal_overflow.o -LIB_OBJS += $(OUTPUT)tests/task-exit.o -LIB_OBJS += $(OUTPUT)tests/sw-clock.o -ifeq ($(ARCH),x86) -LIB_OBJS += $(OUTPUT)tests/perf-time-to-tsc.o -endif -LIB_OBJS += $(OUTPUT)tests/code-reading.o -LIB_OBJS += $(OUTPUT)tests/sample-parsing.o -LIB_OBJS += $(OUTPUT)tests/parse-no-sample-id-all.o -ifndef NO_DWARF_UNWIND -ifeq ($(ARCH),$(filter $(ARCH),x86 arm)) -LIB_OBJS += $(OUTPUT)tests/dwarf-unwind.o -endif -endif -LIB_OBJS += $(OUTPUT)tests/mmap-thread-lookup.o -LIB_OBJS += $(OUTPUT)tests/thread-mg-share.o -LIB_OBJS += $(OUTPUT)tests/switch-tracking.o - -BUILTIN_OBJS += $(OUTPUT)builtin-annotate.o -BUILTIN_OBJS += $(OUTPUT)builtin-bench.o -# Benchmark modules -BUILTIN_OBJS += $(OUTPUT)bench/sched-messaging.o -BUILTIN_OBJS += $(OUTPUT)bench/sched-pipe.o -ifeq ($(ARCH), x86) -ifeq ($(IS_64_BIT), 1) -BUILTIN_OBJS += $(OUTPUT)bench/mem-memcpy-x86-64-asm.o -BUILTIN_OBJS += $(OUTPUT)bench/mem-memset-x86-64-asm.o -endif -endif -BUILTIN_OBJS += $(OUTPUT)bench/mem-memcpy.o -BUILTIN_OBJS += $(OUTPUT)bench/futex-hash.o -BUILTIN_OBJS += $(OUTPUT)bench/futex-wake.o -BUILTIN_OBJS += $(OUTPUT)bench/futex-requeue.o - -BUILTIN_OBJS += $(OUTPUT)builtin-diff.o -BUILTIN_OBJS += $(OUTPUT)builtin-evlist.o -BUILTIN_OBJS += $(OUTPUT)builtin-help.o -BUILTIN_OBJS += $(OUTPUT)builtin-sched.o -BUILTIN_OBJS += $(OUTPUT)builtin-buildid-list.o -BUILTIN_OBJS += $(OUTPUT)builtin-buildid-cache.o -BUILTIN_OBJS += $(OUTPUT)builtin-list.o -BUILTIN_OBJS += $(OUTPUT)builtin-record.o -BUILTIN_OBJS += $(OUTPUT)builtin-report.o -BUILTIN_OBJS += $(OUTPUT)builtin-stat.o -BUILTIN_OBJS += $(OUTPUT)builtin-timechart.o -BUILTIN_OBJS += $(OUTPUT)builtin-top.o -BUILTIN_OBJS += $(OUTPUT)builtin-script.o -BUILTIN_OBJS += $(OUTPUT)builtin-probe.o -BUILTIN_OBJS += $(OUTPUT)builtin-kmem.o -BUILTIN_OBJS += $(OUTPUT)builtin-lock.o -BUILTIN_OBJS += $(OUTPUT)builtin-kvm.o -BUILTIN_OBJS += $(OUTPUT)builtin-inject.o -BUILTIN_OBJS += $(OUTPUT)tests/builtin-test.o -BUILTIN_OBJS += $(OUTPUT)builtin-mem.o - -PERFLIBS = $(LIB_FILE) $(LIBAPIKFS) $(LIBTRACEEVENT) +PERFLIBS = $(LIB_FILE) $(LIBAPI) $(LIBTRACEEVENT) # We choose to avoid "if .. else if .. else .. endif endif" # because maintaining the nesting to match is a pain. If @@ -508,67 +236,9 @@ ifneq ($(OUTPUT),) CFLAGS += -I$(OUTPUT) endif -ifdef NO_LIBELF -# Remove ELF/DWARF dependent codes -LIB_OBJS := $(filter-out $(OUTPUT)util/symbol-elf.o,$(LIB_OBJS)) -LIB_OBJS := $(filter-out $(OUTPUT)util/dwarf-aux.o,$(LIB_OBJS)) -LIB_OBJS := $(filter-out $(OUTPUT)util/probe-event.o,$(LIB_OBJS)) -LIB_OBJS := $(filter-out $(OUTPUT)util/probe-finder.o,$(LIB_OBJS)) - -BUILTIN_OBJS := $(filter-out $(OUTPUT)builtin-probe.o,$(BUILTIN_OBJS)) - -# Use minimal symbol handling -LIB_OBJS += $(OUTPUT)util/symbol-minimal.o - -else # NO_LIBELF -ifndef NO_DWARF - LIB_OBJS += $(OUTPUT)util/probe-finder.o - LIB_OBJS += $(OUTPUT)util/dwarf-aux.o -endif # NO_DWARF -endif # NO_LIBELF - -ifndef NO_LIBDW_DWARF_UNWIND - LIB_OBJS += $(OUTPUT)util/unwind-libdw.o - LIB_H += util/unwind-libdw.h -endif - -ifndef NO_LIBUNWIND - LIB_OBJS += $(OUTPUT)util/unwind-libunwind.o -endif -LIB_OBJS += $(OUTPUT)tests/keep-tracking.o - -ifndef NO_LIBAUDIT - BUILTIN_OBJS += $(OUTPUT)builtin-trace.o -endif - -ifndef NO_SLANG - LIB_OBJS += $(OUTPUT)ui/browser.o - LIB_OBJS += $(OUTPUT)ui/browsers/annotate.o - LIB_OBJS += $(OUTPUT)ui/browsers/hists.o - LIB_OBJS += $(OUTPUT)ui/browsers/map.o - LIB_OBJS += $(OUTPUT)ui/browsers/scripts.o - LIB_OBJS += $(OUTPUT)ui/browsers/header.o - LIB_OBJS += $(OUTPUT)ui/tui/setup.o - LIB_OBJS += $(OUTPUT)ui/tui/util.o - LIB_OBJS += $(OUTPUT)ui/tui/helpline.o - LIB_OBJS += $(OUTPUT)ui/tui/progress.o - LIB_H += ui/tui/tui.h - LIB_H += ui/browser.h - LIB_H += ui/browsers/map.h - LIB_H += ui/keysyms.h - LIB_H += ui/libslang.h -endif - ifndef NO_GTK2 ALL_PROGRAMS += $(OUTPUT)libperf-gtk.so - - GTK_OBJS += $(OUTPUT)ui/gtk/browser.o - GTK_OBJS += $(OUTPUT)ui/gtk/hists.o - GTK_OBJS += $(OUTPUT)ui/gtk/setup.o - GTK_OBJS += $(OUTPUT)ui/gtk/util.o - GTK_OBJS += $(OUTPUT)ui/gtk/helpline.o - GTK_OBJS += $(OUTPUT)ui/gtk/progress.o - GTK_OBJS += $(OUTPUT)ui/gtk/annotate.o + GTK_IN := $(OUTPUT)gtk-in.o install-gtk: $(OUTPUT)libperf-gtk.so $(call QUIET_INSTALL, 'GTK UI') \ @@ -576,31 +246,6 @@ install-gtk: $(OUTPUT)libperf-gtk.so $(INSTALL) $(OUTPUT)libperf-gtk.so '$(DESTDIR_SQ)$(libdir_SQ)' endif -ifndef NO_LIBPERL - LIB_OBJS += $(OUTPUT)util/scripting-engines/trace-event-perl.o - LIB_OBJS += $(OUTPUT)scripts/perl/Perf-Trace-Util/Context.o -endif - -ifndef NO_LIBPYTHON - LIB_OBJS += $(OUTPUT)util/scripting-engines/trace-event-python.o - LIB_OBJS += $(OUTPUT)scripts/python/Perf-Trace-Util/Context.o -endif - -ifeq ($(NO_PERF_REGS),0) - ifeq ($(ARCH),x86) - LIB_H += arch/x86/include/perf_regs.h - endif - LIB_OBJS += $(OUTPUT)util/perf_regs.o -endif - -ifndef NO_LIBNUMA - BUILTIN_OBJS += $(OUTPUT)bench/numa.o -endif - -ifndef NO_ZLIB - LIB_OBJS += $(OUTPUT)util/zlib.o -endif - ifdef ASCIIDOC8 export ASCIIDOC8 endif @@ -616,39 +261,29 @@ SHELL = $(SHELL_PATH) all: shell_compatibility_test $(ALL_PROGRAMS) $(LANG_BINDINGS) $(OTHER_PROGRAMS) please_set_SHELL_PATH_to_a_more_modern_shell: - @$$(:) + $(Q)$$(:) shell_compatibility_test: please_set_SHELL_PATH_to_a_more_modern_shell strip: $(PROGRAMS) $(OUTPUT)perf $(STRIP) $(STRIP_OPTS) $(PROGRAMS) $(OUTPUT)perf -$(OUTPUT)perf.o: perf.c $(OUTPUT)common-cmds.h $(OUTPUT)PERF-CFLAGS - $(QUIET_CC)$(CC) -include $(OUTPUT)PERF-VERSION-FILE \ - '-DPERF_HTML_PATH="$(htmldir_SQ)"' \ - $(CFLAGS) -c $(filter %.c,$^) -o $@ +PERF_IN := $(OUTPUT)perf-in.o -$(OUTPUT)perf: $(OUTPUT)perf.o $(BUILTIN_OBJS) $(PERFLIBS) - $(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) $(OUTPUT)perf.o \ - $(BUILTIN_OBJS) $(LIBS) -o $@ +export srctree OUTPUT RM CC LD AR CFLAGS V BISON FLEX +build := -f $(srctree)/tools/build/Makefile.build dir=. obj -$(GTK_OBJS): $(OUTPUT)%.o: %.c $(LIB_H) - $(QUIET_CC)$(CC) -o $@ -c -fPIC $(CFLAGS) $(GTK_CFLAGS) $< +$(PERF_IN): $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)common-cmds.h FORCE + $(Q)$(MAKE) $(build)=perf -$(OUTPUT)libperf-gtk.so: $(GTK_OBJS) $(PERFLIBS) - $(QUIET_LINK)$(CC) -o $@ -shared $(LDFLAGS) $(filter %.o,$^) $(GTK_LIBS) +$(OUTPUT)perf: $(PERFLIBS) $(PERF_IN) + $(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) $(PERF_IN) $(LIBS) -o $@ -$(OUTPUT)builtin-help.o: builtin-help.c $(OUTPUT)common-cmds.h $(OUTPUT)PERF-CFLAGS - $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) \ - '-DPERF_HTML_PATH="$(htmldir_SQ)"' \ - '-DPERF_MAN_PATH="$(mandir_SQ)"' \ - '-DPERF_INFO_PATH="$(infodir_SQ)"' $< +$(GTK_IN): FORCE + $(Q)$(MAKE) $(build)=gtk -$(OUTPUT)builtin-timechart.o: builtin-timechart.c $(OUTPUT)common-cmds.h $(OUTPUT)PERF-CFLAGS - $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) \ - '-DPERF_HTML_PATH="$(htmldir_SQ)"' \ - '-DPERF_MAN_PATH="$(mandir_SQ)"' \ - '-DPERF_INFO_PATH="$(infodir_SQ)"' $< +$(OUTPUT)libperf-gtk.so: $(GTK_IN) $(PERFLIBS) + $(QUIET_LINK)$(CC) -o $@ -shared $(LDFLAGS) $(filter %.o,$^) $(GTK_LIBS) $(OUTPUT)common-cmds.h: util/generate-cmdlist.sh command-list.txt @@ -659,8 +294,7 @@ $(SCRIPTS) : % : %.sh $(QUIET_GEN)$(INSTALL) '$@.sh' '$(OUTPUT)$@' # These can record PERF_VERSION -$(OUTPUT)perf.o perf.spec \ - $(SCRIPTS) \ +perf.spec $(SCRIPTS) \ : $(OUTPUT)PERF-VERSION-FILE .SUFFIXES: @@ -683,90 +317,33 @@ endif # These two need to be here so that when O= is not used they take precedence # over the general rule for .o -$(OUTPUT)util/%-flex.o: $(OUTPUT)util/%-flex.c $(OUTPUT)PERF-CFLAGS - $(QUIET_CC)$(CC) -o $@ -c -Iutil/ $(CFLAGS) -w $< - -$(OUTPUT)util/%-bison.o: $(OUTPUT)util/%-bison.c $(OUTPUT)PERF-CFLAGS - $(QUIET_CC)$(CC) -o $@ -c -Iutil/ $(CFLAGS) -DYYENABLE_NLS=0 -DYYLTYPE_IS_TRIVIAL=0 -w $< - -$(OUTPUT)%.o: %.c $(OUTPUT)PERF-CFLAGS - $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) $< -$(OUTPUT)%.i: %.c $(OUTPUT)PERF-CFLAGS - $(QUIET_CC)$(CC) -o $@ -E $(CFLAGS) $< -$(OUTPUT)%.s: %.c $(OUTPUT)PERF-CFLAGS - $(QUIET_CC)$(CC) -o $@ -S $(CFLAGS) $< -$(OUTPUT)%.o: %.S - $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) $< -$(OUTPUT)%.s: %.S - $(QUIET_CC)$(CC) -o $@ -E $(CFLAGS) $< - -$(OUTPUT)util/exec_cmd.o: util/exec_cmd.c $(OUTPUT)PERF-CFLAGS - $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) \ - '-DPERF_EXEC_PATH="$(perfexecdir_SQ)"' \ - '-DPREFIX="$(prefix_SQ)"' \ - $< - -$(OUTPUT)tests/attr.o: tests/attr.c $(OUTPUT)PERF-CFLAGS - $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) \ - '-DBINDIR="$(bindir_SQ)"' -DPYTHON='"$(PYTHON_WORD)"' \ - $< - -$(OUTPUT)tests/python-use.o: tests/python-use.c $(OUTPUT)PERF-CFLAGS - $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) \ - -DPYTHONPATH='"$(OUTPUT)python"' \ - -DPYTHON='"$(PYTHON_WORD)"' \ - $< - -$(OUTPUT)tests/dwarf-unwind.o: tests/dwarf-unwind.c - $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) -fno-optimize-sibling-calls $< +# get relative building directory (to $(OUTPUT)) +# and '.' if it's $(OUTPUT) itself +__build-dir = $(subst $(OUTPUT),,$(dir $@)) +build-dir = $(if $(__build-dir),$(__build-dir),.) -$(OUTPUT)util/config.o: util/config.c $(OUTPUT)PERF-CFLAGS - $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) -DETC_PERFCONFIG='"$(ETC_PERFCONFIG_SQ)"' $< +single_dep: $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)common-cmds.h -$(OUTPUT)ui/setup.o: ui/setup.c $(OUTPUT)PERF-CFLAGS - $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) -DLIBDIR='"$(libdir_SQ)"' $< +$(OUTPUT)%.o: %.c single_dep FORCE + $(Q)$(MAKE) -f $(srctree)/tools/build/Makefile.build dir=$(build-dir) $@ -$(OUTPUT)ui/browser.o: ui/browser.c $(OUTPUT)PERF-CFLAGS - $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) -DENABLE_SLFUTURE_CONST $< +$(OUTPUT)%.i: %.c single_dep FORCE + $(Q)$(MAKE) -f $(srctree)/tools/build/Makefile.build dir=$(build-dir) $@ -$(OUTPUT)ui/browsers/annotate.o: ui/browsers/annotate.c $(OUTPUT)PERF-CFLAGS - $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) -DENABLE_SLFUTURE_CONST $< +$(OUTPUT)%.s: %.c single_dep FORCE + $(Q)$(MAKE) -f $(srctree)/tools/build/Makefile.build dir=$(build-dir) $@ -$(OUTPUT)ui/browsers/hists.o: ui/browsers/hists.c $(OUTPUT)PERF-CFLAGS - $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) -DENABLE_SLFUTURE_CONST $< +$(OUTPUT)%-bison.o: %.c single_dep FORCE + $(Q)$(MAKE) -f $(srctree)/tools/build/Makefile.build dir=$(build-dir) $@ -$(OUTPUT)ui/browsers/map.o: ui/browsers/map.c $(OUTPUT)PERF-CFLAGS - $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) -DENABLE_SLFUTURE_CONST $< +$(OUTPUT)%-flex.o: %.c single_dep FORCE + $(Q)$(MAKE) -f $(srctree)/tools/build/Makefile.build dir=$(build-dir) $@ -$(OUTPUT)ui/browsers/scripts.o: ui/browsers/scripts.c $(OUTPUT)PERF-CFLAGS - $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) -DENABLE_SLFUTURE_CONST $< +$(OUTPUT)%.o: %.S single_dep FORCE + $(Q)$(MAKE) -f $(srctree)/tools/build/Makefile.build dir=$(build-dir) $@ -$(OUTPUT)util/kallsyms.o: ../lib/symbol/kallsyms.c $(OUTPUT)PERF-CFLAGS - $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) $< - -$(OUTPUT)util/rbtree.o: ../../lib/rbtree.c $(OUTPUT)PERF-CFLAGS - $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) -Wno-unused-parameter -DETC_PERFCONFIG='"$(ETC_PERFCONFIG_SQ)"' $< - -$(OUTPUT)util/hweight.o: ../../lib/hweight.c $(OUTPUT)PERF-CFLAGS - $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) -Wno-unused-parameter -DETC_PERFCONFIG='"$(ETC_PERFCONFIG_SQ)"' $< - -$(OUTPUT)util/find_next_bit.o: ../lib/util/find_next_bit.c $(OUTPUT)PERF-CFLAGS - $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) -Wno-unused-parameter -DETC_PERFCONFIG='"$(ETC_PERFCONFIG_SQ)"' $< - -$(OUTPUT)util/parse-events.o: util/parse-events.c $(OUTPUT)PERF-CFLAGS - $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) -Wno-redundant-decls $< - -$(OUTPUT)util/scripting-engines/trace-event-perl.o: util/scripting-engines/trace-event-perl.c $(OUTPUT)PERF-CFLAGS - $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) $(PERL_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-shadow -Wno-undef -Wno-switch-default $< - -$(OUTPUT)scripts/perl/Perf-Trace-Util/Context.o: scripts/perl/Perf-Trace-Util/Context.c $(OUTPUT)PERF-CFLAGS - $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) $(PERL_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-nested-externs -Wno-undef -Wno-switch-default $< - -$(OUTPUT)util/scripting-engines/trace-event-python.o: util/scripting-engines/trace-event-python.c $(OUTPUT)PERF-CFLAGS - $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) $(PYTHON_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-shadow $< - -$(OUTPUT)scripts/python/Perf-Trace-Util/Context.o: scripts/python/Perf-Trace-Util/Context.c $(OUTPUT)PERF-CFLAGS - $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) $(PYTHON_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-nested-externs $< +$(OUTPUT)%.i: %.S single_dep FORCE + $(Q)$(MAKE) -f $(srctree)/tools/build/Makefile.build dir=$(build-dir) $@ $(OUTPUT)perf-%: %.o $(PERFLIBS) $(QUIET_LINK)$(CC) $(CFLAGS) -o $@ $(LDFLAGS) $(filter %.o,$^) $(LIBS) @@ -781,58 +358,34 @@ $(OUTPUT)perf-read-vdsox32: perf-read-vdso.c util/find-vdso-map.c $(QUIET_CC)$(CC) -mx32 $(filter -static,$(LDFLAGS)) -Wall -Werror -o $@ perf-read-vdso.c endif -$(LIB_OBJS) $(BUILTIN_OBJS): $(LIB_H) -$(patsubst perf-%,%.o,$(PROGRAMS)): $(LIB_H) $(wildcard */*.h) +$(patsubst perf-%,%.o,$(PROGRAMS)): $(wildcard */*.h) -# we compile into subdirectories. if the target directory is not the source directory, they might not exists. So -# we depend the various files onto their directories. -DIRECTORY_DEPS = $(LIB_OBJS) $(BUILTIN_OBJS) $(GTK_OBJS) -DIRECTORY_DEPS += $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)common-cmds.h -# no need to add flex objects, because they depend on bison ones -DIRECTORY_DEPS += $(OUTPUT)util/parse-events-bison.c -DIRECTORY_DEPS += $(OUTPUT)util/pmu-bison.c +LIBPERF_IN := $(OUTPUT)libperf-in.o -OUTPUT_DIRECTORIES := $(sort $(dir $(DIRECTORY_DEPS))) +$(LIBPERF_IN): FORCE + $(Q)$(MAKE) $(build)=libperf -$(DIRECTORY_DEPS): | $(OUTPUT_DIRECTORIES) -# In the second step, we make a rule to actually create these directories -$(OUTPUT_DIRECTORIES): - $(QUIET_MKDIR)$(MKDIR) -p $@ 2>/dev/null +$(LIB_FILE): $(LIBPERF_IN) + $(QUIET_AR)$(RM) $@ && $(AR) rcs $@ $(LIBPERF_IN) $(LIB_OBJS) -$(LIB_FILE): $(LIB_OBJS) - $(QUIET_AR)$(RM) $@ && $(AR) rcs $@ $(LIB_OBJS) - -# libtraceevent.a -TE_SOURCES = $(wildcard $(TRACE_EVENT_DIR)*.[ch]) - -LIBTRACEEVENT_FLAGS = $(QUIET_SUBDIR1) O=$(OUTPUT) -LIBTRACEEVENT_FLAGS += CFLAGS="-g -Wall $(EXTRA_CFLAGS)" LIBTRACEEVENT_FLAGS += plugin_dir=$(plugindir_SQ) -$(LIBTRACEEVENT): $(TE_SOURCES) $(OUTPUT)PERF-CFLAGS - $(QUIET_SUBDIR0)$(TRACE_EVENT_DIR) $(LIBTRACEEVENT_FLAGS) libtraceevent.a plugins +$(LIBTRACEEVENT): FORCE + $(Q)$(MAKE) -C $(TRACE_EVENT_DIR) $(LIBTRACEEVENT_FLAGS) O=$(OUTPUT) $(OUTPUT)libtraceevent.a plugins $(LIBTRACEEVENT)-clean: $(call QUIET_CLEAN, libtraceevent) - @$(MAKE) -C $(TRACE_EVENT_DIR) O=$(OUTPUT) clean >/dev/null + $(Q)$(MAKE) -C $(TRACE_EVENT_DIR) O=$(OUTPUT) clean >/dev/null install-traceevent-plugins: $(LIBTRACEEVENT) - $(QUIET_SUBDIR0)$(TRACE_EVENT_DIR) $(LIBTRACEEVENT_FLAGS) install_plugins + $(Q)$(MAKE) -C $(TRACE_EVENT_DIR) $(LIBTRACEEVENT_FLAGS) O=$(OUTPUT) install_plugins -LIBAPIKFS_SOURCES = $(wildcard $(LIB_PATH)fs/*.[ch] $(LIB_PATH)fd/*.[ch]) - -# if subdir is set, we've been called from above so target has been built -# already -$(LIBAPIKFS): $(LIBAPIKFS_SOURCES) -ifeq ($(subdir),) - $(QUIET_SUBDIR0)$(LIB_DIR) $(QUIET_SUBDIR1) O=$(OUTPUT) libapikfs.a -endif +$(LIBAPI): FORCE + $(Q)$(MAKE) -C $(LIB_DIR) O=$(OUTPUT) $(OUTPUT)libapi.a -$(LIBAPIKFS)-clean: -ifeq ($(subdir),) - $(call QUIET_CLEAN, libapikfs) - @$(MAKE) -C $(LIB_DIR) O=$(OUTPUT) clean >/dev/null -endif +$(LIBAPI)-clean: + $(call QUIET_CLEAN, libapi) + $(Q)$(MAKE) -C $(LIB_DIR) O=$(OUTPUT) clean >/dev/null help: @echo 'Perf make targets:' @@ -888,17 +441,6 @@ cscope: $(QUIET_GEN)$(RM) cscope*; \ $(FIND) $(TAG_FOLDERS) -name '*.[hcS]' -print | xargs cscope -b $(TAG_FILES) -### Detect prefix changes -TRACK_CFLAGS = $(subst ','\'',$(CFLAGS)):\ - $(bindir_SQ):$(perfexecdir_SQ):$(template_dir_SQ):$(prefix_SQ):$(plugindir_SQ) - -$(OUTPUT)PERF-CFLAGS: .FORCE-PERF-CFLAGS - @FLAGS='$(TRACK_CFLAGS)'; \ - if test x"$$FLAGS" != x"`cat $(OUTPUT)PERF-CFLAGS 2>/dev/null`" ; then \ - echo 1>&2 " FLAGS: * new build flags or prefix"; \ - echo "$$FLAGS" >$(OUTPUT)PERF-CFLAGS; \ - fi - ### Testing rules # GNU make supports exporting all variables by "export" without parameters. @@ -981,12 +523,14 @@ $(INSTALL_DOC_TARGETS): # config-clean: $(call QUIET_CLEAN, config) - @$(MAKE) -C config/feature-checks clean >/dev/null + $(Q)$(MAKE) -C $(srctree)/tools/build/feature/ clean >/dev/null -clean: $(LIBTRACEEVENT)-clean $(LIBAPIKFS)-clean config-clean - $(call QUIET_CLEAN, core-objs) $(RM) $(LIB_OBJS) $(BUILTIN_OBJS) $(LIB_FILE) $(OUTPUT)perf-archive $(OUTPUT)perf-with-kcore $(OUTPUT)perf.o $(LANG_BINDINGS) $(GTK_OBJS) +clean: $(LIBTRACEEVENT)-clean $(LIBAPI)-clean config-clean + $(call QUIET_CLEAN, core-objs) $(RM) $(LIB_FILE) $(OUTPUT)perf-archive $(OUTPUT)perf-with-kcore $(LANG_BINDINGS) + $(Q)find . -name '*.o' -delete -o -name '\.*.cmd' -delete -o -name '\.*.d' -delete + $(Q)$(RM) .config-detected $(call QUIET_CLEAN, core-progs) $(RM) $(ALL_PROGRAMS) perf perf-read-vdso32 perf-read-vdsox32 - $(call QUIET_CLEAN, core-gen) $(RM) *.spec *.pyc *.pyo */*.pyc */*.pyo $(OUTPUT)common-cmds.h TAGS tags cscope* $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)PERF-CFLAGS $(OUTPUT)PERF-FEATURES $(OUTPUT)util/*-bison* $(OUTPUT)util/*-flex* + $(call QUIET_CLEAN, core-gen) $(RM) *.spec *.pyc *.pyo */*.pyc */*.pyo $(OUTPUT)common-cmds.h TAGS tags cscope* $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)FEATURE-DUMP $(OUTPUT)util/*-bison* $(OUTPUT)util/*-flex* $(QUIET_SUBDIR0)Documentation $(QUIET_SUBDIR1) clean $(python-clean) @@ -1000,7 +544,9 @@ else GIT-HEAD-PHONY = endif +FORCE: + .PHONY: all install clean config-clean strip install-gtk .PHONY: shell_compatibility_test please_set_SHELL_PATH_to_a_more_modern_shell -.PHONY: $(GIT-HEAD-PHONY) TAGS tags cscope .FORCE-PERF-CFLAGS +.PHONY: $(GIT-HEAD-PHONY) TAGS tags cscope FORCE single_dep diff --git a/tools/perf/arch/Build b/tools/perf/arch/Build new file mode 100644 index 000000000000..109eb75cf7de --- /dev/null +++ b/tools/perf/arch/Build @@ -0,0 +1,2 @@ +libperf-y += common.o +libperf-y += $(ARCH)/ diff --git a/tools/perf/arch/arm/Build b/tools/perf/arch/arm/Build new file mode 100644 index 000000000000..41bf61da476a --- /dev/null +++ b/tools/perf/arch/arm/Build @@ -0,0 +1,2 @@ +libperf-y += util/ +libperf-$(CONFIG_DWARF_UNWIND) += tests/ diff --git a/tools/perf/arch/arm/Makefile b/tools/perf/arch/arm/Makefile index 09d62153d384..7fbca175099e 100644 --- a/tools/perf/arch/arm/Makefile +++ b/tools/perf/arch/arm/Makefile @@ -1,14 +1,3 @@ ifndef NO_DWARF PERF_HAVE_DWARF_REGS := 1 -LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/dwarf-regs.o -endif -ifndef NO_LIBUNWIND -LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/unwind-libunwind.o -endif -ifndef NO_LIBDW_DWARF_UNWIND -LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/unwind-libdw.o -endif -ifndef NO_DWARF_UNWIND -LIB_OBJS += $(OUTPUT)arch/$(ARCH)/tests/regs_load.o -LIB_OBJS += $(OUTPUT)arch/$(ARCH)/tests/dwarf-unwind.o endif diff --git a/tools/perf/arch/arm/tests/Build b/tools/perf/arch/arm/tests/Build new file mode 100644 index 000000000000..b30eff9bcc83 --- /dev/null +++ b/tools/perf/arch/arm/tests/Build @@ -0,0 +1,2 @@ +libperf-y += regs_load.o +libperf-y += dwarf-unwind.o diff --git a/tools/perf/arch/arm/util/Build b/tools/perf/arch/arm/util/Build new file mode 100644 index 000000000000..d22e3d07de3d --- /dev/null +++ b/tools/perf/arch/arm/util/Build @@ -0,0 +1,4 @@ +libperf-$(CONFIG_DWARF) += dwarf-regs.o + +libperf-$(CONFIG_LIBUNWIND) += unwind-libunwind.o +libperf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o diff --git a/tools/perf/arch/arm64/Build b/tools/perf/arch/arm64/Build new file mode 100644 index 000000000000..54afe4a467e7 --- /dev/null +++ b/tools/perf/arch/arm64/Build @@ -0,0 +1 @@ +libperf-y += util/ diff --git a/tools/perf/arch/arm64/Makefile b/tools/perf/arch/arm64/Makefile index 67e9b3d38e89..7fbca175099e 100644 --- a/tools/perf/arch/arm64/Makefile +++ b/tools/perf/arch/arm64/Makefile @@ -1,7 +1,3 @@ ifndef NO_DWARF PERF_HAVE_DWARF_REGS := 1 -LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/dwarf-regs.o -endif -ifndef NO_LIBUNWIND -LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/unwind-libunwind.o endif diff --git a/tools/perf/arch/arm64/util/Build b/tools/perf/arch/arm64/util/Build new file mode 100644 index 000000000000..e58123a8912b --- /dev/null +++ b/tools/perf/arch/arm64/util/Build @@ -0,0 +1,2 @@ +libperf-$(CONFIG_DWARF) += dwarf-regs.o +libperf-$(CONFIG_LIBUNWIND) += unwind-libunwind.o diff --git a/tools/perf/arch/powerpc/Build b/tools/perf/arch/powerpc/Build new file mode 100644 index 000000000000..54afe4a467e7 --- /dev/null +++ b/tools/perf/arch/powerpc/Build @@ -0,0 +1 @@ +libperf-y += util/ diff --git a/tools/perf/arch/powerpc/Makefile b/tools/perf/arch/powerpc/Makefile index 6f7782bea5dd..7fbca175099e 100644 --- a/tools/perf/arch/powerpc/Makefile +++ b/tools/perf/arch/powerpc/Makefile @@ -1,6 +1,3 @@ ifndef NO_DWARF PERF_HAVE_DWARF_REGS := 1 -LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/dwarf-regs.o -LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/skip-callchain-idx.o endif -LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/header.o diff --git a/tools/perf/arch/powerpc/util/Build b/tools/perf/arch/powerpc/util/Build new file mode 100644 index 000000000000..0af6e9b3f728 --- /dev/null +++ b/tools/perf/arch/powerpc/util/Build @@ -0,0 +1,4 @@ +libperf-y += header.o + +libperf-$(CONFIG_DWARF) += dwarf-regs.o +libperf-$(CONFIG_DWARF) += skip-callchain-idx.o diff --git a/tools/perf/arch/s390/Build b/tools/perf/arch/s390/Build new file mode 100644 index 000000000000..54afe4a467e7 --- /dev/null +++ b/tools/perf/arch/s390/Build @@ -0,0 +1 @@ +libperf-y += util/ diff --git a/tools/perf/arch/s390/Makefile b/tools/perf/arch/s390/Makefile index 798ac7379c5f..21322e0385b8 100644 --- a/tools/perf/arch/s390/Makefile +++ b/tools/perf/arch/s390/Makefile @@ -1,7 +1,4 @@ ifndef NO_DWARF PERF_HAVE_DWARF_REGS := 1 -LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/dwarf-regs.o endif -LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/header.o HAVE_KVM_STAT_SUPPORT := 1 -LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/kvm-stat.o diff --git a/tools/perf/arch/s390/util/Build b/tools/perf/arch/s390/util/Build new file mode 100644 index 000000000000..8a61372bb47a --- /dev/null +++ b/tools/perf/arch/s390/util/Build @@ -0,0 +1,4 @@ +libperf-y += header.o +libperf-y += kvm-stat.o + +libperf-$(CONFIG_DWARF) += dwarf-regs.o diff --git a/tools/perf/arch/sh/Build b/tools/perf/arch/sh/Build new file mode 100644 index 000000000000..54afe4a467e7 --- /dev/null +++ b/tools/perf/arch/sh/Build @@ -0,0 +1 @@ +libperf-y += util/ diff --git a/tools/perf/arch/sh/Makefile b/tools/perf/arch/sh/Makefile index 15130b50dfe3..7fbca175099e 100644 --- a/tools/perf/arch/sh/Makefile +++ b/tools/perf/arch/sh/Makefile @@ -1,4 +1,3 @@ ifndef NO_DWARF PERF_HAVE_DWARF_REGS := 1 -LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/dwarf-regs.o endif diff --git a/tools/perf/arch/sh/util/Build b/tools/perf/arch/sh/util/Build new file mode 100644 index 000000000000..954e287bbb89 --- /dev/null +++ b/tools/perf/arch/sh/util/Build @@ -0,0 +1 @@ +libperf-$(CONFIG_DWARF) += dwarf-regs.o diff --git a/tools/perf/arch/sparc/Build b/tools/perf/arch/sparc/Build new file mode 100644 index 000000000000..54afe4a467e7 --- /dev/null +++ b/tools/perf/arch/sparc/Build @@ -0,0 +1 @@ +libperf-y += util/ diff --git a/tools/perf/arch/sparc/Makefile b/tools/perf/arch/sparc/Makefile index 15130b50dfe3..7fbca175099e 100644 --- a/tools/perf/arch/sparc/Makefile +++ b/tools/perf/arch/sparc/Makefile @@ -1,4 +1,3 @@ ifndef NO_DWARF PERF_HAVE_DWARF_REGS := 1 -LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/dwarf-regs.o endif diff --git a/tools/perf/arch/sparc/util/Build b/tools/perf/arch/sparc/util/Build new file mode 100644 index 000000000000..954e287bbb89 --- /dev/null +++ b/tools/perf/arch/sparc/util/Build @@ -0,0 +1 @@ +libperf-$(CONFIG_DWARF) += dwarf-regs.o diff --git a/tools/perf/arch/x86/Build b/tools/perf/arch/x86/Build new file mode 100644 index 000000000000..41bf61da476a --- /dev/null +++ b/tools/perf/arch/x86/Build @@ -0,0 +1,2 @@ +libperf-y += util/ +libperf-$(CONFIG_DWARF_UNWIND) += tests/ diff --git a/tools/perf/arch/x86/Makefile b/tools/perf/arch/x86/Makefile index 9b21881db52f..21322e0385b8 100644 --- a/tools/perf/arch/x86/Makefile +++ b/tools/perf/arch/x86/Makefile @@ -1,19 +1,4 @@ ifndef NO_DWARF PERF_HAVE_DWARF_REGS := 1 -LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/dwarf-regs.o endif -ifndef NO_LIBUNWIND -LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/unwind-libunwind.o -endif -ifndef NO_LIBDW_DWARF_UNWIND -LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/unwind-libdw.o -endif -ifndef NO_DWARF_UNWIND -LIB_OBJS += $(OUTPUT)arch/$(ARCH)/tests/regs_load.o -LIB_OBJS += $(OUTPUT)arch/$(ARCH)/tests/dwarf-unwind.o -endif -LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/header.o -LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/tsc.o -LIB_H += arch/$(ARCH)/util/tsc.h HAVE_KVM_STAT_SUPPORT := 1 -LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/kvm-stat.o diff --git a/tools/perf/arch/x86/tests/Build b/tools/perf/arch/x86/tests/Build new file mode 100644 index 000000000000..b30eff9bcc83 --- /dev/null +++ b/tools/perf/arch/x86/tests/Build @@ -0,0 +1,2 @@ +libperf-y += regs_load.o +libperf-y += dwarf-unwind.o diff --git a/tools/perf/arch/x86/util/Build b/tools/perf/arch/x86/util/Build new file mode 100644 index 000000000000..cfbccc4e3187 --- /dev/null +++ b/tools/perf/arch/x86/util/Build @@ -0,0 +1,8 @@ +libperf-y += header.o +libperf-y += tsc.o +libperf-y += kvm-stat.o + +libperf-$(CONFIG_DWARF) += dwarf-regs.o + +libperf-$(CONFIG_LIBUNWIND) += unwind-libunwind.o +libperf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o diff --git a/tools/perf/bench/Build b/tools/perf/bench/Build new file mode 100644 index 000000000000..5ce98023d518 --- /dev/null +++ b/tools/perf/bench/Build @@ -0,0 +1,11 @@ +perf-y += sched-messaging.o +perf-y += sched-pipe.o +perf-y += mem-memcpy.o +perf-y += futex-hash.o +perf-y += futex-wake.o +perf-y += futex-requeue.o + +perf-$(CONFIG_X86_64) += mem-memcpy-x86-64-asm.o +perf-$(CONFIG_X86_64) += mem-memset-x86-64-asm.o + +perf-$(CONFIG_NUMA) += numa.o diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index 747f86103599..71bf7451c0ca 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -208,7 +208,7 @@ static int __cmd_annotate(struct perf_annotate *ann) goto out; } - ret = perf_session__process_events(session, &ann->tool); + ret = perf_session__process_events(session); if (ret) goto out; diff --git a/tools/perf/builtin-buildid-cache.c b/tools/perf/builtin-buildid-cache.c index 50e6b66aea1f..d47a0cdc71c9 100644 --- a/tools/perf/builtin-buildid-cache.c +++ b/tools/perf/builtin-buildid-cache.c @@ -125,8 +125,7 @@ static int build_id_cache__kcore_existing(const char *from_dir, char *to_dir, return ret; } -static int build_id_cache__add_kcore(const char *filename, const char *debugdir, - bool force) +static int build_id_cache__add_kcore(const char *filename, bool force) { char dir[32], sbuildid[BUILD_ID_SIZE * 2 + 1]; char from_dir[PATH_MAX], to_dir[PATH_MAX]; @@ -143,7 +142,7 @@ static int build_id_cache__add_kcore(const char *filename, const char *debugdir, return -1; scnprintf(to_dir, sizeof(to_dir), "%s/[kernel.kcore]/%s", - debugdir, sbuildid); + buildid_dir, sbuildid); if (!force && !build_id_cache__kcore_existing(from_dir, to_dir, sizeof(to_dir))) { @@ -155,7 +154,7 @@ static int build_id_cache__add_kcore(const char *filename, const char *debugdir, return -1; scnprintf(to_dir, sizeof(to_dir), "%s/[kernel.kcore]/%s/%s", - debugdir, sbuildid, dir); + buildid_dir, sbuildid, dir); if (mkdir_p(to_dir, 0755)) return -1; @@ -183,7 +182,7 @@ static int build_id_cache__add_kcore(const char *filename, const char *debugdir, return 0; } -static int build_id_cache__add_file(const char *filename, const char *debugdir) +static int build_id_cache__add_file(const char *filename) { char sbuild_id[BUILD_ID_SIZE * 2 + 1]; u8 build_id[BUILD_ID_SIZE]; @@ -195,16 +194,14 @@ static int build_id_cache__add_file(const char *filename, const char *debugdir) } build_id__sprintf(build_id, sizeof(build_id), sbuild_id); - err = build_id_cache__add_s(sbuild_id, debugdir, filename, + err = build_id_cache__add_s(sbuild_id, filename, false, false); - if (verbose) - pr_info("Adding %s %s: %s\n", sbuild_id, filename, - err ? "FAIL" : "Ok"); + pr_debug("Adding %s %s: %s\n", sbuild_id, filename, + err ? "FAIL" : "Ok"); return err; } -static int build_id_cache__remove_file(const char *filename, - const char *debugdir) +static int build_id_cache__remove_file(const char *filename) { u8 build_id[BUILD_ID_SIZE]; char sbuild_id[BUILD_ID_SIZE * 2 + 1]; @@ -217,10 +214,34 @@ static int build_id_cache__remove_file(const char *filename, } build_id__sprintf(build_id, sizeof(build_id), sbuild_id); - err = build_id_cache__remove_s(sbuild_id, debugdir); - if (verbose) - pr_info("Removing %s %s: %s\n", sbuild_id, filename, - err ? "FAIL" : "Ok"); + err = build_id_cache__remove_s(sbuild_id); + pr_debug("Removing %s %s: %s\n", sbuild_id, filename, + err ? "FAIL" : "Ok"); + + return err; +} + +static int build_id_cache__purge_path(const char *pathname) +{ + struct strlist *list; + struct str_node *pos; + int err; + + err = build_id_cache__list_build_ids(pathname, &list); + if (err) + goto out; + + strlist__for_each(pos, list) { + err = build_id_cache__remove_s(pos->s); + pr_debug("Removing %s %s: %s\n", pos->s, pathname, + err ? "FAIL" : "Ok"); + if (err) + break; + } + strlist__delete(list); + +out: + pr_debug("Purging %s: %s\n", pathname, err ? "FAIL" : "Ok"); return err; } @@ -252,13 +273,12 @@ static int build_id_cache__fprintf_missing(struct perf_session *session, FILE *f return 0; } -static int build_id_cache__update_file(const char *filename, - const char *debugdir) +static int build_id_cache__update_file(const char *filename) { u8 build_id[BUILD_ID_SIZE]; char sbuild_id[BUILD_ID_SIZE * 2 + 1]; - int err; + int err = 0; if (filename__read_build_id(filename, &build_id, sizeof(build_id)) < 0) { pr_debug("Couldn't read a build-id in %s\n", filename); @@ -266,14 +286,14 @@ static int build_id_cache__update_file(const char *filename, } build_id__sprintf(build_id, sizeof(build_id), sbuild_id); - err = build_id_cache__remove_s(sbuild_id, debugdir); - if (!err) { - err = build_id_cache__add_s(sbuild_id, debugdir, filename, - false, false); - } - if (verbose) - pr_info("Updating %s %s: %s\n", sbuild_id, filename, - err ? "FAIL" : "Ok"); + if (build_id_cache__cached(sbuild_id)) + err = build_id_cache__remove_s(sbuild_id); + + if (!err) + err = build_id_cache__add_s(sbuild_id, filename, false, false); + + pr_debug("Updating %s %s: %s\n", sbuild_id, filename, + err ? "FAIL" : "Ok"); return err; } @@ -287,6 +307,7 @@ int cmd_buildid_cache(int argc, const char **argv, bool force = false; char const *add_name_list_str = NULL, *remove_name_list_str = NULL, + *purge_name_list_str = NULL, *missing_filename = NULL, *update_name_list_str = NULL, *kcore_filename = NULL; @@ -304,6 +325,8 @@ int cmd_buildid_cache(int argc, const char **argv, "file", "kcore file to add"), OPT_STRING('r', "remove", &remove_name_list_str, "file list", "file(s) to remove"), + OPT_STRING('p', "purge", &purge_name_list_str, "path list", + "path(s) to remove (remove old caches too)"), OPT_STRING('M', "missing", &missing_filename, "file", "to find missing build ids in the cache"), OPT_BOOLEAN('f', "force", &force, "don't complain, do it"), @@ -320,6 +343,11 @@ int cmd_buildid_cache(int argc, const char **argv, argc = parse_options(argc, argv, buildid_cache_options, buildid_cache_usage, 0); + if (argc || (!add_name_list_str && !kcore_filename && + !remove_name_list_str && !purge_name_list_str && + !missing_filename && !update_name_list_str)) + usage_with_options(buildid_cache_usage, buildid_cache_options); + if (missing_filename) { file.path = missing_filename; file.force = force; @@ -338,7 +366,7 @@ int cmd_buildid_cache(int argc, const char **argv, list = strlist__new(true, add_name_list_str); if (list) { strlist__for_each(pos, list) - if (build_id_cache__add_file(pos->s, buildid_dir)) { + if (build_id_cache__add_file(pos->s)) { if (errno == EEXIST) { pr_debug("%s already in the cache\n", pos->s); @@ -356,7 +384,25 @@ int cmd_buildid_cache(int argc, const char **argv, list = strlist__new(true, remove_name_list_str); if (list) { strlist__for_each(pos, list) - if (build_id_cache__remove_file(pos->s, buildid_dir)) { + if (build_id_cache__remove_file(pos->s)) { + if (errno == ENOENT) { + pr_debug("%s wasn't in the cache\n", + pos->s); + continue; + } + pr_warning("Couldn't remove %s: %s\n", + pos->s, strerror_r(errno, sbuf, sizeof(sbuf))); + } + + strlist__delete(list); + } + } + + if (purge_name_list_str) { + list = strlist__new(true, purge_name_list_str); + if (list) { + strlist__for_each(pos, list) + if (build_id_cache__purge_path(pos->s)) { if (errno == ENOENT) { pr_debug("%s wasn't in the cache\n", pos->s); @@ -377,7 +423,7 @@ int cmd_buildid_cache(int argc, const char **argv, list = strlist__new(true, update_name_list_str); if (list) { strlist__for_each(pos, list) - if (build_id_cache__update_file(pos->s, buildid_dir)) { + if (build_id_cache__update_file(pos->s)) { if (errno == ENOENT) { pr_debug("%s wasn't in the cache\n", pos->s); @@ -391,8 +437,7 @@ int cmd_buildid_cache(int argc, const char **argv, } } - if (kcore_filename && - build_id_cache__add_kcore(kcore_filename, buildid_dir, force)) + if (kcore_filename && build_id_cache__add_kcore(kcore_filename, force)) pr_warning("Couldn't add %s\n", kcore_filename); out: diff --git a/tools/perf/builtin-buildid-list.c b/tools/perf/builtin-buildid-list.c index ed3873b3e238..feb420f74c2d 100644 --- a/tools/perf/builtin-buildid-list.c +++ b/tools/perf/builtin-buildid-list.c @@ -74,7 +74,7 @@ static int perf_session__list_build_ids(bool force, bool with_hits) * the record stream. Buildids are stored as RECORD_HEADER_BUILD_ID */ if (with_hits || perf_data_file__is_pipe(&file)) - perf_session__process_events(session, &build_id__mark_dso_hit_ops); + perf_session__process_events(session); perf_session__fprintf_dsos_buildid(session, stdout, dso__skip_buildid, with_hits); perf_session__delete(session); diff --git a/tools/perf/builtin-data.c b/tools/perf/builtin-data.c new file mode 100644 index 000000000000..d6525bc54d13 --- /dev/null +++ b/tools/perf/builtin-data.c @@ -0,0 +1,123 @@ +#include <linux/compiler.h> +#include "builtin.h" +#include "perf.h" +#include "debug.h" +#include "parse-options.h" +#include "data-convert-bt.h" + +typedef int (*data_cmd_fn_t)(int argc, const char **argv, const char *prefix); + +struct data_cmd { + const char *name; + const char *summary; + data_cmd_fn_t fn; +}; + +static struct data_cmd data_cmds[]; + +#define for_each_cmd(cmd) \ + for (cmd = data_cmds; cmd && cmd->name; cmd++) + +static const struct option data_options[] = { + OPT_END() +}; + +static const char * const data_subcommands[] = { "convert", NULL }; + +static const char *data_usage[] = { + "perf data [<common options>] <command> [<options>]", + NULL +}; + +static void print_usage(void) +{ + struct data_cmd *cmd; + + printf("Usage:\n"); + printf("\t%s\n\n", data_usage[0]); + printf("\tAvailable commands:\n"); + + for_each_cmd(cmd) { + printf("\t %s\t- %s\n", cmd->name, cmd->summary); + } + + printf("\n"); +} + +static const char * const data_convert_usage[] = { + "perf data convert [<options>]", + NULL +}; + +static int cmd_data_convert(int argc, const char **argv, + const char *prefix __maybe_unused) +{ + const char *to_ctf = NULL; + bool force = false; + const struct option options[] = { + OPT_INCR('v', "verbose", &verbose, "be more verbose"), + OPT_STRING('i', "input", &input_name, "file", "input file name"), +#ifdef HAVE_LIBBABELTRACE_SUPPORT + OPT_STRING(0, "to-ctf", &to_ctf, NULL, "Convert to CTF format"), +#endif + OPT_BOOLEAN('f', "force", &force, "don't complain, do it"), + OPT_END() + }; + +#ifndef HAVE_LIBBABELTRACE_SUPPORT + pr_err("No conversion support compiled in.\n"); + return -1; +#endif + + argc = parse_options(argc, argv, options, + data_convert_usage, 0); + if (argc) { + usage_with_options(data_convert_usage, options); + return -1; + } + + if (to_ctf) { +#ifdef HAVE_LIBBABELTRACE_SUPPORT + return bt_convert__perf2ctf(input_name, to_ctf, force); +#else + pr_err("The libbabeltrace support is not compiled in.\n"); + return -1; +#endif + } + + return 0; +} + +static struct data_cmd data_cmds[] = { + { "convert", "converts data file between formats", cmd_data_convert }, + { .name = NULL, }, +}; + +int cmd_data(int argc, const char **argv, const char *prefix) +{ + struct data_cmd *cmd; + const char *cmdstr; + + /* No command specified. */ + if (argc < 2) + goto usage; + + argc = parse_options_subcommand(argc, argv, data_options, data_subcommands, data_usage, + PARSE_OPT_STOP_AT_NON_OPTION); + if (argc < 1) + goto usage; + + cmdstr = argv[0]; + + for_each_cmd(cmd) { + if (strcmp(cmd->name, cmdstr)) + continue; + + return cmd->fn(argc, argv, prefix); + } + + pr_err("Unknown command: %s\n", cmdstr); +usage: + print_usage(); + return -1; +} diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c index 74aada554b12..df6307b4050a 100644 --- a/tools/perf/builtin-diff.c +++ b/tools/perf/builtin-diff.c @@ -747,7 +747,7 @@ static int __cmd_diff(void) goto out_delete; } - ret = perf_session__process_events(d->session, &tool); + ret = perf_session__process_events(d->session); if (ret) { pr_err("Failed to process %s\n", d->file.path); goto out_delete; @@ -791,6 +791,8 @@ static const struct option options[] = { OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, "dump raw trace in ASCII"), OPT_BOOLEAN('f', "force", &force, "don't complain, do it"), + OPT_STRING(0, "kallsyms", &symbol_conf.kallsyms_name, + "file", "kallsyms pathname"), OPT_BOOLEAN('m', "modules", &symbol_conf.use_modules, "load module symbols - WARNING: use only with -k and LIVE kernel"), OPT_STRING('d', "dsos", &symbol_conf.dso_list_str, "dso[,dso...]", @@ -802,7 +804,7 @@ static const struct option options[] = { OPT_STRING('s', "sort", &sort_order, "key[,key2...]", "sort by key(s): pid, comm, dso, symbol, parent, cpu, srcline, ..." " Please refer the man page for the complete list."), - OPT_STRING('t', "field-separator", &symbol_conf.field_sep, "separator", + OPT_STRING_NOEMPTY('t', "field-separator", &symbol_conf.field_sep, "separator", "separator for columns, no spaces will be added between " "columns '.' is reserved."), OPT_STRING(0, "symfs", &symbol_conf.symfs, "directory", diff --git a/tools/perf/builtin-evlist.c b/tools/perf/builtin-evlist.c index 0f93f859b782..695ec5a50cf2 100644 --- a/tools/perf/builtin-evlist.c +++ b/tools/perf/builtin-evlist.c @@ -24,6 +24,7 @@ static int __cmd_evlist(const char *file_name, struct perf_attr_details *details struct perf_data_file file = { .path = file_name, .mode = PERF_DATA_MODE_READ, + .force = details->force, }; session = perf_session__new(&file, 0, NULL); @@ -47,6 +48,7 @@ int cmd_evlist(int argc, const char **argv, const char *prefix __maybe_unused) "Show all event attr details"), OPT_BOOLEAN('g', "group", &details.event_group, "Show event group information"), + OPT_BOOLEAN('f', "force", &details.force, "don't complain, do it"), OPT_END() }; const char * const evlist_usage[] = { diff --git a/tools/perf/builtin-help.c b/tools/perf/builtin-help.c index 25d20628212e..36486eade1ef 100644 --- a/tools/perf/builtin-help.c +++ b/tools/perf/builtin-help.c @@ -437,7 +437,18 @@ int cmd_help(int argc, const char **argv, const char *prefix __maybe_unused) HELP_FORMAT_INFO), OPT_END(), }; - const char * const builtin_help_usage[] = { + const char * const builtin_help_subcommands[] = { + "buildid-cache", "buildid-list", "diff", "evlist", "help", "list", + "record", "report", "bench", "stat", "timechart", "top", "annotate", + "script", "sched", "kmem", "lock", "kvm", "test", "inject", "mem", "data", +#ifdef HAVE_LIBELF_SUPPORT + "probe", +#endif +#ifdef HAVE_LIBAUDIT_SUPPORT + "trace", +#endif + NULL }; + const char *builtin_help_usage[] = { "perf help [--all] [--man|--web|--info] [command]", NULL }; @@ -448,8 +459,8 @@ int cmd_help(int argc, const char **argv, const char *prefix __maybe_unused) perf_config(perf_help_config, &help_format); - argc = parse_options(argc, argv, builtin_help_options, - builtin_help_usage, 0); + argc = parse_options_subcommand(argc, argv, builtin_help_options, + builtin_help_subcommands, builtin_help_usage, 0); if (show_all) { printf("\n usage: %s\n\n", perf_usage_string); diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c index a13641e066f5..40a33d7334cc 100644 --- a/tools/perf/builtin-inject.c +++ b/tools/perf/builtin-inject.c @@ -53,6 +53,13 @@ static int perf_event__repipe_synth(struct perf_tool *tool, return 0; } +static int perf_event__repipe_oe_synth(struct perf_tool *tool, + union perf_event *event, + struct ordered_events *oe __maybe_unused) +{ + return perf_event__repipe_synth(tool, event); +} + static int perf_event__repipe_op2_synth(struct perf_tool *tool, union perf_event *event, struct perf_session *session @@ -359,8 +366,6 @@ static int __cmd_inject(struct perf_inject *inject) } else if (inject->sched_stat) { struct perf_evsel *evsel; - inject->tool.ordered_events = true; - evlist__for_each(session->evlist, evsel) { const char *name = perf_evsel__name(evsel); @@ -379,7 +384,7 @@ static int __cmd_inject(struct perf_inject *inject) if (!file_out->is_pipe) lseek(fd, session->header.data_offset, SEEK_SET); - ret = perf_session__process_events(session, &inject->tool); + ret = perf_session__process_events(session); if (!file_out->is_pipe) { if (inject->build_ids) @@ -408,7 +413,7 @@ int cmd_inject(int argc, const char **argv, const char *prefix __maybe_unused) .unthrottle = perf_event__repipe, .attr = perf_event__repipe_attr, .tracing_data = perf_event__repipe_op2_synth, - .finished_round = perf_event__repipe_op2_synth, + .finished_round = perf_event__repipe_oe_synth, .build_id = perf_event__repipe_op2_synth, .id_index = perf_event__repipe_op2_synth, }, @@ -438,6 +443,7 @@ int cmd_inject(int argc, const char **argv, const char *prefix __maybe_unused) "be more verbose (show build ids, etc)"), OPT_STRING(0, "kallsyms", &symbol_conf.kallsyms_name, "file", "kallsyms pathname"), + OPT_BOOLEAN('f', "force", &file.force, "don't complain, do it"), OPT_END() }; const char * const inject_usage[] = { @@ -458,6 +464,8 @@ int cmd_inject(int argc, const char **argv, const char *prefix __maybe_unused) return -1; } + inject.tool.ordered_events = inject.sched_stat; + file.path = inject.input_name; inject.session = perf_session__new(&file, true, &inject.tool); if (inject.session == NULL) diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c index f295141025bc..4ebf65c79434 100644 --- a/tools/perf/builtin-kmem.c +++ b/tools/perf/builtin-kmem.c @@ -20,6 +20,7 @@ #include <linux/rbtree.h> #include <linux/string.h> +#include <locale.h> struct alloc_stat; typedef int (*sort_fn_t)(struct alloc_stat *, struct alloc_stat *); @@ -275,10 +276,10 @@ static void __print_result(struct rb_root *root, struct perf_session *session, struct rb_node *next; struct machine *machine = &session->machines.host; - printf("%.102s\n", graph_dotted_line); + printf("%.105s\n", graph_dotted_line); printf(" %-34s |", is_caller ? "Callsite": "Alloc Ptr"); printf(" Total_alloc/Per | Total_req/Per | Hit | Ping-pong | Frag\n"); - printf("%.102s\n", graph_dotted_line); + printf("%.105s\n", graph_dotted_line); next = rb_first(root); @@ -304,7 +305,7 @@ static void __print_result(struct rb_root *root, struct perf_session *session, snprintf(buf, sizeof(buf), "%#" PRIx64 "", addr); printf(" %-34s |", buf); - printf(" %9llu/%-5lu | %9llu/%-5lu | %8lu | %8lu | %6.3f%%\n", + printf(" %9llu/%-5lu | %9llu/%-5lu | %8lu | %9lu | %6.3f%%\n", (unsigned long long)data->bytes_alloc, (unsigned long)data->bytes_alloc / data->hit, (unsigned long long)data->bytes_req, @@ -317,21 +318,21 @@ static void __print_result(struct rb_root *root, struct perf_session *session, } if (n_lines == -1) - printf(" ... | ... | ... | ... | ... | ... \n"); + printf(" ... | ... | ... | ... | ... | ... \n"); - printf("%.102s\n", graph_dotted_line); + printf("%.105s\n", graph_dotted_line); } static void print_summary(void) { printf("\nSUMMARY\n=======\n"); - printf("Total bytes requested: %lu\n", total_requested); - printf("Total bytes allocated: %lu\n", total_allocated); - printf("Total bytes wasted on internal fragmentation: %lu\n", + printf("Total bytes requested: %'lu\n", total_requested); + printf("Total bytes allocated: %'lu\n", total_allocated); + printf("Total bytes wasted on internal fragmentation: %'lu\n", total_allocated - total_requested); printf("Internal fragmentation: %f%%\n", fragmentation(total_requested, total_allocated)); - printf("Cross CPU allocations: %lu/%lu\n", nr_cross_allocs, nr_allocs); + printf("Cross CPU allocations: %'lu/%'lu\n", nr_cross_allocs, nr_allocs); } static void print_result(struct perf_session *session) @@ -426,7 +427,7 @@ static int __cmd_kmem(struct perf_session *session) } setup_pager(); - err = perf_session__process_events(session, &perf_kmem); + err = perf_session__process_events(session); if (err != 0) goto out; sort_result(); @@ -559,6 +560,7 @@ static int setup_sorting(struct list_head *sort_list, const char *arg) { char *tok; char *str = strdup(arg); + char *pos = str; if (!str) { pr_err("%s: strdup failed\n", __func__); @@ -566,7 +568,7 @@ static int setup_sorting(struct list_head *sort_list, const char *arg) } while (true) { - tok = strsep(&str, ","); + tok = strsep(&pos, ","); if (!tok) break; if (sort_dimension__add(tok, sort_list) < 0) { @@ -660,8 +662,13 @@ static int __cmd_record(int argc, const char **argv) int cmd_kmem(int argc, const char **argv, const char *prefix __maybe_unused) { const char * const default_sort_order = "frag,hit,bytes"; + struct perf_data_file file = { + .mode = PERF_DATA_MODE_READ, + }; const struct option kmem_options[] = { OPT_STRING('i', "input", &input_name, "file", "input file name"), + OPT_INCR('v', "verbose", &verbose, + "be more verbose (show symbol address, etc)"), OPT_CALLBACK_NOOPT(0, "caller", NULL, NULL, "show per-callsite statistics", parse_caller_opt), OPT_CALLBACK_NOOPT(0, "alloc", NULL, NULL, @@ -671,6 +678,7 @@ int cmd_kmem(int argc, const char **argv, const char *prefix __maybe_unused) parse_sort_opt), OPT_CALLBACK('l', "line", NULL, "num", "show n lines", parse_line_opt), OPT_BOOLEAN(0, "raw-ip", &raw_ip, "show raw ip instead of symbol"), + OPT_BOOLEAN('f', "force", &file.force, "don't complain, do it"), OPT_END() }; const char *const kmem_subcommands[] = { "record", "stat", NULL }; @@ -679,10 +687,6 @@ int cmd_kmem(int argc, const char **argv, const char *prefix __maybe_unused) NULL }; struct perf_session *session; - struct perf_data_file file = { - .path = input_name, - .mode = PERF_DATA_MODE_READ, - }; int ret = -1; argc = parse_options_subcommand(argc, argv, kmem_options, @@ -696,6 +700,8 @@ int cmd_kmem(int argc, const char **argv, const char *prefix __maybe_unused) return __cmd_record(argc, argv); } + file.path = input_name; + session = perf_session__new(&file, false, &perf_kmem); if (session == NULL) return -1; @@ -703,6 +709,8 @@ int cmd_kmem(int argc, const char **argv, const char *prefix __maybe_unused) symbol__init(&session->header.env); if (!strcmp(argv[0], "stat")) { + setlocale(LC_ALL, ""); + if (cpu__setup_cpunode_map()) goto out_delete; diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c index 0894a817f67e..1f9338f6109c 100644 --- a/tools/perf/builtin-kvm.c +++ b/tools/perf/builtin-kvm.c @@ -18,6 +18,7 @@ #include "util/stat.h" #include "util/top.h" #include "util/data.h" +#include "util/ordered-events.h" #include <sys/prctl.h> #ifdef HAVE_TIMERFD_SUPPORT @@ -730,9 +731,9 @@ static s64 perf_kvm__mmap_read_idx(struct perf_kvm_stat *kvm, int idx, return -1; } - err = perf_session_queue_event(kvm->session, event, &kvm->tool, &sample, 0); + err = perf_session__queue_event(kvm->session, event, &sample, 0); /* - * FIXME: Here we can't consume the event, as perf_session_queue_event will + * FIXME: Here we can't consume the event, as perf_session__queue_event will * point to it, and it'll get possibly overwritten by the kernel. */ perf_evlist__mmap_consume(kvm->evlist, idx); @@ -783,8 +784,10 @@ static int perf_kvm__mmap_read(struct perf_kvm_stat *kvm) /* flush queue after each round in which we processed events */ if (ntotal) { - kvm->session->ordered_events.next_flush = flush_time; - err = kvm->tool.finished_round(&kvm->tool, NULL, kvm->session); + struct ordered_events *oe = &kvm->session->ordered_events; + + oe->next_flush = flush_time; + err = ordered_events__flush(oe, OE_FLUSH__ROUND); if (err) { if (kvm->lost_events) pr_info("\nLost events: %" PRIu64 "\n\n", @@ -1044,6 +1047,7 @@ static int read_events(struct perf_kvm_stat *kvm) struct perf_data_file file = { .path = kvm->file_name, .mode = PERF_DATA_MODE_READ, + .force = kvm->force, }; kvm->tool = eops; @@ -1066,7 +1070,7 @@ static int read_events(struct perf_kvm_stat *kvm) if (ret < 0) return ret; - return perf_session__process_events(kvm->session, &kvm->tool); + return perf_session__process_events(kvm->session); } static int parse_target_str(struct perf_kvm_stat *kvm) @@ -1201,6 +1205,7 @@ kvm_events_report(struct perf_kvm_stat *kvm, int argc, const char **argv) " time (sort by avg time)"), OPT_STRING('p', "pid", &kvm->opts.target.pid, "pid", "analyze events only for given process id(s)"), + OPT_BOOLEAN('f', "force", &kvm->force, "don't complain, do it"), OPT_END() }; diff --git a/tools/perf/builtin-list.c b/tools/perf/builtin-list.c index 198f3c3aff95..af5bd0514108 100644 --- a/tools/perf/builtin-list.c +++ b/tools/perf/builtin-list.c @@ -36,38 +36,36 @@ int cmd_list(int argc, const char **argv, const char *prefix __maybe_unused) setup_pager(); - if (raw_dump) { - print_events(NULL, true); - return 0; - } + if (!raw_dump) + printf("\nList of pre-defined events (to be used in -e):\n\n"); if (argc == 0) { - print_events(NULL, false); + print_events(NULL, raw_dump); return 0; } for (i = 0; i < argc; ++i) { - if (i) - putchar('\n'); - if (strncmp(argv[i], "tracepoint", 10) == 0) - print_tracepoint_events(NULL, NULL, false); + if (strcmp(argv[i], "tracepoint") == 0) + print_tracepoint_events(NULL, NULL, raw_dump); else if (strcmp(argv[i], "hw") == 0 || strcmp(argv[i], "hardware") == 0) - print_events_type(PERF_TYPE_HARDWARE); + print_symbol_events(NULL, PERF_TYPE_HARDWARE, + event_symbols_hw, PERF_COUNT_HW_MAX, raw_dump); else if (strcmp(argv[i], "sw") == 0 || strcmp(argv[i], "software") == 0) - print_events_type(PERF_TYPE_SOFTWARE); + print_symbol_events(NULL, PERF_TYPE_SOFTWARE, + event_symbols_sw, PERF_COUNT_SW_MAX, raw_dump); else if (strcmp(argv[i], "cache") == 0 || strcmp(argv[i], "hwcache") == 0) - print_hwcache_events(NULL, false); + print_hwcache_events(NULL, raw_dump); else if (strcmp(argv[i], "pmu") == 0) - print_pmu_events(NULL, false); + print_pmu_events(NULL, raw_dump); else { char *sep = strchr(argv[i], ':'), *s; int sep_idx; if (sep == NULL) { - print_events(argv[i], false); + print_events(argv[i], raw_dump); continue; } sep_idx = sep - argv[i]; @@ -76,7 +74,7 @@ int cmd_list(int argc, const char **argv, const char *prefix __maybe_unused) return -1; s[sep_idx] = '\0'; - print_tracepoint_events(s, s + sep_idx + 1, false); + print_tracepoint_events(s, s + sep_idx + 1, raw_dump); free(s); } } diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c index e7ec71589da6..d49c2ab85fc2 100644 --- a/tools/perf/builtin-lock.c +++ b/tools/perf/builtin-lock.c @@ -846,6 +846,8 @@ static const struct perf_evsel_str_handler lock_tracepoints[] = { { "lock:lock_release", perf_evsel__process_lock_release, }, /* CONFIG_LOCKDEP */ }; +static bool force; + static int __cmd_report(bool display_info) { int err = -EINVAL; @@ -857,6 +859,7 @@ static int __cmd_report(bool display_info) struct perf_data_file file = { .path = input_name, .mode = PERF_DATA_MODE_READ, + .force = force, }; session = perf_session__new(&file, false, &eops); @@ -878,7 +881,7 @@ static int __cmd_report(bool display_info) if (select_key()) goto out_delete; - err = perf_session__process_events(session, &eops); + err = perf_session__process_events(session); if (err) goto out_delete; @@ -945,6 +948,7 @@ int cmd_lock(int argc, const char **argv, const char *prefix __maybe_unused) "dump thread list in perf.data"), OPT_BOOLEAN('m', "map", &info_map, "map of lock instances (address:name table)"), + OPT_BOOLEAN('f', "force", &force, "don't complain, do it"), OPT_END() }; const struct option lock_options[] = { @@ -956,6 +960,7 @@ int cmd_lock(int argc, const char **argv, const char *prefix __maybe_unused) const struct option report_options[] = { OPT_STRING('k', "key", &sort_key, "acquired", "key for sorting (acquired / contended / avg_wait / wait_total / wait_max / wait_min)"), + OPT_BOOLEAN('f', "force", &force, "don't complain, do it"), /* TODO: type */ OPT_END() }; diff --git a/tools/perf/builtin-mem.c b/tools/perf/builtin-mem.c index 9b5663950a4d..675216e08bfc 100644 --- a/tools/perf/builtin-mem.c +++ b/tools/perf/builtin-mem.c @@ -15,6 +15,7 @@ struct perf_mem { char const *input_name; bool hide_unresolved; bool dump_raw; + bool force; int operation; const char *cpu_list; DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS); @@ -120,6 +121,7 @@ static int report_raw_events(struct perf_mem *mem) struct perf_data_file file = { .path = input_name, .mode = PERF_DATA_MODE_READ, + .force = mem->force, }; int err = -EINVAL; int ret; @@ -141,7 +143,7 @@ static int report_raw_events(struct perf_mem *mem) printf("# PID, TID, IP, ADDR, LOCAL WEIGHT, DSRC, SYMBOL\n"); - err = perf_session__process_events(session, &mem->tool); + err = perf_session__process_events(session); if (err) return err; @@ -286,10 +288,11 @@ int cmd_mem(int argc, const char **argv, const char *prefix __maybe_unused) "input file name"), OPT_STRING('C', "cpu", &mem.cpu_list, "cpu", "list of cpus to profile"), - OPT_STRING('x', "field-separator", &symbol_conf.field_sep, + OPT_STRING_NOEMPTY('x', "field-separator", &symbol_conf.field_sep, "separator", "separator for columns, no spaces will be added" " between columns '.' is reserved."), + OPT_BOOLEAN('f', "force", &mem.force, "don't complain, do it"), OPT_END() }; const char *const mem_subcommands[] = { "record", "report", NULL }; diff --git a/tools/perf/builtin-probe.c b/tools/perf/builtin-probe.c index 921bb6942503..f7b1af67e9f6 100644 --- a/tools/perf/builtin-probe.c +++ b/tools/perf/builtin-probe.c @@ -56,6 +56,7 @@ static struct { bool mod_events; bool uprobes; bool quiet; + bool target_used; int nevents; struct perf_probe_event events[MAX_PROBES]; struct strlist *dellist; @@ -78,6 +79,12 @@ static int parse_probe_event(const char *str) } pev->uprobes = params.uprobes; + if (params.target) { + pev->target = strdup(params.target); + if (!pev->target) + return -ENOMEM; + params.target_used = true; + } /* Parse a perf-probe command into event */ ret = parse_perf_probe_command(str, pev); @@ -102,6 +109,7 @@ static int set_target(const char *ptr) params.target = strdup(ptr); if (!params.target) return -ENOMEM; + params.target_used = false; found = 1; buf = ptr + (strlen(ptr) - 3); @@ -178,7 +186,7 @@ static int opt_set_target(const struct option *opt, const char *str, int ret = -ENOENT; char *tmp; - if (str && !params.target) { + if (str) { if (!strcmp(opt->long_name, "exec")) params.uprobes = true; #ifdef HAVE_DWARF_SUPPORT @@ -200,7 +208,9 @@ static int opt_set_target(const struct option *opt, const char *str, if (!tmp) return -ENOMEM; } + free(params.target); params.target = tmp; + params.target_used = false; ret = 0; } @@ -485,9 +495,14 @@ __cmd_probe(int argc, const char **argv, const char *prefix __maybe_unused) } if (params.nevents) { + /* Ensure the last given target is used */ + if (params.target && !params.target_used) { + pr_warning(" Error: -x/-m must follow the probe definitions.\n"); + usage_with_options(probe_usage, options); + } + ret = add_perf_probe_events(params.events, params.nevents, params.max_probe_points, - params.target, params.force_add); if (ret < 0) { pr_err_with_code(" Error: Failed to add events.", ret); diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 404ab3434052..c3efdfb630b5 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -70,8 +70,8 @@ static int process_synthesized_event(struct perf_tool *tool, static int record__mmap_read(struct record *rec, int idx) { struct perf_mmap *md = &rec->evlist->mmap[idx]; - unsigned int head = perf_mmap__read_head(md); - unsigned int old = md->prev; + u64 head = perf_mmap__read_head(md); + u64 old = md->prev; unsigned char *data = md->base + page_size; unsigned long size; void *buf; @@ -161,8 +161,9 @@ try_again: } } - if (perf_evlist__apply_filters(evlist)) { - error("failed to set filter with %d (%s)\n", errno, + if (perf_evlist__apply_filters(evlist, &pos)) { + error("failed to set filter \"%s\" on event %s with %d (%s)\n", + pos->filter, perf_evsel__name(pos), errno, strerror_r(errno, msg, sizeof(msg))); rc = -1; goto out; @@ -225,7 +226,7 @@ static int process_buildids(struct record *rec) */ symbol_conf.ignore_vmlinux_buildid = true; - return perf_session__process_events(session, &rec->tool); + return perf_session__process_events(session); } static void perf_event__synthesize_guest_os(struct machine *machine, void *data) @@ -343,7 +344,7 @@ static int __cmd_record(struct record *rec, int argc, const char **argv) signal(SIGINT, sig_handler); signal(SIGTERM, sig_handler); - session = perf_session__new(file, false, NULL); + session = perf_session__new(file, false, tool); if (session == NULL) { pr_err("Perf session creation failed.\n"); return -1; @@ -658,7 +659,7 @@ error: static void callchain_debug(void) { - static const char *str[CALLCHAIN_MAX] = { "NONE", "FP", "DWARF" }; + static const char *str[CALLCHAIN_MAX] = { "NONE", "FP", "DWARF", "LBR" }; pr_debug("callchain: type %s\n", str[callchain_param.record_mode]); @@ -710,6 +711,90 @@ static int perf_record_config(const char *var, const char *value, void *cb) return perf_default_config(var, value, cb); } +struct clockid_map { + const char *name; + int clockid; +}; + +#define CLOCKID_MAP(n, c) \ + { .name = n, .clockid = (c), } + +#define CLOCKID_END { .name = NULL, } + + +/* + * Add the missing ones, we need to build on many distros... + */ +#ifndef CLOCK_MONOTONIC_RAW +#define CLOCK_MONOTONIC_RAW 4 +#endif +#ifndef CLOCK_BOOTTIME +#define CLOCK_BOOTTIME 7 +#endif +#ifndef CLOCK_TAI +#define CLOCK_TAI 11 +#endif + +static const struct clockid_map clockids[] = { + /* available for all events, NMI safe */ + CLOCKID_MAP("monotonic", CLOCK_MONOTONIC), + CLOCKID_MAP("monotonic_raw", CLOCK_MONOTONIC_RAW), + + /* available for some events */ + CLOCKID_MAP("realtime", CLOCK_REALTIME), + CLOCKID_MAP("boottime", CLOCK_BOOTTIME), + CLOCKID_MAP("tai", CLOCK_TAI), + + /* available for the lazy */ + CLOCKID_MAP("mono", CLOCK_MONOTONIC), + CLOCKID_MAP("raw", CLOCK_MONOTONIC_RAW), + CLOCKID_MAP("real", CLOCK_REALTIME), + CLOCKID_MAP("boot", CLOCK_BOOTTIME), + + CLOCKID_END, +}; + +static int parse_clockid(const struct option *opt, const char *str, int unset) +{ + struct record_opts *opts = (struct record_opts *)opt->value; + const struct clockid_map *cm; + const char *ostr = str; + + if (unset) { + opts->use_clockid = 0; + return 0; + } + + /* no arg passed */ + if (!str) + return 0; + + /* no setting it twice */ + if (opts->use_clockid) + return -1; + + opts->use_clockid = true; + + /* if its a number, we're done */ + if (sscanf(str, "%d", &opts->clockid) == 1) + return 0; + + /* allow a "CLOCK_" prefix to the name */ + if (!strncasecmp(str, "CLOCK_", 6)) + str += 6; + + for (cm = clockids; cm->name; cm++) { + if (!strcasecmp(str, cm->name)) { + opts->clockid = cm->clockid; + return 0; + } + } + + opts->use_clockid = false; + ui__warning("unknown clockid %s, check man page\n", ostr); + return -1; +} + static const char * const __record_usage[] = { "perf record [<options>] [<command>]", "perf record [<options>] -- <command> [<options>]", @@ -751,9 +836,9 @@ static struct record record = { #define CALLCHAIN_HELP "setup and enables call-graph (stack chain/backtrace) recording: " #ifdef HAVE_DWARF_UNWIND_SUPPORT -const char record_callchain_help[] = CALLCHAIN_HELP "fp dwarf"; +const char record_callchain_help[] = CALLCHAIN_HELP "fp dwarf lbr"; #else -const char record_callchain_help[] = CALLCHAIN_HELP "fp"; +const char record_callchain_help[] = CALLCHAIN_HELP "fp lbr"; #endif /* @@ -839,6 +924,11 @@ struct option __record_options[] = { "use per-thread mmaps"), OPT_BOOLEAN('I', "intr-regs", &record.opts.sample_intr_regs, "Sample machine registers on interrupt"), + OPT_BOOLEAN(0, "running-time", &record.opts.running_time, + "Record running/enabled time of read (:S) events"), + OPT_CALLBACK('k', "clockid", &record.opts, + "clockid", "clockid to use for events, see clock_gettime()", + parse_clockid), OPT_END() }; diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 2f91094e228b..476cdf7afcca 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -249,6 +249,8 @@ static int report__setup_sample_type(struct report *rep) if ((sample_type & PERF_SAMPLE_REGS_USER) && (sample_type & PERF_SAMPLE_STACK_USER)) callchain_param.record_mode = CALLCHAIN_DWARF; + else if (sample_type & PERF_SAMPLE_BRANCH_STACK) + callchain_param.record_mode = CALLCHAIN_LBR; else callchain_param.record_mode = CALLCHAIN_FP; } @@ -302,7 +304,7 @@ static size_t hists__fprintf_nr_sample_events(struct hists *hists, struct report if (rep->mem_mode) { ret += fprintf(fp, "\n# Total weight : %" PRIu64, nr_events); - ret += fprintf(fp, "\n# Sort order : %s", sort_order); + ret += fprintf(fp, "\n# Sort order : %s", sort_order ? : default_mem_sort_order); } else ret += fprintf(fp, "\n# Event count (approx.): %" PRIu64, nr_events); return ret + fprintf(fp, "\n#\n"); @@ -345,7 +347,7 @@ static int perf_evlist__tty_browse_hists(struct perf_evlist *evlist, static void report__warn_kptr_restrict(const struct report *rep) { struct map *kernel_map = rep->session->machines.host.vmlinux_maps[MAP__FUNCTION]; - struct kmap *kernel_kmap = map__kmap(kernel_map); + struct kmap *kernel_kmap = kernel_map ? map__kmap(kernel_map) : NULL; if (kernel_map == NULL || (kernel_map->dso->hit && @@ -480,7 +482,7 @@ static int __cmd_report(struct report *rep) if (ret) return ret; - ret = perf_session__process_events(session, &rep->tool); + ret = perf_session__process_events(session); if (ret) return ret; @@ -667,6 +669,10 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused) "only consider symbols in these dsos"), OPT_STRING('c', "comms", &symbol_conf.comm_list_str, "comm[,comm...]", "only consider symbols in these comms"), + OPT_STRING(0, "pid", &symbol_conf.pid_list_str, "pid[,pid...]", + "only consider symbols in these pids"), + OPT_STRING(0, "tid", &symbol_conf.tid_list_str, "tid[,tid...]", + "only consider symbols in these tids"), OPT_STRING('S', "symbols", &symbol_conf.sym_list_str, "symbol[,symbol...]", "only consider these symbols"), OPT_STRING(0, "symbol-filter", &report.symbol_filter_str, "filter", @@ -674,7 +680,7 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused) OPT_STRING('w', "column-widths", &symbol_conf.col_width_list_str, "width[,width...]", "don't try to adjust column width, use these fixed values"), - OPT_STRING('t', "field-separator", &symbol_conf.field_sep, "separator", + OPT_STRING_NOEMPTY('t', "field-separator", &symbol_conf.field_sep, "separator", "separator for columns, no spaces will be added between " "columns '.' is reserved."), OPT_BOOLEAN('U', "hide-unresolved", &report.hide_unresolved, @@ -766,7 +772,7 @@ repeat: * 0/1 means the user chose a mode. */ if (((branch_mode == -1 && has_br_stack) || branch_mode == 1) && - branch_call_mode == -1) { + !branch_call_mode) { sort__mode = SORT_MODE__BRANCH; symbol_conf.cumulate_callchain = false; } diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index 891c3930080e..5275bab70313 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c @@ -23,12 +23,13 @@ #include <semaphore.h> #include <pthread.h> #include <math.h> +#include <api/fs/fs.h> #define PR_SET_NAME 15 /* Set process name */ #define MAX_CPUS 4096 #define COMM_LEN 20 #define SYM_LEN 129 -#define MAX_PID 65536 +#define MAX_PID 1024000 struct sched_atom; @@ -124,7 +125,7 @@ struct perf_sched { struct perf_tool tool; const char *sort_order; unsigned long nr_tasks; - struct task_desc *pid_to_task[MAX_PID]; + struct task_desc **pid_to_task; struct task_desc **tasks; const struct trace_sched_handler *tp_handler; pthread_mutex_t start_work_mutex; @@ -169,6 +170,7 @@ struct perf_sched { u64 cpu_last_switched[MAX_CPUS]; struct rb_root atom_root, sorted_atom_root; struct list_head sort_list, cmp_pid; + bool force; }; static u64 get_nsecs(void) @@ -326,8 +328,19 @@ static struct task_desc *register_pid(struct perf_sched *sched, unsigned long pid, const char *comm) { struct task_desc *task; + static int pid_max; - BUG_ON(pid >= MAX_PID); + if (sched->pid_to_task == NULL) { + if (sysctl__read_int("kernel/pid_max", &pid_max) < 0) + pid_max = MAX_PID; + BUG_ON((sched->pid_to_task = calloc(pid_max, sizeof(struct task_desc *))) == NULL); + } + if (pid >= (unsigned long)pid_max) { + BUG_ON((sched->pid_to_task = realloc(sched->pid_to_task, (pid + 1) * + sizeof(struct task_desc *))) == NULL); + while (pid >= (unsigned long)pid_max) + sched->pid_to_task[pid_max++] = NULL; + } task = sched->pid_to_task[pid]; @@ -346,7 +359,7 @@ static struct task_desc *register_pid(struct perf_sched *sched, sched->pid_to_task[pid] = task; sched->nr_tasks++; - sched->tasks = realloc(sched->tasks, sched->nr_tasks * sizeof(struct task_task *)); + sched->tasks = realloc(sched->tasks, sched->nr_tasks * sizeof(struct task_desc *)); BUG_ON(!sched->tasks); sched->tasks[task->nr] = task; @@ -425,24 +438,45 @@ static u64 get_cpu_usage_nsec_parent(void) return sum; } -static int self_open_counters(void) +static int self_open_counters(struct perf_sched *sched, unsigned long cur_task) { struct perf_event_attr attr; - char sbuf[STRERR_BUFSIZE]; + char sbuf[STRERR_BUFSIZE], info[STRERR_BUFSIZE]; int fd; + struct rlimit limit; + bool need_privilege = false; memset(&attr, 0, sizeof(attr)); attr.type = PERF_TYPE_SOFTWARE; attr.config = PERF_COUNT_SW_TASK_CLOCK; +force_again: fd = sys_perf_event_open(&attr, 0, -1, -1, perf_event_open_cloexec_flag()); - if (fd < 0) + if (fd < 0) { + if (errno == EMFILE) { + if (sched->force) { + BUG_ON(getrlimit(RLIMIT_NOFILE, &limit) == -1); + limit.rlim_cur += sched->nr_tasks - cur_task; + if (limit.rlim_cur > limit.rlim_max) { + limit.rlim_max = limit.rlim_cur; + need_privilege = true; + } + if (setrlimit(RLIMIT_NOFILE, &limit) == -1) { + if (need_privilege && errno == EPERM) + strcpy(info, "Need privilege\n"); + } else + goto force_again; + } else + strcpy(info, "Have a try with -f option\n"); + } pr_err("Error: sys_perf_event_open() syscall returned " - "with %d (%s)\n", fd, - strerror_r(errno, sbuf, sizeof(sbuf))); + "with %d (%s)\n%s", fd, + strerror_r(errno, sbuf, sizeof(sbuf)), info); + exit(EXIT_FAILURE); + } return fd; } @@ -460,6 +494,7 @@ static u64 get_cpu_usage_nsec_self(int fd) struct sched_thread_parms { struct task_desc *task; struct perf_sched *sched; + int fd; }; static void *thread_func(void *ctx) @@ -470,13 +505,12 @@ static void *thread_func(void *ctx) u64 cpu_usage_0, cpu_usage_1; unsigned long i, ret; char comm2[22]; - int fd; + int fd = parms->fd; zfree(&parms); sprintf(comm2, ":%s", this_task->comm); prctl(PR_SET_NAME, comm2); - fd = self_open_counters(); if (fd < 0) return NULL; again: @@ -528,6 +562,7 @@ static void create_tasks(struct perf_sched *sched) BUG_ON(parms == NULL); parms->task = task = sched->tasks[i]; parms->sched = sched; + parms->fd = self_open_counters(sched, i); sem_init(&task->sleep_sem, 0, 0); sem_init(&task->ready_for_work, 0, 0); sem_init(&task->work_done_sem, 0, 0); @@ -572,13 +607,13 @@ static void wait_for_tasks(struct perf_sched *sched) cpu_usage_1 = get_cpu_usage_nsec_parent(); if (!sched->runavg_cpu_usage) sched->runavg_cpu_usage = sched->cpu_usage; - sched->runavg_cpu_usage = (sched->runavg_cpu_usage * 9 + sched->cpu_usage) / 10; + sched->runavg_cpu_usage = (sched->runavg_cpu_usage * (sched->replay_repeat - 1) + sched->cpu_usage) / sched->replay_repeat; sched->parent_cpu_usage = cpu_usage_1 - cpu_usage_0; if (!sched->runavg_parent_cpu_usage) sched->runavg_parent_cpu_usage = sched->parent_cpu_usage; - sched->runavg_parent_cpu_usage = (sched->runavg_parent_cpu_usage * 9 + - sched->parent_cpu_usage)/10; + sched->runavg_parent_cpu_usage = (sched->runavg_parent_cpu_usage * (sched->replay_repeat - 1) + + sched->parent_cpu_usage)/sched->replay_repeat; ret = pthread_mutex_lock(&sched->start_work_mutex); BUG_ON(ret); @@ -610,7 +645,7 @@ static void run_one_test(struct perf_sched *sched) sched->sum_fluct += fluct; if (!sched->run_avg) sched->run_avg = delta; - sched->run_avg = (sched->run_avg * 9 + delta) / 10; + sched->run_avg = (sched->run_avg * (sched->replay_repeat - 1) + delta) / sched->replay_repeat; printf("#%-3ld: %0.3f, ", sched->nr_runs, (double)delta / 1000000.0); @@ -831,7 +866,7 @@ static int thread_atoms_insert(struct perf_sched *sched, struct thread *thread) return -1; } - atoms->thread = thread; + atoms->thread = thread__get(thread); INIT_LIST_HEAD(&atoms->work_list); __thread_latency_insert(&sched->atom_root, atoms, &sched->cmp_pid); return 0; @@ -1439,8 +1474,7 @@ static int perf_sched__process_tracepoint_sample(struct perf_tool *tool __maybe_ return err; } -static int perf_sched__read_events(struct perf_sched *sched, - struct perf_session **psession) +static int perf_sched__read_events(struct perf_sched *sched) { const struct perf_evsel_str_handler handlers[] = { { "sched:sched_switch", process_sched_switch_event, }, @@ -1453,7 +1487,9 @@ static int perf_sched__read_events(struct perf_sched *sched, struct perf_data_file file = { .path = input_name, .mode = PERF_DATA_MODE_READ, + .force = sched->force, }; + int rc = -1; session = perf_session__new(&file, false, &sched->tool); if (session == NULL) { @@ -1467,27 +1503,21 @@ static int perf_sched__read_events(struct perf_sched *sched, goto out_delete; if (perf_session__has_traces(session, "record -R")) { - int err = perf_session__process_events(session, &sched->tool); + int err = perf_session__process_events(session); if (err) { pr_err("Failed to process events, error %d", err); goto out_delete; } - sched->nr_events = session->stats.nr_events[0]; - sched->nr_lost_events = session->stats.total_lost; - sched->nr_lost_chunks = session->stats.nr_events[PERF_RECORD_LOST]; + sched->nr_events = session->evlist->stats.nr_events[0]; + sched->nr_lost_events = session->evlist->stats.total_lost; + sched->nr_lost_chunks = session->evlist->stats.nr_events[PERF_RECORD_LOST]; } - if (psession) - *psession = session; - else - perf_session__delete(session); - - return 0; - + rc = 0; out_delete: perf_session__delete(session); - return -1; + return rc; } static void print_bad_events(struct perf_sched *sched) @@ -1515,12 +1545,10 @@ static void print_bad_events(struct perf_sched *sched) static int perf_sched__lat(struct perf_sched *sched) { struct rb_node *next; - struct perf_session *session; setup_pager(); - /* save session -- references to threads are held in work_list */ - if (perf_sched__read_events(sched, &session)) + if (perf_sched__read_events(sched)) return -1; perf_sched__sort_lat(sched); @@ -1537,6 +1565,7 @@ static int perf_sched__lat(struct perf_sched *sched) work_list = rb_entry(next, struct work_atoms, node); output_lat_thread(sched, work_list); next = rb_next(next); + thread__zput(work_list->thread); } printf(" -----------------------------------------------------------------------------------------------------------------\n"); @@ -1548,7 +1577,6 @@ static int perf_sched__lat(struct perf_sched *sched) print_bad_events(sched); printf("\n"); - perf_session__delete(session); return 0; } @@ -1557,7 +1585,7 @@ static int perf_sched__map(struct perf_sched *sched) sched->max_cpu = sysconf(_SC_NPROCESSORS_CONF); setup_pager(); - if (perf_sched__read_events(sched, NULL)) + if (perf_sched__read_events(sched)) return -1; print_bad_events(sched); return 0; @@ -1572,7 +1600,7 @@ static int perf_sched__replay(struct perf_sched *sched) test_calibrations(sched); - if (perf_sched__read_events(sched, NULL)) + if (perf_sched__read_events(sched)) return -1; printf("nr_run_events: %ld\n", sched->nr_run_events); @@ -1693,6 +1721,7 @@ int cmd_sched(int argc, const char **argv, const char *prefix __maybe_unused) "be more verbose (show symbol address, etc)"), OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, "dump raw trace in ASCII"), + OPT_BOOLEAN('f', "force", &sched.force, "don't complain, do it"), OPT_END() }; const struct option sched_options[] = { diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index ce304dfd962a..58f10b8e6ff2 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -446,9 +446,9 @@ static void print_sample_bts(union perf_event *event, } static void process_event(union perf_event *event, struct perf_sample *sample, - struct perf_evsel *evsel, struct thread *thread, - struct addr_location *al) + struct perf_evsel *evsel, struct addr_location *al) { + struct thread *thread = al->thread; struct perf_event_attr *attr = &evsel->attr; if (output[attr->type].fields == 0) @@ -549,14 +549,6 @@ static int process_sample_event(struct perf_tool *tool __maybe_unused, struct machine *machine) { struct addr_location al; - struct thread *thread = machine__findnew_thread(machine, sample->pid, - sample->tid); - - if (thread == NULL) { - pr_debug("problem processing %d event, skipping it.\n", - event->header.type); - return -1; - } if (debug_mode) { if (sample->time < last_timestamp) { @@ -581,7 +573,7 @@ static int process_sample_event(struct perf_tool *tool __maybe_unused, if (cpu_list && !test_bit(sample->cpu, cpu_bitmap)) return 0; - scripting_ops->process_event(event, sample, evsel, thread, &al); + scripting_ops->process_event(event, sample, evsel, &al); return 0; } @@ -800,7 +792,7 @@ static int __cmd_script(struct perf_script *script) script->tool.mmap2 = process_mmap2_event; } - ret = perf_session__process_events(script->session, &script->tool); + ret = perf_session__process_events(script->session); if (debug_mode) pr_err("Misordered timestamps: %" PRIu64 "\n", nr_unordered); @@ -1523,6 +1515,9 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused) .ordering_requires_timestamps = true, }, }; + struct perf_data_file file = { + .mode = PERF_DATA_MODE_READ, + }; const struct option options[] = { OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, "dump raw trace in ASCII"), @@ -1550,7 +1545,7 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused) "When printing symbols do not display call chain"), OPT_STRING(0, "symfs", &symbol_conf.symfs, "directory", "Look for files with symbols relative to this directory"), - OPT_CALLBACK('f', "fields", NULL, "str", + OPT_CALLBACK('F', "fields", NULL, "str", "comma separated output fields prepend with 'type:'. " "Valid types: hw,sw,trace,raw. " "Fields: comm,tid,pid,time,cpu,event,trace,ip,sym,dso," @@ -1562,6 +1557,10 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused) OPT_STRING('C', "cpu", &cpu_list, "cpu", "list of cpus to profile"), OPT_STRING('c', "comms", &symbol_conf.comm_list_str, "comm[,comm...]", "only display events for these comms"), + OPT_STRING(0, "pid", &symbol_conf.pid_list_str, "pid[,pid...]", + "only consider symbols in these pids"), + OPT_STRING(0, "tid", &symbol_conf.tid_list_str, "tid[,tid...]", + "only consider symbols in these tids"), OPT_BOOLEAN('I', "show-info", &show_full_info, "display extended information from perf.data file"), OPT_BOOLEAN('\0', "show-kernel-path", &symbol_conf.show_kernel_path, @@ -1570,9 +1569,11 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused) "Show the fork/comm/exit events"), OPT_BOOLEAN('\0', "show-mmap-events", &script.show_mmap_events, "Show the mmap events"), + OPT_BOOLEAN('f', "force", &file.force, "don't complain, do it"), OPT_END() }; - const char * const script_usage[] = { + const char * const script_subcommands[] = { "record", "report", NULL }; + const char *script_usage[] = { "perf script [<options>]", "perf script [<options>] record <script> [<record-options>] <command>", "perf script [<options>] report <script> [script-args]", @@ -1580,13 +1581,10 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused) "perf script [<options>] <top-script> [script-args]", NULL }; - struct perf_data_file file = { - .mode = PERF_DATA_MODE_READ, - }; setup_scripting(); - argc = parse_options(argc, argv, options, script_usage, + argc = parse_options_subcommand(argc, argv, options, script_subcommands, script_usage, PARSE_OPT_STOP_AT_NON_OPTION); file.path = input_name; diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index e598e4e98170..f7b8218785f6 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -353,39 +353,40 @@ static struct perf_evsel *nth_evsel(int n) * more semantic information such as miss/hit ratios, * instruction rates, etc: */ -static void update_shadow_stats(struct perf_evsel *counter, u64 *count) +static void update_shadow_stats(struct perf_evsel *counter, u64 *count, + int cpu) { if (perf_evsel__match(counter, SOFTWARE, SW_TASK_CLOCK)) - update_stats(&runtime_nsecs_stats[0], count[0]); + update_stats(&runtime_nsecs_stats[cpu], count[0]); else if (perf_evsel__match(counter, HARDWARE, HW_CPU_CYCLES)) - update_stats(&runtime_cycles_stats[0], count[0]); + update_stats(&runtime_cycles_stats[cpu], count[0]); else if (transaction_run && perf_evsel__cmp(counter, nth_evsel(T_CYCLES_IN_TX))) - update_stats(&runtime_cycles_in_tx_stats[0], count[0]); + update_stats(&runtime_cycles_in_tx_stats[cpu], count[0]); else if (transaction_run && perf_evsel__cmp(counter, nth_evsel(T_TRANSACTION_START))) - update_stats(&runtime_transaction_stats[0], count[0]); + update_stats(&runtime_transaction_stats[cpu], count[0]); else if (transaction_run && perf_evsel__cmp(counter, nth_evsel(T_ELISION_START))) - update_stats(&runtime_elision_stats[0], count[0]); + update_stats(&runtime_elision_stats[cpu], count[0]); else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) - update_stats(&runtime_stalled_cycles_front_stats[0], count[0]); + update_stats(&runtime_stalled_cycles_front_stats[cpu], count[0]); else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_BACKEND)) - update_stats(&runtime_stalled_cycles_back_stats[0], count[0]); + update_stats(&runtime_stalled_cycles_back_stats[cpu], count[0]); else if (perf_evsel__match(counter, HARDWARE, HW_BRANCH_INSTRUCTIONS)) - update_stats(&runtime_branches_stats[0], count[0]); + update_stats(&runtime_branches_stats[cpu], count[0]); else if (perf_evsel__match(counter, HARDWARE, HW_CACHE_REFERENCES)) - update_stats(&runtime_cacherefs_stats[0], count[0]); + update_stats(&runtime_cacherefs_stats[cpu], count[0]); else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1D)) - update_stats(&runtime_l1_dcache_stats[0], count[0]); + update_stats(&runtime_l1_dcache_stats[cpu], count[0]); else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1I)) - update_stats(&runtime_l1_icache_stats[0], count[0]); + update_stats(&runtime_l1_icache_stats[cpu], count[0]); else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_LL)) - update_stats(&runtime_ll_cache_stats[0], count[0]); + update_stats(&runtime_ll_cache_stats[cpu], count[0]); else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_DTLB)) - update_stats(&runtime_dtlb_cache_stats[0], count[0]); + update_stats(&runtime_dtlb_cache_stats[cpu], count[0]); else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_ITLB)) - update_stats(&runtime_itlb_cache_stats[0], count[0]); + update_stats(&runtime_itlb_cache_stats[cpu], count[0]); } static void zero_per_pkg(struct perf_evsel *counter) @@ -447,7 +448,8 @@ static int read_cb(struct perf_evsel *evsel, int cpu, int thread __maybe_unused, perf_evsel__compute_deltas(evsel, cpu, count); perf_counts_values__scale(count, scale, NULL); evsel->counts->cpu[cpu] = *count; - update_shadow_stats(evsel, count->values); + if (aggr_mode == AGGR_NONE) + update_shadow_stats(evsel, count->values, cpu); break; case AGGR_GLOBAL: aggr->val += count->val; @@ -495,7 +497,7 @@ static int read_counter_aggr(struct perf_evsel *counter) /* * Save the full runtime - to allow normalization during printout: */ - update_shadow_stats(counter, count); + update_shadow_stats(counter, count, 0); return 0; } @@ -510,6 +512,9 @@ static int read_counter(struct perf_evsel *counter) int ncpus = perf_evsel__nr_cpus(counter); int cpu, thread; + if (!counter->supported) + return -ENOENT; + if (counter->system_wide) nthreads = 1; @@ -679,8 +684,9 @@ static int __run_perf_stat(int argc, const char **argv) unit_width = l; } - if (perf_evlist__apply_filters(evsel_list)) { - error("failed to set filter with %d (%s)\n", errno, + if (perf_evlist__apply_filters(evsel_list, &counter)) { + error("failed to set filter \"%s\" on event %s with %d (%s)\n", + counter->filter, perf_evsel__name(counter), errno, strerror_r(errno, msg, sizeof(msg))); return -1; } @@ -766,6 +772,19 @@ static int run_perf_stat(int argc, const char **argv) return ret; } +static void print_running(u64 run, u64 ena) +{ + if (csv_output) { + fprintf(output, "%s%" PRIu64 "%s%.2f", + csv_sep, + run, + csv_sep, + ena ? 100.0 * run / ena : 100.0); + } else if (run != ena) { + fprintf(output, " (%.2f%%)", 100.0 * run / ena); + } +} + static void print_noise_pct(double total, double avg) { double pct = rel_stddev_stats(total, avg); @@ -1076,6 +1095,8 @@ static void abs_printout(int id, int nr, struct perf_evsel *evsel, double avg) if (total) { ratio = avg / total; fprintf(output, " # %5.2f insns per cycle ", ratio); + } else { + fprintf(output, " "); } total = avg_stats(&runtime_stalled_cycles_front_stats[cpu]); total = max(total, avg_stats(&runtime_stalled_cycles_back_stats[cpu])); @@ -1145,6 +1166,8 @@ static void abs_printout(int id, int nr, struct perf_evsel *evsel, double avg) if (total) { ratio = avg / total; fprintf(output, " # %8.3f GHz ", ratio); + } else { + fprintf(output, " "); } } else if (transaction_run && perf_evsel__cmp(evsel, nth_evsel(T_CYCLES_IN_TX))) { @@ -1249,6 +1272,7 @@ static void print_aggr(char *prefix) fprintf(output, "%s%s", csv_sep, counter->cgrp->name); + print_running(run, ena); fputc('\n', output); continue; } @@ -1259,13 +1283,10 @@ static void print_aggr(char *prefix) else abs_printout(id, nr, counter, uval); - if (!csv_output) { + if (!csv_output) print_noise(counter, 1.0); - if (run != ena) - fprintf(output, " (%.2f%%)", - 100.0 * run / ena); - } + print_running(run, ena); fputc('\n', output); } } @@ -1281,11 +1302,15 @@ static void print_counter_aggr(struct perf_evsel *counter, char *prefix) double avg = avg_stats(&ps->res_stats[0]); int scaled = counter->counts->scaled; double uval; + double avg_enabled, avg_running; + + avg_enabled = avg_stats(&ps->res_stats[1]); + avg_running = avg_stats(&ps->res_stats[2]); if (prefix) fprintf(output, "%s", prefix); - if (scaled == -1) { + if (scaled == -1 || !counter->supported) { fprintf(output, "%*s%s", csv_output ? 0 : 18, counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED, @@ -1300,6 +1325,7 @@ static void print_counter_aggr(struct perf_evsel *counter, char *prefix) if (counter->cgrp) fprintf(output, "%s%s", csv_sep, counter->cgrp->name); + print_running(avg_running, avg_enabled); fputc('\n', output); return; } @@ -1313,19 +1339,7 @@ static void print_counter_aggr(struct perf_evsel *counter, char *prefix) print_noise(counter, avg); - if (csv_output) { - fputc('\n', output); - return; - } - - if (scaled) { - double avg_enabled, avg_running; - - avg_enabled = avg_stats(&ps->res_stats[1]); - avg_running = avg_stats(&ps->res_stats[2]); - - fprintf(output, " [%5.2f%%]", 100 * avg_running / avg_enabled); - } + print_running(avg_running, avg_enabled); fprintf(output, "\n"); } @@ -1367,6 +1381,7 @@ static void print_counter(struct perf_evsel *counter, char *prefix) fprintf(output, "%s%s", csv_sep, counter->cgrp->name); + print_running(run, ena); fputc('\n', output); continue; } @@ -1378,13 +1393,10 @@ static void print_counter(struct perf_evsel *counter, char *prefix) else abs_printout(cpu, 0, counter, uval); - if (!csv_output) { + if (!csv_output) print_noise(counter, 1.0); + print_running(run, ena); - if (run != ena) - fprintf(output, " (%.2f%%)", - 100.0 * run / ena); - } fputc('\n', output); } } diff --git a/tools/perf/builtin-timechart.c b/tools/perf/builtin-timechart.c index f3bb1a4bf060..e50fe1187b0b 100644 --- a/tools/perf/builtin-timechart.c +++ b/tools/perf/builtin-timechart.c @@ -67,6 +67,7 @@ struct timechart { skip_eagain; u64 min_time, merge_dist; + bool force; }; struct per_pidcomm; @@ -1598,6 +1599,7 @@ static int __cmd_timechart(struct timechart *tchart, const char *output_name) struct perf_data_file file = { .path = input_name, .mode = PERF_DATA_MODE_READ, + .force = tchart->force, }; struct perf_session *session = perf_session__new(&file, false, @@ -1623,7 +1625,7 @@ static int __cmd_timechart(struct timechart *tchart, const char *output_name) goto out_delete; } - ret = perf_session__process_events(session, &tchart->tool); + ret = perf_session__process_events(session); if (ret) goto out_delete; @@ -1956,9 +1958,11 @@ int cmd_timechart(int argc, const char **argv, OPT_CALLBACK(0, "io-merge-dist", &tchart.merge_dist, "time", "merge events that are merge-dist us apart", parse_time), + OPT_BOOLEAN('f', "force", &tchart.force, "don't complain, do it"), OPT_END() }; - const char * const timechart_usage[] = { + const char * const timechart_subcommands[] = { "record", NULL }; + const char *timechart_usage[] = { "perf timechart [<options>] {record}", NULL }; @@ -1976,8 +1980,8 @@ int cmd_timechart(int argc, const char **argv, "perf timechart record [<options>]", NULL }; - argc = parse_options(argc, argv, timechart_options, timechart_usage, - PARSE_OPT_STOP_AT_NON_OPTION); + argc = parse_options_subcommand(argc, argv, timechart_options, timechart_subcommands, + timechart_usage, PARSE_OPT_STOP_AT_NON_OPTION); if (tchart.power_only && tchart.tasks_only) { pr_err("-P and -T options cannot be used at the same time.\n"); diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index c4c7eac69de4..1cb3436276d1 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -716,7 +716,7 @@ static void perf_event__process_sample(struct perf_tool *tool, if (!machine) { pr_err("%u unprocessable samples recorded.\r", - top->session->stats.nr_unprocessable_samples++); + top->session->evlist->stats.nr_unprocessable_samples++); return; } @@ -757,8 +757,10 @@ static void perf_event__process_sample(struct perf_tool *tool, al.map == machine->vmlinux_maps[MAP__FUNCTION] && RB_EMPTY_ROOT(&al.map->dso->symbols[MAP__FUNCTION])) { if (symbol_conf.vmlinux_name) { - ui__warning("The %s file can't be used.\n%s", - symbol_conf.vmlinux_name, msg); + char serr[256]; + dso__strerror_load(al.map->dso, serr, sizeof(serr)); + ui__warning("The %s file can't be used: %s\n%s", + symbol_conf.vmlinux_name, serr, msg); } else { ui__warning("A vmlinux file was not found.\n%s", msg); @@ -856,7 +858,7 @@ static void perf_top__mmap_read_idx(struct perf_top *top, int idx) hists__inc_nr_events(evsel__hists(evsel), event->header.type); machine__process_event(machine, event, &sample); } else - ++session->stats.nr_unknown_events; + ++session->evlist->stats.nr_unknown_events; next_event: perf_evlist__mmap_consume(top->evlist, idx); } diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 7e935f1083ec..e124741be187 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -52,7 +52,9 @@ struct tp_field { #define TP_UINT_FIELD(bits) \ static u64 tp_field__u##bits(struct tp_field *field, struct perf_sample *sample) \ { \ - return *(u##bits *)(sample->raw_data + field->offset); \ + u##bits value; \ + memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \ + return value; \ } TP_UINT_FIELD(8); @@ -63,7 +65,8 @@ TP_UINT_FIELD(64); #define TP_UINT_FIELD__SWAPPED(bits) \ static u64 tp_field__swapped_u##bits(struct tp_field *field, struct perf_sample *sample) \ { \ - u##bits value = *(u##bits *)(sample->raw_data + field->offset); \ + u##bits value; \ + memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \ return bswap_##bits(value);\ } @@ -1132,6 +1135,8 @@ static struct syscall_fmt *syscall_fmt__find(const char *name) struct syscall { struct event_format *tp_format; + int nr_args; + struct format_field *args; const char *name; bool filtered; bool is_exit; @@ -1219,7 +1224,9 @@ struct trace { struct syscall *table; } syscalls; struct record_opts opts; + struct perf_evlist *evlist; struct machine *host; + struct thread *current; u64 base_time; FILE *output; unsigned long nr_events; @@ -1227,6 +1234,10 @@ struct trace { const char *last_vfs_getname; struct intlist *tid_list; struct intlist *pid_list; + struct { + size_t nr; + pid_t *entries; + } filter_pids; double duration_filter; double runtime_ms; struct { @@ -1243,6 +1254,7 @@ struct trace { bool show_comm; bool show_tool_stats; bool trace_syscalls; + bool force; int trace_pgfaults; }; @@ -1433,14 +1445,14 @@ static int syscall__set_arg_fmts(struct syscall *sc) struct format_field *field; int idx = 0; - sc->arg_scnprintf = calloc(sc->tp_format->format.nr_fields - 1, sizeof(void *)); + sc->arg_scnprintf = calloc(sc->nr_args, sizeof(void *)); if (sc->arg_scnprintf == NULL) return -1; if (sc->fmt) sc->arg_parm = sc->fmt->arg_parm; - for (field = sc->tp_format->format.fields->next; field; field = field->next) { + for (field = sc->args; field; field = field->next) { if (sc->fmt && sc->fmt->arg_scnprintf[idx]) sc->arg_scnprintf[idx] = sc->fmt->arg_scnprintf[idx]; else if (field->flags & FIELD_IS_POINTER) @@ -1506,18 +1518,37 @@ static int trace__read_syscall_info(struct trace *trace, int id) if (sc->tp_format == NULL) return -1; + sc->args = sc->tp_format->format.fields; + sc->nr_args = sc->tp_format->format.nr_fields; + /* drop nr field - not relevant here; does not exist on older kernels */ + if (sc->args && strcmp(sc->args->name, "nr") == 0) { + sc->args = sc->args->next; + --sc->nr_args; + } + sc->is_exit = !strcmp(name, "exit_group") || !strcmp(name, "exit"); return syscall__set_arg_fmts(sc); } +/* + * args is to be interpreted as a series of longs but we need to handle + * 8-byte unaligned accesses. args points to raw_data within the event + * and raw_data is guaranteed to be 8-byte unaligned because it is + * preceded by raw_size which is a u32. So we need to copy args to a temp + * variable to read it. Most notably this avoids extended load instructions + * on unaligned addresses + */ + static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size, - unsigned long *args, struct trace *trace, + unsigned char *args, struct trace *trace, struct thread *thread) { size_t printed = 0; + unsigned char *p; + unsigned long val; - if (sc->tp_format != NULL) { + if (sc->args != NULL) { struct format_field *field; u8 bit = 1; struct syscall_arg arg = { @@ -1527,16 +1558,21 @@ static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size, .thread = thread, }; - for (field = sc->tp_format->format.fields->next; field; + for (field = sc->args; field; field = field->next, ++arg.idx, bit <<= 1) { if (arg.mask & bit) continue; + + /* special care for unaligned accesses */ + p = args + sizeof(unsigned long) * arg.idx; + memcpy(&val, p, sizeof(val)); + /* * Suppress this argument if its value is zero and * and we don't have a string associated in an * strarray for it. */ - if (args[arg.idx] == 0 && + if (val == 0 && !(sc->arg_scnprintf && sc->arg_scnprintf[arg.idx] == SCA_STRARRAY && sc->arg_parm[arg.idx])) @@ -1545,23 +1581,26 @@ static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size, printed += scnprintf(bf + printed, size - printed, "%s%s: ", printed ? ", " : "", field->name); if (sc->arg_scnprintf && sc->arg_scnprintf[arg.idx]) { - arg.val = args[arg.idx]; + arg.val = val; if (sc->arg_parm) arg.parm = sc->arg_parm[arg.idx]; printed += sc->arg_scnprintf[arg.idx](bf + printed, size - printed, &arg); } else { printed += scnprintf(bf + printed, size - printed, - "%ld", args[arg.idx]); + "%ld", val); } } } else { int i = 0; while (i < 6) { + /* special care for unaligned accesses */ + p = args + sizeof(unsigned long) * i; + memcpy(&val, p, sizeof(val)); printed += scnprintf(bf + printed, size - printed, "%sarg%d: %ld", - printed ? ", " : "", i, args[i]); + printed ? ", " : "", i, val); ++i; } } @@ -1642,6 +1681,29 @@ static void thread__update_stats(struct thread_trace *ttrace, update_stats(stats, duration); } +static int trace__printf_interrupted_entry(struct trace *trace, struct perf_sample *sample) +{ + struct thread_trace *ttrace; + u64 duration; + size_t printed; + + if (trace->current == NULL) + return 0; + + ttrace = thread__priv(trace->current); + + if (!ttrace->entry_pending) + return 0; + + duration = sample->time - ttrace->entry_time; + + printed = trace__fprintf_entry_head(trace, trace->current, duration, sample->time, trace->output); + printed += fprintf(trace->output, "%-70s) ...\n", ttrace->entry_str); + ttrace->entry_pending = false; + + return printed; +} + static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel, union perf_event *event __maybe_unused, struct perf_sample *sample) @@ -1673,6 +1735,9 @@ static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel, return -1; } + if (!trace->summary_only) + trace__printf_interrupted_entry(trace, sample); + ttrace->entry_time = sample->time; msg = ttrace->entry_str; printed += scnprintf(msg + printed, 1024 - printed, "%s(", sc->name); @@ -1688,6 +1753,11 @@ static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel, } else ttrace->entry_pending = true; + if (trace->current != thread) { + thread__put(trace->current); + trace->current = thread__get(thread); + } + return 0; } @@ -1805,6 +1875,28 @@ out_dump: return 0; } +static int trace__event_handler(struct trace *trace, struct perf_evsel *evsel, + union perf_event *event __maybe_unused, + struct perf_sample *sample) +{ + trace__printf_interrupted_entry(trace, sample); + trace__fprintf_tstamp(trace, sample->time, trace->output); + + if (trace->trace_syscalls) + fprintf(trace->output, "( ): "); + + fprintf(trace->output, "%s:", evsel->name); + + if (evsel->tp_format) { + event_format__fprintf(evsel->tp_format, sample->cpu, + sample->raw_data, sample->raw_size, + trace->output); + } + + fprintf(trace->output, ")\n"); + return 0; +} + static void print_location(FILE *f, struct perf_sample *sample, struct addr_location *al, bool print_dso, bool print_sym) @@ -2037,10 +2129,39 @@ static int perf_evlist__add_pgfault(struct perf_evlist *evlist, return 0; } -static int trace__run(struct trace *trace, int argc, const char **argv) +static void trace__handle_event(struct trace *trace, union perf_event *event, struct perf_sample *sample) { - struct perf_evlist *evlist = perf_evlist__new(); + const u32 type = event->header.type; struct perf_evsel *evsel; + + if (!trace->full_time && trace->base_time == 0) + trace->base_time = sample->time; + + if (type != PERF_RECORD_SAMPLE) { + trace__process_event(trace, trace->host, event, sample); + return; + } + + evsel = perf_evlist__id2evsel(trace->evlist, sample->id); + if (evsel == NULL) { + fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n", sample->id); + return; + } + + if (evsel->attr.type == PERF_TYPE_TRACEPOINT && + sample->raw_data == NULL) { + fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n", + perf_evsel__name(evsel), sample->tid, + sample->cpu, sample->raw_size); + } else { + tracepoint_handler handler = evsel->handler; + handler(trace, evsel, event, sample); + } +} + +static int trace__run(struct trace *trace, int argc, const char **argv) +{ + struct perf_evlist *evlist = trace->evlist; int err = -1, i; unsigned long before; const bool forks = argc > 0; @@ -2048,11 +2169,6 @@ static int trace__run(struct trace *trace, int argc, const char **argv) trace->live = true; - if (evlist == NULL) { - fprintf(trace->output, "Not enough memory to run!\n"); - goto out; - } - if (trace->trace_syscalls && perf_evlist__add_syscall_newtp(evlist, trace__sys_enter, trace__sys_exit)) @@ -2105,16 +2221,34 @@ static int trace__run(struct trace *trace, int argc, const char **argv) if (err < 0) goto out_error_open; + /* + * Better not use !target__has_task() here because we need to cover the + * case where no threads were specified in the command line, but a + * workload was, and in that case we will fill in the thread_map when + * we fork the workload in perf_evlist__prepare_workload. + */ + if (trace->filter_pids.nr > 0) + err = perf_evlist__set_filter_pids(evlist, trace->filter_pids.nr, trace->filter_pids.entries); + else if (evlist->threads->map[0] == -1) + err = perf_evlist__set_filter_pid(evlist, getpid()); + + if (err < 0) { + printf("err=%d,%s\n", -err, strerror(-err)); + exit(1); + } + err = perf_evlist__mmap(evlist, trace->opts.mmap_pages, false); if (err < 0) goto out_error_mmap; - perf_evlist__enable(evlist); - if (forks) perf_evlist__start_workload(evlist); + else + perf_evlist__enable(evlist); - trace->multiple_threads = evlist->threads->map[0] == -1 || evlist->threads->nr > 1; + trace->multiple_threads = evlist->threads->map[0] == -1 || + evlist->threads->nr > 1 || + perf_evlist__first(evlist)->attr.inherit; again: before = trace->nr_events; @@ -2122,8 +2256,6 @@ again: union perf_event *event; while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) { - const u32 type = event->header.type; - tracepoint_handler handler; struct perf_sample sample; ++trace->nr_events; @@ -2134,30 +2266,7 @@ again: goto next_event; } - if (!trace->full_time && trace->base_time == 0) - trace->base_time = sample.time; - - if (type != PERF_RECORD_SAMPLE) { - trace__process_event(trace, trace->host, event, &sample); - continue; - } - - evsel = perf_evlist__id2evsel(evlist, sample.id); - if (evsel == NULL) { - fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n", sample.id); - goto next_event; - } - - if (evsel->attr.type == PERF_TYPE_TRACEPOINT && - sample.raw_data == NULL) { - fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n", - perf_evsel__name(evsel), sample.tid, - sample.cpu, sample.raw_size); - goto next_event; - } - - handler = evsel->handler; - handler(trace, evsel, event, &sample); + trace__handle_event(trace, event, &sample); next_event: perf_evlist__mmap_consume(evlist, i); @@ -2180,6 +2289,8 @@ next_event: } out_disable: + thread__zput(trace->current); + perf_evlist__disable(evlist); if (!err) { @@ -2197,7 +2308,7 @@ out_disable: out_delete_evlist: perf_evlist__delete(evlist); -out: + trace->evlist = NULL; trace->live = false; return err; { @@ -2235,6 +2346,7 @@ static int trace__replay(struct trace *trace) struct perf_data_file file = { .path = input_name, .mode = PERF_DATA_MODE_READ, + .force = trace->force, }; struct perf_session *session; struct perf_evsel *evsel; @@ -2309,7 +2421,7 @@ static int trace__replay(struct trace *trace) setup_pager(); - err = perf_session__process_events(session, &trace->tool); + err = perf_session__process_events(session); if (err) pr_err("Failed to process events, error %d", err); @@ -2434,6 +2546,38 @@ static int trace__set_duration(const struct option *opt, const char *str, return 0; } +static int trace__set_filter_pids(const struct option *opt, const char *str, + int unset __maybe_unused) +{ + int ret = -1; + size_t i; + struct trace *trace = opt->value; + /* + * FIXME: introduce a intarray class, plain parse csv and create a + * { int nr, int entries[] } struct... + */ + struct intlist *list = intlist__new(str); + + if (list == NULL) + return -1; + + i = trace->filter_pids.nr = intlist__nr_entries(list) + 1; + trace->filter_pids.entries = calloc(i, sizeof(pid_t)); + + if (trace->filter_pids.entries == NULL) + goto out; + + trace->filter_pids.entries[0] = getpid(); + + for (i = 1; i < trace->filter_pids.nr; ++i) + trace->filter_pids.entries[i] = intlist__entry(list, i - 1)->i; + + intlist__delete(list); + ret = 0; +out: + return ret; +} + static int trace__open_output(struct trace *trace, const char *filename) { struct stat st; @@ -2468,9 +2612,17 @@ static int parse_pagefaults(const struct option *opt, const char *str, return 0; } +static void evlist__set_evsel_handler(struct perf_evlist *evlist, void *handler) +{ + struct perf_evsel *evsel; + + evlist__for_each(evlist, evsel) + evsel->handler = handler; +} + int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused) { - const char * const trace_usage[] = { + const char *trace_usage[] = { "perf trace [<options>] [<command>]", "perf trace [<options>] -- <command> [<options>]", "perf trace record [<options>] [<command>]", @@ -2502,6 +2654,9 @@ int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused) const char *output_name = NULL; const char *ev_qualifier_str = NULL; const struct option trace_options[] = { + OPT_CALLBACK(0, "event", &trace.evlist, "event", + "event selector. use 'perf list' to list available events", + parse_events_option), OPT_BOOLEAN(0, "comm", &trace.show_comm, "show the thread COMM next to its id"), OPT_BOOLEAN(0, "tool_stats", &trace.show_tool_stats, "show tool stats"), @@ -2513,6 +2668,8 @@ int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused) "trace events on existing process id"), OPT_STRING('t', "tid", &trace.opts.target.tid, "tid", "trace events on existing thread id"), + OPT_CALLBACK(0, "filter-pids", &trace, "float", + "show only events with duration > N.M ms", trace__set_filter_pids), OPT_BOOLEAN('a', "all-cpus", &trace.opts.target.system_wide, "system-wide collection from all CPUs"), OPT_STRING('C', "cpu", &trace.opts.target.cpu_list, "cpu", @@ -2538,19 +2695,36 @@ int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused) OPT_CALLBACK_DEFAULT('F', "pf", &trace.trace_pgfaults, "all|maj|min", "Trace pagefaults", parse_pagefaults, "maj"), OPT_BOOLEAN(0, "syscalls", &trace.trace_syscalls, "Trace syscalls"), + OPT_BOOLEAN('f', "force", &trace.force, "don't complain, do it"), OPT_END() }; + const char * const trace_subcommands[] = { "record", NULL }; int err; char bf[BUFSIZ]; - argc = parse_options(argc, argv, trace_options, trace_usage, - PARSE_OPT_STOP_AT_NON_OPTION); + signal(SIGSEGV, sighandler_dump_stack); + signal(SIGFPE, sighandler_dump_stack); + + trace.evlist = perf_evlist__new(); + if (trace.evlist == NULL) + return -ENOMEM; + + if (trace.evlist == NULL) { + pr_err("Not enough memory to run!\n"); + goto out; + } + + argc = parse_options_subcommand(argc, argv, trace_options, trace_subcommands, + trace_usage, PARSE_OPT_STOP_AT_NON_OPTION); if (trace.trace_pgfaults) { trace.opts.sample_address = true; trace.opts.sample_time = true; } + if (trace.evlist->nr_entries > 0) + evlist__set_evsel_handler(trace.evlist, trace__event_handler); + if ((argc >= 1) && (strcmp(argv[0], "record") == 0)) return trace__record(&trace, argc-1, &argv[1]); @@ -2558,7 +2732,8 @@ int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused) if (trace.summary_only) trace.summary = trace.summary_only; - if (!trace.trace_syscalls && !trace.trace_pgfaults) { + if (!trace.trace_syscalls && !trace.trace_pgfaults && + trace.evlist->nr_entries == 0 /* Was --events used? */) { pr_err("Please specify something to trace.\n"); return -1; } diff --git a/tools/perf/builtin.h b/tools/perf/builtin.h index b210d62907e4..3688ad29085f 100644 --- a/tools/perf/builtin.h +++ b/tools/perf/builtin.h @@ -37,6 +37,7 @@ extern int cmd_test(int argc, const char **argv, const char *prefix); extern int cmd_trace(int argc, const char **argv, const char *prefix); extern int cmd_inject(int argc, const char **argv, const char *prefix); extern int cmd_mem(int argc, const char **argv, const char *prefix); +extern int cmd_data(int argc, const char **argv, const char *prefix); extern int find_scripts(char **scripts_array, char **scripts_path_array); #endif diff --git a/tools/perf/command-list.txt b/tools/perf/command-list.txt index 0906fc401c52..00fcaf8a5b8d 100644 --- a/tools/perf/command-list.txt +++ b/tools/perf/command-list.txt @@ -7,6 +7,7 @@ perf-archive mainporcelain common perf-bench mainporcelain common perf-buildid-cache mainporcelain common perf-buildid-list mainporcelain common +perf-data mainporcelain common perf-diff mainporcelain common perf-evlist mainporcelain common perf-inject mainporcelain common diff --git a/tools/perf/config/Makefile b/tools/perf/config/Makefile index cc224080b525..59a98c643240 100644 --- a/tools/perf/config/Makefile +++ b/tools/perf/config/Makefile @@ -11,19 +11,26 @@ ifneq ($(obj-perf),) obj-perf := $(abspath $(obj-perf))/ endif -LIB_INCLUDE := $(srctree)/tools/lib/ +$(shell echo -n > .config-detected) +detected = $(shell echo "$(1)=y" >> .config-detected) +detected_var = $(shell echo "$(1)=$($(1))" >> .config-detected) + CFLAGS := $(EXTRA_CFLAGS) $(EXTRA_WARNINGS) include $(src-perf)/config/Makefile.arch +$(call detected_var,ARCH) + NO_PERF_REGS := 1 # Additional ARCH settings for x86 ifeq ($(ARCH),x86) + $(call detected,CONFIG_X86) ifeq (${IS_64_BIT}, 1) CFLAGS += -DHAVE_ARCH_X86_64_SUPPORT ARCH_INCLUDE = ../../arch/x86/lib/memcpy_64.S ../../arch/x86/lib/memset_64.S LIBUNWIND_LIBS = -lunwind -lunwind-x86_64 + $(call detected,CONFIG_X86_64) else LIBUNWIND_LIBS = -lunwind -lunwind-x86 endif @@ -40,6 +47,10 @@ ifeq ($(ARCH),arm64) LIBUNWIND_LIBS = -lunwind -lunwind-aarch64 endif +ifeq ($(NO_PERF_REGS),0) + $(call detected,CONFIG_PERF_REGS) +endif + # So far there's only x86 and arm libdw unwind support merged in perf. # Disable it on all other architectures in case libdw unwind # support is detected in system. Add supported architectures @@ -84,6 +95,17 @@ ifndef NO_LIBELF FEATURE_CHECK_LDFLAGS-libdw-dwarf-unwind := $(LIBDW_LDFLAGS) -ldw endif +ifdef LIBBABELTRACE + # for linking with debug library, run like: + # make DEBUG=1 LIBBABELTRACE_DIR=/opt/libbabeltrace/ + ifdef LIBBABELTRACE_DIR + LIBBABELTRACE_CFLAGS := -I$(LIBBABELTRACE_DIR)/include + LIBBABELTRACE_LDFLAGS := -L$(LIBBABELTRACE_DIR)/lib + endif + FEATURE_CHECK_CFLAGS-libbabeltrace := $(LIBBABELTRACE_CFLAGS) + FEATURE_CHECK_LDFLAGS-libbabeltrace := $(LIBBABELTRACE_LDFLAGS) -lbabeltrace-ctf +endif + # include ARCH specific config -include $(src-perf)/arch/$(ARCH)/Makefile @@ -114,6 +136,8 @@ ifdef PARSER_DEBUG PARSER_DEBUG_BISON := -t PARSER_DEBUG_FLEX := -d CFLAGS += -DPARSER_DEBUG + $(call detected_var,PARSER_DEBUG_BISON) + $(call detected_var,PARSER_DEBUG_FLEX) endif ifndef NO_LIBPYTHON @@ -152,121 +176,7 @@ LDFLAGS += -Wl,-z,noexecstack EXTLIBS = -lpthread -lrt -lm -ldl -ifneq ($(OUTPUT),) - OUTPUT_FEATURES = $(OUTPUT)config/feature-checks/ - $(shell mkdir -p $(OUTPUT_FEATURES)) -endif - -feature_check = $(eval $(feature_check_code)) -define feature_check_code - feature-$(1) := $(shell $(MAKE) OUTPUT=$(OUTPUT_FEATURES) CFLAGS="$(EXTRA_CFLAGS) $(FEATURE_CHECK_CFLAGS-$(1))" LDFLAGS="$(LDFLAGS) $(FEATURE_CHECK_LDFLAGS-$(1))" -C config/feature-checks test-$1.bin >/dev/null 2>/dev/null && echo 1 || echo 0) -endef - -feature_set = $(eval $(feature_set_code)) -define feature_set_code - feature-$(1) := 1 -endef - -# -# Build the feature check binaries in parallel, ignore errors, ignore return value and suppress output: -# - -# -# Note that this is not a complete list of all feature tests, just -# those that are typically built on a fully configured system. -# -# [ Feature tests not mentioned here have to be built explicitly in -# the rule that uses them - an example for that is the 'bionic' -# feature check. ] -# -CORE_FEATURE_TESTS = \ - backtrace \ - dwarf \ - fortify-source \ - sync-compare-and-swap \ - glibc \ - gtk2 \ - gtk2-infobar \ - libaudit \ - libbfd \ - libelf \ - libelf-getphdrnum \ - libelf-mmap \ - libnuma \ - libperl \ - libpython \ - libpython-version \ - libslang \ - libunwind \ - pthread-attr-setaffinity-np \ - stackprotector-all \ - timerfd \ - libdw-dwarf-unwind \ - zlib - -LIB_FEATURE_TESTS = \ - dwarf \ - glibc \ - gtk2 \ - libaudit \ - libbfd \ - libelf \ - libnuma \ - libperl \ - libpython \ - libslang \ - libunwind \ - libdw-dwarf-unwind \ - zlib - -VF_FEATURE_TESTS = \ - backtrace \ - fortify-source \ - sync-compare-and-swap \ - gtk2-infobar \ - libelf-getphdrnum \ - libelf-mmap \ - libpython-version \ - pthread-attr-setaffinity-np \ - stackprotector-all \ - timerfd \ - libunwind-debug-frame \ - bionic \ - liberty \ - liberty-z \ - cplus-demangle \ - compile-32 \ - compile-x32 - -# Set FEATURE_CHECK_(C|LD)FLAGS-all for all CORE_FEATURE_TESTS features. -# If in the future we need per-feature checks/flags for features not -# mentioned in this list we need to refactor this ;-). -set_test_all_flags = $(eval $(set_test_all_flags_code)) -define set_test_all_flags_code - FEATURE_CHECK_CFLAGS-all += $(FEATURE_CHECK_CFLAGS-$(1)) - FEATURE_CHECK_LDFLAGS-all += $(FEATURE_CHECK_LDFLAGS-$(1)) -endef - -$(foreach feat,$(CORE_FEATURE_TESTS),$(call set_test_all_flags,$(feat))) - -# -# Special fast-path for the 'all features are available' case: -# -$(call feature_check,all,$(MSG)) - -# -# Just in case the build freshly failed, make sure we print the -# feature matrix: -# -ifeq ($(feature-all), 1) - # - # test-all.c passed - just set all the core feature flags to 1: - # - $(foreach feat,$(CORE_FEATURE_TESTS),$(call feature_set,$(feat))) -else - $(shell $(MAKE) OUTPUT=$(OUTPUT_FEATURES) CFLAGS="$(EXTRA_CFLAGS)" LDFLAGS=$(LDFLAGS) -i -j -C config/feature-checks $(addsuffix .bin,$(CORE_FEATURE_TESTS)) >/dev/null 2>&1) - $(foreach feat,$(CORE_FEATURE_TESTS),$(call feature_check,$(feat))) -endif +include $(srctree)/tools/build/Makefile.feature ifeq ($(feature-stackprotector-all), 1) CFLAGS += -fstack-protector-all @@ -295,7 +205,7 @@ endif CFLAGS += -I$(src-perf)/util CFLAGS += -I$(src-perf) -CFLAGS += -I$(LIB_INCLUDE) +CFLAGS += -I$(srctree)/tools/lib/ CFLAGS += -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE @@ -361,6 +271,7 @@ endif # NO_LIBELF ifndef NO_LIBELF CFLAGS += -DHAVE_LIBELF_SUPPORT EXTLIBS += -lelf + $(call detected,CONFIG_LIBELF) ifeq ($(feature-libelf-mmap), 1) CFLAGS += -DHAVE_LIBELF_MMAP_SUPPORT @@ -381,6 +292,7 @@ ifndef NO_LIBELF CFLAGS += -DHAVE_DWARF_SUPPORT $(LIBDW_CFLAGS) LDFLAGS += $(LIBDW_LDFLAGS) EXTLIBS += -ldw + $(call detected,CONFIG_DWARF) endif # PERF_HAVE_DWARF_REGS endif # NO_DWARF endif # NO_LIBELF @@ -408,9 +320,11 @@ ifdef NO_LIBUNWIND dwarf-post-unwind := 0 else dwarf-post-unwind-text := libdw + $(call detected,CONFIG_LIBDW_DWARF_UNWIND) endif else dwarf-post-unwind-text := libunwind + $(call detected,CONFIG_LIBUNWIND) # Enable libunwind support by default. ifndef NO_LIBDW_DWARF_UNWIND NO_LIBDW_DWARF_UNWIND := 1 @@ -419,6 +333,7 @@ endif ifeq ($(dwarf-post-unwind),1) CFLAGS += -DHAVE_DWARF_UNWIND_SUPPORT + $(call detected,CONFIG_DWARF_UNWIND) else NO_DWARF_UNWIND := 1 endif @@ -447,6 +362,7 @@ ifndef NO_LIBAUDIT else CFLAGS += -DHAVE_LIBAUDIT_SUPPORT EXTLIBS += -laudit + $(call detected,CONFIG_AUDIT) endif endif @@ -463,6 +379,7 @@ ifndef NO_SLANG CFLAGS += -I/usr/include/slang CFLAGS += -DHAVE_SLANG_SUPPORT EXTLIBS += -lslang + $(call detected,CONFIG_SLANG) endif endif @@ -497,10 +414,11 @@ else ifneq ($(feature-libperl), 1) CFLAGS += -DNO_LIBPERL NO_LIBPERL := 1 - msg := $(warning Missing perl devel files. Disabling perl scripting support, consider installing perl-ExtUtils-Embed); + msg := $(warning Missing perl devel files. Disabling perl scripting support, please install perl-ExtUtils-Embed/libperl-dev); else LDFLAGS += $(PERL_EMBED_LDFLAGS) EXTLIBS += $(PERL_EMBED_LIBADD) + $(call detected,CONFIG_LIBPERL) endif endif @@ -513,22 +431,21 @@ endif disable-python = $(eval $(disable-python_code)) define disable-python_code CFLAGS += -DNO_LIBPYTHON - $(if $(1),$(warning No $(1) was found)) - $(warning Python support will not be built) + $(warning $1) NO_LIBPYTHON := 1 endef ifdef NO_LIBPYTHON - $(call disable-python) + $(call disable-python,Python support disabled by user) else ifndef PYTHON - $(call disable-python,python interpreter) + $(call disable-python,No python interpreter was found: disables Python support - please install python-devel/python-dev) else PYTHON_WORD := $(call shell-wordify,$(PYTHON)) ifndef PYTHON_CONFIG - $(call disable-python,python-config tool) + $(call disable-python,No 'python-config' tool was found: disables Python support - please install python-devel/python-dev) else PYTHON_CONFIG_SQ := $(call shell-sq,$(PYTHON_CONFIG)) @@ -540,7 +457,7 @@ else FLAGS_PYTHON_EMBED := $(PYTHON_EMBED_CCOPTS) $(PYTHON_EMBED_LDOPTS) ifneq ($(feature-libpython), 1) - $(call disable-python,Python.h (for Python 2.x)) + $(call disable-python,No 'Python.h' (for Python 2.x support) was found: disables Python support - please install python-devel/python-dev) else ifneq ($(feature-libpython-version), 1) @@ -560,6 +477,7 @@ else LDFLAGS += $(PYTHON_EMBED_LDFLAGS) EXTLIBS += $(PYTHON_EMBED_LIBADD) LANG_BINDINGS += $(obj-perf)python/perf.so + $(call detected,CONFIG_LIBPYTHON) endif endif endif @@ -600,7 +518,7 @@ else EXTLIBS += -liberty CFLAGS += -DHAVE_CPLUS_DEMANGLE_SUPPORT else - msg := $(warning No bfd.h/libbfd found, install binutils-dev[el]/zlib-static to gain symbol demangling) + msg := $(warning No bfd.h/libbfd found, please install binutils-dev[el]/zlib-static/libiberty-dev to gain symbol demangling) CFLAGS += -DNO_DEMANGLE endif endif @@ -617,11 +535,23 @@ ifndef NO_ZLIB ifeq ($(feature-zlib), 1) CFLAGS += -DHAVE_ZLIB_SUPPORT EXTLIBS += -lz + $(call detected,CONFIG_ZLIB) else NO_ZLIB := 1 endif endif +ifndef NO_LZMA + ifeq ($(feature-lzma), 1) + CFLAGS += -DHAVE_LZMA_SUPPORT + EXTLIBS += -llzma + $(call detected,CONFIG_LZMA) + else + msg := $(warning No liblzma found, disables xz kernel module decompression, please install xz-devel/liblzma-dev); + NO_LZMA := 1 + endif +endif + ifndef NO_BACKTRACE ifeq ($(feature-backtrace), 1) CFLAGS += -DHAVE_BACKTRACE_SUPPORT @@ -635,6 +565,7 @@ ifndef NO_LIBNUMA else CFLAGS += -DHAVE_LIBNUMA_SUPPORT EXTLIBS += -lnuma + $(call detected,CONFIG_NUMA) endif endif @@ -651,7 +582,7 @@ ifeq (${IS_64_BIT}, 1) NO_PERF_READ_VDSO32 := 1 endif endif - ifneq (${IS_X86_64}, 1) + ifneq ($(ARCH), x86) NO_PERF_READ_VDSOX32 := 1 endif ifndef NO_PERF_READ_VDSOX32 @@ -667,6 +598,18 @@ else NO_PERF_READ_VDSOX32 := 1 endif +ifdef LIBBABELTRACE + $(call feature_check,libbabeltrace) + ifeq ($(feature-libbabeltrace), 1) + CFLAGS += -DHAVE_LIBBABELTRACE_SUPPORT $(LIBBABELTRACE_CFLAGS) + LDFLAGS += $(LIBBABELTRACE_LDFLAGS) + EXTLIBS += -lbabeltrace-ctf + $(call detected,CONFIG_LIBBABELTRACE) + else + msg := $(warning No libbabeltrace found, disables 'perf data' CTF format support, please install libbabeltrace-dev[el]/libbabeltrace-ctf-dev); + endif +endif + # Among the variables below, these: # perfexecdir # template_dir @@ -699,7 +642,7 @@ sysconfdir = $(prefix)/etc ETC_PERFCONFIG = etc/perfconfig endif ifndef lib -ifeq ($(IS_X86_64),1) +ifeq ($(ARCH)$(IS_64_BIT), x861) lib = lib64 else lib = lib @@ -735,83 +678,33 @@ plugindir=$(libdir)/traceevent/plugins plugindir_SQ= $(subst ','\'',$(plugindir)) endif -# -# Print the result of the feature test: -# -feature_print_status = $(eval $(feature_print_status_code)) $(info $(MSG)) - -define feature_print_status_code - ifeq ($(feature-$(1)), 1) - MSG = $(shell printf '...%30s: [ \033[32mon\033[m ]' $(1)) - else - MSG = $(shell printf '...%30s: [ \033[31mOFF\033[m ]' $(1)) - endif -endef - -feature_print_var = $(eval $(feature_print_var_code)) $(info $(MSG)) -define feature_print_var_code +print_var = $(eval $(print_var_code)) $(info $(MSG)) +define print_var_code MSG = $(shell printf '...%30s: %s' $(1) $($(1))) endef -feature_print_text = $(eval $(feature_print_text_code)) $(info $(MSG)) -define feature_print_text_code - MSG = $(shell printf '...%30s: %s' $(1) $(2)) -endef - -PERF_FEATURES := $(foreach feat,$(LIB_FEATURE_TESTS),feature-$(feat)($(feature-$(feat)))) -PERF_FEATURES_FILE := $(shell touch $(OUTPUT)PERF-FEATURES; cat $(OUTPUT)PERF-FEATURES) - -ifeq ($(dwarf-post-unwind),1) - PERF_FEATURES += dwarf-post-unwind($(dwarf-post-unwind-text)) -endif - -# The $(display_lib) controls the default detection message -# output. It's set if: -# - detected features differes from stored features from -# last build (in PERF-FEATURES file) -# - one of the $(LIB_FEATURE_TESTS) is not detected -# - VF is enabled - -ifneq ("$(PERF_FEATURES)","$(PERF_FEATURES_FILE)") - $(shell echo "$(PERF_FEATURES)" > $(OUTPUT)PERF-FEATURES) - display_lib := 1 -endif - -feature_check = $(eval $(feature_check_code)) -define feature_check_code - ifneq ($(feature-$(1)), 1) - display_lib := 1 - endif -endef - -$(foreach feat,$(LIB_FEATURE_TESTS),$(call feature_check,$(feat))) - ifeq ($(VF),1) - display_lib := 1 - display_vf := 1 -endif - -ifeq ($(display_lib),1) + $(call print_var,prefix) + $(call print_var,bindir) + $(call print_var,libdir) + $(call print_var,sysconfdir) + $(call print_var,LIBUNWIND_DIR) + $(call print_var,LIBDW_DIR) $(info ) - $(info Auto-detecting system features:) - $(foreach feat,$(LIB_FEATURE_TESTS),$(call feature_print_status,$(feat),)) - - ifeq ($(dwarf-post-unwind),1) - $(call feature_print_text,"DWARF post unwind library", $(dwarf-post-unwind-text)) - endif -endif - -ifeq ($(display_vf),1) - $(foreach feat,$(VF_FEATURE_TESTS),$(call feature_print_status,$(feat),)) - $(info ) - $(call feature_print_var,prefix) - $(call feature_print_var,bindir) - $(call feature_print_var,libdir) - $(call feature_print_var,sysconfdir) - $(call feature_print_var,LIBUNWIND_DIR) - $(call feature_print_var,LIBDW_DIR) endif -ifeq ($(display_lib),1) - $(info ) -endif +$(call detected_var,bindir_SQ) +$(call detected_var,PYTHON_WORD) +ifneq ($(OUTPUT),) +$(call detected_var,OUTPUT) +endif +$(call detected_var,htmldir_SQ) +$(call detected_var,infodir_SQ) +$(call detected_var,mandir_SQ) +$(call detected_var,ETC_PERFCONFIG_SQ) +$(call detected_var,prefix_SQ) +$(call detected_var,perfexecdir_SQ) +$(call detected_var,LIBDIR) +$(call detected_var,GTK_CFLAGS) +$(call detected_var,PERL_EMBED_CCOPTS) +$(call detected_var,PYTHON_EMBED_CCOPTS) diff --git a/tools/perf/config/Makefile.arch b/tools/perf/config/Makefile.arch index ac8721ffa6c8..e11fbd6fae78 100644 --- a/tools/perf/config/Makefile.arch +++ b/tools/perf/config/Makefile.arch @@ -1,32 +1,15 @@ +ifndef ARCH +ARCH := $(shell uname -m 2>/dev/null || echo not) +endif -uname_M := $(shell uname -m 2>/dev/null || echo not) - -RAW_ARCH := $(shell echo $(uname_M) | sed -e s/i.86/i386/ -e s/sun4u/sparc64/ \ - -e s/arm.*/arm/ -e s/sa110/arm/ \ +ARCH := $(shell echo $(ARCH) | sed -e s/i.86/x86/ -e s/x86_64/x86/ \ + -e s/sun4u/sparc/ -e s/sparc64/sparc/ \ + -e /arm64/!s/arm.*/arm/ -e s/sa110/arm/ \ -e s/s390x/s390/ -e s/parisc64/parisc/ \ -e s/ppc.*/powerpc/ -e s/mips.*/mips/ \ -e s/sh[234].*/sh/ -e s/aarch64.*/arm64/ \ -e s/tile.*/tile/ ) -# Additional ARCH settings for x86 -ifeq ($(RAW_ARCH),i386) - ARCH ?= x86 -endif - -ifeq ($(RAW_ARCH),x86_64) - ARCH ?= x86 - - ifneq (, $(findstring m32,$(CFLAGS))) - RAW_ARCH := x86_32 - endif -endif - -ifeq ($(RAW_ARCH),sparc64) - ARCH ?= sparc -endif - -ARCH ?= $(RAW_ARCH) - LP64 := $(shell echo __LP64__ | ${CC} ${CFLAGS} -E -x c - | tail -n 1) ifeq ($(LP64), 1) IS_64_BIT := 1 diff --git a/tools/perf/config/utilities.mak b/tools/perf/config/utilities.mak index 7076a62d0ff7..c16ce833079c 100644 --- a/tools/perf/config/utilities.mak +++ b/tools/perf/config/utilities.mak @@ -175,6 +175,5 @@ _ge-abspath = $(if $(is-executable),$(1)) define get-executable-or-default $(if $($(1)),$(call _ge_attempt,$($(1)),$(1)),$(call _ge_attempt,$(2))) endef -_ge_attempt = $(if $(get-executable),$(get-executable),$(_gea_warn)$(call _gea_err,$(2))) -_gea_warn = $(warning The path '$(1)' is not executable.) +_ge_attempt = $(if $(get-executable),$(get-executable),$(call _gea_err,$(2))) _gea_err = $(if $(1),$(error Please set '$(1)' appropriately)) diff --git a/tools/perf/perf-completion.sh b/tools/perf/perf-completion.sh index 33569847fdcc..3ba80b2359cc 100644 --- a/tools/perf/perf-completion.sh +++ b/tools/perf/perf-completion.sh @@ -47,8 +47,16 @@ __my_reassemble_comp_words_by_ref() done } -type _get_comp_words_by_ref &>/dev/null || -_get_comp_words_by_ref() +# Define preload_get_comp_words_by_ref="false", if the function +# __perf_get_comp_words_by_ref() is required instead. +preload_get_comp_words_by_ref="true" + +if [ $preload_get_comp_words_by_ref = "true" ]; then + type _get_comp_words_by_ref &>/dev/null || + preload_get_comp_words_by_ref="false" +fi +[ $preload_get_comp_words_by_ref = "true" ] || +__perf_get_comp_words_by_ref() { local exclude cur_ words_ cword_ if [ "$1" = "-n" ]; then @@ -76,8 +84,16 @@ _get_comp_words_by_ref() done } -type __ltrim_colon_completions &>/dev/null || -__ltrim_colon_completions() +# Define preload__ltrim_colon_completions="false", if the function +# __perf__ltrim_colon_completions() is required instead. +preload__ltrim_colon_completions="true" + +if [ $preload__ltrim_colon_completions = "true" ]; then + type __ltrim_colon_completions &>/dev/null || + preload__ltrim_colon_completions="false" +fi +[ $preload__ltrim_colon_completions = "true" ] || +__perf__ltrim_colon_completions() { if [[ "$1" == *:* && "$COMP_WORDBREAKS" == *:* ]]; then # Remove colon-word prefix from COMPREPLY items @@ -97,7 +113,32 @@ __perfcomp () __perfcomp_colon () { __perfcomp "$1" "$2" - __ltrim_colon_completions $cur + if [ $preload__ltrim_colon_completions = "true" ]; then + __ltrim_colon_completions $cur + else + __perf__ltrim_colon_completions $cur + fi +} + +__perf_prev_skip_opts () +{ + local i cmd_ cmds_ + + let i=cword-1 + cmds_=$($cmd $1 --list-cmds) + prev_skip_opts=() + while [ $i -ge 0 ]; do + if [[ ${words[i]} == $1 ]]; then + return + fi + for cmd_ in $cmds_; do + if [[ ${words[i]} == $cmd_ ]]; then + prev_skip_opts=${words[i]} + return + fi + done + ((i--)) + done } __perf_main () @@ -107,29 +148,36 @@ __perf_main () cmd=${words[0]} COMPREPLY=() + # Skip options backward and find the last perf command + __perf_prev_skip_opts # List perf subcommands or long options - if [ $cword -eq 1 ]; then + if [ -z $prev_skip_opts ]; then if [[ $cur == --* ]]; then - __perfcomp '--help --version \ - --exec-path --html-path --paginate --no-pager \ - --perf-dir --work-tree --debugfs-dir' -- "$cur" + cmds=$($cmd --list-opts) else cmds=$($cmd --list-cmds) - __perfcomp "$cmds" "$cur" fi + __perfcomp "$cmds" "$cur" # List possible events for -e option - elif [[ $prev == "-e" && "${words[1]}" == @(record|stat|top) ]]; then + elif [[ $prev == @("-e"|"--event") && + $prev_skip_opts == @(record|stat|top) ]]; then evts=$($cmd list --raw-dump) __perfcomp_colon "$evts" "$cur" - # List subcommands for perf commands - elif [[ $prev == @(kvm|kmem|mem|lock|sched) ]]; then - subcmds=$($cmd $prev --list-cmds) - __perfcomp_colon "$subcmds" "$cur" - # List long option names - elif [[ $cur == --* ]]; then - subcmd=${words[1]} - opts=$($cmd $subcmd --list-opts) - __perfcomp "$opts" "$cur" + else + # List subcommands for perf commands + if [[ $prev_skip_opts == @(kvm|kmem|mem|lock|sched| + |data|help|script|test|timechart|trace) ]]; then + subcmds=$($cmd $prev_skip_opts --list-cmds) + __perfcomp_colon "$subcmds" "$cur" + fi + # List long option names + if [[ $cur == --* ]]; then + subcmd=$prev_skip_opts + __perf_prev_skip_opts $subcmd + subcmd=$subcmd" "$prev_skip_opts + opts=$($cmd $subcmd --list-opts) + __perfcomp "$opts" "$cur" + fi fi } @@ -198,7 +246,11 @@ type perf &>/dev/null && _perf() { local cur words cword prev - _get_comp_words_by_ref -n =: cur words cword prev + if [ $preload_get_comp_words_by_ref = "true" ]; then + _get_comp_words_by_ref -n =: cur words cword prev + else + __perf_get_comp_words_by_ref -n =: cur words cword prev + fi __perf_main } && diff --git a/tools/perf/perf.c b/tools/perf/perf.c index 3700a7faca6c..b857fcbd00cf 100644 --- a/tools/perf/perf.c +++ b/tools/perf/perf.c @@ -13,6 +13,7 @@ #include "util/quote.h" #include "util/run-command.h" #include "util/parse-events.h" +#include "util/parse-options.h" #include "util/debug.h" #include <api/fs/debugfs.h> #include <pthread.h> @@ -62,6 +63,7 @@ static struct cmd_struct commands[] = { #endif { "inject", cmd_inject, 0 }, { "mem", cmd_mem, 0 }, + { "data", cmd_data, 0 }, }; struct pager_config { @@ -124,6 +126,23 @@ static void commit_pager_choice(void) } } +struct option options[] = { + OPT_ARGUMENT("help", "help"), + OPT_ARGUMENT("version", "version"), + OPT_ARGUMENT("exec-path", "exec-path"), + OPT_ARGUMENT("html-path", "html-path"), + OPT_ARGUMENT("paginate", "paginate"), + OPT_ARGUMENT("no-pager", "no-pager"), + OPT_ARGUMENT("perf-dir", "perf-dir"), + OPT_ARGUMENT("work-tree", "work-tree"), + OPT_ARGUMENT("debugfs-dir", "debugfs-dir"), + OPT_ARGUMENT("buildid-dir", "buildid-dir"), + OPT_ARGUMENT("list-cmds", "list-cmds"), + OPT_ARGUMENT("list-opts", "list-opts"), + OPT_ARGUMENT("debug", "debug"), + OPT_END() +}; + static int handle_options(const char ***argv, int *argc, int *envchanged) { int handled = 0; @@ -222,6 +241,16 @@ static int handle_options(const char ***argv, int *argc, int *envchanged) struct cmd_struct *p = commands+i; printf("%s ", p->cmd); } + putchar('\n'); + exit(0); + } else if (!strcmp(cmd, "--list-opts")) { + unsigned int i; + + for (i = 0; i < ARRAY_SIZE(options)-1; i++) { + struct option *p = options+i; + printf("--%s ", p->long_name); + } + putchar('\n'); exit(0); } else if (!strcmp(cmd, "--debug")) { if (*argc < 2) { diff --git a/tools/perf/perf.h b/tools/perf/perf.h index 1dabb8553499..e14bb637255c 100644 --- a/tools/perf/perf.h +++ b/tools/perf/perf.h @@ -29,7 +29,7 @@ static inline unsigned long long rdclock(void) return ts.tv_sec * 1000000000ULL + ts.tv_nsec; } -#define MAX_NR_CPUS 256 +#define MAX_NR_CPUS 1024 extern const char *input_name; extern bool perf_host, perf_guest; @@ -53,6 +53,7 @@ struct record_opts { bool sample_time; bool period; bool sample_intr_regs; + bool running_time; unsigned int freq; unsigned int mmap_pages; unsigned int user_freq; @@ -61,6 +62,8 @@ struct record_opts { u64 user_interval; bool sample_transaction; unsigned initial_delay; + bool use_clockid; + clockid_t clockid; }; struct option; diff --git a/tools/perf/scripts/Build b/tools/perf/scripts/Build new file mode 100644 index 000000000000..41efd7e368b3 --- /dev/null +++ b/tools/perf/scripts/Build @@ -0,0 +1,2 @@ +libperf-$(CONFIG_LIBPERL) += perl/Perf-Trace-Util/ +libperf-$(CONFIG_LIBPYTHON) += python/Perf-Trace-Util/ diff --git a/tools/perf/scripts/perl/Perf-Trace-Util/Build b/tools/perf/scripts/perl/Perf-Trace-Util/Build new file mode 100644 index 000000000000..928e110179cb --- /dev/null +++ b/tools/perf/scripts/perl/Perf-Trace-Util/Build @@ -0,0 +1,3 @@ +libperf-y += Context.o + +CFLAGS_Context.o += $(PERL_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-nested-externs -Wno-undef -Wno-switch-default diff --git a/tools/perf/scripts/python/Perf-Trace-Util/Build b/tools/perf/scripts/python/Perf-Trace-Util/Build new file mode 100644 index 000000000000..aefc15c9444a --- /dev/null +++ b/tools/perf/scripts/python/Perf-Trace-Util/Build @@ -0,0 +1,3 @@ +libperf-y += Context.o + +CFLAGS_Context.o += $(PYTHON_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-nested-externs diff --git a/tools/perf/tests/Build b/tools/perf/tests/Build new file mode 100644 index 000000000000..6a8801b32017 --- /dev/null +++ b/tools/perf/tests/Build @@ -0,0 +1,43 @@ +perf-y += builtin-test.o +perf-y += parse-events.o +perf-y += dso-data.o +perf-y += attr.o +perf-y += vmlinux-kallsyms.o +perf-y += open-syscall.o +perf-y += open-syscall-all-cpus.o +perf-y += open-syscall-tp-fields.o +perf-y += mmap-basic.o +perf-y += perf-record.o +perf-y += rdpmc.o +perf-y += evsel-roundtrip-name.o +perf-y += evsel-tp-sched.o +perf-y += fdarray.o +perf-y += pmu.o +perf-y += hists_common.o +perf-y += hists_link.o +perf-y += hists_filter.o +perf-y += hists_output.o +perf-y += hists_cumulate.o +perf-y += python-use.o +perf-y += bp_signal.o +perf-y += bp_signal_overflow.o +perf-y += task-exit.o +perf-y += sw-clock.o +perf-y += mmap-thread-lookup.o +perf-y += thread-mg-share.o +perf-y += switch-tracking.o +perf-y += keep-tracking.o +perf-y += code-reading.o +perf-y += sample-parsing.o +perf-y += parse-no-sample-id-all.o +perf-y += kmod-path.o + +perf-$(CONFIG_X86) += perf-time-to-tsc.o + +ifeq ($(ARCH),$(filter $(ARCH),x86 arm)) +perf-$(CONFIG_DWARF_UNWIND) += dwarf-unwind.o +endif + +CFLAGS_attr.o += -DBINDIR="BUILD_STR($(bindir_SQ))" -DPYTHON="BUILD_STR($(PYTHON_WORD))" +CFLAGS_python-use.o += -DPYTHONPATH="BUILD_STR($(OUTPUT)python)" -DPYTHON="BUILD_STR($(PYTHON_WORD))" +CFLAGS_dwarf-unwind.o += -fno-optimize-sibling-calls diff --git a/tools/perf/tests/attr/base-record b/tools/perf/tests/attr/base-record index d3095dafed36..7e6d74946e04 100644 --- a/tools/perf/tests/attr/base-record +++ b/tools/perf/tests/attr/base-record @@ -5,7 +5,7 @@ group_fd=-1 flags=0|8 cpu=* type=0|1 -size=104 +size=112 config=0 sample_period=4000 sample_type=263 diff --git a/tools/perf/tests/attr/base-stat b/tools/perf/tests/attr/base-stat index 872ed7e24c7c..f4cf148f14cb 100644 --- a/tools/perf/tests/attr/base-stat +++ b/tools/perf/tests/attr/base-stat @@ -5,7 +5,7 @@ group_fd=-1 flags=0|8 cpu=* type=0 -size=104 +size=112 config=0 sample_period=0 sample_type=0 diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c index 4b7d9ab0f049..4f4098167112 100644 --- a/tools/perf/tests/builtin-test.c +++ b/tools/perf/tests/builtin-test.c @@ -167,6 +167,10 @@ static struct test { .func = test__fdarray__add, }, { + .desc = "Test kmod_path__parse function", + .func = test__kmod_path__parse, + }, + { .func = NULL, }, }; @@ -291,7 +295,7 @@ static int perf_test__list(int argc, const char **argv) int cmd_test(int argc, const char **argv, const char *prefix __maybe_unused) { - const char * const test_usage[] = { + const char *test_usage[] = { "perf test [<options>] [{list <test-name-fragment>|[<test-name-fragments>|<test-numbers>]}]", NULL, }; @@ -302,13 +306,14 @@ int cmd_test(int argc, const char **argv, const char *prefix __maybe_unused) "be more verbose (show symbol address, etc)"), OPT_END() }; + const char * const test_subcommands[] = { "list", NULL }; struct intlist *skiplist = NULL; int ret = hists__init(); if (ret < 0) return ret; - argc = parse_options(argc, argv, test_options, test_usage, 0); + argc = parse_options_subcommand(argc, argv, test_options, test_subcommands, test_usage, 0); if (argc >= 1 && !strcmp(argv[0], "list")) return perf_test__list(argc, argv); diff --git a/tools/perf/tests/dso-data.c b/tools/perf/tests/dso-data.c index caaf37f079b1..513e5febbe5a 100644 --- a/tools/perf/tests/dso-data.c +++ b/tools/perf/tests/dso-data.c @@ -112,6 +112,9 @@ int test__dso_data(void) dso = dso__new((const char *)file); + TEST_ASSERT_VAL("Failed to access to dso", + dso__data_fd(dso, &machine) >= 0); + /* Basic 10 bytes tests. */ for (i = 0; i < ARRAY_SIZE(offsets); i++) { struct test_data_offset *data = &offsets[i]; @@ -243,8 +246,8 @@ int test__dso_data_cache(void) limit = nr * 4; TEST_ASSERT_VAL("failed to set file limit", !set_fd_limit(limit)); - /* and this is now our dso open FDs limit + 1 extra */ - dso_cnt = limit / 2 + 1; + /* and this is now our dso open FDs limit */ + dso_cnt = limit / 2; TEST_ASSERT_VAL("failed to create dsos\n", !dsos__create(dso_cnt, TEST_FILE_SIZE)); @@ -252,13 +255,13 @@ int test__dso_data_cache(void) struct dso *dso = dsos[i]; /* - * Open dsos via dso__data_fd or dso__data_read_offset. - * Both opens the data file and keep it open. + * Open dsos via dso__data_fd(), it opens the data + * file and keep it open (unless open file limit). */ + fd = dso__data_fd(dso, &machine); + TEST_ASSERT_VAL("failed to get fd", fd > 0); + if (i % 2) { - fd = dso__data_fd(dso, &machine); - TEST_ASSERT_VAL("failed to get fd", fd > 0); - } else { #define BUFSIZE 10 u8 buf[BUFSIZE]; ssize_t n; @@ -268,7 +271,10 @@ int test__dso_data_cache(void) } } - /* open +1 dso over the allowed limit */ + /* verify the first one is already open */ + TEST_ASSERT_VAL("dsos[0] is not open", dsos[0]->data.fd != -1); + + /* open +1 dso to reach the allowed limit */ fd = dso__data_fd(dsos[i], &machine); TEST_ASSERT_VAL("failed to get fd", fd > 0); diff --git a/tools/perf/tests/kmod-path.c b/tools/perf/tests/kmod-path.c new file mode 100644 index 000000000000..e8d7cbb9320c --- /dev/null +++ b/tools/perf/tests/kmod-path.c @@ -0,0 +1,73 @@ +#include <stdbool.h> +#include "tests.h" +#include "dso.h" +#include "debug.h" + +static int test(const char *path, bool alloc_name, bool alloc_ext, + bool kmod, bool comp, const char *name, const char *ext) +{ + struct kmod_path m; + + memset(&m, 0x0, sizeof(m)); + + TEST_ASSERT_VAL("kmod_path__parse", + !__kmod_path__parse(&m, path, alloc_name, alloc_ext)); + + pr_debug("%s - alloc name %d, alloc ext %d, kmod %d, comp %d, name '%s', ext '%s'\n", + path, alloc_name, alloc_ext, m.kmod, m.comp, m.name, m.ext); + + TEST_ASSERT_VAL("wrong kmod", m.kmod == kmod); + TEST_ASSERT_VAL("wrong comp", m.comp == comp); + + if (ext) + TEST_ASSERT_VAL("wrong ext", m.ext && !strcmp(ext, m.ext)); + else + TEST_ASSERT_VAL("wrong ext", !m.ext); + + if (name) + TEST_ASSERT_VAL("wrong name", m.name && !strcmp(name, m.name)); + else + TEST_ASSERT_VAL("wrong name", !m.name); + + free(m.name); + free(m.ext); + return 0; +} + +#define T(path, an, ae, k, c, n, e) \ + TEST_ASSERT_VAL("failed", !test(path, an, ae, k, c, n, e)) + +int test__kmod_path__parse(void) +{ + /* path alloc_name alloc_ext kmod comp name ext */ + T("/xxxx/xxxx/x-x.ko", true , true , true, false, "[x_x]", NULL); + T("/xxxx/xxxx/x-x.ko", false , true , true, false, NULL , NULL); + T("/xxxx/xxxx/x-x.ko", true , false , true, false, "[x_x]", NULL); + T("/xxxx/xxxx/x-x.ko", false , false , true, false, NULL , NULL); + + /* path alloc_name alloc_ext kmod comp name ext */ + T("/xxxx/xxxx/x.ko.gz", true , true , true, true, "[x]", "gz"); + T("/xxxx/xxxx/x.ko.gz", false , true , true, true, NULL , "gz"); + T("/xxxx/xxxx/x.ko.gz", true , false , true, true, "[x]", NULL); + T("/xxxx/xxxx/x.ko.gz", false , false , true, true, NULL , NULL); + + /* path alloc_name alloc_ext kmod comp name ext */ + T("/xxxx/xxxx/x.gz", true , true , false, true, "x.gz" ,"gz"); + T("/xxxx/xxxx/x.gz", false , true , false, true, NULL ,"gz"); + T("/xxxx/xxxx/x.gz", true , false , false, true, "x.gz" , NULL); + T("/xxxx/xxxx/x.gz", false , false , false, true, NULL , NULL); + + /* path alloc_name alloc_ext kmod comp name ext */ + T("x.gz", true , true , false, true, "x.gz", "gz"); + T("x.gz", false , true , false, true, NULL , "gz"); + T("x.gz", true , false , false, true, "x.gz", NULL); + T("x.gz", false , false , false, true, NULL , NULL); + + /* path alloc_name alloc_ext kmod comp name ext */ + T("x.ko.gz", true , true , true, true, "[x]", "gz"); + T("x.ko.gz", false , true , true, true, NULL , "gz"); + T("x.ko.gz", true , false , true, true, "[x]", NULL); + T("x.ko.gz", false , false , true, true, NULL , NULL); + + return 0; +} diff --git a/tools/perf/tests/make b/tools/perf/tests/make index 75709d2b17b4..bff85324f799 100644 --- a/tools/perf/tests/make +++ b/tools/perf/tests/make @@ -5,7 +5,7 @@ include config/Makefile.arch # FIXME looks like x86 is the only arch running tests ;-) # we need some IS_(32/64) flag to make this generic -ifeq ($(IS_X86_64),1) +ifeq ($(ARCH)$(IS_64_BIT), x861) lib = lib64 else lib = lib diff --git a/tools/perf/tests/open-syscall-all-cpus.c b/tools/perf/tests/open-syscall-all-cpus.c index 8fa82d1700c7..3ec885c48f8f 100644 --- a/tools/perf/tests/open-syscall-all-cpus.c +++ b/tools/perf/tests/open-syscall-all-cpus.c @@ -29,7 +29,12 @@ int test__open_syscall_event_on_all_cpus(void) evsel = perf_evsel__newtp("syscalls", "sys_enter_open"); if (evsel == NULL) { - pr_debug("is debugfs mounted on /sys/kernel/debug?\n"); + if (tracefs_configured()) + pr_debug("is tracefs mounted on /sys/kernel/tracing?\n"); + else if (debugfs_configured()) + pr_debug("is debugfs mounted on /sys/kernel/debug?\n"); + else + pr_debug("Neither tracefs or debugfs is enabled in this kernel\n"); goto out_thread_map_delete; } diff --git a/tools/perf/tests/open-syscall.c b/tools/perf/tests/open-syscall.c index a33b2daae40f..07aa319bf334 100644 --- a/tools/perf/tests/open-syscall.c +++ b/tools/perf/tests/open-syscall.c @@ -18,7 +18,12 @@ int test__open_syscall_event(void) evsel = perf_evsel__newtp("syscalls", "sys_enter_open"); if (evsel == NULL) { - pr_debug("is debugfs mounted on /sys/kernel/debug?\n"); + if (tracefs_configured()) + pr_debug("is tracefs mounted on /sys/kernel/tracing?\n"); + else if (debugfs_configured()) + pr_debug("is debugfs mounted on /sys/kernel/debug?\n"); + else + pr_debug("Neither tracefs or debugfs is enabled in this kernel\n"); goto out_thread_map_delete; } diff --git a/tools/perf/tests/parse-events.c b/tools/perf/tests/parse-events.c index 1cdab0ce00e2..3de744961739 100644 --- a/tools/perf/tests/parse-events.c +++ b/tools/perf/tests/parse-events.c @@ -3,6 +3,7 @@ #include "evsel.h" #include "evlist.h" #include <api/fs/fs.h> +#include <api/fs/tracefs.h> #include <api/fs/debugfs.h> #include "tests.h" #include "debug.h" @@ -294,6 +295,36 @@ static int test__checkevent_genhw_modifier(struct perf_evlist *evlist) return test__checkevent_genhw(evlist); } +static int test__checkevent_exclude_idle_modifier(struct perf_evlist *evlist) +{ + struct perf_evsel *evsel = perf_evlist__first(evlist); + + TEST_ASSERT_VAL("wrong exclude idle", evsel->attr.exclude_idle); + TEST_ASSERT_VAL("wrong exclude guest", !evsel->attr.exclude_guest); + TEST_ASSERT_VAL("wrong exclude host", !evsel->attr.exclude_host); + TEST_ASSERT_VAL("wrong exclude_user", !evsel->attr.exclude_user); + TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->attr.exclude_kernel); + TEST_ASSERT_VAL("wrong exclude_hv", !evsel->attr.exclude_hv); + TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip); + + return test__checkevent_symbolic_name(evlist); +} + +static int test__checkevent_exclude_idle_modifier_1(struct perf_evlist *evlist) +{ + struct perf_evsel *evsel = perf_evlist__first(evlist); + + TEST_ASSERT_VAL("wrong exclude idle", evsel->attr.exclude_idle); + TEST_ASSERT_VAL("wrong exclude guest", !evsel->attr.exclude_guest); + TEST_ASSERT_VAL("wrong exclude host", evsel->attr.exclude_host); + TEST_ASSERT_VAL("wrong exclude_user", evsel->attr.exclude_user); + TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->attr.exclude_kernel); + TEST_ASSERT_VAL("wrong exclude_hv", evsel->attr.exclude_hv); + TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip); + + return test__checkevent_symbolic_name(evlist); +} + static int test__checkevent_breakpoint_modifier(struct perf_evlist *evlist) { struct perf_evsel *evsel = perf_evlist__first(evlist); @@ -1192,11 +1223,19 @@ static int count_tracepoints(void) { char events_path[PATH_MAX]; struct dirent *events_ent; + const char *mountpoint; DIR *events_dir; int cnt = 0; - scnprintf(events_path, PATH_MAX, "%s/tracing/events", - debugfs_find_mountpoint()); + mountpoint = tracefs_find_mountpoint(); + if (mountpoint) { + scnprintf(events_path, PATH_MAX, "%s/events", + mountpoint); + } else { + mountpoint = debugfs_find_mountpoint(); + scnprintf(events_path, PATH_MAX, "%s/tracing/events", + mountpoint); + } events_dir = opendir(events_path); @@ -1485,6 +1524,16 @@ static struct evlist_test test__events[] = { .id = 100, }, #endif + { + .name = "instructions:I", + .check = test__checkevent_exclude_idle_modifier, + .id = 45, + }, + { + .name = "instructions:kIG", + .check = test__checkevent_exclude_idle_modifier_1, + .id = 46, + }, }; static struct evlist_test test__events_pmu[] = { diff --git a/tools/perf/tests/tests.h b/tools/perf/tests/tests.h index 00e776a87a9c..52758a33f64c 100644 --- a/tools/perf/tests/tests.h +++ b/tools/perf/tests/tests.h @@ -51,6 +51,7 @@ int test__hists_cumulate(void); int test__switch_tracking(void); int test__fdarray__filter(void); int test__fdarray__add(void); +int test__kmod_path__parse(void); #if defined(__x86_64__) || defined(__i386__) || defined(__arm__) #ifdef HAVE_DWARF_UNWIND_SUPPORT diff --git a/tools/perf/ui/Build b/tools/perf/ui/Build new file mode 100644 index 000000000000..0a73538c0441 --- /dev/null +++ b/tools/perf/ui/Build @@ -0,0 +1,14 @@ +libperf-y += setup.o +libperf-y += helpline.o +libperf-y += progress.o +libperf-y += util.o +libperf-y += hist.o +libperf-y += stdio/hist.o + +CFLAGS_setup.o += -DLIBDIR="BUILD_STR($(LIBDIR))" + +libperf-$(CONFIG_SLANG) += browser.o +libperf-$(CONFIG_SLANG) += browsers/ +libperf-$(CONFIG_SLANG) += tui/ + +CFLAGS_browser.o += -DENABLE_SLFUTURE_CONST diff --git a/tools/perf/ui/browsers/Build b/tools/perf/ui/browsers/Build new file mode 100644 index 000000000000..de223f5bed58 --- /dev/null +++ b/tools/perf/ui/browsers/Build @@ -0,0 +1,10 @@ +libperf-y += annotate.o +libperf-y += hists.o +libperf-y += map.o +libperf-y += scripts.o +libperf-y += header.o + +CFLAGS_annotate.o += -DENABLE_SLFUTURE_CONST +CFLAGS_hists.o += -DENABLE_SLFUTURE_CONST +CFLAGS_map.o += -DENABLE_SLFUTURE_CONST +CFLAGS_scripts.o += -DENABLE_SLFUTURE_CONST diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c index 9d32e3c0cfee..e5250eb2dd57 100644 --- a/tools/perf/ui/browsers/annotate.c +++ b/tools/perf/ui/browsers/annotate.c @@ -829,10 +829,16 @@ out: return key; } +int map_symbol__tui_annotate(struct map_symbol *ms, struct perf_evsel *evsel, + struct hist_browser_timer *hbt) +{ + return symbol__tui_annotate(ms->sym, ms->map, evsel, hbt); +} + int hist_entry__tui_annotate(struct hist_entry *he, struct perf_evsel *evsel, struct hist_browser_timer *hbt) { - return symbol__tui_annotate(he->ms.sym, he->ms.map, evsel, hbt); + return map_symbol__tui_annotate(&he->ms, evsel, hbt); } static void annotate_browser__mark_jump_targets(struct annotate_browser *browser, diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index 788506eef567..995b7a8596b1 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -48,6 +48,24 @@ static bool hist_browser__has_filter(struct hist_browser *hb) return hists__has_filter(hb->hists) || hb->min_pcnt; } +static int hist_browser__get_folding(struct hist_browser *browser) +{ + struct rb_node *nd; + struct hists *hists = browser->hists; + int unfolded_rows = 0; + + for (nd = rb_first(&hists->entries); + (nd = hists__filter_entries(nd, browser->min_pcnt)) != NULL; + nd = rb_next(nd)) { + struct hist_entry *he = + rb_entry(nd, struct hist_entry, rb_node); + + if (he->ms.unfolded) + unfolded_rows += he->nr_rows; + } + return unfolded_rows; +} + static u32 hist_browser__nr_entries(struct hist_browser *hb) { u32 nr_entries; @@ -57,6 +75,7 @@ static u32 hist_browser__nr_entries(struct hist_browser *hb) else nr_entries = hb->hists->nr_entries; + hb->nr_callchain_rows = hist_browser__get_folding(hb); return nr_entries + hb->nr_callchain_rows; } @@ -492,6 +511,7 @@ static void hist_browser__show_callchain_entry(struct hist_browser *browser, { int color, width; char folded_sign = callchain_list__folded(chain); + bool show_annotated = browser->show_dso && chain->ms.sym && symbol__annotation(chain->ms.sym)->src; color = HE_COLORSET_NORMAL; width = browser->b.width - (offset + 2); @@ -504,7 +524,8 @@ static void hist_browser__show_callchain_entry(struct hist_browser *browser, ui_browser__set_color(&browser->b, color); hist_browser__gotorc(browser, row, 0); slsmg_write_nstring(" ", offset); - slsmg_printf("%c ", folded_sign); + slsmg_printf("%c", folded_sign); + ui_browser__write_graph(&browser->b, show_annotated ? SLSMG_RARROW_CHAR : ' '); slsmg_write_nstring(str, width); } @@ -1467,7 +1488,7 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events, perf_hpp__set_user_width(symbol_conf.col_width_list_str); while (1) { - const struct thread *thread = NULL; + struct thread *thread = NULL; const struct dso *dso = NULL; int choice = 0, annotate = -2, zoom_dso = -2, zoom_thread = -2, @@ -1593,28 +1614,30 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events, if (!sort__has_sym) goto add_exit_option; + if (browser->selection == NULL) + goto skip_annotation; + if (sort__mode == SORT_MODE__BRANCH) { bi = browser->he_selection->branch_info; - if (browser->selection != NULL && - bi && - bi->from.sym != NULL && + + if (bi == NULL) + goto skip_annotation; + + if (bi->from.sym != NULL && !bi->from.map->dso->annotate_warned && - asprintf(&options[nr_options], "Annotate %s", - bi->from.sym->name) > 0) + asprintf(&options[nr_options], "Annotate %s", bi->from.sym->name) > 0) { annotate_f = nr_options++; + } - if (browser->selection != NULL && - bi && - bi->to.sym != NULL && + if (bi->to.sym != NULL && !bi->to.map->dso->annotate_warned && (bi->to.sym != bi->from.sym || bi->to.map->dso != bi->from.map->dso) && - asprintf(&options[nr_options], "Annotate %s", - bi->to.sym->name) > 0) + asprintf(&options[nr_options], "Annotate %s", bi->to.sym->name) > 0) { annotate_t = nr_options++; + } } else { - if (browser->selection != NULL && - browser->selection->sym != NULL && + if (browser->selection->sym != NULL && !browser->selection->map->dso->annotate_warned) { struct annotation *notes; @@ -1622,11 +1645,12 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events, if (notes->src && asprintf(&options[nr_options], "Annotate %s", - browser->selection->sym->name) > 0) + browser->selection->sym->name) > 0) { annotate = nr_options++; + } } } - +skip_annotation: if (thread != NULL && asprintf(&options[nr_options], "Zoom %s %s(%d) thread", (browser->hists->thread_filter ? "out of" : "into"), @@ -1682,6 +1706,7 @@ retry_popup_menu: if (choice == annotate || choice == annotate_t || choice == annotate_f) { struct hist_entry *he; struct annotation *notes; + struct map_symbol ms; int err; do_annotate: if (!objdump_path && perf_session_env__lookup_objdump(env)) @@ -1691,30 +1716,21 @@ do_annotate: if (he == NULL) continue; - /* - * we stash the branch_info symbol + map into the - * the ms so we don't have to rewrite all the annotation - * code to use branch_info. - * in branch mode, the ms struct is not used - */ if (choice == annotate_f) { - he->ms.sym = he->branch_info->from.sym; - he->ms.map = he->branch_info->from.map; - } else if (choice == annotate_t) { - he->ms.sym = he->branch_info->to.sym; - he->ms.map = he->branch_info->to.map; + ms.map = he->branch_info->from.map; + ms.sym = he->branch_info->from.sym; + } else if (choice == annotate_t) { + ms.map = he->branch_info->to.map; + ms.sym = he->branch_info->to.sym; + } else { + ms = *browser->selection; } - notes = symbol__annotation(he->ms.sym); + notes = symbol__annotation(ms.sym); if (!notes->src) continue; - /* - * Don't let this be freed, say, by hists__decay_entry. - */ - he->used = true; - err = hist_entry__tui_annotate(he, evsel, hbt); - he->used = false; + err = map_symbol__tui_annotate(&ms, evsel, hbt); /* * offer option to annotate the other branch source or target * (if they exists) when returning from annotate @@ -1754,13 +1770,13 @@ zoom_thread: pstack__remove(fstack, &browser->hists->thread_filter); zoom_out_thread: ui_helpline__pop(); - browser->hists->thread_filter = NULL; + thread__zput(browser->hists->thread_filter); perf_hpp__set_elide(HISTC_THREAD, false); } else { ui_helpline__fpush("To zoom out press <- or -> + \"Zoom out of %s(%d) thread\"", thread->comm_set ? thread__comm_str(thread) : "", thread->tid); - browser->hists->thread_filter = thread; + browser->hists->thread_filter = thread__get(thread); perf_hpp__set_elide(HISTC_THREAD, false); pstack__push(fstack, &browser->hists->thread_filter); } diff --git a/tools/perf/ui/gtk/Build b/tools/perf/ui/gtk/Build new file mode 100644 index 000000000000..ec22e899a224 --- /dev/null +++ b/tools/perf/ui/gtk/Build @@ -0,0 +1,9 @@ +CFLAGS_gtk += -fPIC $(GTK_CFLAGS) + +gtk-y += browser.o +gtk-y += hists.o +gtk-y += setup.o +gtk-y += util.o +gtk-y += helpline.o +gtk-y += progress.o +gtk-y += annotate.o diff --git a/tools/perf/ui/tui/Build b/tools/perf/ui/tui/Build new file mode 100644 index 000000000000..9e4c6ca41a9f --- /dev/null +++ b/tools/perf/ui/tui/Build @@ -0,0 +1,4 @@ +libperf-y += setup.o +libperf-y += util.o +libperf-y += helpline.o +libperf-y += progress.o diff --git a/tools/perf/util/Build b/tools/perf/util/Build new file mode 100644 index 000000000000..797490a40075 --- /dev/null +++ b/tools/perf/util/Build @@ -0,0 +1,145 @@ +libperf-y += abspath.o +libperf-y += alias.o +libperf-y += annotate.o +libperf-y += build-id.o +libperf-y += config.o +libperf-y += ctype.o +libperf-y += db-export.o +libperf-y += environment.o +libperf-y += event.o +libperf-y += evlist.o +libperf-y += evsel.o +libperf-y += exec_cmd.o +libperf-y += find_next_bit.o +libperf-y += help.o +libperf-y += kallsyms.o +libperf-y += levenshtein.o +libperf-y += parse-options.o +libperf-y += parse-events.o +libperf-y += path.o +libperf-y += rbtree.o +libperf-y += bitmap.o +libperf-y += hweight.o +libperf-y += run-command.o +libperf-y += quote.o +libperf-y += strbuf.o +libperf-y += string.o +libperf-y += strlist.o +libperf-y += strfilter.o +libperf-y += top.o +libperf-y += usage.o +libperf-y += wrapper.o +libperf-y += sigchain.o +libperf-y += dso.o +libperf-y += symbol.o +libperf-y += color.o +libperf-y += pager.o +libperf-y += header.o +libperf-y += callchain.o +libperf-y += values.o +libperf-y += debug.o +libperf-y += machine.o +libperf-y += map.o +libperf-y += pstack.o +libperf-y += session.o +libperf-y += ordered-events.o +libperf-y += comm.o +libperf-y += thread.o +libperf-y += thread_map.o +libperf-y += trace-event-parse.o +libperf-y += parse-events-flex.o +libperf-y += parse-events-bison.o +libperf-y += pmu.o +libperf-y += pmu-flex.o +libperf-y += pmu-bison.o +libperf-y += trace-event-read.o +libperf-y += trace-event-info.o +libperf-y += trace-event-scripting.o +libperf-y += trace-event.o +libperf-y += svghelper.o +libperf-y += sort.o +libperf-y += hist.o +libperf-y += util.o +libperf-y += xyarray.o +libperf-y += cpumap.o +libperf-y += cgroup.o +libperf-y += target.o +libperf-y += rblist.o +libperf-y += intlist.o +libperf-y += vdso.o +libperf-y += stat.o +libperf-y += record.o +libperf-y += srcline.o +libperf-y += data.o +libperf-$(CONFIG_X86) += tsc.o +libperf-y += cloexec.o +libperf-y += thread-stack.o + +libperf-$(CONFIG_LIBELF) += symbol-elf.o +libperf-$(CONFIG_LIBELF) += probe-event.o + +ifndef CONFIG_LIBELF +libperf-y += symbol-minimal.o +endif + +libperf-$(CONFIG_DWARF) += probe-finder.o +libperf-$(CONFIG_DWARF) += dwarf-aux.o + +libperf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o +libperf-$(CONFIG_LIBUNWIND) += unwind-libunwind.o + +libperf-$(CONFIG_LIBBABELTRACE) += data-convert-bt.o + +libperf-y += scripting-engines/ + +libperf-$(CONFIG_PERF_REGS) += perf_regs.o +libperf-$(CONFIG_ZLIB) += zlib.o +libperf-$(CONFIG_LZMA) += lzma.o + +CFLAGS_config.o += -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))" +CFLAGS_exec_cmd.o += -DPERF_EXEC_PATH="BUILD_STR($(perfexecdir_SQ))" -DPREFIX="BUILD_STR($(prefix_SQ))" + +$(OUTPUT)util/parse-events-flex.c: util/parse-events.l $(OUTPUT)util/parse-events-bison.c + $(call rule_mkdir) + @$(call echo-cmd,flex)$(FLEX) -o $@ --header-file=$(OUTPUT)util/parse-events-flex.h $(PARSER_DEBUG_FLEX) util/parse-events.l + +$(OUTPUT)util/parse-events-bison.c: util/parse-events.y + $(call rule_mkdir) + @$(call echo-cmd,bison)$(BISON) -v util/parse-events.y -d $(PARSER_DEBUG_BISON) -o $@ -p parse_events_ + +$(OUTPUT)util/pmu-flex.c: util/pmu.l $(OUTPUT)util/pmu-bison.c + $(call rule_mkdir) + @$(call echo-cmd,flex)$(FLEX) -o $@ --header-file=$(OUTPUT)util/pmu-flex.h util/pmu.l + +$(OUTPUT)util/pmu-bison.c: util/pmu.y + $(call rule_mkdir) + @$(call echo-cmd,bison)$(BISON) -v util/pmu.y -d -o $@ -p perf_pmu_ + +CFLAGS_parse-events-flex.o += -w +CFLAGS_pmu-flex.o += -w +CFLAGS_parse-events-bison.o += -DYYENABLE_NLS=0 -DYYLTYPE_IS_TRIVIAL=0 -w +CFLAGS_pmu-bison.o += -DYYENABLE_NLS=0 -DYYLTYPE_IS_TRIVIAL=0 -w + +$(OUTPUT)util/parse-events.o: $(OUTPUT)util/parse-events-flex.c $(OUTPUT)util/parse-events-bison.c +$(OUTPUT)util/pmu.o: $(OUTPUT)util/pmu-flex.c $(OUTPUT)util/pmu-bison.c + +CFLAGS_find_next_bit.o += -Wno-unused-parameter -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))" +CFLAGS_rbtree.o += -Wno-unused-parameter -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))" +CFLAGS_hweight.o += -Wno-unused-parameter -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))" +CFLAGS_parse-events.o += -Wno-redundant-decls + +$(OUTPUT)util/kallsyms.o: ../lib/symbol/kallsyms.c FORCE + $(call rule_mkdir) + $(call if_changed_dep,cc_o_c) + +$(OUTPUT)util/find_next_bit.o: ../lib/util/find_next_bit.c FORCE + $(call rule_mkdir) + $(call if_changed_dep,cc_o_c) + +$(OUTPUT)util/rbtree.o: ../../lib/rbtree.c FORCE + $(call rule_mkdir) + $(call if_changed_dep,cc_o_c) + +$(OUTPUT)util/hweight.o: ../../lib/hweight.c FORCE + $(call rule_mkdir) + $(call if_changed_dep,cc_o_c) diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 9d9db3b296dd..7f5bdfc9bc87 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -1010,6 +1010,32 @@ fallback: } filename = symfs_filename; } + } else if (dso__needs_decompress(dso)) { + char tmp[PATH_MAX]; + struct kmod_path m; + int fd; + bool ret; + + if (kmod_path__parse_ext(&m, symfs_filename)) + goto out_free_filename; + + snprintf(tmp, PATH_MAX, "/tmp/perf-kmod-XXXXXX"); + + fd = mkstemp(tmp); + if (fd < 0) { + free(m.ext); + goto out_free_filename; + } + + ret = decompress_to_file(m.ext, symfs_filename, fd); + + free(m.ext); + close(fd); + + if (!ret) + goto out_free_filename; + + strcpy(symfs_filename, tmp); } snprintf(command, sizeof(command), @@ -1029,7 +1055,7 @@ fallback: file = popen(command, "r"); if (!file) - goto out_free_filename; + goto out_remove_tmp; while (!feof(file)) if (symbol__parse_objdump_line(sym, map, file, privsize, @@ -1044,6 +1070,10 @@ fallback: delete_last_nop(sym); pclose(file); + +out_remove_tmp: + if (dso__needs_decompress(dso)) + unlink(symfs_filename); out_free_filename: if (delete_extract) kcore_extract__delete(&kce); diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c index 0c72680a977f..61867dff5d5a 100644 --- a/tools/perf/util/build-id.c +++ b/tools/perf/util/build-id.c @@ -59,11 +59,8 @@ static int perf_event__exit_del_thread(struct perf_tool *tool __maybe_unused, dump_printf("(%d:%d):(%d:%d)\n", event->fork.pid, event->fork.tid, event->fork.ppid, event->fork.ptid); - if (thread) { - rb_erase(&thread->rb_node, &machine->threads); - machine->last_match = NULL; - thread__delete(thread); - } + if (thread) + machine__remove_thread(machine, thread); return 0; } @@ -93,6 +90,35 @@ int build_id__sprintf(const u8 *build_id, int len, char *bf) return raw - build_id; } +/* asnprintf consolidates asprintf and snprintf */ +static int asnprintf(char **strp, size_t size, const char *fmt, ...) +{ + va_list ap; + int ret; + + if (!strp) + return -EINVAL; + + va_start(ap, fmt); + if (*strp) + ret = vsnprintf(*strp, size, fmt, ap); + else + ret = vasprintf(strp, fmt, ap); + va_end(ap); + + return ret; +} + +static char *build_id__filename(const char *sbuild_id, char *bf, size_t size) +{ + char *tmp = bf; + int ret = asnprintf(&bf, size, "%s/.build-id/%.2s/%s", buildid_dir, + sbuild_id, sbuild_id + 2); + if (ret < 0 || (tmp && size < (unsigned int)ret)) + return NULL; + return bf; +} + char *dso__build_id_filename(const struct dso *dso, char *bf, size_t size) { char build_id_hex[BUILD_ID_SIZE * 2 + 1]; @@ -101,14 +127,7 @@ char *dso__build_id_filename(const struct dso *dso, char *bf, size_t size) return NULL; build_id__sprintf(dso->build_id, sizeof(dso->build_id), build_id_hex); - if (bf == NULL) { - if (asprintf(&bf, "%s/.build-id/%.2s/%s", buildid_dir, - build_id_hex, build_id_hex + 2) < 0) - return NULL; - } else - snprintf(bf, size, "%s/.build-id/%.2s/%s", buildid_dir, - build_id_hex, build_id_hex + 2); - return bf; + return build_id__filename(build_id_hex, bf, size); } #define dsos__for_each_with_build_id(pos, head) \ @@ -259,52 +278,113 @@ void disable_buildid_cache(void) no_buildid_cache = true; } -int build_id_cache__add_s(const char *sbuild_id, const char *debugdir, - const char *name, bool is_kallsyms, bool is_vdso) +static char *build_id_cache__dirname_from_path(const char *name, + bool is_kallsyms, bool is_vdso) { - const size_t size = PATH_MAX; - char *realname, *filename = zalloc(size), - *linkname = zalloc(size), *targetname; - int len, err = -1; + char *realname = (char *)name, *filename; bool slash = is_kallsyms || is_vdso; - if (is_kallsyms) { - if (symbol_conf.kptr_restrict) { - pr_debug("Not caching a kptr_restrict'ed /proc/kallsyms\n"); - err = 0; - goto out_free; - } - realname = (char *) name; - } else + if (!slash) { realname = realpath(name, NULL); + if (!realname) + return NULL; + } - if (realname == NULL || filename == NULL || linkname == NULL) + if (asprintf(&filename, "%s%s%s", buildid_dir, slash ? "/" : "", + is_vdso ? DSO__NAME_VDSO : realname) < 0) + filename = NULL; + + if (!slash) + free(realname); + + return filename; +} + +int build_id_cache__list_build_ids(const char *pathname, + struct strlist **result) +{ + struct strlist *list; + char *dir_name; + DIR *dir; + struct dirent *d; + int ret = 0; + + list = strlist__new(true, NULL); + dir_name = build_id_cache__dirname_from_path(pathname, false, false); + if (!list || !dir_name) { + ret = -ENOMEM; + goto out; + } + + /* List up all dirents */ + dir = opendir(dir_name); + if (!dir) { + ret = -errno; + goto out; + } + + while ((d = readdir(dir)) != NULL) { + if (!strcmp(d->d_name, ".") || !strcmp(d->d_name, "..")) + continue; + strlist__add(list, d->d_name); + } + closedir(dir); + +out: + free(dir_name); + if (ret) + strlist__delete(list); + else + *result = list; + + return ret; +} + +int build_id_cache__add_s(const char *sbuild_id, const char *name, + bool is_kallsyms, bool is_vdso) +{ + const size_t size = PATH_MAX; + char *realname = NULL, *filename = NULL, *dir_name = NULL, + *linkname = zalloc(size), *targetname, *tmp; + int err = -1; + + if (!is_kallsyms) { + realname = realpath(name, NULL); + if (!realname) + goto out_free; + } + + dir_name = build_id_cache__dirname_from_path(name, is_kallsyms, is_vdso); + if (!dir_name) goto out_free; - len = scnprintf(filename, size, "%s%s%s", - debugdir, slash ? "/" : "", - is_vdso ? DSO__NAME_VDSO : realname); - if (mkdir_p(filename, 0755)) + if (mkdir_p(dir_name, 0755)) goto out_free; - snprintf(filename + len, size - len, "/%s", sbuild_id); + if (asprintf(&filename, "%s/%s", dir_name, sbuild_id) < 0) { + filename = NULL; + goto out_free; + } if (access(filename, F_OK)) { if (is_kallsyms) { if (copyfile("/proc/kallsyms", filename)) goto out_free; - } else if (link(realname, filename) && copyfile(name, filename)) + } else if (link(realname, filename) && errno != EEXIST && + copyfile(name, filename)) goto out_free; } - len = scnprintf(linkname, size, "%s/.build-id/%.2s", - debugdir, sbuild_id); + if (!build_id__filename(sbuild_id, linkname, size)) + goto out_free; + tmp = strrchr(linkname, '/'); + *tmp = '\0'; if (access(linkname, X_OK) && mkdir_p(linkname, 0755)) goto out_free; - snprintf(linkname + len, size - len, "/%s", sbuild_id + 2); - targetname = filename + strlen(debugdir) - 5; + *tmp = '/'; + targetname = filename + strlen(buildid_dir) - 5; memcpy(targetname, "../..", 5); if (symlink(targetname, linkname) == 0) @@ -313,34 +393,46 @@ out_free: if (!is_kallsyms) free(realname); free(filename); + free(dir_name); free(linkname); return err; } static int build_id_cache__add_b(const u8 *build_id, size_t build_id_size, - const char *name, const char *debugdir, - bool is_kallsyms, bool is_vdso) + const char *name, bool is_kallsyms, + bool is_vdso) { char sbuild_id[BUILD_ID_SIZE * 2 + 1]; build_id__sprintf(build_id, build_id_size, sbuild_id); - return build_id_cache__add_s(sbuild_id, debugdir, name, - is_kallsyms, is_vdso); + return build_id_cache__add_s(sbuild_id, name, is_kallsyms, is_vdso); +} + +bool build_id_cache__cached(const char *sbuild_id) +{ + bool ret = false; + char *filename = build_id__filename(sbuild_id, NULL, 0); + + if (filename && !access(filename, F_OK)) + ret = true; + free(filename); + + return ret; } -int build_id_cache__remove_s(const char *sbuild_id, const char *debugdir) +int build_id_cache__remove_s(const char *sbuild_id) { const size_t size = PATH_MAX; char *filename = zalloc(size), - *linkname = zalloc(size); + *linkname = zalloc(size), *tmp; int err = -1; if (filename == NULL || linkname == NULL) goto out_free; - snprintf(linkname, size, "%s/.build-id/%.2s/%s", - debugdir, sbuild_id, sbuild_id + 2); + if (!build_id__filename(sbuild_id, linkname, size)) + goto out_free; if (access(linkname, F_OK)) goto out_free; @@ -354,8 +446,8 @@ int build_id_cache__remove_s(const char *sbuild_id, const char *debugdir) /* * Since the link is relative, we must make it absolute: */ - snprintf(linkname, size, "%s/.build-id/%.2s/%s", - debugdir, sbuild_id, filename); + tmp = strrchr(linkname, '/') + 1; + snprintf(tmp, size - (tmp - linkname), "%s", filename); if (unlink(linkname)) goto out_free; @@ -367,8 +459,7 @@ out_free: return err; } -static int dso__cache_build_id(struct dso *dso, struct machine *machine, - const char *debugdir) +static int dso__cache_build_id(struct dso *dso, struct machine *machine) { bool is_kallsyms = dso->kernel && dso->long_name[0] != '/'; bool is_vdso = dso__is_vdso(dso); @@ -381,28 +472,26 @@ static int dso__cache_build_id(struct dso *dso, struct machine *machine, name = nm; } return build_id_cache__add_b(dso->build_id, sizeof(dso->build_id), name, - debugdir, is_kallsyms, is_vdso); + is_kallsyms, is_vdso); } static int __dsos__cache_build_ids(struct list_head *head, - struct machine *machine, const char *debugdir) + struct machine *machine) { struct dso *pos; int err = 0; dsos__for_each_with_build_id(pos, head) - if (dso__cache_build_id(pos, machine, debugdir)) + if (dso__cache_build_id(pos, machine)) err = -1; return err; } -static int machine__cache_build_ids(struct machine *machine, const char *debugdir) +static int machine__cache_build_ids(struct machine *machine) { - int ret = __dsos__cache_build_ids(&machine->kernel_dsos.head, machine, - debugdir); - ret |= __dsos__cache_build_ids(&machine->user_dsos.head, machine, - debugdir); + int ret = __dsos__cache_build_ids(&machine->kernel_dsos.head, machine); + ret |= __dsos__cache_build_ids(&machine->user_dsos.head, machine); return ret; } @@ -417,11 +506,11 @@ int perf_session__cache_build_ids(struct perf_session *session) if (mkdir(buildid_dir, 0755) != 0 && errno != EEXIST) return -1; - ret = machine__cache_build_ids(&session->machines.host, buildid_dir); + ret = machine__cache_build_ids(&session->machines.host); for (nd = rb_first(&session->machines.guests); nd; nd = rb_next(nd)) { struct machine *pos = rb_entry(nd, struct machine, rb_node); - ret |= machine__cache_build_ids(pos, buildid_dir); + ret |= machine__cache_build_ids(pos); } return ret ? -1 : 0; } diff --git a/tools/perf/util/build-id.h b/tools/perf/util/build-id.h index 8236319514d5..85011222cc14 100644 --- a/tools/perf/util/build-id.h +++ b/tools/perf/util/build-id.h @@ -4,6 +4,7 @@ #define BUILD_ID_SIZE 20 #include "tool.h" +#include "strlist.h" #include <linux/types.h> extern struct perf_tool build_id__mark_dso_hit_ops; @@ -22,9 +23,12 @@ bool perf_session__read_build_ids(struct perf_session *session, bool with_hits); int perf_session__write_buildid_table(struct perf_session *session, int fd); int perf_session__cache_build_ids(struct perf_session *session); -int build_id_cache__add_s(const char *sbuild_id, const char *debugdir, +int build_id_cache__list_build_ids(const char *pathname, + struct strlist **result); +bool build_id_cache__cached(const char *sbuild_id); +int build_id_cache__add_s(const char *sbuild_id, const char *name, bool is_kallsyms, bool is_vdso); -int build_id_cache__remove_s(const char *sbuild_id, const char *debugdir); +int build_id_cache__remove_s(const char *sbuild_id); void disable_buildid_cache(void); #endif diff --git a/tools/perf/util/cache.h b/tools/perf/util/cache.h index d04d770d90f6..fbcca21d66ab 100644 --- a/tools/perf/util/cache.h +++ b/tools/perf/util/cache.h @@ -17,6 +17,7 @@ #define EXEC_PATH_ENVIRONMENT "PERF_EXEC_PATH" #define DEFAULT_PERF_DIR_ENVIRONMENT ".perf" #define PERF_DEBUGFS_ENVIRONMENT "PERF_DEBUGFS_DIR" +#define PERF_TRACEFS_ENVIRONMENT "PERF_TRACEFS_DIR" typedef int (*config_fn_t)(const char *, const char *, void *); extern int perf_default_config(const char *, const char *, void *); diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c index 14e7a123d43b..9f643ee77001 100644 --- a/tools/perf/util/callchain.c +++ b/tools/perf/util/callchain.c @@ -97,6 +97,14 @@ int parse_callchain_record_opt(const char *arg) callchain_param.dump_size = size; } #endif /* HAVE_DWARF_UNWIND_SUPPORT */ + } else if (!strncmp(name, "lbr", sizeof("lbr"))) { + if (!strtok_r(NULL, ",", &saveptr)) { + callchain_param.record_mode = CALLCHAIN_LBR; + ret = 0; + } else + pr_err("callchain: No more arguments " + "needed for --call-graph lbr\n"); + break; } else { pr_err("callchain: Unknown --call-graph option " "value: %s\n", arg); diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h index c0ec1acc38e4..6033a0a212ca 100644 --- a/tools/perf/util/callchain.h +++ b/tools/perf/util/callchain.h @@ -11,6 +11,7 @@ enum perf_call_graph_mode { CALLCHAIN_NONE, CALLCHAIN_FP, CALLCHAIN_DWARF, + CALLCHAIN_LBR, CALLCHAIN_MAX }; diff --git a/tools/perf/util/cloexec.c b/tools/perf/util/cloexec.c index 6da965bdbc2c..85b523885f9d 100644 --- a/tools/perf/util/cloexec.c +++ b/tools/perf/util/cloexec.c @@ -7,6 +7,12 @@ static unsigned long flag = PERF_FLAG_FD_CLOEXEC; +int __weak sched_getcpu(void) +{ + errno = ENOSYS; + return -1; +} + static int perf_flag_probe(void) { /* use 'safest' configuration as used in perf_evsel__fallback() */ diff --git a/tools/perf/util/cloexec.h b/tools/perf/util/cloexec.h index 94a5a7d829d5..68888c29b04a 100644 --- a/tools/perf/util/cloexec.h +++ b/tools/perf/util/cloexec.h @@ -3,4 +3,10 @@ unsigned long perf_event_open_cloexec_flag(void); +#ifdef __GLIBC_PREREQ +#if !__GLIBC_PREREQ(2, 6) +extern int sched_getcpu(void) __THROW; +#endif +#endif + #endif /* __PERF_CLOEXEC_H */ diff --git a/tools/perf/util/data-convert-bt.c b/tools/perf/util/data-convert-bt.c new file mode 100644 index 000000000000..dd17c9a32fbc --- /dev/null +++ b/tools/perf/util/data-convert-bt.c @@ -0,0 +1,857 @@ +/* + * CTF writing support via babeltrace. + * + * Copyright (C) 2014, Jiri Olsa <jolsa@redhat.com> + * Copyright (C) 2014, Sebastian Andrzej Siewior <bigeasy@linutronix.de> + * + * Released under the GPL v2. (and only v2, not any later version) + */ + +#include <linux/compiler.h> +#include <babeltrace/ctf-writer/writer.h> +#include <babeltrace/ctf-writer/clock.h> +#include <babeltrace/ctf-writer/stream.h> +#include <babeltrace/ctf-writer/event.h> +#include <babeltrace/ctf-writer/event-types.h> +#include <babeltrace/ctf-writer/event-fields.h> +#include <babeltrace/ctf/events.h> +#include <traceevent/event-parse.h> +#include "asm/bug.h" +#include "data-convert-bt.h" +#include "session.h" +#include "util.h" +#include "debug.h" +#include "tool.h" +#include "evlist.h" +#include "evsel.h" +#include "machine.h" + +#define pr_N(n, fmt, ...) \ + eprintf(n, debug_data_convert, fmt, ##__VA_ARGS__) + +#define pr(fmt, ...) pr_N(1, pr_fmt(fmt), ##__VA_ARGS__) +#define pr2(fmt, ...) pr_N(2, pr_fmt(fmt), ##__VA_ARGS__) + +#define pr_time2(t, fmt, ...) pr_time_N(2, debug_data_convert, t, pr_fmt(fmt), ##__VA_ARGS__) + +struct evsel_priv { + struct bt_ctf_event_class *event_class; +}; + +struct ctf_writer { + /* writer primitives */ + struct bt_ctf_writer *writer; + struct bt_ctf_stream *stream; + struct bt_ctf_stream_class *stream_class; + struct bt_ctf_clock *clock; + + /* data types */ + union { + struct { + struct bt_ctf_field_type *s64; + struct bt_ctf_field_type *u64; + struct bt_ctf_field_type *s32; + struct bt_ctf_field_type *u32; + struct bt_ctf_field_type *string; + struct bt_ctf_field_type *u64_hex; + }; + struct bt_ctf_field_type *array[6]; + } data; +}; + +struct convert { + struct perf_tool tool; + struct ctf_writer writer; + + u64 events_size; + u64 events_count; +}; + +static int value_set(struct bt_ctf_field_type *type, + struct bt_ctf_event *event, + const char *name, u64 val) +{ + struct bt_ctf_field *field; + bool sign = bt_ctf_field_type_integer_get_signed(type); + int ret; + + field = bt_ctf_field_create(type); + if (!field) { + pr_err("failed to create a field %s\n", name); + return -1; + } + + if (sign) { + ret = bt_ctf_field_signed_integer_set_value(field, val); + if (ret) { + pr_err("failed to set field value %s\n", name); + goto err; + } + } else { + ret = bt_ctf_field_unsigned_integer_set_value(field, val); + if (ret) { + pr_err("failed to set field value %s\n", name); + goto err; + } + } + + ret = bt_ctf_event_set_payload(event, name, field); + if (ret) { + pr_err("failed to set payload %s\n", name); + goto err; + } + + pr2(" SET [%s = %" PRIu64 "]\n", name, val); + +err: + bt_ctf_field_put(field); + return ret; +} + +#define __FUNC_VALUE_SET(_name, _val_type) \ +static __maybe_unused int value_set_##_name(struct ctf_writer *cw, \ + struct bt_ctf_event *event, \ + const char *name, \ + _val_type val) \ +{ \ + struct bt_ctf_field_type *type = cw->data._name; \ + return value_set(type, event, name, (u64) val); \ +} + +#define FUNC_VALUE_SET(_name) __FUNC_VALUE_SET(_name, _name) + +FUNC_VALUE_SET(s32) +FUNC_VALUE_SET(u32) +FUNC_VALUE_SET(s64) +FUNC_VALUE_SET(u64) +__FUNC_VALUE_SET(u64_hex, u64) + +static struct bt_ctf_field_type* +get_tracepoint_field_type(struct ctf_writer *cw, struct format_field *field) +{ + unsigned long flags = field->flags; + + if (flags & FIELD_IS_STRING) + return cw->data.string; + + if (!(flags & FIELD_IS_SIGNED)) { + /* unsigned long are mostly pointers */ + if (flags & FIELD_IS_LONG || flags & FIELD_IS_POINTER) + return cw->data.u64_hex; + } + + if (flags & FIELD_IS_SIGNED) { + if (field->size == 8) + return cw->data.s64; + else + return cw->data.s32; + } + + if (field->size == 8) + return cw->data.u64; + else + return cw->data.u32; +} + +static int add_tracepoint_field_value(struct ctf_writer *cw, + struct bt_ctf_event_class *event_class, + struct bt_ctf_event *event, + struct perf_sample *sample, + struct format_field *fmtf) +{ + struct bt_ctf_field_type *type; + struct bt_ctf_field *array_field; + struct bt_ctf_field *field; + const char *name = fmtf->name; + void *data = sample->raw_data; + unsigned long long value_int; + unsigned long flags = fmtf->flags; + unsigned int n_items; + unsigned int i; + unsigned int offset; + unsigned int len; + int ret; + + offset = fmtf->offset; + len = fmtf->size; + if (flags & FIELD_IS_STRING) + flags &= ~FIELD_IS_ARRAY; + + if (flags & FIELD_IS_DYNAMIC) { + unsigned long long tmp_val; + + tmp_val = pevent_read_number(fmtf->event->pevent, + data + offset, len); + offset = tmp_val; + len = offset >> 16; + offset &= 0xffff; + } + + if (flags & FIELD_IS_ARRAY) { + + type = bt_ctf_event_class_get_field_by_name( + event_class, name); + array_field = bt_ctf_field_create(type); + bt_ctf_field_type_put(type); + if (!array_field) { + pr_err("Failed to create array type %s\n", name); + return -1; + } + + len = fmtf->size / fmtf->arraylen; + n_items = fmtf->arraylen; + } else { + n_items = 1; + array_field = NULL; + } + + type = get_tracepoint_field_type(cw, fmtf); + + for (i = 0; i < n_items; i++) { + if (!(flags & FIELD_IS_STRING)) + value_int = pevent_read_number( + fmtf->event->pevent, + data + offset + i * len, len); + + if (flags & FIELD_IS_ARRAY) + field = bt_ctf_field_array_get_field(array_field, i); + else + field = bt_ctf_field_create(type); + + if (!field) { + pr_err("failed to create a field %s\n", name); + return -1; + } + + if (flags & FIELD_IS_STRING) + ret = bt_ctf_field_string_set_value(field, + data + offset + i * len); + else if (!(flags & FIELD_IS_SIGNED)) + ret = bt_ctf_field_unsigned_integer_set_value( + field, value_int); + else + ret = bt_ctf_field_signed_integer_set_value( + field, value_int); + if (ret) { + pr_err("failed to set file value %s\n", name); + goto err_put_field; + } + if (!(flags & FIELD_IS_ARRAY)) { + ret = bt_ctf_event_set_payload(event, name, field); + if (ret) { + pr_err("failed to set payload %s\n", name); + goto err_put_field; + } + } + bt_ctf_field_put(field); + } + if (flags & FIELD_IS_ARRAY) { + ret = bt_ctf_event_set_payload(event, name, array_field); + if (ret) { + pr_err("Failed add payload array %s\n", name); + return -1; + } + bt_ctf_field_put(array_field); + } + return 0; + +err_put_field: + bt_ctf_field_put(field); + return -1; +} + +static int add_tracepoint_fields_values(struct ctf_writer *cw, + struct bt_ctf_event_class *event_class, + struct bt_ctf_event *event, + struct format_field *fields, + struct perf_sample *sample) +{ + struct format_field *field; + int ret; + + for (field = fields; field; field = field->next) { + ret = add_tracepoint_field_value(cw, event_class, event, sample, + field); + if (ret) + return -1; + } + return 0; +} + +static int add_tracepoint_values(struct ctf_writer *cw, + struct bt_ctf_event_class *event_class, + struct bt_ctf_event *event, + struct perf_evsel *evsel, + struct perf_sample *sample) +{ + struct format_field *common_fields = evsel->tp_format->format.common_fields; + struct format_field *fields = evsel->tp_format->format.fields; + int ret; + + ret = add_tracepoint_fields_values(cw, event_class, event, + common_fields, sample); + if (!ret) + ret = add_tracepoint_fields_values(cw, event_class, event, + fields, sample); + + return ret; +} + +static int add_generic_values(struct ctf_writer *cw, + struct bt_ctf_event *event, + struct perf_evsel *evsel, + struct perf_sample *sample) +{ + u64 type = evsel->attr.sample_type; + int ret; + + /* + * missing: + * PERF_SAMPLE_TIME - not needed as we have it in + * ctf event header + * PERF_SAMPLE_READ - TODO + * PERF_SAMPLE_CALLCHAIN - TODO + * PERF_SAMPLE_RAW - tracepoint fields are handled separately + * PERF_SAMPLE_BRANCH_STACK - TODO + * PERF_SAMPLE_REGS_USER - TODO + * PERF_SAMPLE_STACK_USER - TODO + */ + + if (type & PERF_SAMPLE_IP) { + ret = value_set_u64_hex(cw, event, "perf_ip", sample->ip); + if (ret) + return -1; + } + + if (type & PERF_SAMPLE_TID) { + ret = value_set_s32(cw, event, "perf_tid", sample->tid); + if (ret) + return -1; + + ret = value_set_s32(cw, event, "perf_pid", sample->pid); + if (ret) + return -1; + } + + if ((type & PERF_SAMPLE_ID) || + (type & PERF_SAMPLE_IDENTIFIER)) { + ret = value_set_u64(cw, event, "perf_id", sample->id); + if (ret) + return -1; + } + + if (type & PERF_SAMPLE_STREAM_ID) { + ret = value_set_u64(cw, event, "perf_stream_id", sample->stream_id); + if (ret) + return -1; + } + + if (type & PERF_SAMPLE_CPU) { + ret = value_set_u32(cw, event, "perf_cpu", sample->cpu); + if (ret) + return -1; + } + + if (type & PERF_SAMPLE_PERIOD) { + ret = value_set_u64(cw, event, "perf_period", sample->period); + if (ret) + return -1; + } + + if (type & PERF_SAMPLE_WEIGHT) { + ret = value_set_u64(cw, event, "perf_weight", sample->weight); + if (ret) + return -1; + } + + if (type & PERF_SAMPLE_DATA_SRC) { + ret = value_set_u64(cw, event, "perf_data_src", + sample->data_src); + if (ret) + return -1; + } + + if (type & PERF_SAMPLE_TRANSACTION) { + ret = value_set_u64(cw, event, "perf_transaction", + sample->transaction); + if (ret) + return -1; + } + + return 0; +} + +static int process_sample_event(struct perf_tool *tool, + union perf_event *_event __maybe_unused, + struct perf_sample *sample, + struct perf_evsel *evsel, + struct machine *machine __maybe_unused) +{ + struct convert *c = container_of(tool, struct convert, tool); + struct evsel_priv *priv = evsel->priv; + struct ctf_writer *cw = &c->writer; + struct bt_ctf_event_class *event_class; + struct bt_ctf_event *event; + int ret; + + if (WARN_ONCE(!priv, "Failed to setup all events.\n")) + return 0; + + event_class = priv->event_class; + + /* update stats */ + c->events_count++; + c->events_size += _event->header.size; + + pr_time2(sample->time, "sample %" PRIu64 "\n", c->events_count); + + event = bt_ctf_event_create(event_class); + if (!event) { + pr_err("Failed to create an CTF event\n"); + return -1; + } + + bt_ctf_clock_set_time(cw->clock, sample->time); + + ret = add_generic_values(cw, event, evsel, sample); + if (ret) + return -1; + + if (evsel->attr.type == PERF_TYPE_TRACEPOINT) { + ret = add_tracepoint_values(cw, event_class, event, + evsel, sample); + if (ret) + return -1; + } + + bt_ctf_stream_append_event(cw->stream, event); + bt_ctf_event_put(event); + return 0; +} + +static int add_tracepoint_fields_types(struct ctf_writer *cw, + struct format_field *fields, + struct bt_ctf_event_class *event_class) +{ + struct format_field *field; + int ret; + + for (field = fields; field; field = field->next) { + struct bt_ctf_field_type *type; + unsigned long flags = field->flags; + + pr2(" field '%s'\n", field->name); + + type = get_tracepoint_field_type(cw, field); + if (!type) + return -1; + + /* + * A string is an array of chars. For this we use the string + * type and don't care that it is an array. What we don't + * support is an array of strings. + */ + if (flags & FIELD_IS_STRING) + flags &= ~FIELD_IS_ARRAY; + + if (flags & FIELD_IS_ARRAY) + type = bt_ctf_field_type_array_create(type, field->arraylen); + + ret = bt_ctf_event_class_add_field(event_class, type, + field->name); + + if (flags & FIELD_IS_ARRAY) + bt_ctf_field_type_put(type); + + if (ret) { + pr_err("Failed to add field '%s\n", field->name); + return -1; + } + } + + return 0; +} + +static int add_tracepoint_types(struct ctf_writer *cw, + struct perf_evsel *evsel, + struct bt_ctf_event_class *class) +{ + struct format_field *common_fields = evsel->tp_format->format.common_fields; + struct format_field *fields = evsel->tp_format->format.fields; + int ret; + + ret = add_tracepoint_fields_types(cw, common_fields, class); + if (!ret) + ret = add_tracepoint_fields_types(cw, fields, class); + + return ret; +} + +static int add_generic_types(struct ctf_writer *cw, struct perf_evsel *evsel, + struct bt_ctf_event_class *event_class) +{ + u64 type = evsel->attr.sample_type; + + /* + * missing: + * PERF_SAMPLE_TIME - not needed as we have it in + * ctf event header + * PERF_SAMPLE_READ - TODO + * PERF_SAMPLE_CALLCHAIN - TODO + * PERF_SAMPLE_RAW - tracepoint fields are handled separately + * PERF_SAMPLE_BRANCH_STACK - TODO + * PERF_SAMPLE_REGS_USER - TODO + * PERF_SAMPLE_STACK_USER - TODO + */ + +#define ADD_FIELD(cl, t, n) \ + do { \ + pr2(" field '%s'\n", n); \ + if (bt_ctf_event_class_add_field(cl, t, n)) { \ + pr_err("Failed to add field '%s;\n", n); \ + return -1; \ + } \ + } while (0) + + if (type & PERF_SAMPLE_IP) + ADD_FIELD(event_class, cw->data.u64_hex, "perf_ip"); + + if (type & PERF_SAMPLE_TID) { + ADD_FIELD(event_class, cw->data.s32, "perf_tid"); + ADD_FIELD(event_class, cw->data.s32, "perf_pid"); + } + + if ((type & PERF_SAMPLE_ID) || + (type & PERF_SAMPLE_IDENTIFIER)) + ADD_FIELD(event_class, cw->data.u64, "perf_id"); + + if (type & PERF_SAMPLE_STREAM_ID) + ADD_FIELD(event_class, cw->data.u64, "perf_stream_id"); + + if (type & PERF_SAMPLE_CPU) + ADD_FIELD(event_class, cw->data.u32, "perf_cpu"); + + if (type & PERF_SAMPLE_PERIOD) + ADD_FIELD(event_class, cw->data.u64, "perf_period"); + + if (type & PERF_SAMPLE_WEIGHT) + ADD_FIELD(event_class, cw->data.u64, "perf_weight"); + + if (type & PERF_SAMPLE_DATA_SRC) + ADD_FIELD(event_class, cw->data.u64, "perf_data_src"); + + if (type & PERF_SAMPLE_TRANSACTION) + ADD_FIELD(event_class, cw->data.u64, "perf_transaction"); + +#undef ADD_FIELD + return 0; +} + +static int add_event(struct ctf_writer *cw, struct perf_evsel *evsel) +{ + struct bt_ctf_event_class *event_class; + struct evsel_priv *priv; + const char *name = perf_evsel__name(evsel); + int ret; + + pr("Adding event '%s' (type %d)\n", name, evsel->attr.type); + + event_class = bt_ctf_event_class_create(name); + if (!event_class) + return -1; + + ret = add_generic_types(cw, evsel, event_class); + if (ret) + goto err; + + if (evsel->attr.type == PERF_TYPE_TRACEPOINT) { + ret = add_tracepoint_types(cw, evsel, event_class); + if (ret) + goto err; + } + + ret = bt_ctf_stream_class_add_event_class(cw->stream_class, event_class); + if (ret) { + pr("Failed to add event class into stream.\n"); + goto err; + } + + priv = malloc(sizeof(*priv)); + if (!priv) + goto err; + + priv->event_class = event_class; + evsel->priv = priv; + return 0; + +err: + bt_ctf_event_class_put(event_class); + pr_err("Failed to add event '%s'.\n", name); + return -1; +} + +static int setup_events(struct ctf_writer *cw, struct perf_session *session) +{ + struct perf_evlist *evlist = session->evlist; + struct perf_evsel *evsel; + int ret; + + evlist__for_each(evlist, evsel) { + ret = add_event(cw, evsel); + if (ret) + return ret; + } + return 0; +} + +static int ctf_writer__setup_env(struct ctf_writer *cw, + struct perf_session *session) +{ + struct perf_header *header = &session->header; + struct bt_ctf_writer *writer = cw->writer; + +#define ADD(__n, __v) \ +do { \ + if (bt_ctf_writer_add_environment_field(writer, __n, __v)) \ + return -1; \ +} while (0) + + ADD("host", header->env.hostname); + ADD("sysname", "Linux"); + ADD("release", header->env.os_release); + ADD("version", header->env.version); + ADD("machine", header->env.arch); + ADD("domain", "kernel"); + ADD("tracer_name", "perf"); + +#undef ADD + return 0; +} + +static int ctf_writer__setup_clock(struct ctf_writer *cw) +{ + struct bt_ctf_clock *clock = cw->clock; + + bt_ctf_clock_set_description(clock, "perf clock"); + +#define SET(__n, __v) \ +do { \ + if (bt_ctf_clock_set_##__n(clock, __v)) \ + return -1; \ +} while (0) + + SET(frequency, 1000000000); + SET(offset_s, 0); + SET(offset, 0); + SET(precision, 10); + SET(is_absolute, 0); + +#undef SET + return 0; +} + +static struct bt_ctf_field_type *create_int_type(int size, bool sign, bool hex) +{ + struct bt_ctf_field_type *type; + + type = bt_ctf_field_type_integer_create(size); + if (!type) + return NULL; + + if (sign && + bt_ctf_field_type_integer_set_signed(type, 1)) + goto err; + + if (hex && + bt_ctf_field_type_integer_set_base(type, BT_CTF_INTEGER_BASE_HEXADECIMAL)) + goto err; + + pr2("Created type: INTEGER %d-bit %ssigned %s\n", + size, sign ? "un" : "", hex ? "hex" : ""); + return type; + +err: + bt_ctf_field_type_put(type); + return NULL; +} + +static void ctf_writer__cleanup_data(struct ctf_writer *cw) +{ + unsigned int i; + + for (i = 0; i < ARRAY_SIZE(cw->data.array); i++) + bt_ctf_field_type_put(cw->data.array[i]); +} + +static int ctf_writer__init_data(struct ctf_writer *cw) +{ +#define CREATE_INT_TYPE(type, size, sign, hex) \ +do { \ + (type) = create_int_type(size, sign, hex); \ + if (!(type)) \ + goto err; \ +} while (0) + + CREATE_INT_TYPE(cw->data.s64, 64, true, false); + CREATE_INT_TYPE(cw->data.u64, 64, false, false); + CREATE_INT_TYPE(cw->data.s32, 32, true, false); + CREATE_INT_TYPE(cw->data.u32, 32, false, false); + CREATE_INT_TYPE(cw->data.u64_hex, 64, false, true); + + cw->data.string = bt_ctf_field_type_string_create(); + if (cw->data.string) + return 0; + +err: + ctf_writer__cleanup_data(cw); + pr_err("Failed to create data types.\n"); + return -1; +} + +static void ctf_writer__cleanup(struct ctf_writer *cw) +{ + ctf_writer__cleanup_data(cw); + + bt_ctf_clock_put(cw->clock); + bt_ctf_stream_put(cw->stream); + bt_ctf_stream_class_put(cw->stream_class); + bt_ctf_writer_put(cw->writer); + + /* and NULL all the pointers */ + memset(cw, 0, sizeof(*cw)); +} + +static int ctf_writer__init(struct ctf_writer *cw, const char *path) +{ + struct bt_ctf_writer *writer; + struct bt_ctf_stream_class *stream_class; + struct bt_ctf_stream *stream; + struct bt_ctf_clock *clock; + + /* CTF writer */ + writer = bt_ctf_writer_create(path); + if (!writer) + goto err; + + cw->writer = writer; + + /* CTF clock */ + clock = bt_ctf_clock_create("perf_clock"); + if (!clock) { + pr("Failed to create CTF clock.\n"); + goto err_cleanup; + } + + cw->clock = clock; + + if (ctf_writer__setup_clock(cw)) { + pr("Failed to setup CTF clock.\n"); + goto err_cleanup; + } + + /* CTF stream class */ + stream_class = bt_ctf_stream_class_create("perf_stream"); + if (!stream_class) { + pr("Failed to create CTF stream class.\n"); + goto err_cleanup; + } + + cw->stream_class = stream_class; + + /* CTF clock stream setup */ + if (bt_ctf_stream_class_set_clock(stream_class, clock)) { + pr("Failed to assign CTF clock to stream class.\n"); + goto err_cleanup; + } + + if (ctf_writer__init_data(cw)) + goto err_cleanup; + + /* CTF stream instance */ + stream = bt_ctf_writer_create_stream(writer, stream_class); + if (!stream) { + pr("Failed to create CTF stream.\n"); + goto err_cleanup; + } + + cw->stream = stream; + + /* CTF clock writer setup */ + if (bt_ctf_writer_add_clock(writer, clock)) { + pr("Failed to assign CTF clock to writer.\n"); + goto err_cleanup; + } + + return 0; + +err_cleanup: + ctf_writer__cleanup(cw); +err: + pr_err("Failed to setup CTF writer.\n"); + return -1; +} + +int bt_convert__perf2ctf(const char *input, const char *path, bool force) +{ + struct perf_session *session; + struct perf_data_file file = { + .path = input, + .mode = PERF_DATA_MODE_READ, + .force = force, + }; + struct convert c = { + .tool = { + .sample = process_sample_event, + .mmap = perf_event__process_mmap, + .mmap2 = perf_event__process_mmap2, + .comm = perf_event__process_comm, + .exit = perf_event__process_exit, + .fork = perf_event__process_fork, + .lost = perf_event__process_lost, + .tracing_data = perf_event__process_tracing_data, + .build_id = perf_event__process_build_id, + .ordered_events = true, + .ordering_requires_timestamps = true, + }, + }; + struct ctf_writer *cw = &c.writer; + int err = -1; + + /* CTF writer */ + if (ctf_writer__init(cw, path)) + return -1; + + /* perf.data session */ + session = perf_session__new(&file, 0, &c.tool); + if (!session) + goto free_writer; + + /* CTF writer env/clock setup */ + if (ctf_writer__setup_env(cw, session)) + goto free_session; + + /* CTF events setup */ + if (setup_events(cw, session)) + goto free_session; + + err = perf_session__process_events(session); + if (!err) + err = bt_ctf_stream_flush(cw->stream); + + fprintf(stderr, + "[ perf data convert: Converted '%s' into CTF data '%s' ]\n", + file.path, path); + + fprintf(stderr, + "[ perf data convert: Converted and wrote %.3f MB (%" PRIu64 " samples) ]\n", + (double) c.events_size / 1024.0 / 1024.0, + c.events_count); + + /* its all good */ +free_session: + perf_session__delete(session); + +free_writer: + ctf_writer__cleanup(cw); + return err; +} diff --git a/tools/perf/util/data-convert-bt.h b/tools/perf/util/data-convert-bt.h new file mode 100644 index 000000000000..4c204342a9d8 --- /dev/null +++ b/tools/perf/util/data-convert-bt.h @@ -0,0 +1,8 @@ +#ifndef __DATA_CONVERT_BT_H +#define __DATA_CONVERT_BT_H +#ifdef HAVE_LIBBABELTRACE_SUPPORT + +int bt_convert__perf2ctf(const char *input_name, const char *to_ctf, bool force); + +#endif /* HAVE_LIBBABELTRACE_SUPPORT */ +#endif /* __DATA_CONVERT_BT_H */ diff --git a/tools/perf/util/db-export.c b/tools/perf/util/db-export.c index c81dae399763..bb39a3ffc70b 100644 --- a/tools/perf/util/db-export.c +++ b/tools/perf/util/db-export.c @@ -282,13 +282,13 @@ int db_export__branch_type(struct db_export *dbe, u32 branch_type, int db_export__sample(struct db_export *dbe, union perf_event *event, struct perf_sample *sample, struct perf_evsel *evsel, - struct thread *thread, struct addr_location *al) + struct addr_location *al) { + struct thread* thread = al->thread; struct export_sample es = { .event = event, .sample = sample, .evsel = evsel, - .thread = thread, .al = al, }; struct thread *main_thread; diff --git a/tools/perf/util/db-export.h b/tools/perf/util/db-export.h index adbd22d66798..25e22fd76aca 100644 --- a/tools/perf/util/db-export.h +++ b/tools/perf/util/db-export.h @@ -34,7 +34,6 @@ struct export_sample { union perf_event *event; struct perf_sample *sample; struct perf_evsel *evsel; - struct thread *thread; struct addr_location *al; u64 db_id; u64 comm_db_id; @@ -97,7 +96,7 @@ int db_export__branch_type(struct db_export *dbe, u32 branch_type, const char *name); int db_export__sample(struct db_export *dbe, union perf_event *event, struct perf_sample *sample, struct perf_evsel *evsel, - struct thread *thread, struct addr_location *al); + struct addr_location *al); int db_export__branch_types(struct db_export *dbe); diff --git a/tools/perf/util/debug.c b/tools/perf/util/debug.c index ad60b2f20258..2da5581ec74d 100644 --- a/tools/perf/util/debug.c +++ b/tools/perf/util/debug.c @@ -20,6 +20,7 @@ int verbose; bool dump_trace = false, quiet = false; int debug_ordered_events; static int redirect_to_stderr; +int debug_data_convert; static int _eprintf(int level, int var, const char *fmt, va_list args) { @@ -147,6 +148,7 @@ static struct debug_variable { { .name = "verbose", .ptr = &verbose }, { .name = "ordered-events", .ptr = &debug_ordered_events}, { .name = "stderr", .ptr = &redirect_to_stderr}, + { .name = "data-convert", .ptr = &debug_data_convert }, { .name = NULL, } }; diff --git a/tools/perf/util/debug.h b/tools/perf/util/debug.h index be264d6f3b30..caac2fdc6105 100644 --- a/tools/perf/util/debug.h +++ b/tools/perf/util/debug.h @@ -12,6 +12,7 @@ extern int verbose; extern bool quiet, dump_trace; extern int debug_ordered_events; +extern int debug_data_convert; #ifndef pr_fmt #define pr_fmt(fmt) fmt diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c index c2f7d3b90966..fc0ddd5792a9 100644 --- a/tools/perf/util/dso.c +++ b/tools/perf/util/dso.c @@ -45,13 +45,13 @@ int dso__read_binary_type_filename(const struct dso *dso, case DSO_BINARY_TYPE__DEBUGLINK: { char *debuglink; - strncpy(filename, dso->long_name, size); - debuglink = filename + dso->long_name_len; + len = __symbol__join_symfs(filename, size, dso->long_name); + debuglink = filename + len; while (debuglink != filename && *debuglink != '/') debuglink--; if (*debuglink == '/') debuglink++; - ret = filename__read_debuglink(dso->long_name, debuglink, + ret = filename__read_debuglink(filename, debuglink, size - (debuglink - filename)); } break; @@ -148,6 +148,9 @@ static const struct { #ifdef HAVE_ZLIB_SUPPORT { "gz", gzip_decompress_to_file }, #endif +#ifdef HAVE_LZMA_SUPPORT + { "xz", lzma_decompress_to_file }, +#endif { NULL, NULL }, }; @@ -162,32 +165,14 @@ bool is_supported_compression(const char *ext) return false; } -bool is_kmodule_extension(const char *ext) -{ - if (strncmp(ext, "ko", 2)) - return false; - - if (ext[2] == '\0' || (ext[2] == '.' && is_supported_compression(ext+3))) - return true; - - return false; -} - -bool is_kernel_module(const char *pathname, bool *compressed) +bool is_kernel_module(const char *pathname) { - const char *ext = strrchr(pathname, '.'); + struct kmod_path m; - if (ext == NULL) - return false; - - if (is_supported_compression(ext + 1)) { - if (compressed) - *compressed = true; - ext -= 3; - } else if (compressed) - *compressed = false; + if (kmod_path__parse(&m, pathname)) + return NULL; - return is_kmodule_extension(ext + 1); + return m.kmod; } bool decompress_to_file(const char *ext, const char *filename, int output_fd) @@ -209,6 +194,72 @@ bool dso__needs_decompress(struct dso *dso) } /* + * Parses kernel module specified in @path and updates + * @m argument like: + * + * @comp - true if @path contains supported compression suffix, + * false otherwise + * @kmod - true if @path contains '.ko' suffix in right position, + * false otherwise + * @name - if (@alloc_name && @kmod) is true, it contains strdup-ed base name + * of the kernel module without suffixes, otherwise strudup-ed + * base name of @path + * @ext - if (@alloc_ext && @comp) is true, it contains strdup-ed string + * the compression suffix + * + * Returns 0 if there's no strdup error, -ENOMEM otherwise. + */ +int __kmod_path__parse(struct kmod_path *m, const char *path, + bool alloc_name, bool alloc_ext) +{ + const char *name = strrchr(path, '/'); + const char *ext = strrchr(path, '.'); + + memset(m, 0x0, sizeof(*m)); + name = name ? name + 1 : path; + + /* No extension, just return name. */ + if (ext == NULL) { + if (alloc_name) { + m->name = strdup(name); + return m->name ? 0 : -ENOMEM; + } + return 0; + } + + if (is_supported_compression(ext + 1)) { + m->comp = true; + ext -= 3; + } + + /* Check .ko extension only if there's enough name left. */ + if (ext > name) + m->kmod = !strncmp(ext, ".ko", 3); + + if (alloc_name) { + if (m->kmod) { + if (asprintf(&m->name, "[%.*s]", (int) (ext - name), name) == -1) + return -ENOMEM; + } else { + if (asprintf(&m->name, "%s", name) == -1) + return -ENOMEM; + } + + strxfrchar(m->name, '-', '_'); + } + + if (alloc_ext && m->comp) { + m->ext = strdup(ext + 4); + if (!m->ext) { + free((void *) m->name); + return -ENOMEM; + } + } + + return 0; +} + +/* * Global list of open DSOs and the counter. */ static LIST_HEAD(dso__data_open); @@ -240,7 +291,7 @@ static int do_open(char *name) if (fd >= 0) return fd; - pr_debug("dso open failed, mmap: %s\n", + pr_debug("dso open failed: %s\n", strerror_r(errno, sbuf, sizeof(sbuf))); if (!dso__data_open_cnt || errno != EMFILE) break; @@ -1002,21 +1053,24 @@ struct dso *dsos__find(const struct dsos *dsos, const char *name, return dso__find_by_longname(&dsos->root, name); } -struct dso *__dsos__findnew(struct dsos *dsos, const char *name) +struct dso *dsos__addnew(struct dsos *dsos, const char *name) { - struct dso *dso = dsos__find(dsos, name, false); + struct dso *dso = dso__new(name); - if (!dso) { - dso = dso__new(name); - if (dso != NULL) { - dsos__add(dsos, dso); - dso__set_basename(dso); - } + if (dso != NULL) { + dsos__add(dsos, dso); + dso__set_basename(dso); } - return dso; } +struct dso *__dsos__findnew(struct dsos *dsos, const char *name) +{ + struct dso *dso = dsos__find(dsos, name, false); + + return dso ? dso : dsos__addnew(dsos, name); +} + size_t __dsos__fprintf_buildid(struct list_head *head, FILE *fp, bool (skip)(struct dso *dso, int parm), int parm) { @@ -1083,3 +1137,36 @@ enum dso_type dso__type(struct dso *dso, struct machine *machine) return dso__type_fd(fd); } + +int dso__strerror_load(struct dso *dso, char *buf, size_t buflen) +{ + int idx, errnum = dso->load_errno; + /* + * This must have a same ordering as the enum dso_load_errno. + */ + static const char *dso_load__error_str[] = { + "Internal tools/perf/ library error", + "Invalid ELF file", + "Can not read build id", + "Mismatching build id", + "Decompression failure", + }; + + BUG_ON(buflen == 0); + + if (errnum >= 0) { + const char *err = strerror_r(errnum, buf, buflen); + + if (err != buf) + scnprintf(buf, buflen, "%s", err); + + return 0; + } + + if (errnum < __DSO_LOAD_ERRNO__START || errnum >= __DSO_LOAD_ERRNO__END) + return -1; + + idx = errnum - __DSO_LOAD_ERRNO__START; + scnprintf(buf, buflen, "%s", dso_load__error_str[idx]); + return 0; +} diff --git a/tools/perf/util/dso.h b/tools/perf/util/dso.h index ced92841ff97..e0901b4ed8de 100644 --- a/tools/perf/util/dso.h +++ b/tools/perf/util/dso.h @@ -60,6 +60,31 @@ enum dso_type { DSO__TYPE_X32BIT, }; +enum dso_load_errno { + DSO_LOAD_ERRNO__SUCCESS = 0, + + /* + * Choose an arbitrary negative big number not to clash with standard + * errno since SUS requires the errno has distinct positive values. + * See 'Issue 6' in the link below. + * + * http://pubs.opengroup.org/onlinepubs/9699919799/basedefs/errno.h.html + */ + __DSO_LOAD_ERRNO__START = -10000, + + DSO_LOAD_ERRNO__INTERNAL_ERROR = __DSO_LOAD_ERRNO__START, + + /* for symsrc__init() */ + DSO_LOAD_ERRNO__INVALID_ELF, + DSO_LOAD_ERRNO__CANNOT_READ_BUILDID, + DSO_LOAD_ERRNO__MISMATCHING_BUILDID, + + /* for decompress_kmodule */ + DSO_LOAD_ERRNO__DECOMPRESSION_FAILURE, + + __DSO_LOAD_ERRNO__END, +}; + #define DSO__SWAP(dso, type, val) \ ({ \ type ____r = val; \ @@ -113,6 +138,7 @@ struct dso { enum dso_swap_type needs_swap; enum dso_binary_type symtab_type; enum dso_binary_type binary_type; + enum dso_load_errno load_errno; u8 adjust_symbols:1; u8 has_build_id:1; u8 has_srcline:1; @@ -139,7 +165,8 @@ struct dso { u32 status_seen; size_t file_size; struct list_head open_entry; - u64 frame_offset; + u64 debug_frame_offset; + u64 eh_frame_hdr_offset; } data; union { /* Tool specific area */ @@ -189,11 +216,24 @@ char dso__symtab_origin(const struct dso *dso); int dso__read_binary_type_filename(const struct dso *dso, enum dso_binary_type type, char *root_dir, char *filename, size_t size); bool is_supported_compression(const char *ext); -bool is_kmodule_extension(const char *ext); -bool is_kernel_module(const char *pathname, bool *compressed); +bool is_kernel_module(const char *pathname); bool decompress_to_file(const char *ext, const char *filename, int output_fd); bool dso__needs_decompress(struct dso *dso); +struct kmod_path { + char *name; + char *ext; + bool comp; + bool kmod; +}; + +int __kmod_path__parse(struct kmod_path *m, const char *path, + bool alloc_name, bool alloc_ext); + +#define kmod_path__parse(__m, __p) __kmod_path__parse(__m, __p, false, false) +#define kmod_path__parse_name(__m, __p) __kmod_path__parse(__m, __p, true , false) +#define kmod_path__parse_ext(__m, __p) __kmod_path__parse(__m, __p, false, true) + /* * The dso__data_* external interface provides following functions: * dso__data_fd @@ -249,6 +289,7 @@ struct dso *dso__kernel_findnew(struct machine *machine, const char *name, const char *short_name, int dso_type); void dsos__add(struct dsos *dsos, struct dso *dso); +struct dso *dsos__addnew(struct dsos *dsos, const char *name); struct dso *dsos__find(const struct dsos *dsos, const char *name, bool cmp_short); struct dso *__dsos__findnew(struct dsos *dsos, const char *name); @@ -279,4 +320,6 @@ void dso__free_a2l(struct dso *dso); enum dso_type dso__type(struct dso *dso, struct machine *machine); +int dso__strerror_load(struct dso *dso, char *buf, size_t buflen); + #endif /* __PERF_DSO */ diff --git a/tools/perf/util/dwarf-aux.c b/tools/perf/util/dwarf-aux.c index cc66c4049e09..c34e024020c7 100644 --- a/tools/perf/util/dwarf-aux.c +++ b/tools/perf/util/dwarf-aux.c @@ -278,6 +278,21 @@ bool die_is_func_def(Dwarf_Die *dw_die) } /** + * die_is_func_instance - Ensure that this DIE is an instance of a subprogram + * @dw_die: a DIE + * + * Ensure that this DIE is an instance (which has an entry address). + * This returns true if @dw_die is a function instance. If not, you need to + * call die_walk_instances() to find actual instances. + **/ +bool die_is_func_instance(Dwarf_Die *dw_die) +{ + Dwarf_Addr tmp; + + /* Actually gcc optimizes non-inline as like as inlined */ + return !dwarf_func_inline(dw_die) && dwarf_entrypc(dw_die, &tmp) == 0; +} +/** * die_get_data_member_location - Get the data-member offset * @mb_die: a DIE of a member of a data structure * @offs: The offset of the member in the data structure @@ -786,10 +801,16 @@ static int __die_find_member_cb(Dwarf_Die *die_mem, void *data) { const char *name = data; - if ((dwarf_tag(die_mem) == DW_TAG_member) && - die_compare_name(die_mem, name)) - return DIE_FIND_CB_END; - + if (dwarf_tag(die_mem) == DW_TAG_member) { + if (die_compare_name(die_mem, name)) + return DIE_FIND_CB_END; + else if (!dwarf_diename(die_mem)) { /* Unnamed structure */ + Dwarf_Die type_die, tmp_die; + if (die_get_type(die_mem, &type_die) && + die_find_member(&type_die, name, &tmp_die)) + return DIE_FIND_CB_END; + } + } return DIE_FIND_CB_SIBLING; } diff --git a/tools/perf/util/dwarf-aux.h b/tools/perf/util/dwarf-aux.h index b4fe90c6cb2d..af7dbcd5f929 100644 --- a/tools/perf/util/dwarf-aux.h +++ b/tools/perf/util/dwarf-aux.h @@ -41,6 +41,9 @@ extern int cu_walk_functions_at(Dwarf_Die *cu_die, Dwarf_Addr addr, /* Ensure that this DIE is a subprogram and definition (not declaration) */ extern bool die_is_func_def(Dwarf_Die *dw_die); +/* Ensure that this DIE is an instance of a subprogram */ +extern bool die_is_func_instance(Dwarf_Die *dw_die); + /* Compare diename and tname */ extern bool die_compare_name(Dwarf_Die *dw_die, const char *tname); diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index 6c6d044e959a..ff866c4d2e2f 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -49,72 +49,103 @@ static struct perf_sample synth_sample = { .period = 1, }; -static pid_t perf_event__get_comm_tgid(pid_t pid, char *comm, size_t len) +/* + * Assumes that the first 4095 bytes of /proc/pid/stat contains + * the comm, tgid and ppid. + */ +static int perf_event__get_comm_ids(pid_t pid, char *comm, size_t len, + pid_t *tgid, pid_t *ppid) { char filename[PATH_MAX]; - char bf[BUFSIZ]; - FILE *fp; - size_t size = 0; - pid_t tgid = -1; + char bf[4096]; + int fd; + size_t size = 0, n; + char *nl, *name, *tgids, *ppids; + + *tgid = -1; + *ppid = -1; snprintf(filename, sizeof(filename), "/proc/%d/status", pid); - fp = fopen(filename, "r"); - if (fp == NULL) { + fd = open(filename, O_RDONLY); + if (fd < 0) { pr_debug("couldn't open %s\n", filename); - return 0; + return -1; } - while (!comm[0] || (tgid < 0)) { - if (fgets(bf, sizeof(bf), fp) == NULL) { - pr_warning("couldn't get COMM and pgid, malformed %s\n", - filename); - break; - } + n = read(fd, bf, sizeof(bf) - 1); + close(fd); + if (n <= 0) { + pr_warning("Couldn't get COMM, tigd and ppid for pid %d\n", + pid); + return -1; + } + bf[n] = '\0'; - if (memcmp(bf, "Name:", 5) == 0) { - char *name = bf + 5; - while (*name && isspace(*name)) - ++name; - size = strlen(name) - 1; - if (size >= len) - size = len - 1; - memcpy(comm, name, size); - comm[size] = '\0'; - - } else if (memcmp(bf, "Tgid:", 5) == 0) { - char *tgids = bf + 5; - while (*tgids && isspace(*tgids)) - ++tgids; - tgid = atoi(tgids); - } + name = strstr(bf, "Name:"); + tgids = strstr(bf, "Tgid:"); + ppids = strstr(bf, "PPid:"); + + if (name) { + name += 5; /* strlen("Name:") */ + + while (*name && isspace(*name)) + ++name; + + nl = strchr(name, '\n'); + if (nl) + *nl = '\0'; + + size = strlen(name); + if (size >= len) + size = len - 1; + memcpy(comm, name, size); + comm[size] = '\0'; + } else { + pr_debug("Name: string not found for pid %d\n", pid); } - fclose(fp); + if (tgids) { + tgids += 5; /* strlen("Tgid:") */ + *tgid = atoi(tgids); + } else { + pr_debug("Tgid: string not found for pid %d\n", pid); + } - return tgid; + if (ppids) { + ppids += 5; /* strlen("PPid:") */ + *ppid = atoi(ppids); + } else { + pr_debug("PPid: string not found for pid %d\n", pid); + } + + return 0; } -static pid_t perf_event__synthesize_comm(struct perf_tool *tool, - union perf_event *event, pid_t pid, - perf_event__handler_t process, - struct machine *machine) +static int perf_event__prepare_comm(union perf_event *event, pid_t pid, + struct machine *machine, + pid_t *tgid, pid_t *ppid) { size_t size; - pid_t tgid; + + *ppid = -1; memset(&event->comm, 0, sizeof(event->comm)); - if (machine__is_host(machine)) - tgid = perf_event__get_comm_tgid(pid, event->comm.comm, - sizeof(event->comm.comm)); - else - tgid = machine->pid; + if (machine__is_host(machine)) { + if (perf_event__get_comm_ids(pid, event->comm.comm, + sizeof(event->comm.comm), + tgid, ppid) != 0) { + return -1; + } + } else { + *tgid = machine->pid; + } - if (tgid < 0) - goto out; + if (*tgid < 0) + return -1; - event->comm.pid = tgid; + event->comm.pid = *tgid; event->comm.header.type = PERF_RECORD_COMM; size = strlen(event->comm.comm) + 1; @@ -125,23 +156,45 @@ static pid_t perf_event__synthesize_comm(struct perf_tool *tool, machine->id_hdr_size); event->comm.tid = pid; + return 0; +} + +static pid_t perf_event__synthesize_comm(struct perf_tool *tool, + union perf_event *event, pid_t pid, + perf_event__handler_t process, + struct machine *machine) +{ + pid_t tgid, ppid; + + if (perf_event__prepare_comm(event, pid, machine, &tgid, &ppid) != 0) + return -1; + if (process(tool, event, &synth_sample, machine) != 0) return -1; -out: return tgid; } static int perf_event__synthesize_fork(struct perf_tool *tool, - union perf_event *event, pid_t pid, - pid_t tgid, perf_event__handler_t process, + union perf_event *event, + pid_t pid, pid_t tgid, pid_t ppid, + perf_event__handler_t process, struct machine *machine) { memset(&event->fork, 0, sizeof(event->fork) + machine->id_hdr_size); - /* this is really a clone event but we use fork to synthesize it */ - event->fork.ppid = tgid; - event->fork.ptid = tgid; + /* + * for main thread set parent to ppid from status file. For other + * threads set parent pid to main thread. ie., assume main thread + * spawns all threads in a process + */ + if (tgid == pid) { + event->fork.ppid = ppid; + event->fork.ptid = ppid; + } else { + event->fork.ppid = tgid; + event->fork.ptid = tgid; + } event->fork.pid = tgid; event->fork.tid = pid; event->fork.header.type = PERF_RECORD_FORK; @@ -333,7 +386,8 @@ static int __event__synthesize_thread(union perf_event *comm_event, char filename[PATH_MAX]; DIR *tasks; struct dirent dirent, *next; - pid_t tgid; + pid_t tgid, ppid; + int rc = 0; /* special case: only send one comm event using passed in pid */ if (!full) { @@ -361,34 +415,38 @@ static int __event__synthesize_thread(union perf_event *comm_event, while (!readdir_r(tasks, &dirent, &next) && next) { char *end; - int rc = 0; pid_t _pid; _pid = strtol(dirent.d_name, &end, 10); if (*end) continue; - tgid = perf_event__synthesize_comm(tool, comm_event, _pid, - process, machine); - if (tgid == -1) - return -1; + rc = -1; + if (perf_event__prepare_comm(comm_event, _pid, machine, + &tgid, &ppid) != 0) + break; + if (perf_event__synthesize_fork(tool, fork_event, _pid, tgid, + ppid, process, machine) < 0) + break; + /* + * Send the prepared comm event + */ + if (process(tool, comm_event, &synth_sample, machine) != 0) + break; + + rc = 0; if (_pid == pid) { /* process the parent's maps too */ rc = perf_event__synthesize_mmap_events(tool, mmap_event, pid, tgid, process, machine, mmap_data); - } else { - /* only fork the tid's map, to save time */ - rc = perf_event__synthesize_fork(tool, fork_event, _pid, tgid, - process, machine); + if (rc) + break; } - - if (rc) - return rc; } closedir(tasks); - return 0; + return rc; } int perf_event__synthesize_thread_map(struct perf_tool *tool, @@ -615,7 +673,7 @@ size_t perf_event__fprintf_comm(union perf_event *event, FILE *fp) else s = ""; - return fprintf(fp, "%s: %s:%d\n", s, event->comm.comm, event->comm.tid); + return fprintf(fp, "%s: %s:%d/%d\n", s, event->comm.comm, event->comm.pid, event->comm.tid); } int perf_event__process_comm(struct perf_tool *tool __maybe_unused, diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h index c4ffe2bd0738..09b9e8d3fcf7 100644 --- a/tools/perf/util/event.h +++ b/tools/perf/util/event.h @@ -242,7 +242,6 @@ struct events_stats { u32 nr_invalid_chains; u32 nr_unknown_id; u32 nr_unprocessable_samples; - u32 nr_unordered_events; }; struct attr_event { diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 28b8ce86bf12..080be93eea96 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -7,7 +7,6 @@ * Released under the GPL v2. (and only v2, not any later version) */ #include "util.h" -#include <api/fs/debugfs.h> #include <api/fs/fs.h> #include <poll.h> #include "cpumap.h" @@ -635,8 +634,8 @@ static struct perf_evsel *perf_evlist__event2evsel(struct perf_evlist *evlist, union perf_event *perf_evlist__mmap_read(struct perf_evlist *evlist, int idx) { struct perf_mmap *md = &evlist->mmap[idx]; - unsigned int head = perf_mmap__read_head(md); - unsigned int old = md->prev; + u64 head = perf_mmap__read_head(md); + u64 old = md->prev; unsigned char *data = md->base + page_size; union perf_event *event = NULL; @@ -696,7 +695,7 @@ union perf_event *perf_evlist__mmap_read(struct perf_evlist *evlist, int idx) static bool perf_mmap__empty(struct perf_mmap *md) { - return perf_mmap__read_head(md) != md->prev; + return perf_mmap__read_head(md) == md->prev; } static void perf_evlist__mmap_get(struct perf_evlist *evlist, int idx) @@ -717,7 +716,7 @@ void perf_evlist__mmap_consume(struct perf_evlist *evlist, int idx) struct perf_mmap *md = &evlist->mmap[idx]; if (!evlist->overwrite) { - unsigned int old = md->prev; + u64 old = md->prev; perf_mmap__write_tail(md, old); } @@ -1051,7 +1050,7 @@ out_delete_threads: return -1; } -int perf_evlist__apply_filters(struct perf_evlist *evlist) +int perf_evlist__apply_filters(struct perf_evlist *evlist, struct perf_evsel **err_evsel) { struct perf_evsel *evsel; int err = 0; @@ -1063,8 +1062,10 @@ int perf_evlist__apply_filters(struct perf_evlist *evlist) continue; err = perf_evsel__set_filter(evsel, ncpus, nthreads, evsel->filter); - if (err) + if (err) { + *err_evsel = evsel; break; + } } return err; @@ -1086,6 +1087,38 @@ int perf_evlist__set_filter(struct perf_evlist *evlist, const char *filter) return err; } +int perf_evlist__set_filter_pids(struct perf_evlist *evlist, size_t npids, pid_t *pids) +{ + char *filter; + int ret = -1; + size_t i; + + for (i = 0; i < npids; ++i) { + if (i == 0) { + if (asprintf(&filter, "common_pid != %d", pids[i]) < 0) + return -1; + } else { + char *tmp; + + if (asprintf(&tmp, "%s && common_pid != %d", filter, pids[i]) < 0) + goto out_free; + + free(filter); + filter = tmp; + } + } + + ret = perf_evlist__set_filter(evlist, filter); +out_free: + free(filter); + return ret; +} + +int perf_evlist__set_filter_pid(struct perf_evlist *evlist, pid_t pid) +{ + return perf_evlist__set_filter_pids(evlist, 1, &pid); +} + bool perf_evlist__valid_sample_type(struct perf_evlist *evlist) { struct perf_evsel *pos; @@ -1329,7 +1362,7 @@ int perf_evlist__prepare_workload(struct perf_evlist *evlist, struct target *tar * writing exactly one byte, in workload.cork_fd, usually via * perf_evlist__start_workload(). * - * For cancelling the workload without actuallin running it, + * For cancelling the workload without actually running it, * the parent will just close workload.cork_fd, without writing * anything, i.e. read will return zero and we just exit() * here. diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index e99a67632831..b5cce95d644e 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -27,7 +27,7 @@ struct perf_mmap { void *base; int mask; int refcnt; - unsigned int prev; + u64 prev; char event_copy[PERF_SAMPLE_MAX_SIZE] __attribute__((aligned(8))); }; @@ -51,6 +51,7 @@ struct perf_evlist { struct thread_map *threads; struct cpu_map *cpus; struct perf_evsel *selected; + struct events_stats stats; }; struct perf_evsel_str_handler { @@ -77,6 +78,8 @@ int perf_evlist__add_newtp(struct perf_evlist *evlist, const char *sys, const char *name, void *handler); int perf_evlist__set_filter(struct perf_evlist *evlist, const char *filter); +int perf_evlist__set_filter_pid(struct perf_evlist *evlist, pid_t pid); +int perf_evlist__set_filter_pids(struct perf_evlist *evlist, size_t npids, pid_t *pids); struct perf_evsel * perf_evlist__find_tracepoint_by_id(struct perf_evlist *evlist, int id); @@ -149,7 +152,7 @@ static inline void perf_evlist__set_maps(struct perf_evlist *evlist, } int perf_evlist__create_maps(struct perf_evlist *evlist, struct target *target); -int perf_evlist__apply_filters(struct perf_evlist *evlist); +int perf_evlist__apply_filters(struct perf_evlist *evlist, struct perf_evsel **err_evsel); void __perf_evlist__set_leader(struct list_head *list); void perf_evlist__set_leader(struct perf_evlist *evlist); @@ -186,16 +189,15 @@ size_t perf_evlist__fprintf(struct perf_evlist *evlist, FILE *fp); int perf_evlist__strerror_open(struct perf_evlist *evlist, int err, char *buf, size_t size); int perf_evlist__strerror_mmap(struct perf_evlist *evlist, int err, char *buf, size_t size); -static inline unsigned int perf_mmap__read_head(struct perf_mmap *mm) +static inline u64 perf_mmap__read_head(struct perf_mmap *mm) { struct perf_event_mmap_page *pc = mm->base; - int head = ACCESS_ONCE(pc->data_head); + u64 head = ACCESS_ONCE(pc->data_head); rmb(); return head; } -static inline void perf_mmap__write_tail(struct perf_mmap *md, - unsigned long tail) +static inline void perf_mmap__write_tail(struct perf_mmap *md, u64 tail) { struct perf_event_mmap_page *pc = md->base; diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index ea51a90e20a0..33e3fd8c2e68 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -32,8 +32,12 @@ static struct { bool exclude_guest; bool mmap2; bool cloexec; + bool clockid; + bool clockid_wrong; } perf_missing_features; +static clockid_t clockid; + static int perf_evsel__no_extra_init(struct perf_evsel *evsel __maybe_unused) { return 0; @@ -537,13 +541,30 @@ int perf_evsel__group_desc(struct perf_evsel *evsel, char *buf, size_t size) } static void -perf_evsel__config_callgraph(struct perf_evsel *evsel) +perf_evsel__config_callgraph(struct perf_evsel *evsel, + struct record_opts *opts) { bool function = perf_evsel__is_function_event(evsel); struct perf_event_attr *attr = &evsel->attr; perf_evsel__set_sample_bit(evsel, CALLCHAIN); + if (callchain_param.record_mode == CALLCHAIN_LBR) { + if (!opts->branch_stack) { + if (attr->exclude_user) { + pr_warning("LBR callstack option is only available " + "to get user callchain information. " + "Falling back to framepointers.\n"); + } else { + perf_evsel__set_sample_bit(evsel, BRANCH_STACK); + attr->branch_sample_type = PERF_SAMPLE_BRANCH_USER | + PERF_SAMPLE_BRANCH_CALL_STACK; + } + } else + pr_warning("Cannot use LBR callstack with branch stack. " + "Falling back to framepointers.\n"); + } + if (callchain_param.record_mode == CALLCHAIN_DWARF) { if (!function) { perf_evsel__set_sample_bit(evsel, REGS_USER); @@ -667,7 +688,7 @@ void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts) evsel->attr.exclude_callchain_user = 1; if (callchain_param.enabled && !evsel->no_aux_samples) - perf_evsel__config_callgraph(evsel); + perf_evsel__config_callgraph(evsel, opts); if (opts->sample_intr_regs) { attr->sample_regs_intr = PERF_REGS_MASK; @@ -717,6 +738,12 @@ void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts) if (opts->sample_transaction) perf_evsel__set_sample_bit(evsel, TRANSACTION); + if (opts->running_time) { + evsel->attr.read_format |= + PERF_FORMAT_TOTAL_TIME_ENABLED | + PERF_FORMAT_TOTAL_TIME_RUNNING; + } + /* * XXX see the function comment above * @@ -738,6 +765,12 @@ void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts) attr->disabled = 0; attr->enable_on_exec = 0; } + + clockid = opts->clockid; + if (opts->use_clockid) { + attr->use_clockid = 1; + attr->clockid = opts->clockid; + } } static int perf_evsel__alloc_fd(struct perf_evsel *evsel, int ncpus, int nthreads) @@ -978,67 +1011,126 @@ static int get_group_fd(struct perf_evsel *evsel, int cpu, int thread) return fd; } -#define __PRINT_ATTR(fmt, cast, field) \ - fprintf(fp, " %-19s "fmt"\n", #field, cast attr->field) - -#define PRINT_ATTR_U32(field) __PRINT_ATTR("%u" , , field) -#define PRINT_ATTR_X32(field) __PRINT_ATTR("%#x", , field) -#define PRINT_ATTR_U64(field) __PRINT_ATTR("%" PRIu64, (uint64_t), field) -#define PRINT_ATTR_X64(field) __PRINT_ATTR("%#"PRIx64, (uint64_t), field) - -#define PRINT_ATTR2N(name1, field1, name2, field2) \ - fprintf(fp, " %-19s %u %-19s %u\n", \ - name1, attr->field1, name2, attr->field2) - -#define PRINT_ATTR2(field1, field2) \ - PRINT_ATTR2N(#field1, field1, #field2, field2) - -static size_t perf_event_attr__fprintf(struct perf_event_attr *attr, FILE *fp) -{ - size_t ret = 0; - - ret += fprintf(fp, "%.60s\n", graph_dotted_line); - ret += fprintf(fp, "perf_event_attr:\n"); - - ret += PRINT_ATTR_U32(type); - ret += PRINT_ATTR_U32(size); - ret += PRINT_ATTR_X64(config); - ret += PRINT_ATTR_U64(sample_period); - ret += PRINT_ATTR_U64(sample_freq); - ret += PRINT_ATTR_X64(sample_type); - ret += PRINT_ATTR_X64(read_format); - - ret += PRINT_ATTR2(disabled, inherit); - ret += PRINT_ATTR2(pinned, exclusive); - ret += PRINT_ATTR2(exclude_user, exclude_kernel); - ret += PRINT_ATTR2(exclude_hv, exclude_idle); - ret += PRINT_ATTR2(mmap, comm); - ret += PRINT_ATTR2(mmap2, comm_exec); - ret += PRINT_ATTR2(freq, inherit_stat); - ret += PRINT_ATTR2(enable_on_exec, task); - ret += PRINT_ATTR2(watermark, precise_ip); - ret += PRINT_ATTR2(mmap_data, sample_id_all); - ret += PRINT_ATTR2(exclude_host, exclude_guest); - ret += PRINT_ATTR2N("excl.callchain_kern", exclude_callchain_kernel, - "excl.callchain_user", exclude_callchain_user); - - ret += PRINT_ATTR_U32(wakeup_events); - ret += PRINT_ATTR_U32(wakeup_watermark); - ret += PRINT_ATTR_X32(bp_type); - ret += PRINT_ATTR_X64(bp_addr); - ret += PRINT_ATTR_X64(config1); - ret += PRINT_ATTR_U64(bp_len); - ret += PRINT_ATTR_X64(config2); - ret += PRINT_ATTR_X64(branch_sample_type); - ret += PRINT_ATTR_X64(sample_regs_user); - ret += PRINT_ATTR_U32(sample_stack_user); - ret += PRINT_ATTR_X64(sample_regs_intr); - - ret += fprintf(fp, "%.60s\n", graph_dotted_line); +struct bit_names { + int bit; + const char *name; +}; + +static void __p_bits(char *buf, size_t size, u64 value, struct bit_names *bits) +{ + bool first_bit = true; + int i = 0; + + do { + if (value & bits[i].bit) { + buf += scnprintf(buf, size, "%s%s", first_bit ? "" : "|", bits[i].name); + first_bit = false; + } + } while (bits[++i].name != NULL); +} + +static void __p_sample_type(char *buf, size_t size, u64 value) +{ +#define bit_name(n) { PERF_SAMPLE_##n, #n } + struct bit_names bits[] = { + bit_name(IP), bit_name(TID), bit_name(TIME), bit_name(ADDR), + bit_name(READ), bit_name(CALLCHAIN), bit_name(ID), bit_name(CPU), + bit_name(PERIOD), bit_name(STREAM_ID), bit_name(RAW), + bit_name(BRANCH_STACK), bit_name(REGS_USER), bit_name(STACK_USER), + bit_name(IDENTIFIER), bit_name(REGS_INTR), + { .name = NULL, } + }; +#undef bit_name + __p_bits(buf, size, value, bits); +} + +static void __p_read_format(char *buf, size_t size, u64 value) +{ +#define bit_name(n) { PERF_FORMAT_##n, #n } + struct bit_names bits[] = { + bit_name(TOTAL_TIME_ENABLED), bit_name(TOTAL_TIME_RUNNING), + bit_name(ID), bit_name(GROUP), + { .name = NULL, } + }; +#undef bit_name + __p_bits(buf, size, value, bits); +} + +#define BUF_SIZE 1024 + +#define p_hex(val) snprintf(buf, BUF_SIZE, "%"PRIx64, (uint64_t)(val)) +#define p_unsigned(val) snprintf(buf, BUF_SIZE, "%"PRIu64, (uint64_t)(val)) +#define p_signed(val) snprintf(buf, BUF_SIZE, "%"PRId64, (int64_t)(val)) +#define p_sample_type(val) __p_sample_type(buf, BUF_SIZE, val) +#define p_read_format(val) __p_read_format(buf, BUF_SIZE, val) + +#define PRINT_ATTRn(_n, _f, _p) \ +do { \ + if (attr->_f) { \ + _p(attr->_f); \ + ret += attr__fprintf(fp, _n, buf, priv);\ + } \ +} while (0) + +#define PRINT_ATTRf(_f, _p) PRINT_ATTRn(#_f, _f, _p) + +int perf_event_attr__fprintf(FILE *fp, struct perf_event_attr *attr, + attr__fprintf_f attr__fprintf, void *priv) +{ + char buf[BUF_SIZE]; + int ret = 0; + + PRINT_ATTRf(type, p_unsigned); + PRINT_ATTRf(size, p_unsigned); + PRINT_ATTRf(config, p_hex); + PRINT_ATTRn("{ sample_period, sample_freq }", sample_period, p_unsigned); + PRINT_ATTRf(sample_type, p_sample_type); + PRINT_ATTRf(read_format, p_read_format); + + PRINT_ATTRf(disabled, p_unsigned); + PRINT_ATTRf(inherit, p_unsigned); + PRINT_ATTRf(pinned, p_unsigned); + PRINT_ATTRf(exclusive, p_unsigned); + PRINT_ATTRf(exclude_user, p_unsigned); + PRINT_ATTRf(exclude_kernel, p_unsigned); + PRINT_ATTRf(exclude_hv, p_unsigned); + PRINT_ATTRf(exclude_idle, p_unsigned); + PRINT_ATTRf(mmap, p_unsigned); + PRINT_ATTRf(comm, p_unsigned); + PRINT_ATTRf(freq, p_unsigned); + PRINT_ATTRf(inherit_stat, p_unsigned); + PRINT_ATTRf(enable_on_exec, p_unsigned); + PRINT_ATTRf(task, p_unsigned); + PRINT_ATTRf(watermark, p_unsigned); + PRINT_ATTRf(precise_ip, p_unsigned); + PRINT_ATTRf(mmap_data, p_unsigned); + PRINT_ATTRf(sample_id_all, p_unsigned); + PRINT_ATTRf(exclude_host, p_unsigned); + PRINT_ATTRf(exclude_guest, p_unsigned); + PRINT_ATTRf(exclude_callchain_kernel, p_unsigned); + PRINT_ATTRf(exclude_callchain_user, p_unsigned); + PRINT_ATTRf(mmap2, p_unsigned); + PRINT_ATTRf(comm_exec, p_unsigned); + PRINT_ATTRf(use_clockid, p_unsigned); + + PRINT_ATTRn("{ wakeup_events, wakeup_watermark }", wakeup_events, p_unsigned); + PRINT_ATTRf(bp_type, p_unsigned); + PRINT_ATTRn("{ bp_addr, config1 }", bp_addr, p_hex); + PRINT_ATTRn("{ bp_len, config2 }", bp_len, p_hex); + PRINT_ATTRf(sample_regs_user, p_hex); + PRINT_ATTRf(sample_stack_user, p_unsigned); + PRINT_ATTRf(clockid, p_signed); + PRINT_ATTRf(sample_regs_intr, p_hex); return ret; } +static int __open_attr__fprintf(FILE *fp, const char *name, const char *val, + void *priv __attribute__((unused))) +{ + return fprintf(fp, " %-32s %s\n", name, val); +} + static int __perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus, struct thread_map *threads) { @@ -1062,6 +1154,12 @@ static int __perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus, } fallback_missing_features: + if (perf_missing_features.clockid_wrong) + evsel->attr.clockid = CLOCK_MONOTONIC; /* should always work */ + if (perf_missing_features.clockid) { + evsel->attr.use_clockid = 0; + evsel->attr.clockid = 0; + } if (perf_missing_features.cloexec) flags &= ~(unsigned long)PERF_FLAG_FD_CLOEXEC; if (perf_missing_features.mmap2) @@ -1072,8 +1170,12 @@ retry_sample_id: if (perf_missing_features.sample_id_all) evsel->attr.sample_id_all = 0; - if (verbose >= 2) - perf_event_attr__fprintf(&evsel->attr, stderr); + if (verbose >= 2) { + fprintf(stderr, "%.60s\n", graph_dotted_line); + fprintf(stderr, "perf_event_attr:\n"); + perf_event_attr__fprintf(stderr, &evsel->attr, __open_attr__fprintf, NULL); + fprintf(stderr, "%.60s\n", graph_dotted_line); + } for (cpu = 0; cpu < cpus->nr; cpu++) { @@ -1099,6 +1201,17 @@ retry_open: goto try_fallback; } set_rlimit = NO_CHANGE; + + /* + * If we succeeded but had to kill clockid, fail and + * have perf_evsel__open_strerror() print us a nice + * error. + */ + if (perf_missing_features.clockid || + perf_missing_features.clockid_wrong) { + err = -EINVAL; + goto out_close; + } } } @@ -1132,7 +1245,17 @@ try_fallback: if (err != -EINVAL || cpu > 0 || thread > 0) goto out_close; - if (!perf_missing_features.cloexec && (flags & PERF_FLAG_FD_CLOEXEC)) { + /* + * Must probe features in the order they were added to the + * perf_event_attr interface. + */ + if (!perf_missing_features.clockid_wrong && evsel->attr.use_clockid) { + perf_missing_features.clockid_wrong = true; + goto fallback_missing_features; + } else if (!perf_missing_features.clockid && evsel->attr.use_clockid) { + perf_missing_features.clockid = true; + goto fallback_missing_features; + } else if (!perf_missing_features.cloexec && (flags & PERF_FLAG_FD_CLOEXEC)) { perf_missing_features.cloexec = true; goto fallback_missing_features; } else if (!perf_missing_features.mmap2 && evsel->attr.mmap2) { @@ -1892,7 +2015,7 @@ u64 perf_evsel__intval(struct perf_evsel *evsel, struct perf_sample *sample, value = *(u32 *)ptr; break; case 8: - value = *(u64 *)ptr; + memcpy(&value, ptr, sizeof(u64)); break; default: return 0; @@ -1933,62 +2056,9 @@ static int comma_fprintf(FILE *fp, bool *first, const char *fmt, ...) return ret; } -static int __if_fprintf(FILE *fp, bool *first, const char *field, u64 value) -{ - if (value == 0) - return 0; - - return comma_fprintf(fp, first, " %s: %" PRIu64, field, value); -} - -#define if_print(field) printed += __if_fprintf(fp, &first, #field, evsel->attr.field) - -struct bit_names { - int bit; - const char *name; -}; - -static int bits__fprintf(FILE *fp, const char *field, u64 value, - struct bit_names *bits, bool *first) -{ - int i = 0, printed = comma_fprintf(fp, first, " %s: ", field); - bool first_bit = true; - - do { - if (value & bits[i].bit) { - printed += fprintf(fp, "%s%s", first_bit ? "" : "|", bits[i].name); - first_bit = false; - } - } while (bits[++i].name != NULL); - - return printed; -} - -static int sample_type__fprintf(FILE *fp, bool *first, u64 value) -{ -#define bit_name(n) { PERF_SAMPLE_##n, #n } - struct bit_names bits[] = { - bit_name(IP), bit_name(TID), bit_name(TIME), bit_name(ADDR), - bit_name(READ), bit_name(CALLCHAIN), bit_name(ID), bit_name(CPU), - bit_name(PERIOD), bit_name(STREAM_ID), bit_name(RAW), - bit_name(BRANCH_STACK), bit_name(REGS_USER), bit_name(STACK_USER), - bit_name(IDENTIFIER), bit_name(REGS_INTR), - { .name = NULL, } - }; -#undef bit_name - return bits__fprintf(fp, "sample_type", value, bits, first); -} - -static int read_format__fprintf(FILE *fp, bool *first, u64 value) +static int __print_attr__fprintf(FILE *fp, const char *name, const char *val, void *priv) { -#define bit_name(n) { PERF_FORMAT_##n, #n } - struct bit_names bits[] = { - bit_name(TOTAL_TIME_ENABLED), bit_name(TOTAL_TIME_RUNNING), - bit_name(ID), bit_name(GROUP), - { .name = NULL, } - }; -#undef bit_name - return bits__fprintf(fp, "read_format", value, bits, first); + return comma_fprintf(fp, (bool *)priv, " %s: %s", name, val); } int perf_evsel__fprintf(struct perf_evsel *evsel, @@ -2017,47 +2087,13 @@ int perf_evsel__fprintf(struct perf_evsel *evsel, printed += fprintf(fp, "%s", perf_evsel__name(evsel)); - if (details->verbose || details->freq) { + if (details->verbose) { + printed += perf_event_attr__fprintf(fp, &evsel->attr, + __print_attr__fprintf, &first); + } else if (details->freq) { printed += comma_fprintf(fp, &first, " sample_freq=%" PRIu64, (u64)evsel->attr.sample_freq); } - - if (details->verbose) { - if_print(type); - if_print(config); - if_print(config1); - if_print(config2); - if_print(size); - printed += sample_type__fprintf(fp, &first, evsel->attr.sample_type); - if (evsel->attr.read_format) - printed += read_format__fprintf(fp, &first, evsel->attr.read_format); - if_print(disabled); - if_print(inherit); - if_print(pinned); - if_print(exclusive); - if_print(exclude_user); - if_print(exclude_kernel); - if_print(exclude_hv); - if_print(exclude_idle); - if_print(mmap); - if_print(mmap2); - if_print(comm); - if_print(comm_exec); - if_print(freq); - if_print(inherit_stat); - if_print(enable_on_exec); - if_print(task); - if_print(watermark); - if_print(precise_ip); - if_print(mmap_data); - if_print(sample_id_all); - if_print(exclude_host); - if_print(exclude_guest); - if_print(__reserved_1); - if_print(wakeup_events); - if_print(bp_type); - if_print(branch_sample_type); - } out: fputc('\n', fp); return ++printed; @@ -2135,6 +2171,12 @@ int perf_evsel__open_strerror(struct perf_evsel *evsel, struct target *target, "The PMU counters are busy/taken by another profiler.\n" "We found oprofile daemon running, please stop it and try again."); break; + case EINVAL: + if (perf_missing_features.clockid) + return scnprintf(msg, size, "clockid feature not supported."); + if (perf_missing_features.clockid_wrong) + return scnprintf(msg, size, "wrong clockid (%d).", clockid); + break; default: break; } diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index 38622747d130..e486151b0308 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -335,6 +335,7 @@ struct perf_attr_details { bool freq; bool verbose; bool event_group; + bool force; }; int perf_evsel__fprintf(struct perf_evsel *evsel, @@ -355,4 +356,14 @@ for ((_evsel) = list_entry((_leader)->node.next, struct perf_evsel, node); \ (_evsel) && (_evsel)->leader == (_leader); \ (_evsel) = list_entry((_evsel)->node.next, struct perf_evsel, node)) +static inline bool has_branch_callstack(struct perf_evsel *evsel) +{ + return evsel->attr.branch_sample_type & PERF_SAMPLE_BRANCH_CALL_STACK; +} + +typedef int (*attr__fprintf_f)(FILE *, const char *, const char *, void *); + +int perf_event_attr__fprintf(FILE *fp, struct perf_event_attr *attr, + attr__fprintf_f attr__fprintf, void *priv); + #endif /* __PERF_EVSEL_H */ diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 1f407f7352a7..918fd8ae2d80 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -1055,6 +1055,12 @@ error: goto out; } +static int __desc_attr__fprintf(FILE *fp, const char *name, const char *val, + void *priv __attribute__((unused))) +{ + return fprintf(fp, ", %s = %s", name, val); +} + static void print_event_desc(struct perf_header *ph, int fd, FILE *fp) { struct perf_evsel *evsel, *events = read_event_desc(ph, fd); @@ -1069,26 +1075,6 @@ static void print_event_desc(struct perf_header *ph, int fd, FILE *fp) for (evsel = events; evsel->attr.size; evsel++) { fprintf(fp, "# event : name = %s, ", evsel->name); - fprintf(fp, "type = %d, config = 0x%"PRIx64 - ", config1 = 0x%"PRIx64", config2 = 0x%"PRIx64, - evsel->attr.type, - (u64)evsel->attr.config, - (u64)evsel->attr.config1, - (u64)evsel->attr.config2); - - fprintf(fp, ", excl_usr = %d, excl_kern = %d", - evsel->attr.exclude_user, - evsel->attr.exclude_kernel); - - fprintf(fp, ", excl_host = %d, excl_guest = %d", - evsel->attr.exclude_host, - evsel->attr.exclude_guest); - - fprintf(fp, ", precise_ip = %d", evsel->attr.precise_ip); - - fprintf(fp, ", attr_mmap2 = %d", evsel->attr.mmap2); - fprintf(fp, ", attr_mmap = %d", evsel->attr.mmap); - fprintf(fp, ", attr_mmap_data = %d", evsel->attr.mmap_data); if (evsel->ids) { fprintf(fp, ", id = {"); for (j = 0, id = evsel->id; j < evsel->ids; j++, id++) { @@ -1099,6 +1085,8 @@ static void print_event_desc(struct perf_header *ph, int fd, FILE *fp) fprintf(fp, " }"); } + perf_event_attr__fprintf(fp, &evsel->attr, __desc_attr__fprintf, NULL); + fputc('\n', fp); } @@ -1266,7 +1254,7 @@ static int __event_process_build_id(struct build_id_event *bev, dso__set_build_id(dso, &bev->build_id); - if (!is_kernel_module(filename, NULL)) + if (!is_kernel_module(filename)) dso->kernel = dso_type; build_id__sprintf(dso->build_id, sizeof(dso->build_id), @@ -2516,8 +2504,11 @@ int perf_session__read_header(struct perf_session *session) if (read_attr(fd, header, &f_attr) < 0) goto out_errno; - if (header->needs_swap) + if (header->needs_swap) { + f_attr.ids.size = bswap_64(f_attr.ids.size); + f_attr.ids.offset = bswap_64(f_attr.ids.offset); perf_event__attr_swap(&f_attr.attr); + } tmp = lseek(fd, 0, SEEK_CUR); evsel = perf_evsel__new(&f_attr.attr); diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index 70b48a65064c..cc22b9158b93 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -263,15 +263,9 @@ void hists__decay_entries(struct hists *hists, bool zap_user, bool zap_kernel) while (next) { n = rb_entry(next, struct hist_entry, rb_node); next = rb_next(&n->rb_node); - /* - * We may be annotating this, for instance, so keep it here in - * case some it gets new samples, we'll eventually free it when - * the user stops browsing and it agains gets fully decayed. - */ if (((zap_user && n->level == '.') || (zap_kernel && n->level != '.') || - hists__decay_entry(hists, n)) && - !n->used) { + hists__decay_entry(hists, n))) { hists__delete_entry(hists, n); } } @@ -355,6 +349,7 @@ static struct hist_entry *hist_entry__new(struct hist_entry *template, callchain_init(he->callchain); INIT_LIST_HEAD(&he->pairs.node); + thread__get(he->thread); } return he; @@ -941,6 +936,7 @@ hist_entry__collapse(struct hist_entry *left, struct hist_entry *right) void hist_entry__delete(struct hist_entry *he) { + thread__zput(he->thread); zfree(&he->branch_info); zfree(&he->mem_info); zfree(&he->stat_acc); @@ -1169,6 +1165,7 @@ static void hists__remove_entry_filter(struct hists *hists, struct hist_entry *h /* force fold unfiltered entry for simplicity */ h->ms.unfolded = false; h->row_offset = 0; + h->nr_rows = 0; hists->stats.nr_non_filtered_samples += h->stat.nr_events; diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index 2b690d028907..9f31b89a527a 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -60,7 +60,7 @@ struct hists { struct rb_root entries_collapsed; u64 nr_entries; u64 nr_non_filtered_entries; - const struct thread *thread_filter; + struct thread *thread_filter; const struct dso *dso_filter; const char *uid_filter_str; const char *symbol_filter_str; @@ -303,6 +303,9 @@ struct hist_browser_timer { #ifdef HAVE_SLANG_SUPPORT #include "../ui/keysyms.h" +int map_symbol__tui_annotate(struct map_symbol *ms, struct perf_evsel *evsel, + struct hist_browser_timer *hbt); + int hist_entry__tui_annotate(struct hist_entry *he, struct perf_evsel *evsel, struct hist_browser_timer *hbt); @@ -321,6 +324,12 @@ int perf_evlist__tui_browse_hists(struct perf_evlist *evlist __maybe_unused, { return 0; } +static inline int map_symbol__tui_annotate(struct map_symbol *ms __maybe_unused, + struct perf_evsel *evsel __maybe_unused, + struct hist_browser_timer *hbt __maybe_unused) +{ + return 0; +} static inline int hist_entry__tui_annotate(struct hist_entry *he __maybe_unused, struct perf_evsel *evsel __maybe_unused, diff --git a/tools/perf/util/kvm-stat.h b/tools/perf/util/kvm-stat.h index cf1d7913783b..ae825d4ec110 100644 --- a/tools/perf/util/kvm-stat.h +++ b/tools/perf/util/kvm-stat.h @@ -99,6 +99,7 @@ struct perf_kvm_stat { int timerfd; unsigned int display_time; bool live; + bool force; }; struct kvm_reg_events_ops { diff --git a/tools/perf/util/lzma.c b/tools/perf/util/lzma.c new file mode 100644 index 000000000000..95a1acb61245 --- /dev/null +++ b/tools/perf/util/lzma.c @@ -0,0 +1,95 @@ +#include <lzma.h> +#include <stdio.h> +#include <linux/compiler.h> +#include "util.h" +#include "debug.h" + +#define BUFSIZE 8192 + +static const char *lzma_strerror(lzma_ret ret) +{ + switch ((int) ret) { + case LZMA_MEM_ERROR: + return "Memory allocation failed"; + case LZMA_OPTIONS_ERROR: + return "Unsupported decompressor flags"; + case LZMA_FORMAT_ERROR: + return "The input is not in the .xz format"; + case LZMA_DATA_ERROR: + return "Compressed file is corrupt"; + case LZMA_BUF_ERROR: + return "Compressed file is truncated or otherwise corrupt"; + default: + return "Unknown error, possibly a bug"; + } +} + +int lzma_decompress_to_file(const char *input, int output_fd) +{ + lzma_action action = LZMA_RUN; + lzma_stream strm = LZMA_STREAM_INIT; + lzma_ret ret; + + u8 buf_in[BUFSIZE]; + u8 buf_out[BUFSIZE]; + FILE *infile; + + infile = fopen(input, "rb"); + if (!infile) { + pr_err("lzma: fopen failed on %s: '%s'\n", + input, strerror(errno)); + return -1; + } + + ret = lzma_stream_decoder(&strm, UINT64_MAX, LZMA_CONCATENATED); + if (ret != LZMA_OK) { + pr_err("lzma: lzma_stream_decoder failed %s (%d)\n", + lzma_strerror(ret), ret); + return -1; + } + + strm.next_in = NULL; + strm.avail_in = 0; + strm.next_out = buf_out; + strm.avail_out = sizeof(buf_out); + + while (1) { + if (strm.avail_in == 0 && !feof(infile)) { + strm.next_in = buf_in; + strm.avail_in = fread(buf_in, 1, sizeof(buf_in), infile); + + if (ferror(infile)) { + pr_err("lzma: read error: %s\n", strerror(errno)); + return -1; + } + + if (feof(infile)) + action = LZMA_FINISH; + } + + ret = lzma_code(&strm, action); + + if (strm.avail_out == 0 || ret == LZMA_STREAM_END) { + ssize_t write_size = sizeof(buf_out) - strm.avail_out; + + if (writen(output_fd, buf_out, write_size) != write_size) { + pr_err("lzma: write error: %s\n", strerror(errno)); + return -1; + } + + strm.next_out = buf_out; + strm.avail_out = sizeof(buf_out); + } + + if (ret != LZMA_OK) { + if (ret == LZMA_STREAM_END) + return 0; + + pr_err("lzma: failed %s\n", lzma_strerror(ret)); + return -1; + } + } + + fclose(infile); + return 0; +} diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index 1bca3a9f2b16..527e032e24f6 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -89,16 +89,6 @@ static void dsos__delete(struct dsos *dsos) } } -void machine__delete_dead_threads(struct machine *machine) -{ - struct thread *n, *t; - - list_for_each_entry_safe(t, n, &machine->dead_threads, node) { - list_del(&t->node); - thread__delete(t); - } -} - void machine__delete_threads(struct machine *machine) { struct rb_node *nd = rb_first(&machine->threads); @@ -106,9 +96,8 @@ void machine__delete_threads(struct machine *machine) while (nd) { struct thread *t = rb_entry(nd, struct thread, rb_node); - rb_erase(&t->rb_node, &machine->threads); nd = rb_next(nd); - thread__delete(t); + machine__remove_thread(machine, t); } } @@ -361,9 +350,13 @@ static struct thread *__machine__findnew_thread(struct machine *machine, * the full rbtree: */ th = machine->last_match; - if (th && th->tid == tid) { - machine__update_thread_pid(machine, th, pid); - return th; + if (th != NULL) { + if (th->tid == tid) { + machine__update_thread_pid(machine, th, pid); + return th; + } + + thread__zput(machine->last_match); } while (*p != NULL) { @@ -371,7 +364,7 @@ static struct thread *__machine__findnew_thread(struct machine *machine, th = rb_entry(parent, struct thread, rb_node); if (th->tid == tid) { - machine->last_match = th; + machine->last_match = thread__get(th); machine__update_thread_pid(machine, th, pid); return th; } @@ -403,8 +396,11 @@ static struct thread *__machine__findnew_thread(struct machine *machine, thread__delete(th); return NULL; } - - machine->last_match = th; + /* + * It is now in the rbtree, get a ref + */ + thread__get(th); + machine->last_match = thread__get(th); } return th; @@ -462,30 +458,61 @@ int machine__process_lost_event(struct machine *machine __maybe_unused, return 0; } +static struct dso* +machine__module_dso(struct machine *machine, struct kmod_path *m, + const char *filename) +{ + struct dso *dso; + + dso = dsos__find(&machine->kernel_dsos, m->name, true); + if (!dso) { + dso = dsos__addnew(&machine->kernel_dsos, m->name); + if (dso == NULL) + return NULL; + + if (machine__is_host(machine)) + dso->symtab_type = DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE; + else + dso->symtab_type = DSO_BINARY_TYPE__GUEST_KMODULE; + + /* _KMODULE_COMP should be next to _KMODULE */ + if (m->kmod && m->comp) + dso->symtab_type++; + + dso__set_short_name(dso, strdup(m->name), true); + dso__set_long_name(dso, strdup(filename), true); + } + + return dso; +} + struct map *machine__new_module(struct machine *machine, u64 start, const char *filename) { - struct map *map; - struct dso *dso = __dsos__findnew(&machine->kernel_dsos, filename); - bool compressed; + struct map *map = NULL; + struct dso *dso; + struct kmod_path m; - if (dso == NULL) + if (kmod_path__parse_name(&m, filename)) return NULL; - map = map__new2(start, dso, MAP__FUNCTION); - if (map == NULL) - return NULL; + map = map_groups__find_by_name(&machine->kmaps, MAP__FUNCTION, + m.name); + if (map) + goto out; - if (machine__is_host(machine)) - dso->symtab_type = DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE; - else - dso->symtab_type = DSO_BINARY_TYPE__GUEST_KMODULE; + dso = machine__module_dso(machine, &m, filename); + if (dso == NULL) + goto out; - /* _KMODULE_COMP should be next to _KMODULE */ - if (is_kernel_module(filename, &compressed) && compressed) - dso->symtab_type++; + map = map__new2(start, dso, MAP__FUNCTION); + if (map == NULL) + goto out; map_groups__insert(&machine->kmaps, map); + +out: + free(m.name); return map; } @@ -650,6 +677,9 @@ int __machine__create_kernel_maps(struct machine *machine, struct dso *kernel) machine->vmlinux_maps[type]->unmap_ip = identity__map_ip; kmap = map__kmap(machine->vmlinux_maps[type]); + if (!kmap) + return -1; + kmap->kmaps = &machine->kmaps; map_groups__insert(&machine->kmaps, machine->vmlinux_maps[type]); @@ -671,7 +701,7 @@ void machine__destroy_kernel_maps(struct machine *machine) kmap = map__kmap(machine->vmlinux_maps[type]); map_groups__remove(&machine->kmaps, machine->vmlinux_maps[type]); - if (kmap->ref_reloc_sym) { + if (kmap && kmap->ref_reloc_sym) { /* * ref_reloc_sym is shared among all maps, so free just * on one of them. @@ -827,6 +857,39 @@ static char *get_kernel_version(const char *root_dir) return strdup(name); } +static bool is_kmod_dso(struct dso *dso) +{ + return dso->symtab_type == DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE || + dso->symtab_type == DSO_BINARY_TYPE__GUEST_KMODULE; +} + +static int map_groups__set_module_path(struct map_groups *mg, const char *path, + struct kmod_path *m) +{ + struct map *map; + char *long_name; + + map = map_groups__find_by_name(mg, MAP__FUNCTION, m->name); + if (map == NULL) + return 0; + + long_name = strdup(path); + if (long_name == NULL) + return -ENOMEM; + + dso__set_long_name(map->dso, long_name, true); + dso__kernel_module_get_build_id(map->dso, ""); + + /* + * Full name could reveal us kmod compression, so + * we need to update the symtab_type if needed. + */ + if (m->comp && is_kmod_dso(map->dso)) + map->dso->symtab_type++; + + return 0; +} + static int map_groups__set_modules_path_dir(struct map_groups *mg, const char *dir_name, int depth) { @@ -865,35 +928,19 @@ static int map_groups__set_modules_path_dir(struct map_groups *mg, if (ret < 0) goto out; } else { - char *dot = strrchr(dent->d_name, '.'), - dso_name[PATH_MAX]; - struct map *map; - char *long_name; + struct kmod_path m; - if (dot == NULL) - continue; - - /* On some system, modules are compressed like .ko.gz */ - if (is_supported_compression(dot + 1) && - is_kmodule_extension(dot - 2)) - dot -= 3; + ret = kmod_path__parse_name(&m, dent->d_name); + if (ret) + goto out; - snprintf(dso_name, sizeof(dso_name), "[%.*s]", - (int)(dot - dent->d_name), dent->d_name); + if (m.kmod) + ret = map_groups__set_module_path(mg, path, &m); - strxfrchar(dso_name, '-', '_'); - map = map_groups__find_by_name(mg, MAP__FUNCTION, - dso_name); - if (map == NULL) - continue; + free(m.name); - long_name = strdup(path); - if (long_name == NULL) { - ret = -1; + if (ret) goto out; - } - dso__set_long_name(map->dso, long_name, true); - dso__kernel_module_get_build_id(map->dso, ""); } } @@ -1046,40 +1093,11 @@ static int machine__process_kernel_mmap_event(struct machine *machine, strlen(kmmap_prefix) - 1) == 0; if (event->mmap.filename[0] == '/' || (!is_kernel_mmap && event->mmap.filename[0] == '[')) { - - char short_module_name[1024]; - char *name, *dot; - - if (event->mmap.filename[0] == '/') { - name = strrchr(event->mmap.filename, '/'); - if (name == NULL) - goto out_problem; - - ++name; /* skip / */ - dot = strrchr(name, '.'); - if (dot == NULL) - goto out_problem; - /* On some system, modules are compressed like .ko.gz */ - if (is_supported_compression(dot + 1)) - dot -= 3; - if (!is_kmodule_extension(dot + 1)) - goto out_problem; - snprintf(short_module_name, sizeof(short_module_name), - "[%.*s]", (int)(dot - name), name); - strxfrchar(short_module_name, '-', '_'); - } else - strcpy(short_module_name, event->mmap.filename); - map = machine__new_module(machine, event->mmap.start, event->mmap.filename); if (map == NULL) goto out_problem; - name = strdup(short_module_name); - if (name == NULL) - goto out_problem; - - dso__set_short_name(map->dso, name, true); map->end = map->start + event->mmap.len; } else if (is_kernel_mmap) { const char *symbol_name = (event->mmap.filename + @@ -1092,7 +1110,7 @@ static int machine__process_kernel_mmap_event(struct machine *machine, struct dso *dso; list_for_each_entry(dso, &machine->kernel_dsos.head, node) { - if (is_kernel_module(dso->long_name, NULL)) + if (is_kernel_module(dso->long_name)) continue; kernel = dso; @@ -1236,15 +1254,19 @@ out_problem: return 0; } -static void machine__remove_thread(struct machine *machine, struct thread *th) +void machine__remove_thread(struct machine *machine, struct thread *th) { - machine->last_match = NULL; + if (machine->last_match == th) + thread__zput(machine->last_match); + rb_erase(&th->rb_node, &machine->threads); /* - * We may have references to this thread, for instance in some hist_entry - * instances, so just move them to a separate list. + * Move it first to the dead_threads list, then drop the reference, + * if this is the last reference, then the thread__delete destructor + * will be called and we will remove it from the dead_threads list. */ list_add_tail(&th->node, &machine->dead_threads); + thread__put(th); } int machine__process_fork_event(struct machine *machine, union perf_event *event, @@ -1387,29 +1409,27 @@ struct mem_info *sample__resolve_mem(struct perf_sample *sample, static int add_callchain_ip(struct thread *thread, struct symbol **parent, struct addr_location *root_al, - bool branch_history, + u8 *cpumode, u64 ip) { struct addr_location al; al.filtered = 0; al.sym = NULL; - if (branch_history) + if (!cpumode) { thread__find_cpumode_addr_location(thread, MAP__FUNCTION, ip, &al); - else { - u8 cpumode = PERF_RECORD_MISC_USER; - + } else { if (ip >= PERF_CONTEXT_MAX) { switch (ip) { case PERF_CONTEXT_HV: - cpumode = PERF_RECORD_MISC_HYPERVISOR; + *cpumode = PERF_RECORD_MISC_HYPERVISOR; break; case PERF_CONTEXT_KERNEL: - cpumode = PERF_RECORD_MISC_KERNEL; + *cpumode = PERF_RECORD_MISC_KERNEL; break; case PERF_CONTEXT_USER: - cpumode = PERF_RECORD_MISC_USER; + *cpumode = PERF_RECORD_MISC_USER; break; default: pr_debug("invalid callchain context: " @@ -1423,8 +1443,8 @@ static int add_callchain_ip(struct thread *thread, } return 0; } - thread__find_addr_location(thread, cpumode, MAP__FUNCTION, - ip, &al); + thread__find_addr_location(thread, *cpumode, MAP__FUNCTION, + ip, &al); } if (al.sym != NULL) { @@ -1502,18 +1522,102 @@ static int remove_loops(struct branch_entry *l, int nr) return nr; } -static int thread__resolve_callchain_sample(struct thread *thread, - struct ip_callchain *chain, - struct branch_stack *branch, - struct symbol **parent, - struct addr_location *root_al, - int max_stack) +/* + * Recolve LBR callstack chain sample + * Return: + * 1 on success get LBR callchain information + * 0 no available LBR callchain information, should try fp + * negative error code on other errors. + */ +static int resolve_lbr_callchain_sample(struct thread *thread, + struct perf_sample *sample, + struct symbol **parent, + struct addr_location *root_al, + int max_stack) { + struct ip_callchain *chain = sample->callchain; int chain_nr = min(max_stack, (int)chain->nr); + u8 cpumode = PERF_RECORD_MISC_USER; + int i, j, err; + u64 ip; + + for (i = 0; i < chain_nr; i++) { + if (chain->ips[i] == PERF_CONTEXT_USER) + break; + } + + /* LBR only affects the user callchain */ + if (i != chain_nr) { + struct branch_stack *lbr_stack = sample->branch_stack; + int lbr_nr = lbr_stack->nr; + /* + * LBR callstack can only get user call chain. + * The mix_chain_nr is kernel call chain + * number plus LBR user call chain number. + * i is kernel call chain number, + * 1 is PERF_CONTEXT_USER, + * lbr_nr + 1 is the user call chain number. + * For details, please refer to the comments + * in callchain__printf + */ + int mix_chain_nr = i + 1 + lbr_nr + 1; + + if (mix_chain_nr > PERF_MAX_STACK_DEPTH + PERF_MAX_BRANCH_DEPTH) { + pr_warning("corrupted callchain. skipping...\n"); + return 0; + } + + for (j = 0; j < mix_chain_nr; j++) { + if (callchain_param.order == ORDER_CALLEE) { + if (j < i + 1) + ip = chain->ips[j]; + else if (j > i + 1) + ip = lbr_stack->entries[j - i - 2].from; + else + ip = lbr_stack->entries[0].to; + } else { + if (j < lbr_nr) + ip = lbr_stack->entries[lbr_nr - j - 1].from; + else if (j > lbr_nr) + ip = chain->ips[i + 1 - (j - lbr_nr)]; + else + ip = lbr_stack->entries[0].to; + } + + err = add_callchain_ip(thread, parent, root_al, &cpumode, ip); + if (err) + return (err < 0) ? err : 0; + } + return 1; + } + + return 0; +} + +static int thread__resolve_callchain_sample(struct thread *thread, + struct perf_evsel *evsel, + struct perf_sample *sample, + struct symbol **parent, + struct addr_location *root_al, + int max_stack) +{ + struct branch_stack *branch = sample->branch_stack; + struct ip_callchain *chain = sample->callchain; + int chain_nr = min(max_stack, (int)chain->nr); + u8 cpumode = PERF_RECORD_MISC_USER; int i, j, err; int skip_idx = -1; int first_call = 0; + callchain_cursor_reset(&callchain_cursor); + + if (has_branch_callstack(evsel)) { + err = resolve_lbr_callchain_sample(thread, sample, parent, + root_al, max_stack); + if (err) + return (err < 0) ? err : 0; + } + /* * Based on DWARF debug information, some architectures skip * a callchain entry saved by the kernel. @@ -1521,8 +1625,6 @@ static int thread__resolve_callchain_sample(struct thread *thread, if (chain->nr < PERF_MAX_STACK_DEPTH) skip_idx = arch_skip_callchain_idx(thread, chain); - callchain_cursor_reset(&callchain_cursor); - /* * Add branches to call stack for easier browsing. This gives * more context for a sample than just the callers. @@ -1568,10 +1670,10 @@ static int thread__resolve_callchain_sample(struct thread *thread, for (i = 0; i < nr; i++) { err = add_callchain_ip(thread, parent, root_al, - true, be[i].to); + NULL, be[i].to); if (!err) err = add_callchain_ip(thread, parent, root_al, - true, be[i].from); + NULL, be[i].from); if (err == -EINVAL) break; if (err) @@ -1600,7 +1702,7 @@ check_calls: #endif ip = chain->ips[j]; - err = add_callchain_ip(thread, parent, root_al, false, ip); + err = add_callchain_ip(thread, parent, root_al, &cpumode, ip); if (err) return (err < 0) ? err : 0; @@ -1623,9 +1725,9 @@ int thread__resolve_callchain(struct thread *thread, struct addr_location *root_al, int max_stack) { - int ret = thread__resolve_callchain_sample(thread, sample->callchain, - sample->branch_stack, - parent, root_al, max_stack); + int ret = thread__resolve_callchain_sample(thread, evsel, + sample, parent, + root_al, max_stack); if (ret) return ret; diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h index e8b7779a0a3f..6d64cedb9d1e 100644 --- a/tools/perf/util/machine.h +++ b/tools/perf/util/machine.h @@ -118,9 +118,9 @@ void machines__set_comm_exec(struct machines *machines, bool comm_exec); struct machine *machine__new_host(void); int machine__init(struct machine *machine, const char *root_dir, pid_t pid); void machine__exit(struct machine *machine); -void machine__delete_dead_threads(struct machine *machine); void machine__delete_threads(struct machine *machine); void machine__delete(struct machine *machine); +void machine__remove_thread(struct machine *machine, struct thread *th); struct branch_info *sample__resolve_bstack(struct perf_sample *sample, struct addr_location *al); diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c index 62ca9f2607d5..a14f08f41686 100644 --- a/tools/perf/util/map.c +++ b/tools/perf/util/map.c @@ -778,3 +778,23 @@ struct map *maps__next(struct map *map) return rb_entry(next, struct map, rb_node); return NULL; } + +struct kmap *map__kmap(struct map *map) +{ + if (!map->dso || !map->dso->kernel) { + pr_err("Internal error: map__kmap with a non-kernel map\n"); + return NULL; + } + return (struct kmap *)(map + 1); +} + +struct map_groups *map__kmaps(struct map *map) +{ + struct kmap *kmap = map__kmap(map); + + if (!kmap || !kmap->kmaps) { + pr_err("Internal error: map__kmaps with a non-kernel map\n"); + return NULL; + } + return kmap->kmaps; +} diff --git a/tools/perf/util/map.h b/tools/perf/util/map.h index 0e42438b1e59..ec19c59ca38e 100644 --- a/tools/perf/util/map.h +++ b/tools/perf/util/map.h @@ -76,10 +76,8 @@ static inline struct map_groups *map_groups__get(struct map_groups *mg) void map_groups__put(struct map_groups *mg); -static inline struct kmap *map__kmap(struct map *map) -{ - return (struct kmap *)(map + 1); -} +struct kmap *map__kmap(struct map *map); +struct map_groups *map__kmaps(struct map *map); static inline u64 map__map_ip(struct map *map, u64 ip) { diff --git a/tools/perf/util/ordered-events.c b/tools/perf/util/ordered-events.c index fd4be94125fb..52be201b9b25 100644 --- a/tools/perf/util/ordered-events.c +++ b/tools/perf/util/ordered-events.c @@ -2,7 +2,6 @@ #include <linux/compiler.h> #include <linux/string.h> #include "ordered-events.h" -#include "evlist.h" #include "session.h" #include "asm/bug.h" #include "debug.h" @@ -131,8 +130,8 @@ static struct ordered_event *alloc_event(struct ordered_events *oe, return new; } -struct ordered_event * -ordered_events__new(struct ordered_events *oe, u64 timestamp, +static struct ordered_event * +ordered_events__new_event(struct ordered_events *oe, u64 timestamp, union perf_event *event) { struct ordered_event *new; @@ -153,20 +152,47 @@ void ordered_events__delete(struct ordered_events *oe, struct ordered_event *eve free_dup_event(oe, event->event); } -static int __ordered_events__flush(struct perf_session *s, - struct perf_tool *tool) +int ordered_events__queue(struct ordered_events *oe, union perf_event *event, + struct perf_sample *sample, u64 file_offset) +{ + u64 timestamp = sample->time; + struct ordered_event *oevent; + + if (!timestamp || timestamp == ~0ULL) + return -ETIME; + + if (timestamp < oe->last_flush) { + pr_oe_time(timestamp, "out of order event\n"); + pr_oe_time(oe->last_flush, "last flush, last_flush_type %d\n", + oe->last_flush_type); + + oe->nr_unordered_events++; + } + + oevent = ordered_events__new_event(oe, timestamp, event); + if (!oevent) { + ordered_events__flush(oe, OE_FLUSH__HALF); + oevent = ordered_events__new_event(oe, timestamp, event); + } + + if (!oevent) + return -ENOMEM; + + oevent->file_offset = file_offset; + return 0; +} + +static int __ordered_events__flush(struct ordered_events *oe) { - struct ordered_events *oe = &s->ordered_events; struct list_head *head = &oe->events; struct ordered_event *tmp, *iter; - struct perf_sample sample; u64 limit = oe->next_flush; u64 last_ts = oe->last ? oe->last->timestamp : 0ULL; bool show_progress = limit == ULLONG_MAX; struct ui_progress prog; int ret; - if (!tool->ordered_events || !limit) + if (!limit) return 0; if (show_progress) @@ -178,16 +204,9 @@ static int __ordered_events__flush(struct perf_session *s, if (iter->timestamp > limit) break; - - ret = perf_evlist__parse_sample(s->evlist, iter->event, &sample); + ret = oe->deliver(oe, iter); if (ret) - pr_err("Can't parse sample, err = %d\n", ret); - else { - ret = perf_session__deliver_event(s, iter->event, &sample, tool, - iter->file_offset); - if (ret) - return ret; - } + return ret; ordered_events__delete(oe, iter); oe->last_flush = iter->timestamp; @@ -204,10 +223,8 @@ static int __ordered_events__flush(struct perf_session *s, return 0; } -int ordered_events__flush(struct perf_session *s, struct perf_tool *tool, - enum oe_flush how) +int ordered_events__flush(struct ordered_events *oe, enum oe_flush how) { - struct ordered_events *oe = &s->ordered_events; static const char * const str[] = { "NONE", "FINAL", @@ -216,6 +233,9 @@ int ordered_events__flush(struct perf_session *s, struct perf_tool *tool, }; int err; + if (oe->nr_events == 0) + return 0; + switch (how) { case OE_FLUSH__FINAL: oe->next_flush = ULLONG_MAX; @@ -248,7 +268,7 @@ int ordered_events__flush(struct perf_session *s, struct perf_tool *tool, str[how], oe->nr_events); pr_oe_time(oe->max_timestamp, "max_timestamp\n"); - err = __ordered_events__flush(s, tool); + err = __ordered_events__flush(oe); if (!err) { if (how == OE_FLUSH__ROUND) @@ -264,13 +284,14 @@ int ordered_events__flush(struct perf_session *s, struct perf_tool *tool, return err; } -void ordered_events__init(struct ordered_events *oe) +void ordered_events__init(struct ordered_events *oe, ordered_events__deliver_t deliver) { INIT_LIST_HEAD(&oe->events); INIT_LIST_HEAD(&oe->cache); INIT_LIST_HEAD(&oe->to_free); oe->max_alloc_size = (u64) -1; oe->cur_alloc_size = 0; + oe->deliver = deliver; } void ordered_events__free(struct ordered_events *oe) diff --git a/tools/perf/util/ordered-events.h b/tools/perf/util/ordered-events.h index 7b8f9b011f38..f403991e3bfd 100644 --- a/tools/perf/util/ordered-events.h +++ b/tools/perf/util/ordered-events.h @@ -2,9 +2,8 @@ #define __ORDERED_EVENTS_H #include <linux/types.h> -#include "tool.h" -struct perf_session; +struct perf_sample; struct ordered_event { u64 timestamp; @@ -20,6 +19,11 @@ enum oe_flush { OE_FLUSH__HALF, }; +struct ordered_events; + +typedef int (*ordered_events__deliver_t)(struct ordered_events *oe, + struct ordered_event *event); + struct ordered_events { u64 last_flush; u64 next_flush; @@ -31,18 +35,19 @@ struct ordered_events { struct list_head to_free; struct ordered_event *buffer; struct ordered_event *last; + ordered_events__deliver_t deliver; int buffer_idx; unsigned int nr_events; enum oe_flush last_flush_type; + u32 nr_unordered_events; bool copy_on_queue; }; -struct ordered_event *ordered_events__new(struct ordered_events *oe, u64 timestamp, - union perf_event *event); +int ordered_events__queue(struct ordered_events *oe, union perf_event *event, + struct perf_sample *sample, u64 file_offset); void ordered_events__delete(struct ordered_events *oe, struct ordered_event *event); -int ordered_events__flush(struct perf_session *s, struct perf_tool *tool, - enum oe_flush how); -void ordered_events__init(struct ordered_events *oe); +int ordered_events__flush(struct ordered_events *oe, enum oe_flush how); +void ordered_events__init(struct ordered_events *oe, ordered_events__deliver_t deliver); void ordered_events__free(struct ordered_events *oe); static inline diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 7f8ec6ce2823..be0655388b38 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -20,11 +20,6 @@ #define MAX_NAME_LEN 100 -struct event_symbol { - const char *symbol; - const char *alias; -}; - #ifdef PARSER_DEBUG extern int parse_events_debug; #endif @@ -39,7 +34,7 @@ static struct perf_pmu_event_symbol *perf_pmu_events_list; */ static int perf_pmu_events_list_num; -static struct event_symbol event_symbols_hw[PERF_COUNT_HW_MAX] = { +struct event_symbol event_symbols_hw[PERF_COUNT_HW_MAX] = { [PERF_COUNT_HW_CPU_CYCLES] = { .symbol = "cpu-cycles", .alias = "cycles", @@ -82,7 +77,7 @@ static struct event_symbol event_symbols_hw[PERF_COUNT_HW_MAX] = { }, }; -static struct event_symbol event_symbols_sw[PERF_COUNT_SW_MAX] = { +struct event_symbol event_symbols_sw[PERF_COUNT_SW_MAX] = { [PERF_COUNT_SW_CPU_CLOCK] = { .symbol = "cpu-clock", .alias = "", @@ -175,9 +170,6 @@ struct tracepoint_path *tracepoint_id_to_path(u64 config) char evt_path[MAXPATHLEN]; char dir_path[MAXPATHLEN]; - if (debugfs_valid_mountpoint(tracing_events_path)) - return NULL; - sys_dir = opendir(tracing_events_path); if (!sys_dir) return NULL; @@ -473,12 +465,6 @@ static int add_tracepoint_multi_sys(struct list_head *list, int *idx, int parse_events_add_tracepoint(struct list_head *list, int *idx, char *sys, char *event) { - int ret; - - ret = debugfs_valid_mountpoint(tracing_events_path); - if (ret) - return ret; - if (strpbrk(sys, "*?")) return add_tracepoint_multi_sys(list, idx, sys, event); else @@ -723,6 +709,7 @@ struct event_modifier { int eh; int eH; int eG; + int eI; int precise; int exclude_GH; int sample_read; @@ -737,6 +724,7 @@ static int get_event_modifier(struct event_modifier *mod, char *str, int eh = evsel ? evsel->attr.exclude_hv : 0; int eH = evsel ? evsel->attr.exclude_host : 0; int eG = evsel ? evsel->attr.exclude_guest : 0; + int eI = evsel ? evsel->attr.exclude_idle : 0; int precise = evsel ? evsel->attr.precise_ip : 0; int sample_read = 0; int pinned = evsel ? evsel->attr.pinned : 0; @@ -767,6 +755,8 @@ static int get_event_modifier(struct event_modifier *mod, char *str, if (!exclude_GH) exclude_GH = eG = eH = 1; eH = 0; + } else if (*str == 'I') { + eI = 1; } else if (*str == 'p') { precise++; /* use of precise requires exclude_guest */ @@ -800,6 +790,7 @@ static int get_event_modifier(struct event_modifier *mod, char *str, mod->eh = eh; mod->eH = eH; mod->eG = eG; + mod->eI = eI; mod->precise = precise; mod->exclude_GH = exclude_GH; mod->sample_read = sample_read; @@ -817,7 +808,7 @@ static int check_modifier(char *str) char *p = str; /* The sizeof includes 0 byte as well. */ - if (strlen(str) > (sizeof("ukhGHpppSD") - 1)) + if (strlen(str) > (sizeof("ukhGHpppSDI") - 1)) return -1; while (*p) { @@ -853,6 +844,7 @@ int parse_events__modifier_event(struct list_head *list, char *str, bool add) evsel->attr.precise_ip = mod.precise; evsel->attr.exclude_host = mod.eH; evsel->attr.exclude_guest = mod.eG; + evsel->attr.exclude_idle = mod.eI; evsel->exclude_GH = mod.exclude_GH; evsel->sample_read = mod.sample_read; @@ -1098,6 +1090,14 @@ static const char * const event_type_descriptors[] = { "Hardware breakpoint", }; +static int cmp_string(const void *a, const void *b) +{ + const char * const *as = a; + const char * const *bs = b; + + return strcmp(*as, *bs); +} + /* * Print the events from <debugfs_mount_point>/tracing/events */ @@ -1109,18 +1109,21 @@ void print_tracepoint_events(const char *subsys_glob, const char *event_glob, struct dirent *sys_next, *evt_next, sys_dirent, evt_dirent; char evt_path[MAXPATHLEN]; char dir_path[MAXPATHLEN]; - char sbuf[STRERR_BUFSIZE]; - - if (debugfs_valid_mountpoint(tracing_events_path)) { - printf(" [ Tracepoints not available: %s ]\n", - strerror_r(errno, sbuf, sizeof(sbuf))); - return; - } + char **evt_list = NULL; + unsigned int evt_i = 0, evt_num = 0; + bool evt_num_known = false; +restart: sys_dir = opendir(tracing_events_path); if (!sys_dir) return; + if (evt_num_known) { + evt_list = zalloc(sizeof(char *) * evt_num); + if (!evt_list) + goto out_close_sys_dir; + } + for_each_subsystem(sys_dir, sys_dirent, sys_next) { if (subsys_glob != NULL && !strglobmatch(sys_dirent.d_name, subsys_glob)) @@ -1137,19 +1140,56 @@ void print_tracepoint_events(const char *subsys_glob, const char *event_glob, !strglobmatch(evt_dirent.d_name, event_glob)) continue; - if (name_only) { - printf("%s:%s ", sys_dirent.d_name, evt_dirent.d_name); + if (!evt_num_known) { + evt_num++; continue; } snprintf(evt_path, MAXPATHLEN, "%s:%s", sys_dirent.d_name, evt_dirent.d_name); - printf(" %-50s [%s]\n", evt_path, - event_type_descriptors[PERF_TYPE_TRACEPOINT]); + + evt_list[evt_i] = strdup(evt_path); + if (evt_list[evt_i] == NULL) + goto out_close_evt_dir; + evt_i++; } closedir(evt_dir); } closedir(sys_dir); + + if (!evt_num_known) { + evt_num_known = true; + goto restart; + } + qsort(evt_list, evt_num, sizeof(char *), cmp_string); + evt_i = 0; + while (evt_i < evt_num) { + if (name_only) { + printf("%s ", evt_list[evt_i++]); + continue; + } + printf(" %-50s [%s]\n", evt_list[evt_i++], + event_type_descriptors[PERF_TYPE_TRACEPOINT]); + } + if (evt_num) + printf("\n"); + +out_free: + evt_num = evt_i; + for (evt_i = 0; evt_i < evt_num; evt_i++) + zfree(&evt_list[evt_i]); + zfree(&evt_list); + return; + +out_close_evt_dir: + closedir(evt_dir); +out_close_sys_dir: + closedir(sys_dir); + + printf("FATAL: not enough memory to print %s\n", + event_type_descriptors[PERF_TYPE_TRACEPOINT]); + if (evt_list) + goto out_free; } /* @@ -1163,9 +1203,6 @@ int is_valid_tracepoint(const char *event_string) char evt_path[MAXPATHLEN]; char dir_path[MAXPATHLEN]; - if (debugfs_valid_mountpoint(tracing_events_path)) - return 0; - sys_dir = opendir(tracing_events_path); if (!sys_dir) return 0; @@ -1233,38 +1270,19 @@ static bool is_event_supported(u8 type, unsigned config) return ret; } -static void __print_events_type(u8 type, struct event_symbol *syms, - unsigned max) -{ - char name[64]; - unsigned i; - - for (i = 0; i < max ; i++, syms++) { - if (!is_event_supported(type, i)) - continue; - - if (strlen(syms->alias)) - snprintf(name, sizeof(name), "%s OR %s", - syms->symbol, syms->alias); - else - snprintf(name, sizeof(name), "%s", syms->symbol); - - printf(" %-50s [%s]\n", name, event_type_descriptors[type]); - } -} - -void print_events_type(u8 type) -{ - if (type == PERF_TYPE_SOFTWARE) - __print_events_type(type, event_symbols_sw, PERF_COUNT_SW_MAX); - else - __print_events_type(type, event_symbols_hw, PERF_COUNT_HW_MAX); -} - int print_hwcache_events(const char *event_glob, bool name_only) { - unsigned int type, op, i, printed = 0; + unsigned int type, op, i, evt_i = 0, evt_num = 0; char name[64]; + char **evt_list = NULL; + bool evt_num_known = false; + +restart: + if (evt_num_known) { + evt_list = zalloc(sizeof(char *) * evt_num); + if (!evt_list) + goto out_enomem; + } for (type = 0; type < PERF_COUNT_HW_CACHE_MAX; type++) { for (op = 0; op < PERF_COUNT_HW_CACHE_OP_MAX; op++) { @@ -1282,27 +1300,66 @@ int print_hwcache_events(const char *event_glob, bool name_only) type | (op << 8) | (i << 16))) continue; - if (name_only) - printf("%s ", name); - else - printf(" %-50s [%s]\n", name, - event_type_descriptors[PERF_TYPE_HW_CACHE]); - ++printed; + if (!evt_num_known) { + evt_num++; + continue; + } + + evt_list[evt_i] = strdup(name); + if (evt_list[evt_i] == NULL) + goto out_enomem; + evt_i++; } } } - if (printed) + if (!evt_num_known) { + evt_num_known = true; + goto restart; + } + qsort(evt_list, evt_num, sizeof(char *), cmp_string); + evt_i = 0; + while (evt_i < evt_num) { + if (name_only) { + printf("%s ", evt_list[evt_i++]); + continue; + } + printf(" %-50s [%s]\n", evt_list[evt_i++], + event_type_descriptors[PERF_TYPE_HW_CACHE]); + } + if (evt_num) printf("\n"); - return printed; + +out_free: + evt_num = evt_i; + for (evt_i = 0; evt_i < evt_num; evt_i++) + zfree(&evt_list[evt_i]); + zfree(&evt_list); + return evt_num; + +out_enomem: + printf("FATAL: not enough memory to print %s\n", event_type_descriptors[PERF_TYPE_HW_CACHE]); + if (evt_list) + goto out_free; + return evt_num; } -static void print_symbol_events(const char *event_glob, unsigned type, +void print_symbol_events(const char *event_glob, unsigned type, struct event_symbol *syms, unsigned max, bool name_only) { - unsigned i, printed = 0; + unsigned int i, evt_i = 0, evt_num = 0; char name[MAX_NAME_LEN]; + char **evt_list = NULL; + bool evt_num_known = false; + +restart: + if (evt_num_known) { + evt_list = zalloc(sizeof(char *) * evt_num); + if (!evt_list) + goto out_enomem; + syms -= max; + } for (i = 0; i < max; i++, syms++) { @@ -1314,23 +1371,49 @@ static void print_symbol_events(const char *event_glob, unsigned type, if (!is_event_supported(type, i)) continue; - if (name_only) { - printf("%s ", syms->symbol); + if (!evt_num_known) { + evt_num++; continue; } - if (strlen(syms->alias)) + if (!name_only && strlen(syms->alias)) snprintf(name, MAX_NAME_LEN, "%s OR %s", syms->symbol, syms->alias); else strncpy(name, syms->symbol, MAX_NAME_LEN); - printf(" %-50s [%s]\n", name, event_type_descriptors[type]); - - printed++; + evt_list[evt_i] = strdup(name); + if (evt_list[evt_i] == NULL) + goto out_enomem; + evt_i++; } - if (printed) + if (!evt_num_known) { + evt_num_known = true; + goto restart; + } + qsort(evt_list, evt_num, sizeof(char *), cmp_string); + evt_i = 0; + while (evt_i < evt_num) { + if (name_only) { + printf("%s ", evt_list[evt_i++]); + continue; + } + printf(" %-50s [%s]\n", evt_list[evt_i++], event_type_descriptors[type]); + } + if (evt_num) printf("\n"); + +out_free: + evt_num = evt_i; + for (evt_i = 0; evt_i < evt_num; evt_i++) + zfree(&evt_list[evt_i]); + zfree(&evt_list); + return; + +out_enomem: + printf("FATAL: not enough memory to print %s\n", event_type_descriptors[type]); + if (evt_list) + goto out_free; } /* @@ -1338,11 +1421,6 @@ static void print_symbol_events(const char *event_glob, unsigned type, */ void print_events(const char *event_glob, bool name_only) { - if (!name_only) { - printf("\n"); - printf("List of pre-defined events (to be used in -e):\n"); - } - print_symbol_events(event_glob, PERF_TYPE_HARDWARE, event_symbols_hw, PERF_COUNT_HW_MAX, name_only); diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h index ff6e1fa4111e..52a2dda4f954 100644 --- a/tools/perf/util/parse-events.h +++ b/tools/perf/util/parse-events.h @@ -116,12 +116,21 @@ void parse_events_update_lists(struct list_head *list_event, void parse_events_error(void *data, void *scanner, char const *msg); void print_events(const char *event_glob, bool name_only); -void print_events_type(u8 type); + +struct event_symbol { + const char *symbol; + const char *alias; +}; +extern struct event_symbol event_symbols_hw[]; +extern struct event_symbol event_symbols_sw[]; +void print_symbol_events(const char *event_glob, unsigned type, + struct event_symbol *syms, unsigned max, + bool name_only); void print_tracepoint_events(const char *subsys_glob, const char *event_glob, bool name_only); int print_hwcache_events(const char *event_glob, bool name_only); extern int is_valid_tracepoint(const char *event_string); -extern int valid_debugfs_mount(const char *debugfs); +int valid_event_mount(const char *eventfs); #endif /* __PERF_PARSE_EVENTS_H */ diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l index 94eacb6c1ef7..8895cf3132ab 100644 --- a/tools/perf/util/parse-events.l +++ b/tools/perf/util/parse-events.l @@ -101,7 +101,7 @@ num_raw_hex [a-fA-F0-9]+ name [a-zA-Z_*?][a-zA-Z0-9_*?]* name_minus [a-zA-Z_*?][a-zA-Z0-9\-_*?]* /* If you add a modifier you need to update check_modifier() */ -modifier_event [ukhpGHSD]+ +modifier_event [ukhpGHSDI]+ modifier_bp [rwx]{1,3} %% diff --git a/tools/perf/util/parse-options.c b/tools/perf/util/parse-options.c index 4a015f77e2b5..01626be2a8eb 100644 --- a/tools/perf/util/parse-options.c +++ b/tools/perf/util/parse-options.c @@ -37,6 +37,7 @@ static int get_value(struct parse_opt_ctx_t *p, { const char *s, *arg = NULL; const int unset = flags & OPT_UNSET; + int err; if (unset && p->opt) return opterror(opt, "takes no value", flags); @@ -114,13 +115,29 @@ static int get_value(struct parse_opt_ctx_t *p, return 0; case OPTION_STRING: + err = 0; if (unset) *(const char **)opt->value = NULL; else if (opt->flags & PARSE_OPT_OPTARG && !p->opt) *(const char **)opt->value = (const char *)opt->defval; else - return get_arg(p, opt, flags, (const char **)opt->value); - return 0; + err = get_arg(p, opt, flags, (const char **)opt->value); + + /* PARSE_OPT_NOEMPTY: Allow NULL but disallow empty string. */ + if (opt->flags & PARSE_OPT_NOEMPTY) { + const char *val = *(const char **)opt->value; + + if (!val) + return err; + + /* Similar to unset if we are given an empty string. */ + if (val[0] == '\0') { + *(const char **)opt->value = NULL; + return 0; + } + } + + return err; case OPTION_CALLBACK: if (unset) @@ -505,13 +522,18 @@ int parse_options_subcommand(int argc, const char **argv, const struct option *o break; case PARSE_OPT_LIST_OPTS: while (options->type != OPTION_END) { - printf("--%s ", options->long_name); + if (options->long_name) + printf("--%s ", options->long_name); options++; } + putchar('\n'); exit(130); case PARSE_OPT_LIST_SUBCMDS: - for (int i = 0; subcommands[i]; i++) - printf("%s ", subcommands[i]); + if (subcommands) { + for (int i = 0; subcommands[i]; i++) + printf("%s ", subcommands[i]); + } + putchar('\n'); exit(130); default: /* PARSE_OPT_UNKNOWN */ if (ctx.argv[0][1] == '-') { diff --git a/tools/perf/util/parse-options.h b/tools/perf/util/parse-options.h index 97b153fb4999..59561fd86278 100644 --- a/tools/perf/util/parse-options.h +++ b/tools/perf/util/parse-options.h @@ -40,6 +40,7 @@ enum parse_opt_option_flags { PARSE_OPT_LASTARG_DEFAULT = 16, PARSE_OPT_DISABLED = 32, PARSE_OPT_EXCLUSIVE = 64, + PARSE_OPT_NOEMPTY = 128, }; struct option; @@ -122,6 +123,7 @@ struct option { #define OPT_LONG(s, l, v, h) { .type = OPTION_LONG, .short_name = (s), .long_name = (l), .value = check_vtype(v, long *), .help = (h) } #define OPT_U64(s, l, v, h) { .type = OPTION_U64, .short_name = (s), .long_name = (l), .value = check_vtype(v, u64 *), .help = (h) } #define OPT_STRING(s, l, v, a, h) { .type = OPTION_STRING, .short_name = (s), .long_name = (l), .value = check_vtype(v, const char **), (a), .help = (h) } +#define OPT_STRING_NOEMPTY(s, l, v, a, h) { .type = OPTION_STRING, .short_name = (s), .long_name = (l), .value = check_vtype(v, const char **), (a), .help = (h), .flags = PARSE_OPT_NOEMPTY} #define OPT_DATE(s, l, v, h) \ { .type = OPTION_CALLBACK, .short_name = (s), .long_name = (l), .value = (v), .argh = "time", .help = (h), .callback = parse_opt_approxidate_cb } #define OPT_CALLBACK(s, l, v, a, h, f) \ diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index 919937eb0be2..30545ce2c712 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -41,6 +41,7 @@ #include "symbol.h" #include "thread.h" #include <api/fs/debugfs.h> +#include <api/fs/tracefs.h> #include "trace-event.h" /* For __maybe_unused */ #include "probe-event.h" #include "probe-finder.h" @@ -79,6 +80,7 @@ static int init_symbol_maps(bool user_only) int ret; symbol_conf.sort_by_name = true; + symbol_conf.allow_aliases = true; ret = symbol__init(NULL); if (ret < 0) { pr_debug("Failed to init symbol map.\n"); @@ -133,6 +135,8 @@ static struct ref_reloc_sym *kernel_get_ref_reloc_sym(void) return NULL; kmap = map__kmap(host_machine->vmlinux_maps[MAP__FUNCTION]); + if (!kmap) + return NULL; return kmap->ref_reloc_sym; } @@ -150,7 +154,7 @@ static u64 kernel_get_symbol_address_by_name(const char *name, bool reloc) sym = __find_kernel_function_by_name(name, &map); if (sym) return map->unmap_ip(map, sym->start) - - (reloc) ? 0 : map->reloc; + ((reloc) ? 0 : map->reloc); } return 0; } @@ -177,6 +181,25 @@ static struct map *kernel_get_module_map(const char *module) return NULL; } +static struct map *get_target_map(const char *target, bool user) +{ + /* Init maps of given executable or kernel */ + if (user) + return dso__new_map(target); + else + return kernel_get_module_map(target); +} + +static void put_target_map(struct map *map, bool user) +{ + if (map && user) { + /* Only the user map needs to be released */ + dso__delete(map->dso); + map__delete(map); + } +} + + static struct dso *kernel_get_module_dso(const char *module) { struct dso *dso; @@ -248,6 +271,13 @@ out: return ret; } +static void clear_perf_probe_point(struct perf_probe_point *pp) +{ + free(pp->file); + free(pp->function); + free(pp->lazy_line); +} + static void clear_probe_trace_events(struct probe_trace_event *tevs, int ntevs) { int i; @@ -257,6 +287,103 @@ static void clear_probe_trace_events(struct probe_trace_event *tevs, int ntevs) } #ifdef HAVE_DWARF_SUPPORT +/* + * Some binaries like glibc have special symbols which are on the symbol + * table, but not in the debuginfo. If we can find the address of the + * symbol from map, we can translate the address back to the probe point. + */ +static int find_alternative_probe_point(struct debuginfo *dinfo, + struct perf_probe_point *pp, + struct perf_probe_point *result, + const char *target, bool uprobes) +{ + struct map *map = NULL; + struct symbol *sym; + u64 address = 0; + int ret = -ENOENT; + + /* This can work only for function-name based one */ + if (!pp->function || pp->file) + return -ENOTSUP; + + map = get_target_map(target, uprobes); + if (!map) + return -EINVAL; + + /* Find the address of given function */ + map__for_each_symbol_by_name(map, pp->function, sym) { + if (uprobes) + address = sym->start; + else + address = map->unmap_ip(map, sym->start); + break; + } + if (!address) { + ret = -ENOENT; + goto out; + } + pr_debug("Symbol %s address found : %" PRIx64 "\n", + pp->function, address); + + ret = debuginfo__find_probe_point(dinfo, (unsigned long)address, + result); + if (ret <= 0) + ret = (!ret) ? -ENOENT : ret; + else { + result->offset += pp->offset; + result->line += pp->line; + ret = 0; + } + +out: + put_target_map(map, uprobes); + return ret; + +} + +static int get_alternative_probe_event(struct debuginfo *dinfo, + struct perf_probe_event *pev, + struct perf_probe_point *tmp, + const char *target) +{ + int ret; + + memcpy(tmp, &pev->point, sizeof(*tmp)); + memset(&pev->point, 0, sizeof(pev->point)); + ret = find_alternative_probe_point(dinfo, tmp, &pev->point, + target, pev->uprobes); + if (ret < 0) + memcpy(&pev->point, tmp, sizeof(*tmp)); + + return ret; +} + +static int get_alternative_line_range(struct debuginfo *dinfo, + struct line_range *lr, + const char *target, bool user) +{ + struct perf_probe_point pp = { .function = lr->function, + .file = lr->file, + .line = lr->start }; + struct perf_probe_point result; + int ret, len = 0; + + memset(&result, 0, sizeof(result)); + + if (lr->end != INT_MAX) + len = lr->end - lr->start; + ret = find_alternative_probe_point(dinfo, &pp, &result, + target, user); + if (!ret) { + lr->function = result.function; + lr->file = result.file; + lr->start = result.line; + if (lr->end != INT_MAX) + lr->end = lr->start + len; + clear_perf_probe_point(&pp); + } + return ret; +} /* Open new debuginfo of given module */ static struct debuginfo *open_debuginfo(const char *module, bool silent) @@ -465,6 +592,7 @@ static int try_to_find_probe_trace_events(struct perf_probe_event *pev, int max_tevs, const char *target) { bool need_dwarf = perf_probe_event_need_dwarf(pev); + struct perf_probe_point tmp; struct debuginfo *dinfo; int ntevs, ret = 0; @@ -481,6 +609,20 @@ static int try_to_find_probe_trace_events(struct perf_probe_event *pev, /* Searching trace events corresponding to a probe event */ ntevs = debuginfo__find_trace_events(dinfo, pev, tevs, max_tevs); + if (ntevs == 0) { /* Not found, retry with an alternative */ + ret = get_alternative_probe_event(dinfo, pev, &tmp, target); + if (!ret) { + ntevs = debuginfo__find_trace_events(dinfo, pev, + tevs, max_tevs); + /* + * Write back to the original probe_event for + * setting appropriate (user given) event name + */ + clear_perf_probe_point(&pev->point); + memcpy(&pev->point, &tmp, sizeof(tmp)); + } + } + debuginfo__delete(dinfo); if (ntevs > 0) { /* Succeeded to find trace events */ @@ -495,11 +637,9 @@ static int try_to_find_probe_trace_events(struct perf_probe_event *pev, } if (ntevs == 0) { /* No error but failed to find probe point. */ - pr_warning("Probe point '%s' not found in debuginfo.\n", + pr_warning("Probe point '%s' not found.\n", synthesize_perf_probe_point(&pev->point)); - if (need_dwarf) - return -ENOENT; - return 0; + return -ENOENT; } /* Error path : ntevs < 0 */ pr_debug("An error occurred in debuginfo analysis (%d).\n", ntevs); @@ -532,7 +672,7 @@ static int get_real_path(const char *raw_path, const char *comp_dir, else { if (access(raw_path, R_OK) == 0) { *new_path = strdup(raw_path); - return 0; + return *new_path ? 0 : -ENOMEM; } else return -errno; } @@ -548,9 +688,11 @@ static int get_real_path(const char *raw_path, const char *comp_dir, if (access(*new_path, R_OK) == 0) return 0; - if (!symbol_conf.source_prefix) + if (!symbol_conf.source_prefix) { /* In case of searching comp_dir, don't retry */ + zfree(new_path); return -errno; + } switch (errno) { case ENAMETOOLONG: @@ -622,7 +764,8 @@ static int _show_one_line(FILE *fp, int l, bool skip, bool show_num) * Show line-range always requires debuginfo to find source file and * line number. */ -static int __show_line_range(struct line_range *lr, const char *module) +static int __show_line_range(struct line_range *lr, const char *module, + bool user) { int l = 1; struct int_node *ln; @@ -638,6 +781,11 @@ static int __show_line_range(struct line_range *lr, const char *module) return -ENOENT; ret = debuginfo__find_line_range(dinfo, lr); + if (!ret) { /* Not found, retry with an alternative */ + ret = get_alternative_line_range(dinfo, lr, module, user); + if (!ret) + ret = debuginfo__find_line_range(dinfo, lr); + } debuginfo__delete(dinfo); if (ret == 0 || ret == -ENOENT) { pr_warning("Specified source line is not found.\n"); @@ -650,7 +798,11 @@ static int __show_line_range(struct line_range *lr, const char *module) /* Convert source file path */ tmp = lr->path; ret = get_real_path(tmp, lr->comp_dir, &lr->path); - free(tmp); /* Free old path */ + + /* Free old path when new path is assigned */ + if (tmp != lr->path) + free(tmp); + if (ret < 0) { pr_warning("Failed to find source file path.\n"); return ret; @@ -707,7 +859,7 @@ int show_line_range(struct line_range *lr, const char *module, bool user) ret = init_symbol_maps(user); if (ret < 0) return ret; - ret = __show_line_range(lr, module); + ret = __show_line_range(lr, module, user); exit_symbol_maps(); return ret; @@ -716,12 +868,13 @@ int show_line_range(struct line_range *lr, const char *module, bool user) static int show_available_vars_at(struct debuginfo *dinfo, struct perf_probe_event *pev, int max_vls, struct strfilter *_filter, - bool externs) + bool externs, const char *target) { char *buf; int ret, i, nvars; struct str_node *node; struct variable_list *vls = NULL, *vl; + struct perf_probe_point tmp; const char *var; buf = synthesize_perf_probe_point(&pev->point); @@ -731,6 +884,15 @@ static int show_available_vars_at(struct debuginfo *dinfo, ret = debuginfo__find_available_vars_at(dinfo, pev, &vls, max_vls, externs); + if (!ret) { /* Not found, retry with an alternative */ + ret = get_alternative_probe_event(dinfo, pev, &tmp, target); + if (!ret) { + ret = debuginfo__find_available_vars_at(dinfo, pev, + &vls, max_vls, externs); + /* Release the old probe_point */ + clear_perf_probe_point(&tmp); + } + } if (ret <= 0) { if (ret == 0 || ret == -ENOENT) { pr_err("Failed to find the address of %s\n", buf); @@ -793,7 +955,7 @@ int show_available_vars(struct perf_probe_event *pevs, int npevs, for (i = 0; i < npevs && ret >= 0; i++) ret = show_available_vars_at(dinfo, &pevs[i], max_vls, _filter, - externs); + externs, module); debuginfo__delete(dinfo); out: @@ -1739,15 +1901,13 @@ static int convert_to_perf_probe_event(struct probe_trace_event *tev, void clear_perf_probe_event(struct perf_probe_event *pev) { - struct perf_probe_point *pp = &pev->point; struct perf_probe_arg_field *field, *next; int i; free(pev->event); free(pev->group); - free(pp->file); - free(pp->function); - free(pp->lazy_line); + free(pev->target); + clear_perf_probe_point(&pev->point); for (i = 0; i < pev->nargs; i++) { free(pev->args[i].name); @@ -1805,7 +1965,7 @@ static void print_open_warning(int err, bool is_kprobe) " - please rebuild kernel with %s.\n", is_kprobe ? 'k' : 'u', config); } else if (err == -ENOTSUP) - pr_warning("Debugfs is not mounted.\n"); + pr_warning("Tracefs or debugfs is not mounted.\n"); else pr_warning("Failed to open %cprobe_events: %s\n", is_kprobe ? 'k' : 'u', @@ -1816,7 +1976,7 @@ static void print_both_open_warning(int kerr, int uerr) { /* Both kprobes and uprobes are disabled, warn it. */ if (kerr == -ENOTSUP && uerr == -ENOTSUP) - pr_warning("Debugfs is not mounted.\n"); + pr_warning("Tracefs or debugfs is not mounted.\n"); else if (kerr == -ENOENT && uerr == -ENOENT) pr_warning("Please rebuild kernel with CONFIG_KPROBE_EVENTS " "or/and CONFIG_UPROBE_EVENTS.\n"); @@ -1833,13 +1993,20 @@ static int open_probe_events(const char *trace_file, bool readwrite) { char buf[PATH_MAX]; const char *__debugfs; + const char *tracing_dir = ""; int ret; - __debugfs = debugfs_find_mountpoint(); - if (__debugfs == NULL) - return -ENOTSUP; + __debugfs = tracefs_find_mountpoint(); + if (__debugfs == NULL) { + tracing_dir = "tracing/"; + + __debugfs = debugfs_find_mountpoint(); + if (__debugfs == NULL) + return -ENOTSUP; + } - ret = e_snprintf(buf, PATH_MAX, "%s/%s", __debugfs, trace_file); + ret = e_snprintf(buf, PATH_MAX, "%s/%s%s", + __debugfs, tracing_dir, trace_file); if (ret >= 0) { pr_debug("Opening %s write=%d\n", buf, readwrite); if (readwrite && !probe_event_dry_run) @@ -1855,12 +2022,12 @@ static int open_probe_events(const char *trace_file, bool readwrite) static int open_kprobe_events(bool readwrite) { - return open_probe_events("tracing/kprobe_events", readwrite); + return open_probe_events("kprobe_events", readwrite); } static int open_uprobe_events(bool readwrite) { - return open_probe_events("tracing/uprobe_events", readwrite); + return open_probe_events("uprobe_events", readwrite); } /* Get raw string list of current kprobe_events or uprobe_events */ @@ -1895,6 +2062,95 @@ static struct strlist *get_probe_trace_command_rawlist(int fd) return sl; } +struct kprobe_blacklist_node { + struct list_head list; + unsigned long start; + unsigned long end; + char *symbol; +}; + +static void kprobe_blacklist__delete(struct list_head *blacklist) +{ + struct kprobe_blacklist_node *node; + + while (!list_empty(blacklist)) { + node = list_first_entry(blacklist, + struct kprobe_blacklist_node, list); + list_del(&node->list); + free(node->symbol); + free(node); + } +} + +static int kprobe_blacklist__load(struct list_head *blacklist) +{ + struct kprobe_blacklist_node *node; + const char *__debugfs = debugfs_find_mountpoint(); + char buf[PATH_MAX], *p; + FILE *fp; + int ret; + + if (__debugfs == NULL) + return -ENOTSUP; + + ret = e_snprintf(buf, PATH_MAX, "%s/kprobes/blacklist", __debugfs); + if (ret < 0) + return ret; + + fp = fopen(buf, "r"); + if (!fp) + return -errno; + + ret = 0; + while (fgets(buf, PATH_MAX, fp)) { + node = zalloc(sizeof(*node)); + if (!node) { + ret = -ENOMEM; + break; + } + INIT_LIST_HEAD(&node->list); + list_add_tail(&node->list, blacklist); + if (sscanf(buf, "0x%lx-0x%lx", &node->start, &node->end) != 2) { + ret = -EINVAL; + break; + } + p = strchr(buf, '\t'); + if (p) { + p++; + if (p[strlen(p) - 1] == '\n') + p[strlen(p) - 1] = '\0'; + } else + p = (char *)"unknown"; + node->symbol = strdup(p); + if (!node->symbol) { + ret = -ENOMEM; + break; + } + pr_debug2("Blacklist: 0x%lx-0x%lx, %s\n", + node->start, node->end, node->symbol); + ret++; + } + if (ret < 0) + kprobe_blacklist__delete(blacklist); + fclose(fp); + + return ret; +} + +static struct kprobe_blacklist_node * +kprobe_blacklist__find_by_address(struct list_head *blacklist, + unsigned long address) +{ + struct kprobe_blacklist_node *node; + + list_for_each_entry(node, blacklist, list) { + if (node->start <= address && address <= node->end) + return node; + } + + return NULL; +} + /* Show an event */ static int show_perf_probe_event(struct perf_probe_event *pev, const char *module) @@ -2100,6 +2356,27 @@ static int get_new_event_name(char *buf, size_t len, const char *base, return ret; } +/* Warn if the current kernel's uprobe implementation is old */ +static void warn_uprobe_event_compat(struct probe_trace_event *tev) +{ + int i; + char *buf = synthesize_probe_trace_command(tev); + + /* Old uprobe event doesn't support memory dereference */ + if (!tev->uprobes || tev->nargs == 0 || !buf) + goto out; + + for (i = 0; i < tev->nargs; i++) + if (strglobmatch(tev->args[i].value, "[$@+-]*")) { + pr_warning("Please upgrade your kernel to at least " + "3.14 to have access to feature %s\n", + tev->args[i].value); + break; + } +out: + free(buf); +} + static int __add_probe_trace_events(struct perf_probe_event *pev, struct probe_trace_event *tevs, int ntevs, bool allow_suffix) @@ -2109,6 +2386,8 @@ static int __add_probe_trace_events(struct perf_probe_event *pev, char buf[64]; const char *event, *group; struct strlist *namelist; + LIST_HEAD(blacklist); + struct kprobe_blacklist_node *node; if (pev->uprobes) fd = open_uprobe_events(true); @@ -2126,11 +2405,25 @@ static int __add_probe_trace_events(struct perf_probe_event *pev, pr_debug("Failed to get current event list.\n"); return -EIO; } + /* Get kprobe blacklist if exists */ + if (!pev->uprobes) { + ret = kprobe_blacklist__load(&blacklist); + if (ret < 0) + pr_debug("No kprobe blacklist support, ignored\n"); + } ret = 0; pr_info("Added new event%s\n", (ntevs > 1) ? "s:" : ":"); for (i = 0; i < ntevs; i++) { tev = &tevs[i]; + /* Ensure that the address is NOT blacklisted */ + node = kprobe_blacklist__find_by_address(&blacklist, + tev->point.address); + if (node) { + pr_warning("Warning: Skipped probing on blacklisted function: %s\n", node->symbol); + continue; + } + if (pev->event) event = pev->event; else @@ -2180,14 +2473,18 @@ static int __add_probe_trace_events(struct perf_probe_event *pev, */ allow_suffix = true; } + if (ret == -EINVAL && pev->uprobes) + warn_uprobe_event_compat(tev); - if (ret >= 0) { + /* Note that it is possible to skip all events because of blacklist */ + if (ret >= 0 && tev->event) { /* Show how to use the event. */ pr_info("\nYou can now use it in all perf tools, such as:\n\n"); pr_info("\tperf record -e %s:%s -aR sleep 1\n\n", tev->group, tev->event); } + kprobe_blacklist__delete(&blacklist); strlist__delete(namelist); close(fd); return ret; @@ -2199,8 +2496,7 @@ static int find_probe_functions(struct map *map, char *name) struct symbol *sym; map__for_each_symbol_by_name(map, name, sym) { - if (sym->binding == STB_GLOBAL || sym->binding == STB_LOCAL) - found++; + found++; } return found; @@ -2218,7 +2514,6 @@ static int find_probe_trace_events_from_map(struct perf_probe_event *pev, int max_tevs, const char *target) { struct map *map = NULL; - struct kmap *kmap = NULL; struct ref_reloc_sym *reloc_sym = NULL; struct symbol *sym; struct probe_trace_event *tev; @@ -2227,11 +2522,7 @@ static int find_probe_trace_events_from_map(struct perf_probe_event *pev, int num_matched_functions; int ret, i; - /* Init maps of given executable or kernel */ - if (pev->uprobes) - map = dso__new_map(target); - else - map = kernel_get_module_map(target); + map = get_target_map(target, pev->uprobes); if (!map) { ret = -EINVAL; goto out; @@ -2255,8 +2546,7 @@ static int find_probe_trace_events_from_map(struct perf_probe_event *pev, } if (!pev->uprobes && !pp->retprobe) { - kmap = map__kmap(map); - reloc_sym = kmap->ref_reloc_sym; + reloc_sym = kernel_get_ref_reloc_sym(); if (!reloc_sym) { pr_warning("Relocated base symbol is not found!\n"); ret = -EINVAL; @@ -2324,11 +2614,7 @@ static int find_probe_trace_events_from_map(struct perf_probe_event *pev, } out: - if (map && pev->uprobes) { - /* Only when using uprobe(exec) map needs to be released */ - dso__delete(map->dso); - map__delete(map); - } + put_target_map(map, pev->uprobes); return ret; nomem_out: @@ -2369,7 +2655,7 @@ struct __event_package { }; int add_perf_probe_events(struct perf_probe_event *pevs, int npevs, - int max_tevs, const char *target, bool force_add) + int max_tevs, bool force_add) { int i, j, ret; struct __event_package *pkgs; @@ -2393,7 +2679,7 @@ int add_perf_probe_events(struct perf_probe_event *pevs, int npevs, ret = convert_to_probe_trace_events(pkgs[i].pev, &pkgs[i].tevs, max_tevs, - target); + pkgs[i].pev->target); if (ret < 0) goto end; pkgs[i].ntevs = ret; @@ -2568,8 +2854,7 @@ static struct strfilter *available_func_filter; static int filter_available_functions(struct map *map __maybe_unused, struct symbol *sym) { - if ((sym->binding == STB_GLOBAL || sym->binding == STB_LOCAL) && - strfilter__compare(available_func_filter, sym->name)) + if (strfilter__compare(available_func_filter, sym->name)) return 0; return 1; } diff --git a/tools/perf/util/probe-event.h b/tools/perf/util/probe-event.h index e01e9943139f..d6b783447be9 100644 --- a/tools/perf/util/probe-event.h +++ b/tools/perf/util/probe-event.h @@ -73,7 +73,8 @@ struct perf_probe_event { char *group; /* Group name */ struct perf_probe_point point; /* Probe point */ int nargs; /* Number of arguments */ - bool uprobes; + bool uprobes; /* Uprobe event flag */ + char *target; /* Target binary */ struct perf_probe_arg *args; /* Arguments */ }; @@ -124,8 +125,7 @@ extern int line_range__init(struct line_range *lr); extern const char *kernel_get_module_path(const char *module); extern int add_perf_probe_events(struct perf_probe_event *pevs, int npevs, - int max_probe_points, const char *module, - bool force_add); + int max_probe_points, bool force_add); extern int del_perf_probe_events(struct strlist *dellist); extern int show_perf_probe_events(void); extern int show_line_range(struct line_range *lr, const char *module, diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c index b5247d777f0e..e3074230f236 100644 --- a/tools/perf/util/probe-finder.c +++ b/tools/perf/util/probe-finder.c @@ -456,11 +456,12 @@ static int convert_variable_fields(Dwarf_Die *vr_die, const char *varname, return -EINVAL; } if (field->name[0] == '[') { - pr_err("Semantic error: %s is not a pointor" + pr_err("Semantic error: %s is not a pointer" " nor array.\n", varname); return -EINVAL; } - if (field->ref) { + /* While prcessing unnamed field, we don't care about this */ + if (field->ref && dwarf_diename(vr_die)) { pr_err("Semantic error: %s must be referred by '.'\n", field->name); return -EINVAL; @@ -491,6 +492,11 @@ static int convert_variable_fields(Dwarf_Die *vr_die, const char *varname, } ref->offset += (long)offs; + /* If this member is unnamed, we need to reuse this field */ + if (!dwarf_diename(die_mem)) + return convert_variable_fields(die_mem, varname, field, + &ref, die_mem); + next: /* Converting next field */ if (field->next) @@ -915,17 +921,13 @@ static int probe_point_search_cb(Dwarf_Die *sp_die, void *data) dwarf_decl_line(sp_die, &pf->lno); pf->lno += pp->line; param->retval = find_probe_point_by_line(pf); - } else if (!dwarf_func_inline(sp_die)) { + } else if (die_is_func_instance(sp_die)) { + /* Instances always have the entry address */ + dwarf_entrypc(sp_die, &pf->addr); /* Real function */ if (pp->lazy_line) param->retval = find_probe_point_lazy(sp_die, pf); else { - if (dwarf_entrypc(sp_die, &pf->addr) != 0) { - pr_warning("Failed to get entry address of " - "%s.\n", dwarf_diename(sp_die)); - param->retval = -ENOENT; - return DWARF_CB_ABORT; - } pf->addr += pp->offset; /* TODO: Check the address in this function */ param->retval = call_probe_finder(sp_die, pf); @@ -1349,11 +1351,8 @@ int debuginfo__find_probe_point(struct debuginfo *dbg, unsigned long addr, const char *fname = NULL, *func = NULL, *basefunc = NULL, *tmp; int baseline = 0, lineno = 0, ret = 0; - /* Adjust address with bias */ - addr += dbg->bias; - /* Find cu die */ - if (!dwarf_addrdie(dbg->dbg, (Dwarf_Addr)addr - dbg->bias, &cudie)) { + if (!dwarf_addrdie(dbg->dbg, (Dwarf_Addr)addr, &cudie)) { pr_warning("Failed to find debug information for address %lx\n", addr); ret = -EINVAL; @@ -1536,7 +1535,7 @@ static int line_range_search_cb(Dwarf_Die *sp_die, void *data) pr_debug("New line range: %d to %d\n", lf->lno_s, lf->lno_e); lr->start = lf->lno_s; lr->end = lf->lno_e; - if (dwarf_func_inline(sp_die)) + if (!die_is_func_instance(sp_die)) param->retval = die_walk_instances(sp_die, line_range_inline_cb, lf); else diff --git a/tools/perf/util/python-ext-sources b/tools/perf/util/python-ext-sources index 6c6a6953fa93..4d28624a1eca 100644 --- a/tools/perf/util/python-ext-sources +++ b/tools/perf/util/python-ext-sources @@ -17,6 +17,5 @@ util/xyarray.c util/cgroup.c util/rblist.c util/strlist.c -../lib/api/fs/fs.c util/trace-event.c ../../lib/rbtree.c diff --git a/tools/perf/util/scripting-engines/Build b/tools/perf/util/scripting-engines/Build new file mode 100644 index 000000000000..6516e220c247 --- /dev/null +++ b/tools/perf/util/scripting-engines/Build @@ -0,0 +1,6 @@ +libperf-$(CONFIG_LIBPERL) += trace-event-perl.o +libperf-$(CONFIG_LIBPYTHON) += trace-event-python.o + +CFLAGS_trace-event-perl.o += $(PERL_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-shadow -Wno-undef -Wno-switch-default + +CFLAGS_trace-event-python.o += $(PYTHON_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-shadow diff --git a/tools/perf/util/scripting-engines/trace-event-perl.c b/tools/perf/util/scripting-engines/trace-event-perl.c index 22ebc46226e7..430b5d27828e 100644 --- a/tools/perf/util/scripting-engines/trace-event-perl.c +++ b/tools/perf/util/scripting-engines/trace-event-perl.c @@ -214,6 +214,11 @@ static void define_event_symbols(struct event_format *event, define_event_symbols(event, ev_name, args->hex.field); define_event_symbols(event, ev_name, args->hex.size); break; + case PRINT_INT_ARRAY: + define_event_symbols(event, ev_name, args->int_array.field); + define_event_symbols(event, ev_name, args->int_array.count); + define_event_symbols(event, ev_name, args->int_array.el_size); + break; case PRINT_BSTRING: case PRINT_DYNAMIC_ARRAY: case PRINT_STRING: @@ -355,10 +360,9 @@ static void perl_process_event_generic(union perf_event *event, static void perl_process_event(union perf_event *event, struct perf_sample *sample, struct perf_evsel *evsel, - struct thread *thread, - struct addr_location *al __maybe_unused) + struct addr_location *al) { - perl_process_tracepoint(sample, evsel, thread); + perl_process_tracepoint(sample, evsel, al->thread); perl_process_event_generic(event, sample, evsel); } diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c index 0c815a40a6e8..5544b8cdd1ee 100644 --- a/tools/perf/util/scripting-engines/trace-event-python.c +++ b/tools/perf/util/scripting-engines/trace-event-python.c @@ -231,6 +231,11 @@ static void define_event_symbols(struct event_format *event, define_event_symbols(event, ev_name, args->hex.field); define_event_symbols(event, ev_name, args->hex.size); break; + case PRINT_INT_ARRAY: + define_event_symbols(event, ev_name, args->int_array.field); + define_event_symbols(event, ev_name, args->int_array.count); + define_event_symbols(event, ev_name, args->int_array.el_size); + break; case PRINT_STRING: break; case PRINT_TYPE: @@ -376,7 +381,6 @@ exit: static void python_process_tracepoint(struct perf_sample *sample, struct perf_evsel *evsel, - struct thread *thread, struct addr_location *al) { struct event_format *event = evsel->tp_format; @@ -390,7 +394,7 @@ static void python_process_tracepoint(struct perf_sample *sample, int cpu = sample->cpu; void *data = sample->raw_data; unsigned long long nsecs = sample->time; - const char *comm = thread__comm_str(thread); + const char *comm = thread__comm_str(al->thread); t = PyTuple_New(MAX_FIELDS); if (!t) @@ -675,7 +679,7 @@ static int python_export_sample(struct db_export *dbe, tuple_set_u64(t, 0, es->db_id); tuple_set_u64(t, 1, es->evsel->db_id); tuple_set_u64(t, 2, es->al->machine->db_id); - tuple_set_u64(t, 3, es->thread->db_id); + tuple_set_u64(t, 3, es->al->thread->db_id); tuple_set_u64(t, 4, es->comm_db_id); tuple_set_u64(t, 5, es->dso_db_id); tuple_set_u64(t, 6, es->sym_db_id); @@ -761,7 +765,6 @@ static int python_process_call_return(struct call_return *cr, void *data) static void python_process_general_event(struct perf_sample *sample, struct perf_evsel *evsel, - struct thread *thread, struct addr_location *al) { PyObject *handler, *t, *dict, *callchain, *dict_sample; @@ -811,7 +814,7 @@ static void python_process_general_event(struct perf_sample *sample, pydict_set_item_string_decref(dict, "raw_buf", PyString_FromStringAndSize( (const char *)sample->raw_data, sample->raw_size)); pydict_set_item_string_decref(dict, "comm", - PyString_FromString(thread__comm_str(thread))); + PyString_FromString(thread__comm_str(al->thread))); if (al->map) { pydict_set_item_string_decref(dict, "dso", PyString_FromString(al->map->dso->name)); @@ -838,22 +841,20 @@ exit: static void python_process_event(union perf_event *event, struct perf_sample *sample, struct perf_evsel *evsel, - struct thread *thread, struct addr_location *al) { struct tables *tables = &tables_global; switch (evsel->attr.type) { case PERF_TYPE_TRACEPOINT: - python_process_tracepoint(sample, evsel, thread, al); + python_process_tracepoint(sample, evsel, al); break; /* Reserve for future process_hw/sw/raw APIs */ default: if (tables->db_export_mode) - db_export__sample(&tables->dbe, event, sample, evsel, - thread, al); + db_export__sample(&tables->dbe, event, sample, evsel, al); else - python_process_general_event(sample, evsel, thread, al); + python_process_general_event(sample, evsel, al); } } diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 0baf75f12b7c..0c74012575ac 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -16,6 +16,12 @@ #include "perf_regs.h" #include "asm/bug.h" +static int machines__deliver_event(struct machines *machines, + struct perf_evlist *evlist, + union perf_event *event, + struct perf_sample *sample, + struct perf_tool *tool, u64 file_offset); + static int perf_session__open(struct perf_session *session) { struct perf_data_file *file = session->file; @@ -86,6 +92,23 @@ static void perf_session__set_comm_exec(struct perf_session *session) machines__set_comm_exec(&session->machines, comm_exec); } +static int ordered_events__deliver_event(struct ordered_events *oe, + struct ordered_event *event) +{ + struct perf_sample sample; + struct perf_session *session = container_of(oe, struct perf_session, + ordered_events); + int ret = perf_evlist__parse_sample(session->evlist, event->event, &sample); + + if (ret) { + pr_err("Can't parse sample, err = %d\n", ret); + return ret; + } + + return machines__deliver_event(&session->machines, session->evlist, event->event, + &sample, session->tool, event->file_offset); +} + struct perf_session *perf_session__new(struct perf_data_file *file, bool repipe, struct perf_tool *tool) { @@ -95,8 +118,9 @@ struct perf_session *perf_session__new(struct perf_data_file *file, goto out; session->repipe = repipe; - ordered_events__init(&session->ordered_events); + session->tool = tool; machines__init(&session->machines); + ordered_events__init(&session->ordered_events, ordered_events__deliver_event); if (file) { if (perf_data_file__open(file)) @@ -138,11 +162,6 @@ struct perf_session *perf_session__new(struct perf_data_file *file, return NULL; } -static void perf_session__delete_dead_threads(struct perf_session *session) -{ - machine__delete_dead_threads(&session->machines.host); -} - static void perf_session__delete_threads(struct perf_session *session) { machine__delete_threads(&session->machines.host); @@ -167,7 +186,6 @@ static void perf_session_env__delete(struct perf_session_env *env) void perf_session__delete(struct perf_session *session) { perf_session__destroy_kernel_maps(session); - perf_session__delete_dead_threads(session); perf_session__delete_threads(session); perf_session_env__delete(&session->header.env); machines__exit(&session->machines); @@ -215,10 +233,17 @@ static int process_event_stub(struct perf_tool *tool __maybe_unused, return 0; } +static int process_build_id_stub(struct perf_tool *tool __maybe_unused, + union perf_event *event __maybe_unused, + struct perf_session *session __maybe_unused) +{ + dump_printf(": unhandled!\n"); + return 0; +} + static int process_finished_round_stub(struct perf_tool *tool __maybe_unused, union perf_event *event __maybe_unused, - struct perf_session *perf_session - __maybe_unused) + struct ordered_events *oe __maybe_unused) { dump_printf(": unhandled!\n"); return 0; @@ -226,7 +251,7 @@ static int process_finished_round_stub(struct perf_tool *tool __maybe_unused, static int process_finished_round(struct perf_tool *tool, union perf_event *event, - struct perf_session *session); + struct ordered_events *oe); static int process_id_index_stub(struct perf_tool *tool __maybe_unused, union perf_event *event __maybe_unused, @@ -264,7 +289,7 @@ void perf_tool__fill_defaults(struct perf_tool *tool) if (tool->tracing_data == NULL) tool->tracing_data = process_event_synth_tracing_data_stub; if (tool->build_id == NULL) - tool->build_id = process_finished_round_stub; + tool->build_id = process_build_id_stub; if (tool->finished_round == NULL) { if (tool->ordered_events) tool->finished_round = process_finished_round; @@ -514,54 +539,80 @@ static perf_event__swap_op perf_event__swap_ops[] = { * Flush every events below timestamp 7 * etc... */ -static int process_finished_round(struct perf_tool *tool, +static int process_finished_round(struct perf_tool *tool __maybe_unused, union perf_event *event __maybe_unused, - struct perf_session *session) + struct ordered_events *oe) { - return ordered_events__flush(session, tool, OE_FLUSH__ROUND); + return ordered_events__flush(oe, OE_FLUSH__ROUND); } -int perf_session_queue_event(struct perf_session *s, union perf_event *event, - struct perf_tool *tool, struct perf_sample *sample, - u64 file_offset) +int perf_session__queue_event(struct perf_session *s, union perf_event *event, + struct perf_sample *sample, u64 file_offset) { - struct ordered_events *oe = &s->ordered_events; - u64 timestamp = sample->time; - struct ordered_event *new; - - if (!timestamp || timestamp == ~0ULL) - return -ETIME; + return ordered_events__queue(&s->ordered_events, event, sample, file_offset); +} - if (timestamp < oe->last_flush) { - pr_oe_time(timestamp, "out of order event\n"); - pr_oe_time(oe->last_flush, "last flush, last_flush_type %d\n", - oe->last_flush_type); +static void callchain__lbr_callstack_printf(struct perf_sample *sample) +{ + struct ip_callchain *callchain = sample->callchain; + struct branch_stack *lbr_stack = sample->branch_stack; + u64 kernel_callchain_nr = callchain->nr; + unsigned int i; - s->stats.nr_unordered_events++; + for (i = 0; i < kernel_callchain_nr; i++) { + if (callchain->ips[i] == PERF_CONTEXT_USER) + break; } - new = ordered_events__new(oe, timestamp, event); - if (!new) { - ordered_events__flush(s, tool, OE_FLUSH__HALF); - new = ordered_events__new(oe, timestamp, event); - } + if ((i != kernel_callchain_nr) && lbr_stack->nr) { + u64 total_nr; + /* + * LBR callstack can only get user call chain, + * i is kernel call chain number, + * 1 is PERF_CONTEXT_USER. + * + * The user call chain is stored in LBR registers. + * LBR are pair registers. The caller is stored + * in "from" register, while the callee is stored + * in "to" register. + * For example, there is a call stack + * "A"->"B"->"C"->"D". + * The LBR registers will recorde like + * "C"->"D", "B"->"C", "A"->"B". + * So only the first "to" register and all "from" + * registers are needed to construct the whole stack. + */ + total_nr = i + 1 + lbr_stack->nr + 1; + kernel_callchain_nr = i + 1; - if (!new) - return -ENOMEM; + printf("... LBR call chain: nr:%" PRIu64 "\n", total_nr); - new->file_offset = file_offset; - return 0; + for (i = 0; i < kernel_callchain_nr; i++) + printf("..... %2d: %016" PRIx64 "\n", + i, callchain->ips[i]); + + printf("..... %2d: %016" PRIx64 "\n", + (int)(kernel_callchain_nr), lbr_stack->entries[0].to); + for (i = 0; i < lbr_stack->nr; i++) + printf("..... %2d: %016" PRIx64 "\n", + (int)(i + kernel_callchain_nr + 1), lbr_stack->entries[i].from); + } } -static void callchain__printf(struct perf_sample *sample) +static void callchain__printf(struct perf_evsel *evsel, + struct perf_sample *sample) { unsigned int i; + struct ip_callchain *callchain = sample->callchain; + + if (has_branch_callstack(evsel)) + callchain__lbr_callstack_printf(sample); - printf("... chain: nr:%" PRIu64 "\n", sample->callchain->nr); + printf("... FP chain: nr:%" PRIu64 "\n", callchain->nr); - for (i = 0; i < sample->callchain->nr; i++) + for (i = 0; i < callchain->nr; i++) printf("..... %2d: %016" PRIx64 "\n", - i, sample->callchain->ips[i]); + i, callchain->ips[i]); } static void branch_stack__printf(struct perf_sample *sample) @@ -636,14 +687,14 @@ static void stack_user__printf(struct stack_dump *dump) dump->size, dump->offset); } -static void perf_session__print_tstamp(struct perf_session *session, +static void perf_evlist__print_tstamp(struct perf_evlist *evlist, union perf_event *event, struct perf_sample *sample) { - u64 sample_type = __perf_evlist__combined_sample_type(session->evlist); + u64 sample_type = __perf_evlist__combined_sample_type(evlist); if (event->header.type != PERF_RECORD_SAMPLE && - !perf_evlist__sample_id_all(session->evlist)) { + !perf_evlist__sample_id_all(evlist)) { fputs("-1 -1 ", stdout); return; } @@ -685,7 +736,7 @@ static void sample_read__printf(struct perf_sample *sample, u64 read_format) sample->read.one.id, sample->read.one.value); } -static void dump_event(struct perf_session *session, union perf_event *event, +static void dump_event(struct perf_evlist *evlist, union perf_event *event, u64 file_offset, struct perf_sample *sample) { if (!dump_trace) @@ -697,7 +748,7 @@ static void dump_event(struct perf_session *session, union perf_event *event, trace_event(event); if (sample) - perf_session__print_tstamp(session, event, sample); + perf_evlist__print_tstamp(evlist, event, sample); printf("%#" PRIx64 " [%#x]: PERF_RECORD_%s", file_offset, event->header.size, perf_event__name(event->header.type)); @@ -718,9 +769,9 @@ static void dump_sample(struct perf_evsel *evsel, union perf_event *event, sample_type = evsel->attr.sample_type; if (sample_type & PERF_SAMPLE_CALLCHAIN) - callchain__printf(sample); + callchain__printf(evsel, sample); - if (sample_type & PERF_SAMPLE_BRANCH_STACK) + if ((sample_type & PERF_SAMPLE_BRANCH_STACK) && !has_branch_callstack(evsel)) branch_stack__printf(sample); if (sample_type & PERF_SAMPLE_REGS_USER) @@ -745,8 +796,7 @@ static void dump_sample(struct perf_evsel *evsel, union perf_event *event, sample_read__printf(sample, evsel->attr.read_format); } -static struct machine * - perf_session__find_machine_for_cpumode(struct perf_session *session, +static struct machine *machines__find_for_cpumode(struct machines *machines, union perf_event *event, struct perf_sample *sample) { @@ -764,26 +814,24 @@ static struct machine * else pid = sample->pid; - machine = perf_session__find_machine(session, pid); + machine = machines__find(machines, pid); if (!machine) - machine = perf_session__findnew_machine(session, - DEFAULT_GUEST_KERNEL_ID); + machine = machines__find(machines, DEFAULT_GUEST_KERNEL_ID); return machine; } - return &session->machines.host; + return &machines->host; } -static int deliver_sample_value(struct perf_session *session, +static int deliver_sample_value(struct perf_evlist *evlist, struct perf_tool *tool, union perf_event *event, struct perf_sample *sample, struct sample_read_value *v, struct machine *machine) { - struct perf_sample_id *sid; + struct perf_sample_id *sid = perf_evlist__id2sid(evlist, v->id); - sid = perf_evlist__id2sid(session->evlist, v->id); if (sid) { sample->id = v->id; sample->period = v->value - sid->period; @@ -791,14 +839,14 @@ static int deliver_sample_value(struct perf_session *session, } if (!sid || sid->evsel == NULL) { - ++session->stats.nr_unknown_id; + ++evlist->stats.nr_unknown_id; return 0; } return tool->sample(tool, event, sample, sid->evsel, machine); } -static int deliver_sample_group(struct perf_session *session, +static int deliver_sample_group(struct perf_evlist *evlist, struct perf_tool *tool, union perf_event *event, struct perf_sample *sample, @@ -808,7 +856,7 @@ static int deliver_sample_group(struct perf_session *session, u64 i; for (i = 0; i < sample->read.group.nr; i++) { - ret = deliver_sample_value(session, tool, event, sample, + ret = deliver_sample_value(evlist, tool, event, sample, &sample->read.group.values[i], machine); if (ret) @@ -819,7 +867,7 @@ static int deliver_sample_group(struct perf_session *session, } static int -perf_session__deliver_sample(struct perf_session *session, + perf_evlist__deliver_sample(struct perf_evlist *evlist, struct perf_tool *tool, union perf_event *event, struct perf_sample *sample, @@ -836,41 +884,40 @@ perf_session__deliver_sample(struct perf_session *session, /* For PERF_SAMPLE_READ we have either single or group mode. */ if (read_format & PERF_FORMAT_GROUP) - return deliver_sample_group(session, tool, event, sample, + return deliver_sample_group(evlist, tool, event, sample, machine); else - return deliver_sample_value(session, tool, event, sample, + return deliver_sample_value(evlist, tool, event, sample, &sample->read.one, machine); } -int perf_session__deliver_event(struct perf_session *session, - union perf_event *event, - struct perf_sample *sample, - struct perf_tool *tool, u64 file_offset) +static int machines__deliver_event(struct machines *machines, + struct perf_evlist *evlist, + union perf_event *event, + struct perf_sample *sample, + struct perf_tool *tool, u64 file_offset) { struct perf_evsel *evsel; struct machine *machine; - dump_event(session, event, file_offset, sample); + dump_event(evlist, event, file_offset, sample); - evsel = perf_evlist__id2evsel(session->evlist, sample->id); + evsel = perf_evlist__id2evsel(evlist, sample->id); - machine = perf_session__find_machine_for_cpumode(session, event, - sample); + machine = machines__find_for_cpumode(machines, event, sample); switch (event->header.type) { case PERF_RECORD_SAMPLE: dump_sample(evsel, event, sample); if (evsel == NULL) { - ++session->stats.nr_unknown_id; + ++evlist->stats.nr_unknown_id; return 0; } if (machine == NULL) { - ++session->stats.nr_unprocessable_samples; + ++evlist->stats.nr_unprocessable_samples; return 0; } - return perf_session__deliver_sample(session, tool, event, - sample, evsel, machine); + return perf_evlist__deliver_sample(evlist, tool, event, sample, evsel, machine); case PERF_RECORD_MMAP: return tool->mmap(tool, event, sample, machine); case PERF_RECORD_MMAP2: @@ -883,7 +930,7 @@ int perf_session__deliver_event(struct perf_session *session, return tool->exit(tool, event, sample, machine); case PERF_RECORD_LOST: if (tool->lost == perf_event__process_lost) - session->stats.total_lost += event->lost.lost; + evlist->stats.total_lost += event->lost.lost; return tool->lost(tool, event, sample, machine); case PERF_RECORD_READ: return tool->read(tool, event, sample, evsel, machine); @@ -892,20 +939,21 @@ int perf_session__deliver_event(struct perf_session *session, case PERF_RECORD_UNTHROTTLE: return tool->unthrottle(tool, event, sample, machine); default: - ++session->stats.nr_unknown_events; + ++evlist->stats.nr_unknown_events; return -1; } } static s64 perf_session__process_user_event(struct perf_session *session, union perf_event *event, - struct perf_tool *tool, u64 file_offset) { + struct ordered_events *oe = &session->ordered_events; + struct perf_tool *tool = session->tool; int fd = perf_data_file__fd(session->file); int err; - dump_event(session, event, file_offset, NULL); + dump_event(session->evlist, event, file_offset, NULL); /* These events are processed right away */ switch (event->header.type) { @@ -929,7 +977,7 @@ static s64 perf_session__process_user_event(struct perf_session *session, case PERF_RECORD_HEADER_BUILD_ID: return tool->build_id(tool, event, session); case PERF_RECORD_FINISHED_ROUND: - return tool->finished_round(tool, event, session); + return tool->finished_round(tool, event, oe); case PERF_RECORD_ID_INDEX: return tool->id_index(tool, event, session); default: @@ -939,15 +987,17 @@ static s64 perf_session__process_user_event(struct perf_session *session, int perf_session__deliver_synth_event(struct perf_session *session, union perf_event *event, - struct perf_sample *sample, - struct perf_tool *tool) + struct perf_sample *sample) { - events_stats__inc(&session->stats, event->header.type); + struct perf_evlist *evlist = session->evlist; + struct perf_tool *tool = session->tool; + + events_stats__inc(&evlist->stats, event->header.type); if (event->header.type >= PERF_RECORD_USER_TYPE_START) - return perf_session__process_user_event(session, event, tool, 0); + return perf_session__process_user_event(session, event, 0); - return perf_session__deliver_event(session, event, sample, tool, 0); + return machines__deliver_event(&session->machines, evlist, event, sample, tool, 0); } static void event_swap(union perf_event *event, bool sample_id_all) @@ -1015,40 +1065,39 @@ out_parse_sample: } static s64 perf_session__process_event(struct perf_session *session, - union perf_event *event, - struct perf_tool *tool, - u64 file_offset) + union perf_event *event, u64 file_offset) { + struct perf_evlist *evlist = session->evlist; + struct perf_tool *tool = session->tool; struct perf_sample sample; int ret; if (session->header.needs_swap) - event_swap(event, perf_evlist__sample_id_all(session->evlist)); + event_swap(event, perf_evlist__sample_id_all(evlist)); if (event->header.type >= PERF_RECORD_HEADER_MAX) return -EINVAL; - events_stats__inc(&session->stats, event->header.type); + events_stats__inc(&evlist->stats, event->header.type); if (event->header.type >= PERF_RECORD_USER_TYPE_START) - return perf_session__process_user_event(session, event, tool, file_offset); + return perf_session__process_user_event(session, event, file_offset); /* * For all kernel events we get the sample data */ - ret = perf_evlist__parse_sample(session->evlist, event, &sample); + ret = perf_evlist__parse_sample(evlist, event, &sample); if (ret) return ret; if (tool->ordered_events) { - ret = perf_session_queue_event(session, event, tool, &sample, - file_offset); + ret = perf_session__queue_event(session, event, &sample, file_offset); if (ret != -ETIME) return ret; } - return perf_session__deliver_event(session, event, &sample, tool, - file_offset); + return machines__deliver_event(&session->machines, evlist, event, + &sample, tool, file_offset); } void perf_event_header__bswap(struct perf_event_header *hdr) @@ -1076,54 +1125,57 @@ static struct thread *perf_session__register_idle_thread(struct perf_session *se return thread; } -static void perf_session__warn_about_errors(const struct perf_session *session, - const struct perf_tool *tool) +static void perf_session__warn_about_errors(const struct perf_session *session) { - if (tool->lost == perf_event__process_lost && - session->stats.nr_events[PERF_RECORD_LOST] != 0) { + const struct events_stats *stats = &session->evlist->stats; + const struct ordered_events *oe = &session->ordered_events; + + if (session->tool->lost == perf_event__process_lost && + stats->nr_events[PERF_RECORD_LOST] != 0) { ui__warning("Processed %d events and lost %d chunks!\n\n" "Check IO/CPU overload!\n\n", - session->stats.nr_events[0], - session->stats.nr_events[PERF_RECORD_LOST]); + stats->nr_events[0], + stats->nr_events[PERF_RECORD_LOST]); } - if (session->stats.nr_unknown_events != 0) { + if (stats->nr_unknown_events != 0) { ui__warning("Found %u unknown events!\n\n" "Is this an older tool processing a perf.data " "file generated by a more recent tool?\n\n" "If that is not the case, consider " "reporting to linux-kernel@vger.kernel.org.\n\n", - session->stats.nr_unknown_events); + stats->nr_unknown_events); } - if (session->stats.nr_unknown_id != 0) { + if (stats->nr_unknown_id != 0) { ui__warning("%u samples with id not present in the header\n", - session->stats.nr_unknown_id); + stats->nr_unknown_id); } - if (session->stats.nr_invalid_chains != 0) { - ui__warning("Found invalid callchains!\n\n" - "%u out of %u events were discarded for this reason.\n\n" - "Consider reporting to linux-kernel@vger.kernel.org.\n\n", - session->stats.nr_invalid_chains, - session->stats.nr_events[PERF_RECORD_SAMPLE]); - } + if (stats->nr_invalid_chains != 0) { + ui__warning("Found invalid callchains!\n\n" + "%u out of %u events were discarded for this reason.\n\n" + "Consider reporting to linux-kernel@vger.kernel.org.\n\n", + stats->nr_invalid_chains, + stats->nr_events[PERF_RECORD_SAMPLE]); + } - if (session->stats.nr_unprocessable_samples != 0) { + if (stats->nr_unprocessable_samples != 0) { ui__warning("%u unprocessable samples recorded.\n" "Do you have a KVM guest running and not using 'perf kvm'?\n", - session->stats.nr_unprocessable_samples); + stats->nr_unprocessable_samples); } - if (session->stats.nr_unordered_events != 0) - ui__warning("%u out of order events recorded.\n", session->stats.nr_unordered_events); + if (oe->nr_unordered_events != 0) + ui__warning("%u out of order events recorded.\n", oe->nr_unordered_events); } volatile int session_done; -static int __perf_session__process_pipe_events(struct perf_session *session, - struct perf_tool *tool) +static int __perf_session__process_pipe_events(struct perf_session *session) { + struct ordered_events *oe = &session->ordered_events; + struct perf_tool *tool = session->tool; int fd = perf_data_file__fd(session->file); union perf_event *event; uint32_t size, cur_size = 0; @@ -1187,7 +1239,7 @@ more: } } - if ((skip = perf_session__process_event(session, event, tool, head)) < 0) { + if ((skip = perf_session__process_event(session, event, head)) < 0) { pr_err("%#" PRIx64 " [%#x]: failed to process type: %d\n", head, event->header.size, event->header.type); err = -EINVAL; @@ -1203,10 +1255,10 @@ more: goto more; done: /* do the final flush for ordered samples */ - err = ordered_events__flush(session, tool, OE_FLUSH__FINAL); + err = ordered_events__flush(oe, OE_FLUSH__FINAL); out_err: free(buf); - perf_session__warn_about_errors(session, tool); + perf_session__warn_about_errors(session); ordered_events__free(&session->ordered_events); return err; } @@ -1253,8 +1305,10 @@ fetch_mmaped_event(struct perf_session *session, static int __perf_session__process_events(struct perf_session *session, u64 data_offset, u64 data_size, - u64 file_size, struct perf_tool *tool) + u64 file_size) { + struct ordered_events *oe = &session->ordered_events; + struct perf_tool *tool = session->tool; int fd = perf_data_file__fd(session->file); u64 head, page_offset, file_offset, file_pos, size; int err, mmap_prot, mmap_flags, map_idx = 0; @@ -1323,8 +1377,7 @@ more: size = event->header.size; if (size < sizeof(struct perf_event_header) || - (skip = perf_session__process_event(session, event, tool, file_pos)) - < 0) { + (skip = perf_session__process_event(session, event, file_pos)) < 0) { pr_err("%#" PRIx64 " [%#x]: failed to process type: %d\n", file_offset + head, event->header.size, event->header.type); @@ -1348,17 +1401,16 @@ more: out: /* do the final flush for ordered samples */ - err = ordered_events__flush(session, tool, OE_FLUSH__FINAL); + err = ordered_events__flush(oe, OE_FLUSH__FINAL); out_err: ui_progress__finish(); - perf_session__warn_about_errors(session, tool); + perf_session__warn_about_errors(session); ordered_events__free(&session->ordered_events); session->one_mmap = false; return err; } -int perf_session__process_events(struct perf_session *session, - struct perf_tool *tool) +int perf_session__process_events(struct perf_session *session) { u64 size = perf_data_file__size(session->file); int err; @@ -1369,10 +1421,9 @@ int perf_session__process_events(struct perf_session *session, if (!perf_data_file__is_pipe(session->file)) err = __perf_session__process_events(session, session->header.data_offset, - session->header.data_size, - size, tool); + session->header.data_size, size); else - err = __perf_session__process_pipe_events(session, tool); + err = __perf_session__process_pipe_events(session); return err; } @@ -1415,6 +1466,9 @@ int maps__set_kallsyms_ref_reloc_sym(struct map **maps, for (i = 0; i < MAP__NR_TYPES; ++i) { struct kmap *kmap = map__kmap(maps[i]); + + if (!kmap) + continue; kmap->ref_reloc_sym = ref; } @@ -1436,7 +1490,7 @@ size_t perf_session__fprintf_nr_events(struct perf_session *session, FILE *fp) { size_t ret = fprintf(fp, "Aggregated stats:\n"); - ret += events_stats__fprintf(&session->stats, fp); + ret += events_stats__fprintf(&session->evlist->stats, fp); return ret; } diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h index 6d663dc76404..d5fa7b7916ef 100644 --- a/tools/perf/util/session.h +++ b/tools/perf/util/session.h @@ -20,13 +20,13 @@ struct perf_session { struct machines machines; struct perf_evlist *evlist; struct trace_event tevent; - struct events_stats stats; bool repipe; bool one_mmap; void *one_mmap_addr; u64 one_mmap_offset; struct ordered_events ordered_events; struct perf_data_file *file; + struct perf_tool *tool; }; #define PRINT_IP_OPT_IP (1<<0) @@ -49,20 +49,13 @@ int perf_session__peek_event(struct perf_session *session, off_t file_offset, union perf_event **event_ptr, struct perf_sample *sample); -int perf_session__process_events(struct perf_session *session, - struct perf_tool *tool); +int perf_session__process_events(struct perf_session *session); -int perf_session_queue_event(struct perf_session *s, union perf_event *event, - struct perf_tool *tool, struct perf_sample *sample, - u64 file_offset); +int perf_session__queue_event(struct perf_session *s, union perf_event *event, + struct perf_sample *sample, u64 file_offset); void perf_tool__fill_defaults(struct perf_tool *tool); -int perf_session__deliver_event(struct perf_session *session, - union perf_event *event, - struct perf_sample *sample, - struct perf_tool *tool, u64 file_offset); - int perf_session__resolve_callchain(struct perf_session *session, struct perf_evsel *evsel, struct thread *thread, @@ -126,8 +119,7 @@ extern volatile int session_done; int perf_session__deliver_synth_event(struct perf_session *session, union perf_event *event, - struct perf_sample *sample, - struct perf_tool *tool); + struct perf_sample *sample); int perf_event__process_id_index(struct perf_tool *tool, union perf_event *event, diff --git a/tools/perf/util/setup.py b/tools/perf/util/setup.py index d0aee4b9dfd4..1833103768cb 100644 --- a/tools/perf/util/setup.py +++ b/tools/perf/util/setup.py @@ -25,7 +25,7 @@ cflags += ['-fno-strict-aliasing', '-Wno-write-strings', '-Wno-unused-parameter' build_lib = getenv('PYTHON_EXTBUILD_LIB') build_tmp = getenv('PYTHON_EXTBUILD_TMP') libtraceevent = getenv('LIBTRACEEVENT') -libapikfs = getenv('LIBAPIKFS') +libapikfs = getenv('LIBAPI') ext_sources = [f.strip() for f in file('util/python-ext-sources') if len(f.strip()) > 0 and f[0] != '#'] diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index 7a39c1ed8d37..4593f36ecc4c 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -1463,6 +1463,15 @@ int sort_dimension__add(const char *tok) sort__has_parent = 1; } else if (sd->entry == &sort_sym) { sort__has_sym = 1; + /* + * perf diff displays the performance difference amongst + * two or more perf.data files. Those files could come + * from different binaries. So we should not compare + * their ips, but the name of symbol. + */ + if (sort__mode == SORT_MODE__DIFF) + sd->entry->se_collapse = sort__sym_sort; + } else if (sd->entry == &sort_dso) { sort__has_dso = 1; } diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h index c03e4ff8beff..846036a921dc 100644 --- a/tools/perf/util/sort.h +++ b/tools/perf/util/sort.h @@ -44,6 +44,7 @@ extern struct sort_entry sort_dso_to; extern struct sort_entry sort_sym_from; extern struct sort_entry sort_sym_to; extern enum sort_type sort__first_dimension; +extern const char default_mem_sort_order[]; struct he_stat { u64 period; @@ -102,7 +103,6 @@ struct hist_entry { bool init_have_children; char level; - bool used; u8 filtered; char *srcline; struct symbol *parent; diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c index 33b7a2aef713..a7ab6063e038 100644 --- a/tools/perf/util/symbol-elf.c +++ b/tools/perf/util/symbol-elf.c @@ -74,6 +74,10 @@ static inline uint8_t elf_sym__type(const GElf_Sym *sym) return GELF_ST_TYPE(sym->st_info); } +#ifndef STT_GNU_IFUNC +#define STT_GNU_IFUNC 10 +#endif + static inline int elf_sym__is_function(const GElf_Sym *sym) { return (elf_sym__type(sym) == STT_FUNC || @@ -575,32 +579,37 @@ static int dso__swap_init(struct dso *dso, unsigned char eidata) static int decompress_kmodule(struct dso *dso, const char *name, enum dso_binary_type type) { - int fd; - const char *ext = strrchr(name, '.'); + int fd = -1; char tmpbuf[] = "/tmp/perf-kmod-XXXXXX"; + struct kmod_path m; if (type != DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE_COMP && type != DSO_BINARY_TYPE__GUEST_KMODULE_COMP && type != DSO_BINARY_TYPE__BUILD_ID_CACHE) return -1; - if (!ext || !is_supported_compression(ext + 1)) { - ext = strrchr(dso->name, '.'); - if (!ext || !is_supported_compression(ext + 1)) - return -1; - } + if (type == DSO_BINARY_TYPE__BUILD_ID_CACHE) + name = dso->long_name; - fd = mkstemp(tmpbuf); - if (fd < 0) + if (kmod_path__parse_ext(&m, name) || !m.comp) return -1; - if (!decompress_to_file(ext + 1, name, fd)) { + fd = mkstemp(tmpbuf); + if (fd < 0) { + dso->load_errno = errno; + goto out; + } + + if (!decompress_to_file(m.ext, name, fd)) { + dso->load_errno = DSO_LOAD_ERRNO__DECOMPRESSION_FAILURE; close(fd); fd = -1; } unlink(tmpbuf); +out: + free(m.ext); return fd; } @@ -629,37 +638,49 @@ int symsrc__init(struct symsrc *ss, struct dso *dso, const char *name, Elf *elf; int fd; - if (dso__needs_decompress(dso)) + if (dso__needs_decompress(dso)) { fd = decompress_kmodule(dso, name, type); - else + if (fd < 0) + return -1; + } else { fd = open(name, O_RDONLY); - - if (fd < 0) - return -1; + if (fd < 0) { + dso->load_errno = errno; + return -1; + } + } elf = elf_begin(fd, PERF_ELF_C_READ_MMAP, NULL); if (elf == NULL) { pr_debug("%s: cannot read %s ELF file.\n", __func__, name); + dso->load_errno = DSO_LOAD_ERRNO__INVALID_ELF; goto out_close; } if (gelf_getehdr(elf, &ehdr) == NULL) { + dso->load_errno = DSO_LOAD_ERRNO__INVALID_ELF; pr_debug("%s: cannot get elf header.\n", __func__); goto out_elf_end; } - if (dso__swap_init(dso, ehdr.e_ident[EI_DATA])) + if (dso__swap_init(dso, ehdr.e_ident[EI_DATA])) { + dso->load_errno = DSO_LOAD_ERRNO__INTERNAL_ERROR; goto out_elf_end; + } /* Always reject images with a mismatched build-id: */ if (dso->has_build_id) { u8 build_id[BUILD_ID_SIZE]; - if (elf_read_build_id(elf, build_id, BUILD_ID_SIZE) < 0) + if (elf_read_build_id(elf, build_id, BUILD_ID_SIZE) < 0) { + dso->load_errno = DSO_LOAD_ERRNO__CANNOT_READ_BUILDID; goto out_elf_end; + } - if (!dso__build_id_equal(dso, build_id)) + if (!dso__build_id_equal(dso, build_id)) { + dso->load_errno = DSO_LOAD_ERRNO__MISMATCHING_BUILDID; goto out_elf_end; + } } ss->is_64_bit = (gelf_getclass(elf) == ELFCLASS64); @@ -695,8 +716,10 @@ int symsrc__init(struct symsrc *ss, struct dso *dso, const char *name, } ss->name = strdup(name); - if (!ss->name) + if (!ss->name) { + dso->load_errno = errno; goto out_elf_end; + } ss->elf = elf; ss->fd = fd; @@ -753,6 +776,7 @@ int dso__load_sym(struct dso *dso, struct map *map, symbol_filter_t filter, int kmodule) { struct kmap *kmap = dso->kernel ? map__kmap(map) : NULL; + struct map_groups *kmaps = kmap ? map__kmaps(map) : NULL; struct map *curr_map = map; struct dso *curr_dso = dso; Elf_Data *symstrs, *secstrs; @@ -768,6 +792,9 @@ int dso__load_sym(struct dso *dso, struct map *map, int nr = 0; bool remap_kernel = false, adjust_kernel_syms = false; + if (kmap && !kmaps) + return -1; + dso->symtab_type = syms_ss->type; dso->is_64_bit = syms_ss->is_64_bit; dso->rel = syms_ss->ehdr.e_type == ET_REL; @@ -864,10 +891,9 @@ int dso__load_sym(struct dso *dso, struct map *map, /* Reject ARM ELF "mapping symbols": these aren't unique and * don't identify functions, so will confuse the profile * output: */ - if (ehdr.e_machine == EM_ARM) { - if (!strcmp(elf_name, "$a") || - !strcmp(elf_name, "$d") || - !strcmp(elf_name, "$t")) + if (ehdr.e_machine == EM_ARM || ehdr.e_machine == EM_AARCH64) { + if (elf_name[0] == '$' && strchr("adtx", elf_name[1]) + && (elf_name[2] == '\0' || elf_name[2] == '.')) continue; } @@ -936,8 +962,10 @@ int dso__load_sym(struct dso *dso, struct map *map, map->map_ip = map__map_ip; map->unmap_ip = map__unmap_ip; /* Ensure maps are correctly ordered */ - map_groups__remove(kmap->kmaps, map); - map_groups__insert(kmap->kmaps, map); + if (kmaps) { + map_groups__remove(kmaps, map); + map_groups__insert(kmaps, map); + } } /* @@ -961,7 +989,7 @@ int dso__load_sym(struct dso *dso, struct map *map, snprintf(dso_name, sizeof(dso_name), "%s%s", dso->short_name, section_name); - curr_map = map_groups__find_by_name(kmap->kmaps, map->type, dso_name); + curr_map = map_groups__find_by_name(kmaps, map->type, dso_name); if (curr_map == NULL) { u64 start = sym.st_value; @@ -991,7 +1019,7 @@ int dso__load_sym(struct dso *dso, struct map *map, curr_map->unmap_ip = identity__map_ip; } curr_dso->symtab_type = dso->symtab_type; - map_groups__insert(kmap->kmaps, curr_map); + map_groups__insert(kmaps, curr_map); /* * The new DSO should go to the kernel DSOS */ @@ -1045,14 +1073,15 @@ new_symbol: * For misannotated, zeroed, ASM function sizes. */ if (nr > 0) { - symbols__fixup_duplicate(&dso->symbols[map->type]); + if (!symbol_conf.allow_aliases) + symbols__fixup_duplicate(&dso->symbols[map->type]); symbols__fixup_end(&dso->symbols[map->type]); if (kmap) { /* * We need to fixup this here too because we create new * maps here, for things like vsyscall sections. */ - __map_groups__fixup_end(kmap->kmaps, map->type); + __map_groups__fixup_end(kmaps, map->type); } } err = nr; diff --git a/tools/perf/util/symbol-minimal.c b/tools/perf/util/symbol-minimal.c index d7efb03b3f9a..fd8477cacf88 100644 --- a/tools/perf/util/symbol-minimal.c +++ b/tools/perf/util/symbol-minimal.c @@ -246,13 +246,12 @@ out: return ret; } -int symsrc__init(struct symsrc *ss, struct dso *dso __maybe_unused, - const char *name, +int symsrc__init(struct symsrc *ss, struct dso *dso, const char *name, enum dso_binary_type type) { int fd = open(name, O_RDONLY); if (fd < 0) - return -1; + goto out_errno; ss->name = strdup(name); if (!ss->name) @@ -264,6 +263,8 @@ int symsrc__init(struct symsrc *ss, struct dso *dso __maybe_unused, return 0; out_close: close(fd); +out_errno: + dso->load_errno = errno; return -1; } diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index a69066865a55..201f6c4ca738 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -15,6 +15,7 @@ #include "machine.h" #include "symbol.h" #include "strlist.h" +#include "intlist.h" #include "header.h" #include <elf.h> @@ -629,13 +630,16 @@ static int dso__load_all_kallsyms(struct dso *dso, const char *filename, static int dso__split_kallsyms_for_kcore(struct dso *dso, struct map *map, symbol_filter_t filter) { - struct map_groups *kmaps = map__kmap(map)->kmaps; + struct map_groups *kmaps = map__kmaps(map); struct map *curr_map; struct symbol *pos; int count = 0, moved = 0; struct rb_root *root = &dso->symbols[map->type]; struct rb_node *next = rb_first(root); + if (!kmaps) + return -1; + while (next) { char *module; @@ -681,8 +685,8 @@ static int dso__split_kallsyms_for_kcore(struct dso *dso, struct map *map, static int dso__split_kallsyms(struct dso *dso, struct map *map, u64 delta, symbol_filter_t filter) { - struct map_groups *kmaps = map__kmap(map)->kmaps; - struct machine *machine = kmaps->machine; + struct map_groups *kmaps = map__kmaps(map); + struct machine *machine; struct map *curr_map = map; struct symbol *pos; int count = 0, moved = 0; @@ -690,6 +694,11 @@ static int dso__split_kallsyms(struct dso *dso, struct map *map, u64 delta, struct rb_node *next = rb_first(root); int kernel_range = 0; + if (!kmaps) + return -1; + + machine = kmaps->machine; + while (next) { char *module; @@ -1024,9 +1033,12 @@ static bool filename_from_kallsyms_filename(char *filename, static int validate_kcore_modules(const char *kallsyms_filename, struct map *map) { - struct map_groups *kmaps = map__kmap(map)->kmaps; + struct map_groups *kmaps = map__kmaps(map); char modules_filename[PATH_MAX]; + if (!kmaps) + return -EINVAL; + if (!filename_from_kallsyms_filename(modules_filename, "modules", kallsyms_filename)) return -EINVAL; @@ -1042,6 +1054,9 @@ static int validate_kcore_addresses(const char *kallsyms_filename, { struct kmap *kmap = map__kmap(map); + if (!kmap) + return -EINVAL; + if (kmap->ref_reloc_sym && kmap->ref_reloc_sym->name) { u64 start; @@ -1080,8 +1095,8 @@ static int kcore_mapfn(u64 start, u64 len, u64 pgoff, void *data) static int dso__load_kcore(struct dso *dso, struct map *map, const char *kallsyms_filename) { - struct map_groups *kmaps = map__kmap(map)->kmaps; - struct machine *machine = kmaps->machine; + struct map_groups *kmaps = map__kmaps(map); + struct machine *machine; struct kcore_mapfn_data md; struct map *old_map, *new_map, *replacement_map = NULL; bool is_64_bit; @@ -1089,6 +1104,11 @@ static int dso__load_kcore(struct dso *dso, struct map *map, char kcore_filename[PATH_MAX]; struct symbol *sym; + if (!kmaps) + return -EINVAL; + + machine = kmaps->machine; + /* This function requires that the map is the kernel map */ if (map != machine->vmlinux_maps[map->type]) return -EINVAL; @@ -1201,6 +1221,9 @@ static int kallsyms__delta(struct map *map, const char *filename, u64 *delta) struct kmap *kmap = map__kmap(map); u64 addr; + if (!kmap) + return -1; + if (!kmap->ref_reloc_sym || !kmap->ref_reloc_sym->name) return 0; @@ -1859,6 +1882,20 @@ int setup_list(struct strlist **list, const char *list_str, return 0; } +int setup_intlist(struct intlist **list, const char *list_str, + const char *list_name) +{ + if (list_str == NULL) + return 0; + + *list = intlist__new(list_str); + if (!*list) { + pr_err("problems parsing %s list\n", list_name); + return -1; + } + return 0; +} + static bool symbol__read_kptr_restrict(void) { bool value = false; @@ -1909,9 +1946,17 @@ int symbol__init(struct perf_session_env *env) symbol_conf.comm_list_str, "comm") < 0) goto out_free_dso_list; + if (setup_intlist(&symbol_conf.pid_list, + symbol_conf.pid_list_str, "pid") < 0) + goto out_free_comm_list; + + if (setup_intlist(&symbol_conf.tid_list, + symbol_conf.tid_list_str, "tid") < 0) + goto out_free_pid_list; + if (setup_list(&symbol_conf.sym_list, symbol_conf.sym_list_str, "symbol") < 0) - goto out_free_comm_list; + goto out_free_tid_list; /* * A path to symbols of "/" is identical to "" @@ -1930,6 +1975,10 @@ int symbol__init(struct perf_session_env *env) symbol_conf.initialized = true; return 0; +out_free_tid_list: + intlist__delete(symbol_conf.tid_list); +out_free_pid_list: + intlist__delete(symbol_conf.pid_list); out_free_comm_list: strlist__delete(symbol_conf.comm_list); out_free_dso_list: @@ -1944,6 +1993,8 @@ void symbol__exit(void) strlist__delete(symbol_conf.sym_list); strlist__delete(symbol_conf.dso_list); strlist__delete(symbol_conf.comm_list); + intlist__delete(symbol_conf.tid_list); + intlist__delete(symbol_conf.pid_list); vmlinux_path__exit(); symbol_conf.sym_list = symbol_conf.dso_list = symbol_conf.comm_list = NULL; symbol_conf.initialized = false; diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index 1650dcb3a67b..09561500164a 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -78,6 +78,7 @@ static inline size_t symbol__size(const struct symbol *sym) } struct strlist; +struct intlist; struct symbol_conf { unsigned short priv_size; @@ -87,6 +88,7 @@ struct symbol_conf { ignore_vmlinux_buildid, show_kernel_path, use_modules, + allow_aliases, sort_by_name, show_nr_samples, show_total_period, @@ -114,6 +116,8 @@ struct symbol_conf { const char *guestmount; const char *dso_list_str, *comm_list_str, + *pid_list_str, + *tid_list_str, *sym_list_str, *col_width_list_str; struct strlist *dso_list, @@ -123,6 +127,8 @@ struct symbol_conf { *dso_to_list, *sym_from_list, *sym_to_list; + struct intlist *pid_list, + *tid_list; const char *symfs; }; @@ -294,5 +300,7 @@ int compare_proc_modules(const char *from, const char *to); int setup_list(struct strlist **list, const char *list_str, const char *list_name); +int setup_intlist(struct intlist **list, const char *list_str, + const char *list_name); #endif /* __PERF_SYMBOL */ diff --git a/tools/perf/util/target.c b/tools/perf/util/target.c index e74c5963dc7a..a53603b27e52 100644 --- a/tools/perf/util/target.c +++ b/tools/perf/util/target.c @@ -123,11 +123,8 @@ int target__strerror(struct target *target, int errnum, if (errnum >= 0) { const char *err = strerror_r(errnum, buf, buflen); - if (err != buf) { - size_t len = strlen(err); - memcpy(buf, err, min(buflen - 1, len)); - *(buf + min(buflen - 1, len)) = '\0'; - } + if (err != buf) + scnprintf(buf, buflen, "%s", err); return 0; } diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c index 9ebc8b1f9be5..1c8fbc9588c5 100644 --- a/tools/perf/util/thread.c +++ b/tools/perf/util/thread.c @@ -82,6 +82,20 @@ void thread__delete(struct thread *thread) free(thread); } +struct thread *thread__get(struct thread *thread) +{ + ++thread->refcnt; + return thread; +} + +void thread__put(struct thread *thread) +{ + if (thread && --thread->refcnt == 0) { + list_del_init(&thread->node); + thread__delete(thread); + } +} + struct comm *thread__comm(const struct thread *thread) { if (list_empty(&thread->comm_list)) @@ -192,7 +206,6 @@ int thread__fork(struct thread *thread, struct thread *parent, u64 timestamp) err = thread__set_comm(thread, comm, timestamp); if (err) return err; - thread->comm_set = true; } thread->ppid = parent->tid; diff --git a/tools/perf/util/thread.h b/tools/perf/util/thread.h index 160fd066a7d1..9b8a54dc34a8 100644 --- a/tools/perf/util/thread.h +++ b/tools/perf/util/thread.h @@ -7,6 +7,7 @@ #include <sys/types.h> #include "symbol.h" #include <strlist.h> +#include <intlist.h> struct thread_stack; @@ -20,6 +21,7 @@ struct thread { pid_t tid; pid_t ppid; int cpu; + int refcnt; char shortname[3]; bool comm_set; bool dead; /* if set thread has exited */ @@ -37,6 +39,18 @@ struct comm; struct thread *thread__new(pid_t pid, pid_t tid); int thread__init_map_groups(struct thread *thread, struct machine *machine); void thread__delete(struct thread *thread); + +struct thread *thread__get(struct thread *thread); +void thread__put(struct thread *thread); + +static inline void __thread__zput(struct thread **thread) +{ + thread__put(*thread); + *thread = NULL; +} + +#define thread__zput(thread) __thread__zput(&thread) + static inline void thread__exited(struct thread *thread) { thread->dead = true; @@ -87,6 +101,16 @@ static inline bool thread__is_filtered(struct thread *thread) return true; } + if (symbol_conf.pid_list && + !intlist__has_entry(symbol_conf.pid_list, thread->pid_)) { + return true; + } + + if (symbol_conf.tid_list && + !intlist__has_entry(symbol_conf.tid_list, thread->tid)) { + return true; + } + return false; } diff --git a/tools/perf/util/tool.h b/tools/perf/util/tool.h index bb2708bbfaca..51d9e56c0f84 100644 --- a/tools/perf/util/tool.h +++ b/tools/perf/util/tool.h @@ -10,6 +10,7 @@ struct perf_evsel; struct perf_sample; struct perf_tool; struct machine; +struct ordered_events; typedef int (*event_sample)(struct perf_tool *tool, union perf_event *event, struct perf_sample *sample, @@ -25,6 +26,9 @@ typedef int (*event_attr_op)(struct perf_tool *tool, typedef int (*event_op2)(struct perf_tool *tool, union perf_event *event, struct perf_session *session); +typedef int (*event_oe)(struct perf_tool *tool, union perf_event *event, + struct ordered_events *oe); + struct perf_tool { event_sample sample, read; @@ -38,8 +42,8 @@ struct perf_tool { unthrottle; event_attr_op attr; event_op2 tracing_data; - event_op2 finished_round, - build_id, + event_oe finished_round; + event_op2 build_id, id_index; bool ordered_events; bool ordering_requires_timestamps; diff --git a/tools/perf/util/trace-event-parse.c b/tools/perf/util/trace-event-parse.c index c36636fd825b..25d6c737be3e 100644 --- a/tools/perf/util/trace-event-parse.c +++ b/tools/perf/util/trace-event-parse.c @@ -112,8 +112,8 @@ unsigned long long read_size(struct event_format *event, void *ptr, int size) return pevent_read_number(event->pevent, ptr, size); } -void event_format__print(struct event_format *event, - int cpu, void *data, int size) +void event_format__fprintf(struct event_format *event, + int cpu, void *data, int size, FILE *fp) { struct pevent_record record; struct trace_seq s; @@ -125,10 +125,16 @@ void event_format__print(struct event_format *event, trace_seq_init(&s); pevent_event_info(&s, event, &record); - trace_seq_do_printf(&s); + trace_seq_do_fprintf(&s, fp); trace_seq_destroy(&s); } +void event_format__print(struct event_format *event, + int cpu, void *data, int size) +{ + return event_format__fprintf(event, cpu, data, size, stdout); +} + void parse_proc_kallsyms(struct pevent *pevent, char *file, unsigned int size __maybe_unused) { diff --git a/tools/perf/util/trace-event-scripting.c b/tools/perf/util/trace-event-scripting.c index 5c9bdd1591a9..9df61059a85d 100644 --- a/tools/perf/util/trace-event-scripting.c +++ b/tools/perf/util/trace-event-scripting.c @@ -43,7 +43,6 @@ static int stop_script_unsupported(void) static void process_event_unsupported(union perf_event *event __maybe_unused, struct perf_sample *sample __maybe_unused, struct perf_evsel *evsel __maybe_unused, - struct thread *thread __maybe_unused, struct addr_location *al __maybe_unused) { } diff --git a/tools/perf/util/trace-event.h b/tools/perf/util/trace-event.h index 52aaa19e1eb1..d5168f0be4ec 100644 --- a/tools/perf/util/trace-event.h +++ b/tools/perf/util/trace-event.h @@ -23,6 +23,9 @@ trace_event__tp_format(const char *sys, const char *name); int bigendian(void); +void event_format__fprintf(struct event_format *event, + int cpu, void *data, int size, FILE *fp); + void event_format__print(struct event_format *event, int cpu, void *data, int size); @@ -69,8 +72,7 @@ struct scripting_ops { void (*process_event) (union perf_event *event, struct perf_sample *sample, struct perf_evsel *evsel, - struct thread *thread, - struct addr_location *al); + struct addr_location *al); int (*generate_script) (struct pevent *pevent, const char *outfile); }; diff --git a/tools/perf/util/unwind-libunwind.c b/tools/perf/util/unwind-libunwind.c index e3c40a520a25..7b09a443a280 100644 --- a/tools/perf/util/unwind-libunwind.c +++ b/tools/perf/util/unwind-libunwind.c @@ -266,7 +266,7 @@ static int read_unwind_spec_eh_frame(struct dso *dso, struct machine *machine, u64 *fde_count) { int ret = -EINVAL, fd; - u64 offset = dso->data.frame_offset; + u64 offset = dso->data.eh_frame_hdr_offset; if (offset == 0) { fd = dso__data_fd(dso, machine); @@ -275,7 +275,7 @@ static int read_unwind_spec_eh_frame(struct dso *dso, struct machine *machine, /* Check the .eh_frame section for unwinding info */ offset = elf_section_offset(fd, ".eh_frame_hdr"); - dso->data.frame_offset = offset; + dso->data.eh_frame_hdr_offset = offset; } if (offset) @@ -291,7 +291,7 @@ static int read_unwind_spec_debug_frame(struct dso *dso, struct machine *machine, u64 *offset) { int fd; - u64 ofs = dso->data.frame_offset; + u64 ofs = dso->data.debug_frame_offset; if (ofs == 0) { fd = dso__data_fd(dso, machine); @@ -300,7 +300,7 @@ static int read_unwind_spec_debug_frame(struct dso *dso, /* Check the .debug_frame section for unwinding info */ ofs = elf_section_offset(fd, ".debug_frame"); - dso->data.frame_offset = ofs; + dso->data.debug_frame_offset = ofs; } *offset = ofs; diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c index b86744f29eef..4ee6d0d4c993 100644 --- a/tools/perf/util/util.c +++ b/tools/perf/util/util.c @@ -269,6 +269,13 @@ void dump_stack(void) void dump_stack(void) {} #endif +void sighandler_dump_stack(int sig) +{ + psignal(sig, "perf"); + dump_stack(); + exit(sig); +} + void get_term_dimensions(struct winsize *ws) { char *s = getenv("LINES"); @@ -303,13 +310,26 @@ void set_term_quiet_input(struct termios *old) tcsetattr(0, TCSANOW, &tc); } -static void set_tracing_events_path(const char *mountpoint) +static void set_tracing_events_path(const char *tracing, const char *mountpoint) { - snprintf(tracing_events_path, sizeof(tracing_events_path), "%s/%s", - mountpoint, "tracing/events"); + snprintf(tracing_events_path, sizeof(tracing_events_path), "%s/%s%s", + mountpoint, tracing, "events"); } -const char *perf_debugfs_mount(const char *mountpoint) +static const char *__perf_tracefs_mount(const char *mountpoint) +{ + const char *mnt; + + mnt = tracefs_mount(mountpoint); + if (!mnt) + return NULL; + + set_tracing_events_path("", mnt); + + return mnt; +} + +static const char *__perf_debugfs_mount(const char *mountpoint) { const char *mnt; @@ -317,7 +337,20 @@ const char *perf_debugfs_mount(const char *mountpoint) if (!mnt) return NULL; - set_tracing_events_path(mnt); + set_tracing_events_path("tracing/", mnt); + + return mnt; +} + +const char *perf_debugfs_mount(const char *mountpoint) +{ + const char *mnt; + + mnt = __perf_tracefs_mount(mountpoint); + if (mnt) + return mnt; + + mnt = __perf_debugfs_mount(mountpoint); return mnt; } @@ -325,12 +358,19 @@ const char *perf_debugfs_mount(const char *mountpoint) void perf_debugfs_set_path(const char *mntpt) { snprintf(debugfs_mountpoint, strlen(debugfs_mountpoint), "%s", mntpt); - set_tracing_events_path(mntpt); + set_tracing_events_path("tracing/", mntpt); +} + +static const char *find_tracefs(void) +{ + const char *path = __perf_tracefs_mount(NULL); + + return path; } static const char *find_debugfs(void) { - const char *path = perf_debugfs_mount(NULL); + const char *path = __perf_debugfs_mount(NULL); if (!path) fprintf(stderr, "Your kernel does not support the debugfs filesystem"); @@ -344,6 +384,7 @@ static const char *find_debugfs(void) */ const char *find_tracing_dir(void) { + const char *tracing_dir = ""; static char *tracing; static int tracing_found; const char *debugfs; @@ -351,11 +392,15 @@ const char *find_tracing_dir(void) if (tracing_found) return tracing; - debugfs = find_debugfs(); - if (!debugfs) - return NULL; + debugfs = find_tracefs(); + if (!debugfs) { + tracing_dir = "/tracing"; + debugfs = find_debugfs(); + if (!debugfs) + return NULL; + } - if (asprintf(&tracing, "%s/tracing", debugfs) < 0) + if (asprintf(&tracing, "%s%s", debugfs, tracing_dir) < 0) return NULL; tracing_found = 1; diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h index 027a5153495c..1ff23e04ad27 100644 --- a/tools/perf/util/util.h +++ b/tools/perf/util/util.h @@ -75,6 +75,7 @@ #include <linux/types.h> #include <sys/ttydefaults.h> #include <api/fs/debugfs.h> +#include <api/fs/tracefs.h> #include <termios.h> #include <linux/bitops.h> #include <termios.h> @@ -276,6 +277,7 @@ char *ltrim(char *s); char *rtrim(char *s); void dump_stack(void); +void sighandler_dump_stack(int sig); extern unsigned int page_size; extern int cacheline_size; @@ -327,4 +329,8 @@ bool find_process(const char *name); int gzip_decompress_to_file(const char *input, int output_fd); #endif +#ifdef HAVE_LZMA_SUPPORT +int lzma_decompress_to_file(const char *input, int output_fd); +#endif + #endif /* GIT_COMPAT_UTIL_H */ diff --git a/tools/testing/selftests/rcutorture/bin/kvm.sh b/tools/testing/selftests/rcutorture/bin/kvm.sh index 368d64ac779e..dd2812ceb0ba 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm.sh @@ -310,7 +310,7 @@ function dump(first, pastlast) cfr[jn] = cf[j] "." cfrep[cf[j]]; } if (cpusr[jn] > ncpus && ncpus != 0) - ovf = "(!)"; + ovf = "-ovf"; else ovf = ""; print "echo ", cfr[jn], cpusr[jn] ovf ": Starting build. `date`"; diff --git a/tools/testing/selftests/rcutorture/configs/rcu/CFcommon b/tools/testing/selftests/rcutorture/configs/rcu/CFcommon index d2d2a86139db..49701218dc62 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/CFcommon +++ b/tools/testing/selftests/rcutorture/configs/rcu/CFcommon @@ -1,2 +1,3 @@ CONFIG_RCU_TORTURE_TEST=y CONFIG_PRINTK_TIME=y +CONFIG_RCU_TORTURE_TEST_SLOW_INIT=y |