diff options
Diffstat (limited to 'tools/perf')
235 files changed, 23936 insertions, 2820 deletions
diff --git a/tools/perf/.gitignore b/tools/perf/.gitignore index 09db62ba5786..3d1bb802dbf4 100644 --- a/tools/perf/.gitignore +++ b/tools/perf/.gitignore @@ -29,3 +29,4 @@ config.mak.autogen *.pyc *.pyo .config-detected +util/intel-pt-decoder/inat-tables.c diff --git a/tools/perf/Build b/tools/perf/Build index b77370ef7005..72237455b400 100644 --- a/tools/perf/Build +++ b/tools/perf/Build @@ -35,6 +35,7 @@ paths += -DPERF_MAN_PATH="BUILD_STR($(mandir_SQ))" CFLAGS_builtin-help.o += $(paths) CFLAGS_builtin-timechart.o += $(paths) CFLAGS_perf.o += -DPERF_HTML_PATH="BUILD_STR($(htmldir_SQ))" -include $(OUTPUT)PERF-VERSION-FILE +CFLAGS_builtin-trace.o += -DSTRACE_GROUPS_DIR="BUILD_STR($(STRACE_GROUPS_DIR_SQ))" libperf-y += util/ libperf-y += arch/ diff --git a/tools/perf/Documentation/intel-bts.txt b/tools/perf/Documentation/intel-bts.txt new file mode 100644 index 000000000000..8bdc93bd7fdb --- /dev/null +++ b/tools/perf/Documentation/intel-bts.txt @@ -0,0 +1,86 @@ +Intel Branch Trace Store +======================== + +Overview +======== + +Intel BTS could be regarded as a predecessor to Intel PT and has some +similarities because it can also identify every branch a program takes. A +notable difference is that Intel BTS has no timing information and as a +consequence the present implementation is limited to per-thread recording. + +While decoding Intel BTS does not require walking the object code, the object +code is still needed to pair up calls and returns correctly, consequently much +of the Intel PT documentation applies also to Intel BTS. Refer to the Intel PT +documentation and consider that the PMU 'intel_bts' can usually be used in +place of 'intel_pt' in the examples provided, with the proviso that per-thread +recording must also be stipulated i.e. the --per-thread option for +'perf record'. + + +perf record +=========== + +new event +--------- + +The Intel BTS kernel driver creates a new PMU for Intel BTS. The perf record +option is: + + -e intel_bts// + +Currently Intel BTS is limited to per-thread tracing so the --per-thread option +is also needed. + + +snapshot option +--------------- + +The snapshot option is the same as Intel PT (refer Intel PT documentation). + + +auxtrace mmap size option +----------------------- + +The mmap size option is the same as Intel PT (refer Intel PT documentation). + + +perf script +=========== + +By default, perf script will decode trace data found in the perf.data file. +This can be further controlled by option --itrace. The --itrace option is +the same as Intel PT (refer Intel PT documentation) except that neither +"instructions" events nor "transactions" events (and consequently call +chains) are supported. + +To disable trace decoding entirely, use the option --no-itrace. + + +dump option +----------- + +perf script has an option (-D) to "dump" the events i.e. display the binary +data. + +When -D is used, Intel BTS packets are displayed. + +To disable the display of Intel BTS packets, combine the -D option with +--no-itrace. + + +perf report +=========== + +By default, perf report will decode trace data found in the perf.data file. +This can be further controlled by new option --itrace exactly the same as +perf script. + + +perf inject +=========== + +perf inject also accepts the --itrace option in which case tracing data is +removed and replaced with the synthesized events. e.g. + + perf inject --itrace -i perf.data -o perf.data.new diff --git a/tools/perf/Documentation/intel-pt.txt b/tools/perf/Documentation/intel-pt.txt new file mode 100644 index 000000000000..be764f9ec769 --- /dev/null +++ b/tools/perf/Documentation/intel-pt.txt @@ -0,0 +1,795 @@ +Intel Processor Trace +===================== + +Overview +======== + +Intel Processor Trace (Intel PT) is an extension of Intel Architecture that +collects information about software execution such as control flow, execution +modes and timings and formats it into highly compressed binary packets. +Technical details are documented in the Intel 64 and IA-32 Architectures +Software Developer Manuals, Chapter 36 Intel Processor Trace. + +Intel PT is first supported in Intel Core M and 5th generation Intel Core +processors that are based on the Intel micro-architecture code name Broadwell. + +Trace data is collected by 'perf record' and stored within the perf.data file. +See below for options to 'perf record'. + +Trace data must be 'decoded' which involves walking the object code and matching +the trace data packets. For example a TNT packet only tells whether a +conditional branch was taken or not taken, so to make use of that packet the +decoder must know precisely which instruction was being executed. + +Decoding is done on-the-fly. The decoder outputs samples in the same format as +samples output by perf hardware events, for example as though the "instructions" +or "branches" events had been recorded. Presently 3 tools support this: +'perf script', 'perf report' and 'perf inject'. See below for more information +on using those tools. + +The main distinguishing feature of Intel PT is that the decoder can determine +the exact flow of software execution. Intel PT can be used to understand why +and how did software get to a certain point, or behave a certain way. The +software does not have to be recompiled, so Intel PT works with debug or release +builds, however the executed images are needed - which makes use in JIT-compiled +environments, or with self-modified code, a challenge. Also symbols need to be +provided to make sense of addresses. + +A limitation of Intel PT is that it produces huge amounts of trace data +(hundreds of megabytes per second per core) which takes a long time to decode, +for example two or three orders of magnitude longer than it took to collect. +Another limitation is the performance impact of tracing, something that will +vary depending on the use-case and architecture. + + +Quickstart +========== + +It is important to start small. That is because it is easy to capture vastly +more data than can possibly be processed. + +The simplest thing to do with Intel PT is userspace profiling of small programs. +Data is captured with 'perf record' e.g. to trace 'ls' userspace-only: + + perf record -e intel_pt//u ls + +And profiled with 'perf report' e.g. + + perf report + +To also trace kernel space presents a problem, namely kernel self-modifying +code. A fairly good kernel image is available in /proc/kcore but to get an +accurate image a copy of /proc/kcore needs to be made under the same conditions +as the data capture. A script perf-with-kcore can do that, but beware that the +script makes use of 'sudo' to copy /proc/kcore. If you have perf installed +locally from the source tree you can do: + + ~/libexec/perf-core/perf-with-kcore record pt_ls -e intel_pt// -- ls + +which will create a directory named 'pt_ls' and put the perf.data file and +copies of /proc/kcore, /proc/kallsyms and /proc/modules into it. Then to use +'perf report' becomes: + + ~/libexec/perf-core/perf-with-kcore report pt_ls + +Because samples are synthesized after-the-fact, the sampling period can be +selected for reporting. e.g. sample every microsecond + + ~/libexec/perf-core/perf-with-kcore report pt_ls --itrace=i1usge + +See the sections below for more information about the --itrace option. + +Beware the smaller the period, the more samples that are produced, and the +longer it takes to process them. + +Also note that the coarseness of Intel PT timing information will start to +distort the statistical value of the sampling as the sampling period becomes +smaller. + +To represent software control flow, "branches" samples are produced. By default +a branch sample is synthesized for every single branch. To get an idea what +data is available you can use the 'perf script' tool with no parameters, which +will list all the samples. + + perf record -e intel_pt//u ls + perf script + +An interesting field that is not printed by default is 'flags' which can be +displayed as follows: + + perf script -Fcomm,tid,pid,time,cpu,event,trace,ip,sym,dso,addr,symoff,flags + +The flags are "bcrosyiABEx" which stand for branch, call, return, conditional, +system, asynchronous, interrupt, transaction abort, trace begin, trace end, and +in transaction, respectively. + +While it is possible to create scripts to analyze the data, an alternative +approach is available to export the data to a postgresql database. Refer to +script export-to-postgresql.py for more details, and to script +call-graph-from-postgresql.py for an example of using the database. + +As mentioned above, it is easy to capture too much data. One way to limit the +data captured is to use 'snapshot' mode which is explained further below. +Refer to 'new snapshot option' and 'Intel PT modes of operation' further below. + +Another problem that will be experienced is decoder errors. They can be caused +by inability to access the executed image, self-modified or JIT-ed code, or the +inability to match side-band information (such as context switches and mmaps) +which results in the decoder not knowing what code was executed. + +There is also the problem of perf not being able to copy the data fast enough, +resulting in data lost because the buffer was full. See 'Buffer handling' below +for more details. + + +perf record +=========== + +new event +--------- + +The Intel PT kernel driver creates a new PMU for Intel PT. PMU events are +selected by providing the PMU name followed by the "config" separated by slashes. +An enhancement has been made to allow default "config" e.g. the option + + -e intel_pt// + +will use a default config value. Currently that is the same as + + -e intel_pt/tsc,noretcomp=0/ + +which is the same as + + -e intel_pt/tsc=1,noretcomp=0/ + +Note there are now new config terms - see section 'config terms' further below. + +The config terms are listed in /sys/devices/intel_pt/format. They are bit +fields within the config member of the struct perf_event_attr which is +passed to the kernel by the perf_event_open system call. They correspond to bit +fields in the IA32_RTIT_CTL MSR. Here is a list of them and their definitions: + + $ grep -H . /sys/bus/event_source/devices/intel_pt/format/* + /sys/bus/event_source/devices/intel_pt/format/cyc:config:1 + /sys/bus/event_source/devices/intel_pt/format/cyc_thresh:config:19-22 + /sys/bus/event_source/devices/intel_pt/format/mtc:config:9 + /sys/bus/event_source/devices/intel_pt/format/mtc_period:config:14-17 + /sys/bus/event_source/devices/intel_pt/format/noretcomp:config:11 + /sys/bus/event_source/devices/intel_pt/format/psb_period:config:24-27 + /sys/bus/event_source/devices/intel_pt/format/tsc:config:10 + +Note that the default config must be overridden for each term i.e. + + -e intel_pt/noretcomp=0/ + +is the same as: + + -e intel_pt/tsc=1,noretcomp=0/ + +So, to disable TSC packets use: + + -e intel_pt/tsc=0/ + +It is also possible to specify the config value explicitly: + + -e intel_pt/config=0x400/ + +Note that, as with all events, the event is suffixed with event modifiers: + + u userspace + k kernel + h hypervisor + G guest + H host + p precise ip + +'h', 'G' and 'H' are for virtualization which is not supported by Intel PT. +'p' is also not relevant to Intel PT. So only options 'u' and 'k' are +meaningful for Intel PT. + +perf_event_attr is displayed if the -vv option is used e.g. + + ------------------------------------------------------------ + perf_event_attr: + type 6 + size 112 + config 0x400 + { sample_period, sample_freq } 1 + sample_type IP|TID|TIME|CPU|IDENTIFIER + read_format ID + disabled 1 + inherit 1 + exclude_kernel 1 + exclude_hv 1 + enable_on_exec 1 + sample_id_all 1 + ------------------------------------------------------------ + sys_perf_event_open: pid 31104 cpu 0 group_fd -1 flags 0x8 + sys_perf_event_open: pid 31104 cpu 1 group_fd -1 flags 0x8 + sys_perf_event_open: pid 31104 cpu 2 group_fd -1 flags 0x8 + sys_perf_event_open: pid 31104 cpu 3 group_fd -1 flags 0x8 + ------------------------------------------------------------ + + +config terms +------------ + +The June 2015 version of Intel 64 and IA-32 Architectures Software Developer +Manuals, Chapter 36 Intel Processor Trace, defined new Intel PT features. +Some of the features are reflect in new config terms. All the config terms are +described below. + +tsc Always supported. Produces TSC timestamp packets to provide + timing information. In some cases it is possible to decode + without timing information, for example a per-thread context + that does not overlap executable memory maps. + + The default config selects tsc (i.e. tsc=1). + +noretcomp Always supported. Disables "return compression" so a TIP packet + is produced when a function returns. Causes more packets to be + produced but might make decoding more reliable. + + The default config does not select noretcomp (i.e. noretcomp=0). + +psb_period Allows the frequency of PSB packets to be specified. + + The PSB packet is a synchronization packet that provides a + starting point for decoding or recovery from errors. + + Support for psb_period is indicated by: + + /sys/bus/event_source/devices/intel_pt/caps/psb_cyc + + which contains "1" if the feature is supported and "0" + otherwise. + + Valid values are given by: + + /sys/bus/event_source/devices/intel_pt/caps/psb_periods + + which contains a hexadecimal value, the bits of which represent + valid values e.g. bit 2 set means value 2 is valid. + + The psb_period value is converted to the approximate number of + trace bytes between PSB packets as: + + 2 ^ (value + 11) + + e.g. value 3 means 16KiB bytes between PSBs + + If an invalid value is entered, the error message + will give a list of valid values e.g. + + $ perf record -e intel_pt/psb_period=15/u uname + Invalid psb_period for intel_pt. Valid values are: 0-5 + + If MTC packets are selected, the default config selects a value + of 3 (i.e. psb_period=3) or the nearest lower value that is + supported (0 is always supported). Otherwise the default is 0. + + If decoding is expected to be reliable and the buffer is large + then a large PSB period can be used. + + Because a TSC packet is produced with PSB, the PSB period can + also affect the granularity to timing information in the absence + of MTC or CYC. + +mtc Produces MTC timing packets. + + MTC packets provide finer grain timestamp information than TSC + packets. MTC packets record time using the hardware crystal + clock (CTC) which is related to TSC packets using a TMA packet. + + Support for this feature is indicated by: + + /sys/bus/event_source/devices/intel_pt/caps/mtc + + which contains "1" if the feature is supported and + "0" otherwise. + + The frequency of MTC packets can also be specified - see + mtc_period below. + +mtc_period Specifies how frequently MTC packets are produced - see mtc + above for how to determine if MTC packets are supported. + + Valid values are given by: + + /sys/bus/event_source/devices/intel_pt/caps/mtc_periods + + which contains a hexadecimal value, the bits of which represent + valid values e.g. bit 2 set means value 2 is valid. + + The mtc_period value is converted to the MTC frequency as: + + CTC-frequency / (2 ^ value) + + e.g. value 3 means one eighth of CTC-frequency + + Where CTC is the hardware crystal clock, the frequency of which + can be related to TSC via values provided in cpuid leaf 0x15. + + If an invalid value is entered, the error message + will give a list of valid values e.g. + + $ perf record -e intel_pt/mtc_period=15/u uname + Invalid mtc_period for intel_pt. Valid values are: 0,3,6,9 + + The default value is 3 or the nearest lower value + that is supported (0 is always supported). + +cyc Produces CYC timing packets. + + CYC packets provide even finer grain timestamp information than + MTC and TSC packets. A CYC packet contains the number of CPU + cycles since the last CYC packet. Unlike MTC and TSC packets, + CYC packets are only sent when another packet is also sent. + + Support for this feature is indicated by: + + /sys/bus/event_source/devices/intel_pt/caps/psb_cyc + + which contains "1" if the feature is supported and + "0" otherwise. + + The number of CYC packets produced can be reduced by specifying + a threshold - see cyc_thresh below. + +cyc_thresh Specifies how frequently CYC packets are produced - see cyc + above for how to determine if CYC packets are supported. + + Valid cyc_thresh values are given by: + + /sys/bus/event_source/devices/intel_pt/caps/cycle_thresholds + + which contains a hexadecimal value, the bits of which represent + valid values e.g. bit 2 set means value 2 is valid. + + The cyc_thresh value represents the minimum number of CPU cycles + that must have passed before a CYC packet can be sent. The + number of CPU cycles is: + + 2 ^ (value - 1) + + e.g. value 4 means 8 CPU cycles must pass before a CYC packet + can be sent. Note a CYC packet is still only sent when another + packet is sent, not at, e.g. every 8 CPU cycles. + + If an invalid value is entered, the error message + will give a list of valid values e.g. + + $ perf record -e intel_pt/cyc,cyc_thresh=15/u uname + Invalid cyc_thresh for intel_pt. Valid values are: 0-12 + + CYC packets are not requested by default. + + +new snapshot option +------------------- + +The difference between full trace and snapshot from the kernel's perspective is +that in full trace we don't overwrite trace data that the user hasn't collected +yet (and indicated that by advancing aux_tail), whereas in snapshot mode we let +the trace run and overwrite older data in the buffer so that whenever something +interesting happens, we can stop it and grab a snapshot of what was going on +around that interesting moment. + +To select snapshot mode a new option has been added: + + -S + +Optionally it can be followed by the snapshot size e.g. + + -S0x100000 + +The default snapshot size is the auxtrace mmap size. If neither auxtrace mmap size +nor snapshot size is specified, then the default is 4MiB for privileged users +(or if /proc/sys/kernel/perf_event_paranoid < 0), 128KiB for unprivileged users. +If an unprivileged user does not specify mmap pages, the mmap pages will be +reduced as described in the 'new auxtrace mmap size option' section below. + +The snapshot size is displayed if the option -vv is used e.g. + + Intel PT snapshot size: %zu + + +new auxtrace mmap size option +--------------------------- + +Intel PT buffer size is specified by an addition to the -m option e.g. + + -m,16 + +selects a buffer size of 16 pages i.e. 64KiB. + +Note that the existing functionality of -m is unchanged. The auxtrace mmap size +is specified by the optional addition of a comma and the value. + +The default auxtrace mmap size for Intel PT is 4MiB/page_size for privileged users +(or if /proc/sys/kernel/perf_event_paranoid < 0), 128KiB for unprivileged users. +If an unprivileged user does not specify mmap pages, the mmap pages will be +reduced from the default 512KiB/page_size to 256KiB/page_size, otherwise the +user is likely to get an error as they exceed their mlock limit (Max locked +memory as shown in /proc/self/limits). Note that perf does not count the first +512KiB (actually /proc/sys/kernel/perf_event_mlock_kb minus 1 page) per cpu +against the mlock limit so an unprivileged user is allowed 512KiB per cpu plus +their mlock limit (which defaults to 64KiB but is not multiplied by the number +of cpus). + +In full-trace mode, powers of two are allowed for buffer size, with a minimum +size of 2 pages. In snapshot mode, it is the same but the minimum size is +1 page. + +The mmap size and auxtrace mmap size are displayed if the -vv option is used e.g. + + mmap length 528384 + auxtrace mmap length 4198400 + + +Intel PT modes of operation +--------------------------- + +Intel PT can be used in 2 modes: + full-trace mode + snapshot mode + +Full-trace mode traces continuously e.g. + + perf record -e intel_pt//u uname + +Snapshot mode captures the available data when a signal is sent e.g. + + perf record -v -e intel_pt//u -S ./loopy 1000000000 & + [1] 11435 + kill -USR2 11435 + Recording AUX area tracing snapshot + +Note that the signal sent is SIGUSR2. +Note that "Recording AUX area tracing snapshot" is displayed because the -v +option is used. + +The 2 modes cannot be used together. + + +Buffer handling +--------------- + +There may be buffer limitations (i.e. single ToPa entry) which means that actual +buffer sizes are limited to powers of 2 up to 4MiB (MAX_ORDER). In order to +provide other sizes, and in particular an arbitrarily large size, multiple +buffers are logically concatenated. However an interrupt must be used to switch +between buffers. That has two potential problems: + a) the interrupt may not be handled in time so that the current buffer + becomes full and some trace data is lost. + b) the interrupts may slow the system and affect the performance + results. + +If trace data is lost, the driver sets 'truncated' in the PERF_RECORD_AUX event +which the tools report as an error. + +In full-trace mode, the driver waits for data to be copied out before allowing +the (logical) buffer to wrap-around. If data is not copied out quickly enough, +again 'truncated' is set in the PERF_RECORD_AUX event. If the driver has to +wait, the intel_pt event gets disabled. Because it is difficult to know when +that happens, perf tools always re-enable the intel_pt event after copying out +data. + + +Intel PT and build ids +---------------------- + +By default "perf record" post-processes the event stream to find all build ids +for executables for all addresses sampled. Deliberately, Intel PT is not +decoded for that purpose (it would take too long). Instead the build ids for +all executables encountered (due to mmap, comm or task events) are included +in the perf.data file. + +To see buildids included in the perf.data file use the command: + + perf buildid-list + +If the perf.data file contains Intel PT data, that is the same as: + + perf buildid-list --with-hits + + +Snapshot mode and event disabling +--------------------------------- + +In order to make a snapshot, the intel_pt event is disabled using an IOCTL, +namely PERF_EVENT_IOC_DISABLE. However doing that can also disable the +collection of side-band information. In order to prevent that, a dummy +software event has been introduced that permits tracking events (like mmaps) to +continue to be recorded while intel_pt is disabled. That is important to ensure +there is complete side-band information to allow the decoding of subsequent +snapshots. + +A test has been created for that. To find the test: + + perf test list + ... + 23: Test using a dummy software event to keep tracking + +To run the test: + + perf test 23 + 23: Test using a dummy software event to keep tracking : Ok + + +perf record modes (nothing new here) +------------------------------------ + +perf record essentially operates in one of three modes: + per thread + per cpu + workload only + +"per thread" mode is selected by -t or by --per-thread (with -p or -u or just a +workload). +"per cpu" is selected by -C or -a. +"workload only" mode is selected by not using the other options but providing a +command to run (i.e. the workload). + +In per-thread mode an exact list of threads is traced. There is no inheritance. +Each thread has its own event buffer. + +In per-cpu mode all processes (or processes from the selected cgroup i.e. -G +option, or processes selected with -p or -u) are traced. Each cpu has its own +buffer. Inheritance is allowed. + +In workload-only mode, the workload is traced but with per-cpu buffers. +Inheritance is allowed. Note that you can now trace a workload in per-thread +mode by using the --per-thread option. + + +Privileged vs non-privileged users +---------------------------------- + +Unless /proc/sys/kernel/perf_event_paranoid is set to -1, unprivileged users +have memory limits imposed upon them. That affects what buffer sizes they can +have as outlined above. + +Unless /proc/sys/kernel/perf_event_paranoid is set to -1, unprivileged users are +not permitted to use tracepoints which means there is insufficient side-band +information to decode Intel PT in per-cpu mode, and potentially workload-only +mode too if the workload creates new processes. + +Note also, that to use tracepoints, read-access to debugfs is required. So if +debugfs is not mounted or the user does not have read-access, it will again not +be possible to decode Intel PT in per-cpu mode. + + +sched_switch tracepoint +----------------------- + +The sched_switch tracepoint is used to provide side-band data for Intel PT +decoding. sched_switch events are automatically added. e.g. the second event +shown below + + $ perf record -vv -e intel_pt//u uname + ------------------------------------------------------------ + perf_event_attr: + type 6 + size 112 + config 0x400 + { sample_period, sample_freq } 1 + sample_type IP|TID|TIME|CPU|IDENTIFIER + read_format ID + disabled 1 + inherit 1 + exclude_kernel 1 + exclude_hv 1 + enable_on_exec 1 + sample_id_all 1 + ------------------------------------------------------------ + sys_perf_event_open: pid 31104 cpu 0 group_fd -1 flags 0x8 + sys_perf_event_open: pid 31104 cpu 1 group_fd -1 flags 0x8 + sys_perf_event_open: pid 31104 cpu 2 group_fd -1 flags 0x8 + sys_perf_event_open: pid 31104 cpu 3 group_fd -1 flags 0x8 + ------------------------------------------------------------ + perf_event_attr: + type 2 + size 112 + config 0x108 + { sample_period, sample_freq } 1 + sample_type IP|TID|TIME|CPU|PERIOD|RAW|IDENTIFIER + read_format ID + inherit 1 + sample_id_all 1 + exclude_guest 1 + ------------------------------------------------------------ + sys_perf_event_open: pid -1 cpu 0 group_fd -1 flags 0x8 + sys_perf_event_open: pid -1 cpu 1 group_fd -1 flags 0x8 + sys_perf_event_open: pid -1 cpu 2 group_fd -1 flags 0x8 + sys_perf_event_open: pid -1 cpu 3 group_fd -1 flags 0x8 + ------------------------------------------------------------ + perf_event_attr: + type 1 + size 112 + config 0x9 + { sample_period, sample_freq } 1 + sample_type IP|TID|TIME|IDENTIFIER + read_format ID + disabled 1 + inherit 1 + exclude_kernel 1 + exclude_hv 1 + mmap 1 + comm 1 + enable_on_exec 1 + task 1 + sample_id_all 1 + mmap2 1 + comm_exec 1 + ------------------------------------------------------------ + sys_perf_event_open: pid 31104 cpu 0 group_fd -1 flags 0x8 + sys_perf_event_open: pid 31104 cpu 1 group_fd -1 flags 0x8 + sys_perf_event_open: pid 31104 cpu 2 group_fd -1 flags 0x8 + sys_perf_event_open: pid 31104 cpu 3 group_fd -1 flags 0x8 + mmap size 528384B + AUX area mmap length 4194304 + perf event ring buffer mmapped per cpu + Synthesizing auxtrace information + Linux + [ perf record: Woken up 1 times to write data ] + [ perf record: Captured and wrote 0.042 MB perf.data ] + +Note, the sched_switch event is only added if the user is permitted to use it +and only in per-cpu mode. + +Note also, the sched_switch event is only added if TSC packets are requested. +That is because, in the absence of timing information, the sched_switch events +cannot be matched against the Intel PT trace. + + +perf script +=========== + +By default, perf script will decode trace data found in the perf.data file. +This can be further controlled by new option --itrace. + + +New --itrace option +------------------- + +Having no option is the same as + + --itrace + +which, in turn, is the same as + + --itrace=ibxe + +The letters are: + + i synthesize "instructions" events + b synthesize "branches" events + x synthesize "transactions" events + c synthesize branches events (calls only) + r synthesize branches events (returns only) + e synthesize tracing error events + d create a debug log + g synthesize a call chain (use with i or x) + l synthesize last branch entries (use with i or x) + +"Instructions" events look like they were recorded by "perf record -e +instructions". + +"Branches" events look like they were recorded by "perf record -e branches". "c" +and "r" can be combined to get calls and returns. + +"Transactions" events correspond to the start or end of transactions. The +'flags' field can be used in perf script to determine whether the event is a +tranasaction start, commit or abort. + +Error events are new. They show where the decoder lost the trace. Error events +are quite important. Users must know if what they are seeing is a complete +picture or not. + +The "d" option will cause the creation of a file "intel_pt.log" containing all +decoded packets and instructions. Note that this option slows down the decoder +and that the resulting file may be very large. + +In addition, the period of the "instructions" event can be specified. e.g. + + --itrace=i10us + +sets the period to 10us i.e. one instruction sample is synthesized for each 10 +microseconds of trace. Alternatives to "us" are "ms" (milliseconds), +"ns" (nanoseconds), "t" (TSC ticks) or "i" (instructions). + +"ms", "us" and "ns" are converted to TSC ticks. + +The timing information included with Intel PT does not give the time of every +instruction. Consequently, for the purpose of sampling, the decoder estimates +the time since the last timing packet based on 1 tick per instruction. The time +on the sample is *not* adjusted and reflects the last known value of TSC. + +For Intel PT, the default period is 100us. + +Setting it to a zero period means "as often as possible". + +In the case of Intel PT that is the same as a period of 1 and a unit of +'instructions' (i.e. --itrace=i1i). + +Also the call chain size (default 16, max. 1024) for instructions or +transactions events can be specified. e.g. + + --itrace=ig32 + --itrace=xg32 + +Also the number of last branch entries (default 64, max. 1024) for instructions or +transactions events can be specified. e.g. + + --itrace=il10 + --itrace=xl10 + +Note that last branch entries are cleared for each sample, so there is no overlap +from one sample to the next. + +To disable trace decoding entirely, use the option --no-itrace. + + +dump option +----------- + +perf script has an option (-D) to "dump" the events i.e. display the binary +data. + +When -D is used, Intel PT packets are displayed. The packet decoder does not +pay attention to PSB packets, but just decodes the bytes - so the packets seen +by the actual decoder may not be identical in places where the data is corrupt. +One example of that would be when the buffer-switching interrupt has been too +slow, and the buffer has been filled completely. In that case, the last packet +in the buffer might be truncated and immediately followed by a PSB as the trace +continues in the next buffer. + +To disable the display of Intel PT packets, combine the -D option with +--no-itrace. + + +perf report +=========== + +By default, perf report will decode trace data found in the perf.data file. +This can be further controlled by new option --itrace exactly the same as +perf script, with the exception that the default is --itrace=igxe. + + +perf inject +=========== + +perf inject also accepts the --itrace option in which case tracing data is +removed and replaced with the synthesized events. e.g. + + perf inject --itrace -i perf.data -o perf.data.new + +Below is an example of using Intel PT with autofdo. It requires autofdo +(https://github.com/google/autofdo) and gcc version 5. The bubble +sort example is from the AutoFDO tutorial (https://gcc.gnu.org/wiki/AutoFDO/Tutorial) +amended to take the number of elements as a parameter. + + $ gcc-5 -O3 sort.c -o sort_optimized + $ ./sort_optimized 30000 + Bubble sorting array of 30000 elements + 2254 ms + + $ cat ~/.perfconfig + [intel-pt] + mispred-all + + $ perf record -e intel_pt//u ./sort 3000 + Bubble sorting array of 3000 elements + 58 ms + [ perf record: Woken up 2 times to write data ] + [ perf record: Captured and wrote 3.939 MB perf.data ] + $ perf inject -i perf.data -o inj --itrace=i100usle --strip + $ ./create_gcov --binary=./sort --profile=inj --gcov=sort.gcov -gcov_version=1 + $ gcc-5 -O3 -fauto-profile=sort.gcov sort.c -o sort_autofdo + $ ./sort_autofdo 30000 + Bubble sorting array of 30000 elements + 2155 ms + +Note there is currently no advantage to using Intel PT instead of LBR, but +that may change in the future if greater use is made of the data. diff --git a/tools/perf/Documentation/itrace.txt b/tools/perf/Documentation/itrace.txt new file mode 100644 index 000000000000..65453f4c7006 --- /dev/null +++ b/tools/perf/Documentation/itrace.txt @@ -0,0 +1,26 @@ + i synthesize instructions events + b synthesize branches events + c synthesize branches events (calls only) + r synthesize branches events (returns only) + x synthesize transactions events + e synthesize error events + d create a debug log + g synthesize a call chain (use with i or x) + l synthesize last branch entries (use with i or x) + + The default is all events i.e. the same as --itrace=ibxe + + In addition, the period (default 100000) for instructions events + can be specified in units of: + + i instructions + t ticks + ms milliseconds + us microseconds + ns nanoseconds (default) + + Also the call chain size (default 16, max. 1024) for instructions or + transactions events can be specified. + + Also the number of last branch entries (default 64, max. 1024) for + instructions or transactions events can be specified. diff --git a/tools/perf/Documentation/perf-bench.txt b/tools/perf/Documentation/perf-bench.txt index bf3d0644bf10..34750fc32714 100644 --- a/tools/perf/Documentation/perf-bench.txt +++ b/tools/perf/Documentation/perf-bench.txt @@ -82,7 +82,7 @@ Be multi thread instead of multi process Specify number of groups -l:: ---loop=:: +--nr_loops=:: Specify number of loops Example of *messaging* @@ -139,64 +139,48 @@ Suite for evaluating performance of simple memory copy in various ways. Options of *memcpy* ^^^^^^^^^^^^^^^^^^^ -l:: ---length:: -Specify length of memory to copy (default: 1MB). +--size:: +Specify size of memory to copy (default: 1MB). Available units are B, KB, MB, GB and TB (case insensitive). --r:: ---routine:: -Specify routine to copy (default: default). -Available routines are depend on the architecture. +-f:: +--function:: +Specify function to copy (default: default). +Available functions are depend on the architecture. On x86-64, x86-64-unrolled, x86-64-movsq and x86-64-movsb are supported. --i:: ---iterations:: +-l:: +--nr_loops:: Repeat memcpy invocation this number of times. -c:: ---cycle:: +--cycles:: Use perf's cpu-cycles event instead of gettimeofday syscall. --o:: ---only-prefault:: -Show only the result with page faults before memcpy. - --n:: ---no-prefault:: -Show only the result without page faults before memcpy. - *memset*:: Suite for evaluating performance of simple memory set in various ways. Options of *memset* ^^^^^^^^^^^^^^^^^^^ -l:: ---length:: -Specify length of memory to set (default: 1MB). +--size:: +Specify size of memory to set (default: 1MB). Available units are B, KB, MB, GB and TB (case insensitive). --r:: ---routine:: -Specify routine to set (default: default). -Available routines are depend on the architecture. +-f:: +--function:: +Specify function to set (default: default). +Available functions are depend on the architecture. On x86-64, x86-64-unrolled, x86-64-stosq and x86-64-stosb are supported. --i:: ---iterations:: +-l:: +--nr_loops:: Repeat memset invocation this number of times. -c:: ---cycle:: +--cycles:: Use perf's cpu-cycles event instead of gettimeofday syscall. --o:: ---only-prefault:: -Show only the result with page faults before memset. - --n:: ---no-prefault:: -Show only the result without page faults before memset. - SUITES FOR 'numa' ~~~~~~~~~~~~~~~~~ *mem*:: @@ -216,6 +200,10 @@ Suite for evaluating parallel wake calls. *requeue*:: Suite for evaluating requeue calls. +*lock-pi*:: +Suite for evaluating futex lock_pi calls. + + SEE ALSO -------- linkperf:perf[1] diff --git a/tools/perf/Documentation/perf-inject.txt b/tools/perf/Documentation/perf-inject.txt index b876ae312699..0b1cedeef895 100644 --- a/tools/perf/Documentation/perf-inject.txt +++ b/tools/perf/Documentation/perf-inject.txt @@ -48,28 +48,10 @@ OPTIONS Decode Instruction Tracing data, replacing it with synthesized events. Options are: - i synthesize instructions events - b synthesize branches events - c synthesize branches events (calls only) - r synthesize branches events (returns only) - x synthesize transactions events - e synthesize error events - d create a debug log - g synthesize a call chain (use with i or x) +include::itrace.txt[] - The default is all events i.e. the same as --itrace=ibxe - - In addition, the period (default 100000) for instructions events - can be specified in units of: - - i instructions - t ticks - ms milliseconds - us microseconds - ns nanoseconds (default) - - Also the call chain size (default 16, max. 1024) for instructions or - transactions events can be specified. +--strip:: + Use with --itrace to strip out non-synthesized events. SEE ALSO -------- diff --git a/tools/perf/Documentation/perf-list.txt b/tools/perf/Documentation/perf-list.txt index bada8933fdd4..79483f40e991 100644 --- a/tools/perf/Documentation/perf-list.txt +++ b/tools/perf/Documentation/perf-list.txt @@ -30,6 +30,7 @@ counted. The following modifiers exist: G - guest counting (in KVM guests) H - host counting (not in KVM guests) p - precise level + P - use maximum detected precise level S - read sample value (PERF_SAMPLE_READ) D - pin the event to the PMU @@ -125,6 +126,8 @@ To limit the list use: . If none of the above is matched, it will apply the supplied glob to all events, printing the ones that match. +. As a last resort, it will do a substring search in all event names. + One or more types can be used at the same time, listing the events for the types specified. diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt index 9b9d9d086680..e630a7d2c348 100644 --- a/tools/perf/Documentation/perf-record.txt +++ b/tools/perf/Documentation/perf-record.txt @@ -45,6 +45,21 @@ OPTIONS param1 and param2 are defined as formats for the PMU in: /sys/bus/event_sources/devices/<pmu>/format/* + There are also some params which are not defined in .../<pmu>/format/*. + These params can be used to overload default config values per event. + Here is a list of the params. + - 'period': Set event sampling period + - 'freq': Set event sampling frequency + - 'time': Disable/enable time stamping. Acceptable values are 1 for + enabling time stamping. 0 for disabling time stamping. + The default is 1. + - 'call-graph': Disable/enable callgraph. Acceptable str are "fp" for + FP mode, "dwarf" for DWARF mode, "lbr" for LBR mode and + "no" for disable callgraph. + - 'stack-size': user stack size for dwarf mode + Note: If user explicitly sets options which conflict with the params, + the value set by the params will be overridden. + - a hardware breakpoint event in the form of '\mem:addr[/len][:access]' where addr is the address in memory you want to break in. Access is the memory access type (read, write, execute) it can @@ -61,7 +76,16 @@ OPTIONS "perf report" to view group events together. --filter=<filter>:: - Event filter. + Event filter. This option should follow a event selector (-e) which + selects tracepoint event(s). Multiple '--filter' options are combined + using '&&'. + +--exclude-perf:: + Don't record events issued by perf itself. This option should follow + a event selector (-e) which selects tracepoint event(s). It adds a + filter expression 'common_pid != $PERFPID' to filters. If other + '--filter' exists, the new filter expression will be combined with + them by '&&'. -a:: --all-cpus:: @@ -120,7 +144,7 @@ OPTIONS --call-graph:: Setup and enable call-graph (stack chain/backtrace) recording, - implies -g. + implies -g. Default is "fp". Allows specifying "fp" (frame pointer) or "dwarf" (DWARF's CFI - Call Frame Information) or "lbr" @@ -130,13 +154,18 @@ OPTIONS In some systems, where binaries are build with gcc --fomit-frame-pointer, using the "fp" method will produce bogus call graphs, using "dwarf", if available (perf tools linked to - the libunwind library) should be used instead. + the libunwind or libdw library) should be used instead. Using the "lbr" method doesn't require any compiler options. It will produce call graphs from the hardware LBR registers. The main limition is that it is only available on new Intel platforms, such as Haswell. It can only get user call chain. It doesn't work with branch stack sampling at the same time. + When "dwarf" recording is used, perf also records (user) stack dump + when sampled. Default size of the stack dump is 8192 (bytes). + User can change the size by passing the size after comma like + "--call-graph dwarf,4096". + -q:: --quiet:: Don't print any message, useful for scripting. @@ -212,6 +241,7 @@ following filters are defined: - any_call: any function call or system call - any_ret: any function return or system call return - ind_call: any indirect branch + - call: direct calls, including far (to/from kernel) calls - u: only when the branch target is at the user level - k: only when the branch target is in the kernel - hv: only when the target is at the hypervisor level @@ -252,7 +282,11 @@ filter out the startup phase of the program, which is often very different. --intr-regs:: Capture machine state (registers) at interrupt, i.e., on counter overflows for each sample. List of captured registers depends on the architecture. This option -is off by default. +is off by default. It is possible to select the registers to sample using their +symbolic names, e.g. on x86, ax, si. To list the available registers use +--intr-regs=\?. To name registers, pass a comma separated list such as +--intr-regs=ax,bx. The list of register is architecture dependent. + --running-time:: Record running and enabled time for read events (:S) @@ -276,6 +310,16 @@ When processing pre-existing threads /proc/XXX/mmap, it may take a long time, because the file may be huge. A time out is needed in such cases. This option sets the time out limit. The default value is 500 ms. +--switch-events:: +Record context switch events i.e. events of type PERF_RECORD_SWITCH or +PERF_RECORD_SWITCH_CPU_WIDE. + +--clang-path:: +Path to clang binary to use for compiling BPF scriptlets. + +--clang-opt:: +Options passed to clang when compiling BPF scriptlets. + SEE ALSO -------- linkperf:perf-stat[1], linkperf:perf-list[1] diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt index c33b69f3374f..5ce8da1e1256 100644 --- a/tools/perf/Documentation/perf-report.txt +++ b/tools/perf/Documentation/perf-report.txt @@ -29,7 +29,7 @@ OPTIONS --show-nr-samples:: Show the number of samples for each symbol ---showcpuutilization:: +--show-cpu-utilization:: Show sample percentage for different cpu modes. -T:: @@ -68,7 +68,7 @@ OPTIONS --sort=:: Sort histogram entries by given key(s) - multiple keys can be specified in CSV format. Following sort keys are available: - pid, comm, dso, symbol, parent, cpu, srcline, weight, local_weight. + pid, comm, dso, symbol, parent, cpu, socket, srcline, weight, local_weight. Each key has following meaning: @@ -79,8 +79,11 @@ OPTIONS - parent: name of function matched to the parent regex filter. Unmatched entries are displayed as "[other]". - cpu: cpu number the task ran at the time of sample + - socket: processor socket number the task ran at the time of sample - srcline: filename and line number executed at the time of sample. The DWARF debugging info must be provided. + - srcfile: file name of the source file of the same. Requires dwarf + information. - weight: Event specific weight, e.g. memory latency or transaction abort cost. This is the global weight. - local_weight: Local weight version of the weight above. @@ -109,6 +112,7 @@ OPTIONS - mispredict: "N" for predicted branch, "Y" for mispredicted branch - in_tx: branch in TSX transaction - abort: TSX transaction abort. + - cycles: Cycles in basic block And default sort keys are changed to comm, dso_from, symbol_from, dso_to and symbol_to, see '--branch-stack'. @@ -165,30 +169,40 @@ OPTIONS --dump-raw-trace:: Dump raw trace in ASCII. --g [type,min[,limit],order[,key][,branch]]:: ---call-graph:: - Display call chains using type, min percent threshold, optional print - limit and order. - type can be either: +-g:: +--call-graph=<print_type,threshold[,print_limit],order,sort_key,branch>:: + Display call chains using type, min percent threshold, print limit, + call order, sort key and branch. Note that ordering of parameters is not + fixed so any parement can be given in an arbitraty order. One exception + is the print_limit which should be preceded by threshold. + + print_type can be either: - flat: single column, linear exposure of call chains. - - graph: use a graph tree, displaying absolute overhead rates. + - graph: use a graph tree, displaying absolute overhead rates. (default) - fractal: like graph, but displays relative rates. Each branch of - the tree is considered as a new profiled object. + + the tree is considered as a new profiled object. + - none: disable call chain display. + + threshold is a percentage value which specifies a minimum percent to be + included in the output call graph. Default is 0.5 (%). + + print_limit is only applied when stdio interface is used. It's to limit + number of call graph entries in a single hist entry. Note that it needs + to be given after threshold (but not necessarily consecutive). + Default is 0 (unlimited). order can be either: - callee: callee based call graph. - caller: inverted caller based call graph. + Default is 'caller' when --children is used, otherwise 'callee'. - key can be: - - function: compare on functions + sort_key can be: + - function: compare on functions (default) - address: compare on individual code addresses branch can be: - - branch: include last branch information in callgraph - when available. Usually more convenient to use --branch-history - for this. - - Default: fractal,0.5,callee,function. + - branch: include last branch information in callgraph when available. + Usually more convenient to use --branch-history for this. --children:: Accumulate callchain of children to parent entry so that then can @@ -201,6 +215,8 @@ OPTIONS beyond the specified depth will be ignored. This is a trade-off between information loss and faster processing especially for workloads that can have a very long callchain stack. + Note that when using the --itrace option the synthesized callchain size + will override this value if the synthesized callchain size is bigger. Default: 127 @@ -328,31 +344,26 @@ OPTIONS --itrace:: Options for decoding instruction tracing data. The options are: - i synthesize instructions events - b synthesize branches events - c synthesize branches events (calls only) - r synthesize branches events (returns only) - x synthesize transactions events - e synthesize error events - d create a debug log - g synthesize a call chain (use with i or x) - - The default is all events i.e. the same as --itrace=ibxe - - In addition, the period (default 100000) for instructions events - can be specified in units of: - - i instructions - t ticks - ms milliseconds - us microseconds - ns nanoseconds (default) - - Also the call chain size (default 16, max. 1024) for instructions or - transactions events can be specified. +include::itrace.txt[] To disable decoding entirely, use --no-itrace. +--full-source-path:: + Show the full path for source files for srcline output. + +--show-ref-call-graph:: + When multiple events are sampled, it may not be needed to collect + callgraphs for all of them. The sample sites are usually nearby, + and it's enough to collect the callgraphs on a reference event. + So user can use "call-graph=no" event modifier to disable callgraph + for other events to reduce the overhead. + However, perf report cannot show callgraphs for the event which + disable the callgraph. + This option extends the perf report to show reference callgraphs, + which collected by reference event, in no callgraph event. + +--socket-filter:: + Only report the samples on the processor socket that match with this filter include::callchain-overhead-calculation.txt[] diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt index c82df572fac2..382ddfb45d1d 100644 --- a/tools/perf/Documentation/perf-script.txt +++ b/tools/perf/Documentation/perf-script.txt @@ -112,11 +112,11 @@ OPTIONS --debug-mode:: Do various checks like samples ordering and lost events. --f:: +-F:: --fields:: Comma separated list of fields to print. Options are: comm, tid, pid, time, cpu, event, trace, ip, sym, dso, addr, symoff, - srcline, period, flags. + srcline, period, iregs, brstack, brstacksym, flags. Field list can be prepended with the type, trace, sw or hw, to indicate to which event type the field list applies. e.g., -f sw:comm,tid,time,ip,sym and -f trace:time,cpu,trace @@ -175,6 +175,16 @@ OPTIONS Finally, a user may not set fields to none for all event types. i.e., -f "" is not allowed. + The brstack output includes branch related information with raw addresses using the + /v/v/v/v/ syntax in the following order: + FROM: branch source instruction + TO : branch target instruction + M/P/-: M=branch target mispredicted or branch direction was mispredicted, P=target predicted or direction predicted, -=not supported + X/- : X=branch inside a transactional region, -=not in transaction region or not supported + A/- : A=TSX abort entry, -=not aborted region or not supported + + The brstacksym is identical to brstack, except that the FROM and TO addresses are printed in a symbolic form if possible. + -k:: --vmlinux=<file>:: vmlinux pathname @@ -222,6 +232,17 @@ OPTIONS --show-mmap-events Display mmap related events (e.g. MMAP, MMAP2). +--show-switch-events + Display context switch events i.e. events of type PERF_RECORD_SWITCH or + PERF_RECORD_SWITCH_CPU_WIDE. + +--demangle:: + Demangle symbol names to human readable form. It's enabled by default, + disable with --no-demangle. + +--demangle-kernel:: + Demangle kernel symbol names to human readable form (for C++ kernels). + --header Show perf.data header. @@ -231,30 +252,15 @@ OPTIONS --itrace:: Options for decoding instruction tracing data. The options are: - i synthesize instructions events - b synthesize branches events - c synthesize branches events (calls only) - r synthesize branches events (returns only) - x synthesize transactions events - e synthesize error events - d create a debug log - g synthesize a call chain (use with i or x) +include::itrace.txt[] - The default is all events i.e. the same as --itrace=ibxe - - In addition, the period (default 100000) for instructions events - can be specified in units of: - - i instructions - t ticks - ms milliseconds - us microseconds - ns nanoseconds (default) + To disable decoding entirely, use --no-itrace. - Also the call chain size (default 16, max. 1024) for instructions or - transactions events can be specified. +--full-source-path:: + Show the full path for source files for srcline output. - To disable decoding entirely, use --no-itrace. +--ns:: + Use 9 decimal places when displaying time (i.e. show the nanoseconds) SEE ALSO -------- diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt index 47469abdcc1c..4e074a660826 100644 --- a/tools/perf/Documentation/perf-stat.txt +++ b/tools/perf/Documentation/perf-stat.txt @@ -128,8 +128,9 @@ perf stat --repeat 10 --null --sync --pre 'make -s O=defconfig-build/clean' -- m -I msecs:: --interval-print msecs:: - Print count deltas every N milliseconds (minimum: 100ms) - example: perf stat -I 1000 -e cycles -a sleep 5 +Print count deltas every N milliseconds (minimum: 10ms) +The overhead percentage could be high in some cases, for instance with small, sub 100ms intervals. Use with caution. + example: 'perf stat -I 1000 -e cycles -a sleep 5' --per-socket:: Aggregate counts per processor socket for system-wide mode measurements. This diff --git a/tools/perf/Documentation/perf-top.txt b/tools/perf/Documentation/perf-top.txt index 776aec4d0927..556cec09bf50 100644 --- a/tools/perf/Documentation/perf-top.txt +++ b/tools/perf/Documentation/perf-top.txt @@ -160,9 +160,10 @@ Default is to monitor all CPUS. -g:: Enables call-graph (stack chain/backtrace) recording. ---call-graph:: +--call-graph [mode,type,min[,limit],order[,key][,branch]]:: Setup and enable call-graph (stack chain/backtrace) recording, - implies -g. + implies -g. See `--call-graph` section in perf-record and + perf-report man pages for details. --children:: Accumulate callchain of children to parent entry so that then can @@ -208,6 +209,27 @@ Default is to monitor all CPUS. This option sets the time out limit. The default value is 500 ms. +-b:: +--branch-any:: + Enable taken branch stack sampling. Any type of taken branch may be sampled. + This is a shortcut for --branch-filter any. See --branch-filter for more infos. + +-j:: +--branch-filter:: + Enable taken branch stack sampling. Each sample captures a series of consecutive + taken branches. The number of branches captured with each sample depends on the + underlying hardware, the type of branches of interest, and the executed code. + It is possible to select the types of branches captured by enabling filters. + For a full list of modifiers please see the perf record manpage. + + The option requires at least one branch type among any, any_call, any_ret, ind_call, cond. + The privilege levels may be omitted, in which case, the privilege levels of the associated + event are applied to the branch filter. Both kernel (k) and hypervisor (hv) privilege + levels are subject to permissions. When sampling on multiple events, branch stack sampling + is enabled for all the sampling events. The sampled branch type is the same for all events. + The various filters must be specified as a comma separated list: --branch-filter any_ret,u,k + Note that this feature may not be available on all processors. + INTERACTIVE PROMPTING KEYS -------------------------- diff --git a/tools/perf/Documentation/perf.txt b/tools/perf/Documentation/perf.txt index 2b131776363e..864e37597252 100644 --- a/tools/perf/Documentation/perf.txt +++ b/tools/perf/Documentation/perf.txt @@ -27,6 +27,14 @@ OPTIONS Setup buildid cache directory. It has higher priority than buildid.dir config file option. +-v:: +--version:: + Display perf version. + +-h:: +--help:: + Run perf help command. + DESCRIPTION ----------- Performance counters for Linux are a new kernel-based subsystem diff --git a/tools/perf/MANIFEST b/tools/perf/MANIFEST index d01a0aad5a01..39c38cb45b00 100644 --- a/tools/perf/MANIFEST +++ b/tools/perf/MANIFEST @@ -17,7 +17,9 @@ tools/build tools/arch/x86/include/asm/atomic.h tools/arch/x86/include/asm/rmwcc.h tools/lib/traceevent +tools/lib/bpf tools/lib/api +tools/lib/bpf tools/lib/hweight.c tools/lib/rbtree.c tools/lib/symbol/kallsyms.c @@ -40,7 +42,7 @@ tools/include/asm-generic/bitops.h tools/include/linux/atomic.h tools/include/linux/bitops.h tools/include/linux/compiler.h -tools/include/linux/export.h +tools/include/linux/filter.h tools/include/linux/hash.h tools/include/linux/kernel.h tools/include/linux/list.h @@ -49,6 +51,7 @@ tools/include/linux/poison.h tools/include/linux/rbtree.h tools/include/linux/rbtree_augmented.h tools/include/linux/types.h +tools/include/linux/err.h include/asm-generic/bitops/arch_hweight.h include/asm-generic/bitops/const_hweight.h include/asm-generic/bitops/fls64.h @@ -67,6 +70,8 @@ arch/*/lib/memset*.S include/linux/poison.h include/linux/hw_breakpoint.h include/uapi/linux/perf_event.h +include/uapi/linux/bpf.h +include/uapi/linux/bpf_common.h include/uapi/linux/const.h include/uapi/linux/swab.h include/uapi/linux/hw_breakpoint.h diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index bba34636b733..0d19d5447d6c 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -75,6 +75,14 @@ include config/utilities.mak # Define NO_LZMA if you do not want to support compressed (xz) kernel modules # # Define NO_AUXTRACE if you do not want AUX area tracing support +# +# Define NO_LIBBPF if you do not want BPF support + +# As per kernel Makefile, avoid funny character set dependencies +unexport LC_ALL +LC_COLLATE=C +LC_NUMERIC=C +export LC_COLLATE LC_NUMERIC ifeq ($(srctree),) srctree := $(patsubst %/,%,$(dir $(shell pwd))) @@ -135,9 +143,11 @@ INSTALL = install FLEX = flex BISON = bison STRIP = strip +AWK = awk LIB_DIR = $(srctree)/tools/lib/api/ TRACE_EVENT_DIR = $(srctree)/tools/lib/traceevent/ +BPF_DIR = $(srctree)/tools/lib/bpf/ # include config/Makefile by default and rule out # non-config cases @@ -173,6 +183,7 @@ strip-libs = $(filter-out -l%,$(1)) ifneq ($(OUTPUT),) TE_PATH=$(OUTPUT) + BPF_PATH=$(OUTPUT) ifneq ($(subdir),) LIB_PATH=$(OUTPUT)/../lib/api/ else @@ -181,6 +192,7 @@ endif else TE_PATH=$(TRACE_EVENT_DIR) LIB_PATH=$(LIB_DIR) + BPF_PATH=$(BPF_DIR) endif LIBTRACEEVENT = $(TE_PATH)libtraceevent.a @@ -192,6 +204,8 @@ LIBTRACEEVENT_DYNAMIC_LIST_LDFLAGS = -Xlinker --dynamic-list=$(LIBTRACEEVENT_DYN LIBAPI = $(LIB_PATH)libapi.a export LIBAPI +LIBBPF = $(BPF_PATH)libbpf.a + # python extension build directories PYTHON_EXTBUILD := $(OUTPUT)python_ext_build/ PYTHON_EXTBUILD_LIB := $(PYTHON_EXTBUILD)lib/ @@ -244,6 +258,9 @@ export PERL_PATH LIB_FILE=$(OUTPUT)libperf.a PERFLIBS = $(LIB_FILE) $(LIBAPI) $(LIBTRACEEVENT) +ifndef NO_LIBBPF + PERFLIBS += $(LIBBPF) +endif # We choose to avoid "if .. else if .. else .. endif endif" # because maintaining the nesting to match is a pain. If @@ -289,17 +306,17 @@ strip: $(PROGRAMS) $(OUTPUT)perf PERF_IN := $(OUTPUT)perf-in.o -export srctree OUTPUT RM CC LD AR CFLAGS V BISON FLEX -build := -f $(srctree)/tools/build/Makefile.build dir=. obj +export srctree OUTPUT RM CC LD AR CFLAGS V BISON FLEX AWK +include $(srctree)/tools/build/Makefile.include -$(PERF_IN): $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)common-cmds.h FORCE +$(PERF_IN): prepare FORCE $(Q)$(MAKE) $(build)=perf $(OUTPUT)perf: $(PERFLIBS) $(PERF_IN) $(LIBTRACEEVENT_DYNAMIC_LIST) $(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) $(LIBTRACEEVENT_DYNAMIC_LIST_LDFLAGS) \ $(PERF_IN) $(LIBS) -o $@ -$(GTK_IN): FORCE +$(GTK_IN): fixdep FORCE $(Q)$(MAKE) $(build)=gtk $(OUTPUT)libperf-gtk.so: $(GTK_IN) $(PERFLIBS) @@ -342,27 +359,27 @@ endif __build-dir = $(subst $(OUTPUT),,$(dir $@)) build-dir = $(if $(__build-dir),$(__build-dir),.) -single_dep: $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)common-cmds.h +prepare: $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)common-cmds.h fixdep -$(OUTPUT)%.o: %.c single_dep FORCE +$(OUTPUT)%.o: %.c prepare FORCE $(Q)$(MAKE) -f $(srctree)/tools/build/Makefile.build dir=$(build-dir) $@ -$(OUTPUT)%.i: %.c single_dep FORCE +$(OUTPUT)%.i: %.c prepare FORCE $(Q)$(MAKE) -f $(srctree)/tools/build/Makefile.build dir=$(build-dir) $@ -$(OUTPUT)%.s: %.c single_dep FORCE +$(OUTPUT)%.s: %.c prepare FORCE $(Q)$(MAKE) -f $(srctree)/tools/build/Makefile.build dir=$(build-dir) $@ -$(OUTPUT)%-bison.o: %.c single_dep FORCE +$(OUTPUT)%-bison.o: %.c prepare FORCE $(Q)$(MAKE) -f $(srctree)/tools/build/Makefile.build dir=$(build-dir) $@ -$(OUTPUT)%-flex.o: %.c single_dep FORCE +$(OUTPUT)%-flex.o: %.c prepare FORCE $(Q)$(MAKE) -f $(srctree)/tools/build/Makefile.build dir=$(build-dir) $@ -$(OUTPUT)%.o: %.S single_dep FORCE +$(OUTPUT)%.o: %.S prepare FORCE $(Q)$(MAKE) -f $(srctree)/tools/build/Makefile.build dir=$(build-dir) $@ -$(OUTPUT)%.i: %.S single_dep FORCE +$(OUTPUT)%.i: %.S prepare FORCE $(Q)$(MAKE) -f $(srctree)/tools/build/Makefile.build dir=$(build-dir) $@ $(OUTPUT)perf-%: %.o $(PERFLIBS) @@ -382,7 +399,7 @@ $(patsubst perf-%,%.o,$(PROGRAMS)): $(wildcard */*.h) LIBPERF_IN := $(OUTPUT)libperf-in.o -$(LIBPERF_IN): FORCE +$(LIBPERF_IN): fixdep FORCE $(Q)$(MAKE) $(build)=libperf $(LIB_FILE): $(LIBPERF_IN) @@ -390,10 +407,10 @@ $(LIB_FILE): $(LIBPERF_IN) LIBTRACEEVENT_FLAGS += plugin_dir=$(plugindir_SQ) -$(LIBTRACEEVENT): FORCE +$(LIBTRACEEVENT): fixdep FORCE $(Q)$(MAKE) -C $(TRACE_EVENT_DIR) $(LIBTRACEEVENT_FLAGS) O=$(OUTPUT) $(OUTPUT)libtraceevent.a -libtraceevent_plugins: FORCE +libtraceevent_plugins: fixdep FORCE $(Q)$(MAKE) -C $(TRACE_EVENT_DIR) $(LIBTRACEEVENT_FLAGS) O=$(OUTPUT) plugins $(LIBTRACEEVENT_DYNAMIC_LIST): libtraceevent_plugins @@ -406,13 +423,20 @@ $(LIBTRACEEVENT)-clean: install-traceevent-plugins: $(LIBTRACEEVENT) $(Q)$(MAKE) -C $(TRACE_EVENT_DIR) $(LIBTRACEEVENT_FLAGS) O=$(OUTPUT) install_plugins -$(LIBAPI): FORCE +$(LIBAPI): fixdep FORCE $(Q)$(MAKE) -C $(LIB_DIR) O=$(OUTPUT) $(OUTPUT)libapi.a $(LIBAPI)-clean: $(call QUIET_CLEAN, libapi) $(Q)$(MAKE) -C $(LIB_DIR) O=$(OUTPUT) clean >/dev/null +$(LIBBPF): fixdep FORCE + $(Q)$(MAKE) -C $(BPF_DIR) O=$(OUTPUT) $(OUTPUT)libbpf.a + +$(LIBBPF)-clean: + $(call QUIET_CLEAN, libbpf) + $(Q)$(MAKE) -C $(BPF_DIR) O=$(OUTPUT) clean >/dev/null + help: @echo 'Perf make targets:' @echo ' doc - make *all* documentation (see below)' @@ -452,7 +476,7 @@ INSTALL_DOC_TARGETS += quick-install-doc quick-install-man quick-install-html $(DOC_TARGETS): $(QUIET_SUBDIR0)Documentation $(QUIET_SUBDIR1) $(@:doc=all) -TAG_FOLDERS= . ../lib/traceevent ../lib/api ../lib/symbol +TAG_FOLDERS= . ../lib/traceevent ../lib/api ../lib/symbol ../include ../lib/bpf TAG_FILES= ../../include/uapi/linux/perf_event.h TAGS: @@ -507,6 +531,11 @@ endif $(INSTALL) $(OUTPUT)perf-archive -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)' $(call QUIET_INSTALL, perf-with-kcore) \ $(INSTALL) $(OUTPUT)perf-with-kcore -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)' +ifndef NO_LIBAUDIT + $(call QUIET_INSTALL, strace/groups) \ + $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(STRACE_GROUPS_INSTDIR_SQ)'; \ + $(INSTALL) trace/strace/groups/* -t '$(DESTDIR_SQ)$(STRACE_GROUPS_INSTDIR_SQ)' +endif ifndef NO_LIBPERL $(call QUIET_INSTALL, perl-scripts) \ $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl/Perf-Trace-Util/lib/Perf/Trace'; \ @@ -555,12 +584,13 @@ config-clean: $(call QUIET_CLEAN, config) $(Q)$(MAKE) -C $(srctree)/tools/build/feature/ clean >/dev/null -clean: $(LIBTRACEEVENT)-clean $(LIBAPI)-clean config-clean +clean: $(LIBTRACEEVENT)-clean $(LIBAPI)-clean $(LIBBPF)-clean config-clean $(call QUIET_CLEAN, core-objs) $(RM) $(LIB_FILE) $(OUTPUT)perf-archive $(OUTPUT)perf-with-kcore $(LANG_BINDINGS) $(Q)find . -name '*.o' -delete -o -name '\.*.cmd' -delete -o -name '\.*.d' -delete $(Q)$(RM) $(OUTPUT).config-detected $(call QUIET_CLEAN, core-progs) $(RM) $(ALL_PROGRAMS) perf perf-read-vdso32 perf-read-vdsox32 - $(call QUIET_CLEAN, core-gen) $(RM) *.spec *.pyc *.pyo */*.pyc */*.pyo $(OUTPUT)common-cmds.h TAGS tags cscope* $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)FEATURE-DUMP $(OUTPUT)util/*-bison* $(OUTPUT)util/*-flex* + $(call QUIET_CLEAN, core-gen) $(RM) *.spec *.pyc *.pyo */*.pyc */*.pyo $(OUTPUT)common-cmds.h TAGS tags cscope* $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)FEATURE-DUMP $(OUTPUT)util/*-bison* $(OUTPUT)util/*-flex* \ + $(OUTPUT)util/intel-pt-decoder/inat-tables.c $(QUIET_SUBDIR0)Documentation $(QUIET_SUBDIR1) clean $(python-clean) @@ -578,6 +608,6 @@ FORCE: .PHONY: all install clean config-clean strip install-gtk .PHONY: shell_compatibility_test please_set_SHELL_PATH_to_a_more_modern_shell -.PHONY: $(GIT-HEAD-PHONY) TAGS tags cscope FORCE single_dep +.PHONY: $(GIT-HEAD-PHONY) TAGS tags cscope FORCE prepare .PHONY: libtraceevent_plugins diff --git a/tools/perf/arch/alpha/Build b/tools/perf/arch/alpha/Build new file mode 100644 index 000000000000..1bb8bf6d7fd4 --- /dev/null +++ b/tools/perf/arch/alpha/Build @@ -0,0 +1 @@ +# empty diff --git a/tools/perf/arch/common.c b/tools/perf/arch/common.c index b7bb42c44694..e83c8ce24303 100644 --- a/tools/perf/arch/common.c +++ b/tools/perf/arch/common.c @@ -128,9 +128,8 @@ static const char *normalize_arch(char *arch) return arch; } -static int perf_session_env__lookup_binutils_path(struct perf_session_env *env, - const char *name, - const char **path) +static int perf_env__lookup_binutils_path(struct perf_env *env, + const char *name, const char **path) { int idx; const char *arch, *cross_env; @@ -206,7 +205,7 @@ out_error: return -1; } -int perf_session_env__lookup_objdump(struct perf_session_env *env) +int perf_env__lookup_objdump(struct perf_env *env) { /* * For live mode, env->arch will be NULL and we can use @@ -215,6 +214,5 @@ int perf_session_env__lookup_objdump(struct perf_session_env *env) if (env->arch == NULL) return 0; - return perf_session_env__lookup_binutils_path(env, "objdump", - &objdump_path); + return perf_env__lookup_binutils_path(env, "objdump", &objdump_path); } diff --git a/tools/perf/arch/common.h b/tools/perf/arch/common.h index ede246eda9be..7529cfb143ce 100644 --- a/tools/perf/arch/common.h +++ b/tools/perf/arch/common.h @@ -1,10 +1,10 @@ #ifndef ARCH_PERF_COMMON_H #define ARCH_PERF_COMMON_H -#include "../util/session.h" +#include "../util/env.h" extern const char *objdump_path; -int perf_session_env__lookup_objdump(struct perf_session_env *env); +int perf_env__lookup_objdump(struct perf_env *env); #endif /* ARCH_PERF_COMMON_H */ diff --git a/tools/perf/arch/mips/Build b/tools/perf/arch/mips/Build new file mode 100644 index 000000000000..1bb8bf6d7fd4 --- /dev/null +++ b/tools/perf/arch/mips/Build @@ -0,0 +1 @@ +# empty diff --git a/tools/perf/arch/parisc/Build b/tools/perf/arch/parisc/Build new file mode 100644 index 000000000000..1bb8bf6d7fd4 --- /dev/null +++ b/tools/perf/arch/parisc/Build @@ -0,0 +1 @@ +# empty diff --git a/tools/perf/arch/sh/util/dwarf-regs.c b/tools/perf/arch/sh/util/dwarf-regs.c index 0d0897f57a10..f8dfa89696f4 100644 --- a/tools/perf/arch/sh/util/dwarf-regs.c +++ b/tools/perf/arch/sh/util/dwarf-regs.c @@ -51,5 +51,5 @@ const char *sh_regs_table[SH_MAX_REGS] = { /* Return architecture dependent register string (for kprobe-tracer) */ const char *get_arch_regstr(unsigned int n) { - return (n <= SH_MAX_REGS) ? sh_regs_table[n] : NULL; + return (n < SH_MAX_REGS) ? sh_regs_table[n] : NULL; } diff --git a/tools/perf/arch/sparc/util/dwarf-regs.c b/tools/perf/arch/sparc/util/dwarf-regs.c index 92eda412fed3..b704fdb9237a 100644 --- a/tools/perf/arch/sparc/util/dwarf-regs.c +++ b/tools/perf/arch/sparc/util/dwarf-regs.c @@ -39,5 +39,5 @@ const char *sparc_regs_table[SPARC_MAX_REGS] = { */ const char *get_arch_regstr(unsigned int n) { - return (n <= SPARC_MAX_REGS) ? sparc_regs_table[n] : NULL; + return (n < SPARC_MAX_REGS) ? sparc_regs_table[n] : NULL; } diff --git a/tools/perf/arch/x86/Build b/tools/perf/arch/x86/Build index 41bf61da476a..db52fa22d3a1 100644 --- a/tools/perf/arch/x86/Build +++ b/tools/perf/arch/x86/Build @@ -1,2 +1,2 @@ libperf-y += util/ -libperf-$(CONFIG_DWARF_UNWIND) += tests/ +libperf-y += tests/ diff --git a/tools/perf/arch/x86/Makefile b/tools/perf/arch/x86/Makefile index 21322e0385b8..09ba923debe8 100644 --- a/tools/perf/arch/x86/Makefile +++ b/tools/perf/arch/x86/Makefile @@ -2,3 +2,4 @@ ifndef NO_DWARF PERF_HAVE_DWARF_REGS := 1 endif HAVE_KVM_STAT_SUPPORT := 1 +PERF_HAVE_ARCH_REGS_QUERY_REGISTER_OFFSET := 1 diff --git a/tools/perf/arch/x86/include/arch-tests.h b/tools/perf/arch/x86/include/arch-tests.h new file mode 100644 index 000000000000..7ed00f4b0908 --- /dev/null +++ b/tools/perf/arch/x86/include/arch-tests.h @@ -0,0 +1,19 @@ +#ifndef ARCH_TESTS_H +#define ARCH_TESTS_H + +/* Tests */ +int test__rdpmc(void); +int test__perf_time_to_tsc(void); +int test__insn_x86(void); +int test__intel_cqm_count_nmi_context(void); + +#ifdef HAVE_DWARF_UNWIND_SUPPORT +struct thread; +struct perf_sample; +int test__arch_unwind_sample(struct perf_sample *sample, + struct thread *thread); +#endif + +extern struct test arch_tests[]; + +#endif diff --git a/tools/perf/arch/x86/tests/Build b/tools/perf/arch/x86/tests/Build index b30eff9bcc83..cbb7e978166b 100644 --- a/tools/perf/arch/x86/tests/Build +++ b/tools/perf/arch/x86/tests/Build @@ -1,2 +1,8 @@ -libperf-y += regs_load.o -libperf-y += dwarf-unwind.o +libperf-$(CONFIG_DWARF_UNWIND) += regs_load.o +libperf-$(CONFIG_DWARF_UNWIND) += dwarf-unwind.o + +libperf-y += arch-tests.o +libperf-y += rdpmc.o +libperf-y += perf-time-to-tsc.o +libperf-$(CONFIG_AUXTRACE) += insn-x86.o +libperf-y += intel-cqm.o diff --git a/tools/perf/arch/x86/tests/arch-tests.c b/tools/perf/arch/x86/tests/arch-tests.c new file mode 100644 index 000000000000..2218cb64f840 --- /dev/null +++ b/tools/perf/arch/x86/tests/arch-tests.c @@ -0,0 +1,34 @@ +#include <string.h> +#include "tests/tests.h" +#include "arch-tests.h" + +struct test arch_tests[] = { + { + .desc = "x86 rdpmc test", + .func = test__rdpmc, + }, + { + .desc = "Test converting perf time to TSC", + .func = test__perf_time_to_tsc, + }, +#ifdef HAVE_DWARF_UNWIND_SUPPORT + { + .desc = "Test dwarf unwind", + .func = test__dwarf_unwind, + }, +#endif +#ifdef HAVE_AUXTRACE_SUPPORT + { + .desc = "Test x86 instruction decoder - new instructions", + .func = test__insn_x86, + }, +#endif + { + .desc = "Test intel cqm nmi context read", + .func = test__intel_cqm_count_nmi_context, + }, + { + .func = NULL, + }, + +}; diff --git a/tools/perf/arch/x86/tests/dwarf-unwind.c b/tools/perf/arch/x86/tests/dwarf-unwind.c index d8bbf7ad1681..7f209ce827bf 100644 --- a/tools/perf/arch/x86/tests/dwarf-unwind.c +++ b/tools/perf/arch/x86/tests/dwarf-unwind.c @@ -5,6 +5,7 @@ #include "event.h" #include "debug.h" #include "tests/tests.h" +#include "arch-tests.h" #define STACK_SIZE 8192 diff --git a/tools/perf/arch/x86/tests/gen-insn-x86-dat.awk b/tools/perf/arch/x86/tests/gen-insn-x86-dat.awk new file mode 100644 index 000000000000..a21454835cd4 --- /dev/null +++ b/tools/perf/arch/x86/tests/gen-insn-x86-dat.awk @@ -0,0 +1,75 @@ +#!/bin/awk -f +# gen-insn-x86-dat.awk: script to convert data for the insn-x86 test +# Copyright (c) 2015, Intel Corporation. +# +# This program is free software; you can redistribute it and/or modify it +# under the terms and conditions of the GNU General Public License, +# version 2, as published by the Free Software Foundation. +# +# This program is distributed in the hope it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +# more details. + +BEGIN { + print "/*" + print " * Generated by gen-insn-x86-dat.sh and gen-insn-x86-dat.awk" + print " * from insn-x86-dat-src.c for inclusion by insn-x86.c" + print " * Do not change this code." + print "*/\n" + op = "" + branch = "" + rel = 0 + going = 0 +} + +/ Start here / { + going = 1 +} + +/ Stop here / { + going = 0 +} + +/^\s*[0-9a-fA-F]+\:/ { + if (going) { + colon_pos = index($0, ":") + useful_line = substr($0, colon_pos + 1) + first_pos = match(useful_line, "[0-9a-fA-F]") + useful_line = substr(useful_line, first_pos) + gsub("\t", "\\t", useful_line) + printf "{{" + len = 0 + for (i = 2; i <= NF; i++) { + if (match($i, "^[0-9a-fA-F][0-9a-fA-F]$")) { + printf "0x%s, ", $i + len += 1 + } else { + break + } + } + printf "}, %d, %s, \"%s\", \"%s\",", len, rel, op, branch + printf "\n\"%s\",},\n", useful_line + op = "" + branch = "" + rel = 0 + } +} + +/ Expecting: / { + expecting_str = " Expecting: " + expecting_len = length(expecting_str) + expecting_pos = index($0, expecting_str) + useful_line = substr($0, expecting_pos + expecting_len) + for (i = 1; i <= NF; i++) { + if ($i == "Expecting:") { + i++ + op = $i + i++ + branch = $i + i++ + rel = $i + break + } + } +} diff --git a/tools/perf/arch/x86/tests/gen-insn-x86-dat.sh b/tools/perf/arch/x86/tests/gen-insn-x86-dat.sh new file mode 100755 index 000000000000..2d4ef94cff98 --- /dev/null +++ b/tools/perf/arch/x86/tests/gen-insn-x86-dat.sh @@ -0,0 +1,43 @@ +#!/bin/sh +# gen-insn-x86-dat: generate data for the insn-x86 test +# Copyright (c) 2015, Intel Corporation. +# +# This program is free software; you can redistribute it and/or modify it +# under the terms and conditions of the GNU General Public License, +# version 2, as published by the Free Software Foundation. +# +# This program is distributed in the hope it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +# more details. + +set -e + +if [ "$(uname -m)" != "x86_64" ]; then + echo "ERROR: This script only works on x86_64" + exit 1 +fi + +cd $(dirname $0) + +trap 'echo "Might need a more recent version of binutils"' EXIT + +echo "Compiling insn-x86-dat-src.c to 64-bit object" + +gcc -g -c insn-x86-dat-src.c + +objdump -dSw insn-x86-dat-src.o | awk -f gen-insn-x86-dat.awk > insn-x86-dat-64.c + +rm -f insn-x86-dat-src.o + +echo "Compiling insn-x86-dat-src.c to 32-bit object" + +gcc -g -c -m32 insn-x86-dat-src.c + +objdump -dSw insn-x86-dat-src.o | awk -f gen-insn-x86-dat.awk > insn-x86-dat-32.c + +rm -f insn-x86-dat-src.o + +trap - EXIT + +echo "Done (use git diff to see the changes)" diff --git a/tools/perf/arch/x86/tests/insn-x86-dat-32.c b/tools/perf/arch/x86/tests/insn-x86-dat-32.c new file mode 100644 index 000000000000..3b491cfe204e --- /dev/null +++ b/tools/perf/arch/x86/tests/insn-x86-dat-32.c @@ -0,0 +1,658 @@ +/* + * Generated by gen-insn-x86-dat.sh and gen-insn-x86-dat.awk + * from insn-x86-dat-src.c for inclusion by insn-x86.c + * Do not change this code. +*/ + +{{0x0f, 0x31, }, 2, 0, "", "", +"0f 31 \trdtsc ",}, +{{0xf3, 0x0f, 0x1b, 0x00, }, 4, 0, "", "", +"f3 0f 1b 00 \tbndmk (%eax),%bnd0",}, +{{0xf3, 0x0f, 0x1b, 0x05, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"f3 0f 1b 05 78 56 34 12 \tbndmk 0x12345678,%bnd0",}, +{{0xf3, 0x0f, 0x1b, 0x18, }, 4, 0, "", "", +"f3 0f 1b 18 \tbndmk (%eax),%bnd3",}, +{{0xf3, 0x0f, 0x1b, 0x04, 0x01, }, 5, 0, "", "", +"f3 0f 1b 04 01 \tbndmk (%ecx,%eax,1),%bnd0",}, +{{0xf3, 0x0f, 0x1b, 0x04, 0x05, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"f3 0f 1b 04 05 78 56 34 12 \tbndmk 0x12345678(,%eax,1),%bnd0",}, +{{0xf3, 0x0f, 0x1b, 0x04, 0x08, }, 5, 0, "", "", +"f3 0f 1b 04 08 \tbndmk (%eax,%ecx,1),%bnd0",}, +{{0xf3, 0x0f, 0x1b, 0x04, 0xc8, }, 5, 0, "", "", +"f3 0f 1b 04 c8 \tbndmk (%eax,%ecx,8),%bnd0",}, +{{0xf3, 0x0f, 0x1b, 0x40, 0x12, }, 5, 0, "", "", +"f3 0f 1b 40 12 \tbndmk 0x12(%eax),%bnd0",}, +{{0xf3, 0x0f, 0x1b, 0x45, 0x12, }, 5, 0, "", "", +"f3 0f 1b 45 12 \tbndmk 0x12(%ebp),%bnd0",}, +{{0xf3, 0x0f, 0x1b, 0x44, 0x01, 0x12, }, 6, 0, "", "", +"f3 0f 1b 44 01 12 \tbndmk 0x12(%ecx,%eax,1),%bnd0",}, +{{0xf3, 0x0f, 0x1b, 0x44, 0x05, 0x12, }, 6, 0, "", "", +"f3 0f 1b 44 05 12 \tbndmk 0x12(%ebp,%eax,1),%bnd0",}, +{{0xf3, 0x0f, 0x1b, 0x44, 0x08, 0x12, }, 6, 0, "", "", +"f3 0f 1b 44 08 12 \tbndmk 0x12(%eax,%ecx,1),%bnd0",}, +{{0xf3, 0x0f, 0x1b, 0x44, 0xc8, 0x12, }, 6, 0, "", "", +"f3 0f 1b 44 c8 12 \tbndmk 0x12(%eax,%ecx,8),%bnd0",}, +{{0xf3, 0x0f, 0x1b, 0x80, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"f3 0f 1b 80 78 56 34 12 \tbndmk 0x12345678(%eax),%bnd0",}, +{{0xf3, 0x0f, 0x1b, 0x85, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"f3 0f 1b 85 78 56 34 12 \tbndmk 0x12345678(%ebp),%bnd0",}, +{{0xf3, 0x0f, 0x1b, 0x84, 0x01, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"f3 0f 1b 84 01 78 56 34 12 \tbndmk 0x12345678(%ecx,%eax,1),%bnd0",}, +{{0xf3, 0x0f, 0x1b, 0x84, 0x05, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"f3 0f 1b 84 05 78 56 34 12 \tbndmk 0x12345678(%ebp,%eax,1),%bnd0",}, +{{0xf3, 0x0f, 0x1b, 0x84, 0x08, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"f3 0f 1b 84 08 78 56 34 12 \tbndmk 0x12345678(%eax,%ecx,1),%bnd0",}, +{{0xf3, 0x0f, 0x1b, 0x84, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"f3 0f 1b 84 c8 78 56 34 12 \tbndmk 0x12345678(%eax,%ecx,8),%bnd0",}, +{{0xf3, 0x0f, 0x1a, 0x00, }, 4, 0, "", "", +"f3 0f 1a 00 \tbndcl (%eax),%bnd0",}, +{{0xf3, 0x0f, 0x1a, 0x05, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"f3 0f 1a 05 78 56 34 12 \tbndcl 0x12345678,%bnd0",}, +{{0xf3, 0x0f, 0x1a, 0x18, }, 4, 0, "", "", +"f3 0f 1a 18 \tbndcl (%eax),%bnd3",}, +{{0xf3, 0x0f, 0x1a, 0x04, 0x01, }, 5, 0, "", "", +"f3 0f 1a 04 01 \tbndcl (%ecx,%eax,1),%bnd0",}, +{{0xf3, 0x0f, 0x1a, 0x04, 0x05, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"f3 0f 1a 04 05 78 56 34 12 \tbndcl 0x12345678(,%eax,1),%bnd0",}, +{{0xf3, 0x0f, 0x1a, 0x04, 0x08, }, 5, 0, "", "", +"f3 0f 1a 04 08 \tbndcl (%eax,%ecx,1),%bnd0",}, +{{0xf3, 0x0f, 0x1a, 0x04, 0xc8, }, 5, 0, "", "", +"f3 0f 1a 04 c8 \tbndcl (%eax,%ecx,8),%bnd0",}, +{{0xf3, 0x0f, 0x1a, 0x40, 0x12, }, 5, 0, "", "", +"f3 0f 1a 40 12 \tbndcl 0x12(%eax),%bnd0",}, +{{0xf3, 0x0f, 0x1a, 0x45, 0x12, }, 5, 0, "", "", +"f3 0f 1a 45 12 \tbndcl 0x12(%ebp),%bnd0",}, +{{0xf3, 0x0f, 0x1a, 0x44, 0x01, 0x12, }, 6, 0, "", "", +"f3 0f 1a 44 01 12 \tbndcl 0x12(%ecx,%eax,1),%bnd0",}, +{{0xf3, 0x0f, 0x1a, 0x44, 0x05, 0x12, }, 6, 0, "", "", +"f3 0f 1a 44 05 12 \tbndcl 0x12(%ebp,%eax,1),%bnd0",}, +{{0xf3, 0x0f, 0x1a, 0x44, 0x08, 0x12, }, 6, 0, "", "", +"f3 0f 1a 44 08 12 \tbndcl 0x12(%eax,%ecx,1),%bnd0",}, +{{0xf3, 0x0f, 0x1a, 0x44, 0xc8, 0x12, }, 6, 0, "", "", +"f3 0f 1a 44 c8 12 \tbndcl 0x12(%eax,%ecx,8),%bnd0",}, +{{0xf3, 0x0f, 0x1a, 0x80, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"f3 0f 1a 80 78 56 34 12 \tbndcl 0x12345678(%eax),%bnd0",}, +{{0xf3, 0x0f, 0x1a, 0x85, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"f3 0f 1a 85 78 56 34 12 \tbndcl 0x12345678(%ebp),%bnd0",}, +{{0xf3, 0x0f, 0x1a, 0x84, 0x01, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"f3 0f 1a 84 01 78 56 34 12 \tbndcl 0x12345678(%ecx,%eax,1),%bnd0",}, +{{0xf3, 0x0f, 0x1a, 0x84, 0x05, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"f3 0f 1a 84 05 78 56 34 12 \tbndcl 0x12345678(%ebp,%eax,1),%bnd0",}, +{{0xf3, 0x0f, 0x1a, 0x84, 0x08, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"f3 0f 1a 84 08 78 56 34 12 \tbndcl 0x12345678(%eax,%ecx,1),%bnd0",}, +{{0xf3, 0x0f, 0x1a, 0x84, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"f3 0f 1a 84 c8 78 56 34 12 \tbndcl 0x12345678(%eax,%ecx,8),%bnd0",}, +{{0xf3, 0x0f, 0x1a, 0xc0, }, 4, 0, "", "", +"f3 0f 1a c0 \tbndcl %eax,%bnd0",}, +{{0xf2, 0x0f, 0x1a, 0x00, }, 4, 0, "", "", +"f2 0f 1a 00 \tbndcu (%eax),%bnd0",}, +{{0xf2, 0x0f, 0x1a, 0x05, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"f2 0f 1a 05 78 56 34 12 \tbndcu 0x12345678,%bnd0",}, +{{0xf2, 0x0f, 0x1a, 0x18, }, 4, 0, "", "", +"f2 0f 1a 18 \tbndcu (%eax),%bnd3",}, +{{0xf2, 0x0f, 0x1a, 0x04, 0x01, }, 5, 0, "", "", +"f2 0f 1a 04 01 \tbndcu (%ecx,%eax,1),%bnd0",}, +{{0xf2, 0x0f, 0x1a, 0x04, 0x05, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"f2 0f 1a 04 05 78 56 34 12 \tbndcu 0x12345678(,%eax,1),%bnd0",}, +{{0xf2, 0x0f, 0x1a, 0x04, 0x08, }, 5, 0, "", "", +"f2 0f 1a 04 08 \tbndcu (%eax,%ecx,1),%bnd0",}, +{{0xf2, 0x0f, 0x1a, 0x04, 0xc8, }, 5, 0, "", "", +"f2 0f 1a 04 c8 \tbndcu (%eax,%ecx,8),%bnd0",}, +{{0xf2, 0x0f, 0x1a, 0x40, 0x12, }, 5, 0, "", "", +"f2 0f 1a 40 12 \tbndcu 0x12(%eax),%bnd0",}, +{{0xf2, 0x0f, 0x1a, 0x45, 0x12, }, 5, 0, "", "", +"f2 0f 1a 45 12 \tbndcu 0x12(%ebp),%bnd0",}, +{{0xf2, 0x0f, 0x1a, 0x44, 0x01, 0x12, }, 6, 0, "", "", +"f2 0f 1a 44 01 12 \tbndcu 0x12(%ecx,%eax,1),%bnd0",}, +{{0xf2, 0x0f, 0x1a, 0x44, 0x05, 0x12, }, 6, 0, "", "", +"f2 0f 1a 44 05 12 \tbndcu 0x12(%ebp,%eax,1),%bnd0",}, +{{0xf2, 0x0f, 0x1a, 0x44, 0x08, 0x12, }, 6, 0, "", "", +"f2 0f 1a 44 08 12 \tbndcu 0x12(%eax,%ecx,1),%bnd0",}, +{{0xf2, 0x0f, 0x1a, 0x44, 0xc8, 0x12, }, 6, 0, "", "", +"f2 0f 1a 44 c8 12 \tbndcu 0x12(%eax,%ecx,8),%bnd0",}, +{{0xf2, 0x0f, 0x1a, 0x80, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"f2 0f 1a 80 78 56 34 12 \tbndcu 0x12345678(%eax),%bnd0",}, +{{0xf2, 0x0f, 0x1a, 0x85, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"f2 0f 1a 85 78 56 34 12 \tbndcu 0x12345678(%ebp),%bnd0",}, +{{0xf2, 0x0f, 0x1a, 0x84, 0x01, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"f2 0f 1a 84 01 78 56 34 12 \tbndcu 0x12345678(%ecx,%eax,1),%bnd0",}, +{{0xf2, 0x0f, 0x1a, 0x84, 0x05, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"f2 0f 1a 84 05 78 56 34 12 \tbndcu 0x12345678(%ebp,%eax,1),%bnd0",}, +{{0xf2, 0x0f, 0x1a, 0x84, 0x08, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"f2 0f 1a 84 08 78 56 34 12 \tbndcu 0x12345678(%eax,%ecx,1),%bnd0",}, +{{0xf2, 0x0f, 0x1a, 0x84, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"f2 0f 1a 84 c8 78 56 34 12 \tbndcu 0x12345678(%eax,%ecx,8),%bnd0",}, +{{0xf2, 0x0f, 0x1a, 0xc0, }, 4, 0, "", "", +"f2 0f 1a c0 \tbndcu %eax,%bnd0",}, +{{0xf2, 0x0f, 0x1b, 0x00, }, 4, 0, "", "", +"f2 0f 1b 00 \tbndcn (%eax),%bnd0",}, +{{0xf2, 0x0f, 0x1b, 0x05, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"f2 0f 1b 05 78 56 34 12 \tbndcn 0x12345678,%bnd0",}, +{{0xf2, 0x0f, 0x1b, 0x18, }, 4, 0, "", "", +"f2 0f 1b 18 \tbndcn (%eax),%bnd3",}, +{{0xf2, 0x0f, 0x1b, 0x04, 0x01, }, 5, 0, "", "", +"f2 0f 1b 04 01 \tbndcn (%ecx,%eax,1),%bnd0",}, +{{0xf2, 0x0f, 0x1b, 0x04, 0x05, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"f2 0f 1b 04 05 78 56 34 12 \tbndcn 0x12345678(,%eax,1),%bnd0",}, +{{0xf2, 0x0f, 0x1b, 0x04, 0x08, }, 5, 0, "", "", +"f2 0f 1b 04 08 \tbndcn (%eax,%ecx,1),%bnd0",}, +{{0xf2, 0x0f, 0x1b, 0x04, 0xc8, }, 5, 0, "", "", +"f2 0f 1b 04 c8 \tbndcn (%eax,%ecx,8),%bnd0",}, +{{0xf2, 0x0f, 0x1b, 0x40, 0x12, }, 5, 0, "", "", +"f2 0f 1b 40 12 \tbndcn 0x12(%eax),%bnd0",}, +{{0xf2, 0x0f, 0x1b, 0x45, 0x12, }, 5, 0, "", "", +"f2 0f 1b 45 12 \tbndcn 0x12(%ebp),%bnd0",}, +{{0xf2, 0x0f, 0x1b, 0x44, 0x01, 0x12, }, 6, 0, "", "", +"f2 0f 1b 44 01 12 \tbndcn 0x12(%ecx,%eax,1),%bnd0",}, +{{0xf2, 0x0f, 0x1b, 0x44, 0x05, 0x12, }, 6, 0, "", "", +"f2 0f 1b 44 05 12 \tbndcn 0x12(%ebp,%eax,1),%bnd0",}, +{{0xf2, 0x0f, 0x1b, 0x44, 0x08, 0x12, }, 6, 0, "", "", +"f2 0f 1b 44 08 12 \tbndcn 0x12(%eax,%ecx,1),%bnd0",}, +{{0xf2, 0x0f, 0x1b, 0x44, 0xc8, 0x12, }, 6, 0, "", "", +"f2 0f 1b 44 c8 12 \tbndcn 0x12(%eax,%ecx,8),%bnd0",}, +{{0xf2, 0x0f, 0x1b, 0x80, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"f2 0f 1b 80 78 56 34 12 \tbndcn 0x12345678(%eax),%bnd0",}, +{{0xf2, 0x0f, 0x1b, 0x85, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"f2 0f 1b 85 78 56 34 12 \tbndcn 0x12345678(%ebp),%bnd0",}, +{{0xf2, 0x0f, 0x1b, 0x84, 0x01, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"f2 0f 1b 84 01 78 56 34 12 \tbndcn 0x12345678(%ecx,%eax,1),%bnd0",}, +{{0xf2, 0x0f, 0x1b, 0x84, 0x05, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"f2 0f 1b 84 05 78 56 34 12 \tbndcn 0x12345678(%ebp,%eax,1),%bnd0",}, +{{0xf2, 0x0f, 0x1b, 0x84, 0x08, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"f2 0f 1b 84 08 78 56 34 12 \tbndcn 0x12345678(%eax,%ecx,1),%bnd0",}, +{{0xf2, 0x0f, 0x1b, 0x84, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"f2 0f 1b 84 c8 78 56 34 12 \tbndcn 0x12345678(%eax,%ecx,8),%bnd0",}, +{{0xf2, 0x0f, 0x1b, 0xc0, }, 4, 0, "", "", +"f2 0f 1b c0 \tbndcn %eax,%bnd0",}, +{{0x66, 0x0f, 0x1a, 0x00, }, 4, 0, "", "", +"66 0f 1a 00 \tbndmov (%eax),%bnd0",}, +{{0x66, 0x0f, 0x1a, 0x05, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"66 0f 1a 05 78 56 34 12 \tbndmov 0x12345678,%bnd0",}, +{{0x66, 0x0f, 0x1a, 0x18, }, 4, 0, "", "", +"66 0f 1a 18 \tbndmov (%eax),%bnd3",}, +{{0x66, 0x0f, 0x1a, 0x04, 0x01, }, 5, 0, "", "", +"66 0f 1a 04 01 \tbndmov (%ecx,%eax,1),%bnd0",}, +{{0x66, 0x0f, 0x1a, 0x04, 0x05, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"66 0f 1a 04 05 78 56 34 12 \tbndmov 0x12345678(,%eax,1),%bnd0",}, +{{0x66, 0x0f, 0x1a, 0x04, 0x08, }, 5, 0, "", "", +"66 0f 1a 04 08 \tbndmov (%eax,%ecx,1),%bnd0",}, +{{0x66, 0x0f, 0x1a, 0x04, 0xc8, }, 5, 0, "", "", +"66 0f 1a 04 c8 \tbndmov (%eax,%ecx,8),%bnd0",}, +{{0x66, 0x0f, 0x1a, 0x40, 0x12, }, 5, 0, "", "", +"66 0f 1a 40 12 \tbndmov 0x12(%eax),%bnd0",}, +{{0x66, 0x0f, 0x1a, 0x45, 0x12, }, 5, 0, "", "", +"66 0f 1a 45 12 \tbndmov 0x12(%ebp),%bnd0",}, +{{0x66, 0x0f, 0x1a, 0x44, 0x01, 0x12, }, 6, 0, "", "", +"66 0f 1a 44 01 12 \tbndmov 0x12(%ecx,%eax,1),%bnd0",}, +{{0x66, 0x0f, 0x1a, 0x44, 0x05, 0x12, }, 6, 0, "", "", +"66 0f 1a 44 05 12 \tbndmov 0x12(%ebp,%eax,1),%bnd0",}, +{{0x66, 0x0f, 0x1a, 0x44, 0x08, 0x12, }, 6, 0, "", "", +"66 0f 1a 44 08 12 \tbndmov 0x12(%eax,%ecx,1),%bnd0",}, +{{0x66, 0x0f, 0x1a, 0x44, 0xc8, 0x12, }, 6, 0, "", "", +"66 0f 1a 44 c8 12 \tbndmov 0x12(%eax,%ecx,8),%bnd0",}, +{{0x66, 0x0f, 0x1a, 0x80, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"66 0f 1a 80 78 56 34 12 \tbndmov 0x12345678(%eax),%bnd0",}, +{{0x66, 0x0f, 0x1a, 0x85, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"66 0f 1a 85 78 56 34 12 \tbndmov 0x12345678(%ebp),%bnd0",}, +{{0x66, 0x0f, 0x1a, 0x84, 0x01, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"66 0f 1a 84 01 78 56 34 12 \tbndmov 0x12345678(%ecx,%eax,1),%bnd0",}, +{{0x66, 0x0f, 0x1a, 0x84, 0x05, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"66 0f 1a 84 05 78 56 34 12 \tbndmov 0x12345678(%ebp,%eax,1),%bnd0",}, +{{0x66, 0x0f, 0x1a, 0x84, 0x08, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"66 0f 1a 84 08 78 56 34 12 \tbndmov 0x12345678(%eax,%ecx,1),%bnd0",}, +{{0x66, 0x0f, 0x1a, 0x84, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"66 0f 1a 84 c8 78 56 34 12 \tbndmov 0x12345678(%eax,%ecx,8),%bnd0",}, +{{0x66, 0x0f, 0x1b, 0x00, }, 4, 0, "", "", +"66 0f 1b 00 \tbndmov %bnd0,(%eax)",}, +{{0x66, 0x0f, 0x1b, 0x05, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"66 0f 1b 05 78 56 34 12 \tbndmov %bnd0,0x12345678",}, +{{0x66, 0x0f, 0x1b, 0x18, }, 4, 0, "", "", +"66 0f 1b 18 \tbndmov %bnd3,(%eax)",}, +{{0x66, 0x0f, 0x1b, 0x04, 0x01, }, 5, 0, "", "", +"66 0f 1b 04 01 \tbndmov %bnd0,(%ecx,%eax,1)",}, +{{0x66, 0x0f, 0x1b, 0x04, 0x05, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"66 0f 1b 04 05 78 56 34 12 \tbndmov %bnd0,0x12345678(,%eax,1)",}, +{{0x66, 0x0f, 0x1b, 0x04, 0x08, }, 5, 0, "", "", +"66 0f 1b 04 08 \tbndmov %bnd0,(%eax,%ecx,1)",}, +{{0x66, 0x0f, 0x1b, 0x04, 0xc8, }, 5, 0, "", "", +"66 0f 1b 04 c8 \tbndmov %bnd0,(%eax,%ecx,8)",}, +{{0x66, 0x0f, 0x1b, 0x40, 0x12, }, 5, 0, "", "", +"66 0f 1b 40 12 \tbndmov %bnd0,0x12(%eax)",}, +{{0x66, 0x0f, 0x1b, 0x45, 0x12, }, 5, 0, "", "", +"66 0f 1b 45 12 \tbndmov %bnd0,0x12(%ebp)",}, +{{0x66, 0x0f, 0x1b, 0x44, 0x01, 0x12, }, 6, 0, "", "", +"66 0f 1b 44 01 12 \tbndmov %bnd0,0x12(%ecx,%eax,1)",}, +{{0x66, 0x0f, 0x1b, 0x44, 0x05, 0x12, }, 6, 0, "", "", +"66 0f 1b 44 05 12 \tbndmov %bnd0,0x12(%ebp,%eax,1)",}, +{{0x66, 0x0f, 0x1b, 0x44, 0x08, 0x12, }, 6, 0, "", "", +"66 0f 1b 44 08 12 \tbndmov %bnd0,0x12(%eax,%ecx,1)",}, +{{0x66, 0x0f, 0x1b, 0x44, 0xc8, 0x12, }, 6, 0, "", "", +"66 0f 1b 44 c8 12 \tbndmov %bnd0,0x12(%eax,%ecx,8)",}, +{{0x66, 0x0f, 0x1b, 0x80, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"66 0f 1b 80 78 56 34 12 \tbndmov %bnd0,0x12345678(%eax)",}, +{{0x66, 0x0f, 0x1b, 0x85, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"66 0f 1b 85 78 56 34 12 \tbndmov %bnd0,0x12345678(%ebp)",}, +{{0x66, 0x0f, 0x1b, 0x84, 0x01, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"66 0f 1b 84 01 78 56 34 12 \tbndmov %bnd0,0x12345678(%ecx,%eax,1)",}, +{{0x66, 0x0f, 0x1b, 0x84, 0x05, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"66 0f 1b 84 05 78 56 34 12 \tbndmov %bnd0,0x12345678(%ebp,%eax,1)",}, +{{0x66, 0x0f, 0x1b, 0x84, 0x08, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"66 0f 1b 84 08 78 56 34 12 \tbndmov %bnd0,0x12345678(%eax,%ecx,1)",}, +{{0x66, 0x0f, 0x1b, 0x84, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"66 0f 1b 84 c8 78 56 34 12 \tbndmov %bnd0,0x12345678(%eax,%ecx,8)",}, +{{0x66, 0x0f, 0x1a, 0xc8, }, 4, 0, "", "", +"66 0f 1a c8 \tbndmov %bnd0,%bnd1",}, +{{0x66, 0x0f, 0x1a, 0xc1, }, 4, 0, "", "", +"66 0f 1a c1 \tbndmov %bnd1,%bnd0",}, +{{0x0f, 0x1a, 0x00, }, 3, 0, "", "", +"0f 1a 00 \tbndldx (%eax),%bnd0",}, +{{0x0f, 0x1a, 0x05, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "", +"0f 1a 05 78 56 34 12 \tbndldx 0x12345678,%bnd0",}, +{{0x0f, 0x1a, 0x18, }, 3, 0, "", "", +"0f 1a 18 \tbndldx (%eax),%bnd3",}, +{{0x0f, 0x1a, 0x04, 0x01, }, 4, 0, "", "", +"0f 1a 04 01 \tbndldx (%ecx,%eax,1),%bnd0",}, +{{0x0f, 0x1a, 0x04, 0x05, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"0f 1a 04 05 78 56 34 12 \tbndldx 0x12345678(,%eax,1),%bnd0",}, +{{0x0f, 0x1a, 0x04, 0x08, }, 4, 0, "", "", +"0f 1a 04 08 \tbndldx (%eax,%ecx,1),%bnd0",}, +{{0x0f, 0x1a, 0x40, 0x12, }, 4, 0, "", "", +"0f 1a 40 12 \tbndldx 0x12(%eax),%bnd0",}, +{{0x0f, 0x1a, 0x45, 0x12, }, 4, 0, "", "", +"0f 1a 45 12 \tbndldx 0x12(%ebp),%bnd0",}, +{{0x0f, 0x1a, 0x44, 0x01, 0x12, }, 5, 0, "", "", +"0f 1a 44 01 12 \tbndldx 0x12(%ecx,%eax,1),%bnd0",}, +{{0x0f, 0x1a, 0x44, 0x05, 0x12, }, 5, 0, "", "", +"0f 1a 44 05 12 \tbndldx 0x12(%ebp,%eax,1),%bnd0",}, +{{0x0f, 0x1a, 0x44, 0x08, 0x12, }, 5, 0, "", "", +"0f 1a 44 08 12 \tbndldx 0x12(%eax,%ecx,1),%bnd0",}, +{{0x0f, 0x1a, 0x80, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "", +"0f 1a 80 78 56 34 12 \tbndldx 0x12345678(%eax),%bnd0",}, +{{0x0f, 0x1a, 0x85, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "", +"0f 1a 85 78 56 34 12 \tbndldx 0x12345678(%ebp),%bnd0",}, +{{0x0f, 0x1a, 0x84, 0x01, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"0f 1a 84 01 78 56 34 12 \tbndldx 0x12345678(%ecx,%eax,1),%bnd0",}, +{{0x0f, 0x1a, 0x84, 0x05, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"0f 1a 84 05 78 56 34 12 \tbndldx 0x12345678(%ebp,%eax,1),%bnd0",}, +{{0x0f, 0x1a, 0x84, 0x08, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"0f 1a 84 08 78 56 34 12 \tbndldx 0x12345678(%eax,%ecx,1),%bnd0",}, +{{0x0f, 0x1b, 0x00, }, 3, 0, "", "", +"0f 1b 00 \tbndstx %bnd0,(%eax)",}, +{{0x0f, 0x1b, 0x05, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "", +"0f 1b 05 78 56 34 12 \tbndstx %bnd0,0x12345678",}, +{{0x0f, 0x1b, 0x18, }, 3, 0, "", "", +"0f 1b 18 \tbndstx %bnd3,(%eax)",}, +{{0x0f, 0x1b, 0x04, 0x01, }, 4, 0, "", "", +"0f 1b 04 01 \tbndstx %bnd0,(%ecx,%eax,1)",}, +{{0x0f, 0x1b, 0x04, 0x05, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"0f 1b 04 05 78 56 34 12 \tbndstx %bnd0,0x12345678(,%eax,1)",}, +{{0x0f, 0x1b, 0x04, 0x08, }, 4, 0, "", "", +"0f 1b 04 08 \tbndstx %bnd0,(%eax,%ecx,1)",}, +{{0x0f, 0x1b, 0x40, 0x12, }, 4, 0, "", "", +"0f 1b 40 12 \tbndstx %bnd0,0x12(%eax)",}, +{{0x0f, 0x1b, 0x45, 0x12, }, 4, 0, "", "", +"0f 1b 45 12 \tbndstx %bnd0,0x12(%ebp)",}, +{{0x0f, 0x1b, 0x44, 0x01, 0x12, }, 5, 0, "", "", +"0f 1b 44 01 12 \tbndstx %bnd0,0x12(%ecx,%eax,1)",}, +{{0x0f, 0x1b, 0x44, 0x05, 0x12, }, 5, 0, "", "", +"0f 1b 44 05 12 \tbndstx %bnd0,0x12(%ebp,%eax,1)",}, +{{0x0f, 0x1b, 0x44, 0x08, 0x12, }, 5, 0, "", "", +"0f 1b 44 08 12 \tbndstx %bnd0,0x12(%eax,%ecx,1)",}, +{{0x0f, 0x1b, 0x80, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "", +"0f 1b 80 78 56 34 12 \tbndstx %bnd0,0x12345678(%eax)",}, +{{0x0f, 0x1b, 0x85, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "", +"0f 1b 85 78 56 34 12 \tbndstx %bnd0,0x12345678(%ebp)",}, +{{0x0f, 0x1b, 0x84, 0x01, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"0f 1b 84 01 78 56 34 12 \tbndstx %bnd0,0x12345678(%ecx,%eax,1)",}, +{{0x0f, 0x1b, 0x84, 0x05, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"0f 1b 84 05 78 56 34 12 \tbndstx %bnd0,0x12345678(%ebp,%eax,1)",}, +{{0x0f, 0x1b, 0x84, 0x08, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"0f 1b 84 08 78 56 34 12 \tbndstx %bnd0,0x12345678(%eax,%ecx,1)",}, +{{0xf2, 0xe8, 0xfc, 0xff, 0xff, 0xff, }, 6, 0xfffffffc, "call", "unconditional", +"f2 e8 fc ff ff ff \tbnd call 3c3 <main+0x3c3>",}, +{{0xf2, 0xff, 0x10, }, 3, 0, "call", "indirect", +"f2 ff 10 \tbnd call *(%eax)",}, +{{0xf2, 0xc3, }, 2, 0, "ret", "indirect", +"f2 c3 \tbnd ret ",}, +{{0xf2, 0xe9, 0xfc, 0xff, 0xff, 0xff, }, 6, 0xfffffffc, "jmp", "unconditional", +"f2 e9 fc ff ff ff \tbnd jmp 3ce <main+0x3ce>",}, +{{0xf2, 0xe9, 0xfc, 0xff, 0xff, 0xff, }, 6, 0xfffffffc, "jmp", "unconditional", +"f2 e9 fc ff ff ff \tbnd jmp 3d4 <main+0x3d4>",}, +{{0xf2, 0xff, 0x21, }, 3, 0, "jmp", "indirect", +"f2 ff 21 \tbnd jmp *(%ecx)",}, +{{0xf2, 0x0f, 0x85, 0xfc, 0xff, 0xff, 0xff, }, 7, 0xfffffffc, "jcc", "conditional", +"f2 0f 85 fc ff ff ff \tbnd jne 3de <main+0x3de>",}, +{{0x0f, 0x3a, 0xcc, 0xc1, 0x00, }, 5, 0, "", "", +"0f 3a cc c1 00 \tsha1rnds4 $0x0,%xmm1,%xmm0",}, +{{0x0f, 0x3a, 0xcc, 0xd7, 0x91, }, 5, 0, "", "", +"0f 3a cc d7 91 \tsha1rnds4 $0x91,%xmm7,%xmm2",}, +{{0x0f, 0x3a, 0xcc, 0x00, 0x91, }, 5, 0, "", "", +"0f 3a cc 00 91 \tsha1rnds4 $0x91,(%eax),%xmm0",}, +{{0x0f, 0x3a, 0xcc, 0x05, 0x78, 0x56, 0x34, 0x12, 0x91, }, 9, 0, "", "", +"0f 3a cc 05 78 56 34 12 91 \tsha1rnds4 $0x91,0x12345678,%xmm0",}, +{{0x0f, 0x3a, 0xcc, 0x18, 0x91, }, 5, 0, "", "", +"0f 3a cc 18 91 \tsha1rnds4 $0x91,(%eax),%xmm3",}, +{{0x0f, 0x3a, 0xcc, 0x04, 0x01, 0x91, }, 6, 0, "", "", +"0f 3a cc 04 01 91 \tsha1rnds4 $0x91,(%ecx,%eax,1),%xmm0",}, +{{0x0f, 0x3a, 0xcc, 0x04, 0x05, 0x78, 0x56, 0x34, 0x12, 0x91, }, 10, 0, "", "", +"0f 3a cc 04 05 78 56 34 12 91 \tsha1rnds4 $0x91,0x12345678(,%eax,1),%xmm0",}, +{{0x0f, 0x3a, 0xcc, 0x04, 0x08, 0x91, }, 6, 0, "", "", +"0f 3a cc 04 08 91 \tsha1rnds4 $0x91,(%eax,%ecx,1),%xmm0",}, +{{0x0f, 0x3a, 0xcc, 0x04, 0xc8, 0x91, }, 6, 0, "", "", +"0f 3a cc 04 c8 91 \tsha1rnds4 $0x91,(%eax,%ecx,8),%xmm0",}, +{{0x0f, 0x3a, 0xcc, 0x40, 0x12, 0x91, }, 6, 0, "", "", +"0f 3a cc 40 12 91 \tsha1rnds4 $0x91,0x12(%eax),%xmm0",}, +{{0x0f, 0x3a, 0xcc, 0x45, 0x12, 0x91, }, 6, 0, "", "", +"0f 3a cc 45 12 91 \tsha1rnds4 $0x91,0x12(%ebp),%xmm0",}, +{{0x0f, 0x3a, 0xcc, 0x44, 0x01, 0x12, 0x91, }, 7, 0, "", "", +"0f 3a cc 44 01 12 91 \tsha1rnds4 $0x91,0x12(%ecx,%eax,1),%xmm0",}, +{{0x0f, 0x3a, 0xcc, 0x44, 0x05, 0x12, 0x91, }, 7, 0, "", "", +"0f 3a cc 44 05 12 91 \tsha1rnds4 $0x91,0x12(%ebp,%eax,1),%xmm0",}, +{{0x0f, 0x3a, 0xcc, 0x44, 0x08, 0x12, 0x91, }, 7, 0, "", "", +"0f 3a cc 44 08 12 91 \tsha1rnds4 $0x91,0x12(%eax,%ecx,1),%xmm0",}, +{{0x0f, 0x3a, 0xcc, 0x44, 0xc8, 0x12, 0x91, }, 7, 0, "", "", +"0f 3a cc 44 c8 12 91 \tsha1rnds4 $0x91,0x12(%eax,%ecx,8),%xmm0",}, +{{0x0f, 0x3a, 0xcc, 0x80, 0x78, 0x56, 0x34, 0x12, 0x91, }, 9, 0, "", "", +"0f 3a cc 80 78 56 34 12 91 \tsha1rnds4 $0x91,0x12345678(%eax),%xmm0",}, +{{0x0f, 0x3a, 0xcc, 0x85, 0x78, 0x56, 0x34, 0x12, 0x91, }, 9, 0, "", "", +"0f 3a cc 85 78 56 34 12 91 \tsha1rnds4 $0x91,0x12345678(%ebp),%xmm0",}, +{{0x0f, 0x3a, 0xcc, 0x84, 0x01, 0x78, 0x56, 0x34, 0x12, 0x91, }, 10, 0, "", "", +"0f 3a cc 84 01 78 56 34 12 91 \tsha1rnds4 $0x91,0x12345678(%ecx,%eax,1),%xmm0",}, +{{0x0f, 0x3a, 0xcc, 0x84, 0x05, 0x78, 0x56, 0x34, 0x12, 0x91, }, 10, 0, "", "", +"0f 3a cc 84 05 78 56 34 12 91 \tsha1rnds4 $0x91,0x12345678(%ebp,%eax,1),%xmm0",}, +{{0x0f, 0x3a, 0xcc, 0x84, 0x08, 0x78, 0x56, 0x34, 0x12, 0x91, }, 10, 0, "", "", +"0f 3a cc 84 08 78 56 34 12 91 \tsha1rnds4 $0x91,0x12345678(%eax,%ecx,1),%xmm0",}, +{{0x0f, 0x3a, 0xcc, 0x84, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x91, }, 10, 0, "", "", +"0f 3a cc 84 c8 78 56 34 12 91 \tsha1rnds4 $0x91,0x12345678(%eax,%ecx,8),%xmm0",}, +{{0x0f, 0x38, 0xc8, 0xc1, }, 4, 0, "", "", +"0f 38 c8 c1 \tsha1nexte %xmm1,%xmm0",}, +{{0x0f, 0x38, 0xc8, 0xd7, }, 4, 0, "", "", +"0f 38 c8 d7 \tsha1nexte %xmm7,%xmm2",}, +{{0x0f, 0x38, 0xc8, 0x00, }, 4, 0, "", "", +"0f 38 c8 00 \tsha1nexte (%eax),%xmm0",}, +{{0x0f, 0x38, 0xc8, 0x05, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"0f 38 c8 05 78 56 34 12 \tsha1nexte 0x12345678,%xmm0",}, +{{0x0f, 0x38, 0xc8, 0x18, }, 4, 0, "", "", +"0f 38 c8 18 \tsha1nexte (%eax),%xmm3",}, +{{0x0f, 0x38, 0xc8, 0x04, 0x01, }, 5, 0, "", "", +"0f 38 c8 04 01 \tsha1nexte (%ecx,%eax,1),%xmm0",}, +{{0x0f, 0x38, 0xc8, 0x04, 0x05, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"0f 38 c8 04 05 78 56 34 12 \tsha1nexte 0x12345678(,%eax,1),%xmm0",}, +{{0x0f, 0x38, 0xc8, 0x04, 0x08, }, 5, 0, "", "", +"0f 38 c8 04 08 \tsha1nexte (%eax,%ecx,1),%xmm0",}, +{{0x0f, 0x38, 0xc8, 0x04, 0xc8, }, 5, 0, "", "", +"0f 38 c8 04 c8 \tsha1nexte (%eax,%ecx,8),%xmm0",}, +{{0x0f, 0x38, 0xc8, 0x40, 0x12, }, 5, 0, "", "", +"0f 38 c8 40 12 \tsha1nexte 0x12(%eax),%xmm0",}, +{{0x0f, 0x38, 0xc8, 0x45, 0x12, }, 5, 0, "", "", +"0f 38 c8 45 12 \tsha1nexte 0x12(%ebp),%xmm0",}, +{{0x0f, 0x38, 0xc8, 0x44, 0x01, 0x12, }, 6, 0, "", "", +"0f 38 c8 44 01 12 \tsha1nexte 0x12(%ecx,%eax,1),%xmm0",}, +{{0x0f, 0x38, 0xc8, 0x44, 0x05, 0x12, }, 6, 0, "", "", +"0f 38 c8 44 05 12 \tsha1nexte 0x12(%ebp,%eax,1),%xmm0",}, +{{0x0f, 0x38, 0xc8, 0x44, 0x08, 0x12, }, 6, 0, "", "", +"0f 38 c8 44 08 12 \tsha1nexte 0x12(%eax,%ecx,1),%xmm0",}, +{{0x0f, 0x38, 0xc8, 0x44, 0xc8, 0x12, }, 6, 0, "", "", +"0f 38 c8 44 c8 12 \tsha1nexte 0x12(%eax,%ecx,8),%xmm0",}, +{{0x0f, 0x38, 0xc8, 0x80, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"0f 38 c8 80 78 56 34 12 \tsha1nexte 0x12345678(%eax),%xmm0",}, +{{0x0f, 0x38, 0xc8, 0x85, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"0f 38 c8 85 78 56 34 12 \tsha1nexte 0x12345678(%ebp),%xmm0",}, +{{0x0f, 0x38, 0xc8, 0x84, 0x01, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"0f 38 c8 84 01 78 56 34 12 \tsha1nexte 0x12345678(%ecx,%eax,1),%xmm0",}, +{{0x0f, 0x38, 0xc8, 0x84, 0x05, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"0f 38 c8 84 05 78 56 34 12 \tsha1nexte 0x12345678(%ebp,%eax,1),%xmm0",}, +{{0x0f, 0x38, 0xc8, 0x84, 0x08, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"0f 38 c8 84 08 78 56 34 12 \tsha1nexte 0x12345678(%eax,%ecx,1),%xmm0",}, +{{0x0f, 0x38, 0xc8, 0x84, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"0f 38 c8 84 c8 78 56 34 12 \tsha1nexte 0x12345678(%eax,%ecx,8),%xmm0",}, +{{0x0f, 0x38, 0xc9, 0xc1, }, 4, 0, "", "", +"0f 38 c9 c1 \tsha1msg1 %xmm1,%xmm0",}, +{{0x0f, 0x38, 0xc9, 0xd7, }, 4, 0, "", "", +"0f 38 c9 d7 \tsha1msg1 %xmm7,%xmm2",}, +{{0x0f, 0x38, 0xc9, 0x00, }, 4, 0, "", "", +"0f 38 c9 00 \tsha1msg1 (%eax),%xmm0",}, +{{0x0f, 0x38, 0xc9, 0x05, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"0f 38 c9 05 78 56 34 12 \tsha1msg1 0x12345678,%xmm0",}, +{{0x0f, 0x38, 0xc9, 0x18, }, 4, 0, "", "", +"0f 38 c9 18 \tsha1msg1 (%eax),%xmm3",}, +{{0x0f, 0x38, 0xc9, 0x04, 0x01, }, 5, 0, "", "", +"0f 38 c9 04 01 \tsha1msg1 (%ecx,%eax,1),%xmm0",}, +{{0x0f, 0x38, 0xc9, 0x04, 0x05, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"0f 38 c9 04 05 78 56 34 12 \tsha1msg1 0x12345678(,%eax,1),%xmm0",}, +{{0x0f, 0x38, 0xc9, 0x04, 0x08, }, 5, 0, "", "", +"0f 38 c9 04 08 \tsha1msg1 (%eax,%ecx,1),%xmm0",}, +{{0x0f, 0x38, 0xc9, 0x04, 0xc8, }, 5, 0, "", "", +"0f 38 c9 04 c8 \tsha1msg1 (%eax,%ecx,8),%xmm0",}, +{{0x0f, 0x38, 0xc9, 0x40, 0x12, }, 5, 0, "", "", +"0f 38 c9 40 12 \tsha1msg1 0x12(%eax),%xmm0",}, +{{0x0f, 0x38, 0xc9, 0x45, 0x12, }, 5, 0, "", "", +"0f 38 c9 45 12 \tsha1msg1 0x12(%ebp),%xmm0",}, +{{0x0f, 0x38, 0xc9, 0x44, 0x01, 0x12, }, 6, 0, "", "", +"0f 38 c9 44 01 12 \tsha1msg1 0x12(%ecx,%eax,1),%xmm0",}, +{{0x0f, 0x38, 0xc9, 0x44, 0x05, 0x12, }, 6, 0, "", "", +"0f 38 c9 44 05 12 \tsha1msg1 0x12(%ebp,%eax,1),%xmm0",}, +{{0x0f, 0x38, 0xc9, 0x44, 0x08, 0x12, }, 6, 0, "", "", +"0f 38 c9 44 08 12 \tsha1msg1 0x12(%eax,%ecx,1),%xmm0",}, +{{0x0f, 0x38, 0xc9, 0x44, 0xc8, 0x12, }, 6, 0, "", "", +"0f 38 c9 44 c8 12 \tsha1msg1 0x12(%eax,%ecx,8),%xmm0",}, +{{0x0f, 0x38, 0xc9, 0x80, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"0f 38 c9 80 78 56 34 12 \tsha1msg1 0x12345678(%eax),%xmm0",}, +{{0x0f, 0x38, 0xc9, 0x85, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"0f 38 c9 85 78 56 34 12 \tsha1msg1 0x12345678(%ebp),%xmm0",}, +{{0x0f, 0x38, 0xc9, 0x84, 0x01, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"0f 38 c9 84 01 78 56 34 12 \tsha1msg1 0x12345678(%ecx,%eax,1),%xmm0",}, +{{0x0f, 0x38, 0xc9, 0x84, 0x05, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"0f 38 c9 84 05 78 56 34 12 \tsha1msg1 0x12345678(%ebp,%eax,1),%xmm0",}, +{{0x0f, 0x38, 0xc9, 0x84, 0x08, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"0f 38 c9 84 08 78 56 34 12 \tsha1msg1 0x12345678(%eax,%ecx,1),%xmm0",}, +{{0x0f, 0x38, 0xc9, 0x84, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"0f 38 c9 84 c8 78 56 34 12 \tsha1msg1 0x12345678(%eax,%ecx,8),%xmm0",}, +{{0x0f, 0x38, 0xca, 0xc1, }, 4, 0, "", "", +"0f 38 ca c1 \tsha1msg2 %xmm1,%xmm0",}, +{{0x0f, 0x38, 0xca, 0xd7, }, 4, 0, "", "", +"0f 38 ca d7 \tsha1msg2 %xmm7,%xmm2",}, +{{0x0f, 0x38, 0xca, 0x00, }, 4, 0, "", "", +"0f 38 ca 00 \tsha1msg2 (%eax),%xmm0",}, +{{0x0f, 0x38, 0xca, 0x05, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"0f 38 ca 05 78 56 34 12 \tsha1msg2 0x12345678,%xmm0",}, +{{0x0f, 0x38, 0xca, 0x18, }, 4, 0, "", "", +"0f 38 ca 18 \tsha1msg2 (%eax),%xmm3",}, +{{0x0f, 0x38, 0xca, 0x04, 0x01, }, 5, 0, "", "", +"0f 38 ca 04 01 \tsha1msg2 (%ecx,%eax,1),%xmm0",}, +{{0x0f, 0x38, 0xca, 0x04, 0x05, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"0f 38 ca 04 05 78 56 34 12 \tsha1msg2 0x12345678(,%eax,1),%xmm0",}, +{{0x0f, 0x38, 0xca, 0x04, 0x08, }, 5, 0, "", "", +"0f 38 ca 04 08 \tsha1msg2 (%eax,%ecx,1),%xmm0",}, +{{0x0f, 0x38, 0xca, 0x04, 0xc8, }, 5, 0, "", "", +"0f 38 ca 04 c8 \tsha1msg2 (%eax,%ecx,8),%xmm0",}, +{{0x0f, 0x38, 0xca, 0x40, 0x12, }, 5, 0, "", "", +"0f 38 ca 40 12 \tsha1msg2 0x12(%eax),%xmm0",}, +{{0x0f, 0x38, 0xca, 0x45, 0x12, }, 5, 0, "", "", +"0f 38 ca 45 12 \tsha1msg2 0x12(%ebp),%xmm0",}, +{{0x0f, 0x38, 0xca, 0x44, 0x01, 0x12, }, 6, 0, "", "", +"0f 38 ca 44 01 12 \tsha1msg2 0x12(%ecx,%eax,1),%xmm0",}, +{{0x0f, 0x38, 0xca, 0x44, 0x05, 0x12, }, 6, 0, "", "", +"0f 38 ca 44 05 12 \tsha1msg2 0x12(%ebp,%eax,1),%xmm0",}, +{{0x0f, 0x38, 0xca, 0x44, 0x08, 0x12, }, 6, 0, "", "", +"0f 38 ca 44 08 12 \tsha1msg2 0x12(%eax,%ecx,1),%xmm0",}, +{{0x0f, 0x38, 0xca, 0x44, 0xc8, 0x12, }, 6, 0, "", "", +"0f 38 ca 44 c8 12 \tsha1msg2 0x12(%eax,%ecx,8),%xmm0",}, +{{0x0f, 0x38, 0xca, 0x80, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"0f 38 ca 80 78 56 34 12 \tsha1msg2 0x12345678(%eax),%xmm0",}, +{{0x0f, 0x38, 0xca, 0x85, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"0f 38 ca 85 78 56 34 12 \tsha1msg2 0x12345678(%ebp),%xmm0",}, +{{0x0f, 0x38, 0xca, 0x84, 0x01, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"0f 38 ca 84 01 78 56 34 12 \tsha1msg2 0x12345678(%ecx,%eax,1),%xmm0",}, +{{0x0f, 0x38, 0xca, 0x84, 0x05, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"0f 38 ca 84 05 78 56 34 12 \tsha1msg2 0x12345678(%ebp,%eax,1),%xmm0",}, +{{0x0f, 0x38, 0xca, 0x84, 0x08, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"0f 38 ca 84 08 78 56 34 12 \tsha1msg2 0x12345678(%eax,%ecx,1),%xmm0",}, +{{0x0f, 0x38, 0xca, 0x84, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"0f 38 ca 84 c8 78 56 34 12 \tsha1msg2 0x12345678(%eax,%ecx,8),%xmm0",}, +{{0x0f, 0x38, 0xcb, 0xcc, }, 4, 0, "", "", +"0f 38 cb cc \tsha256rnds2 %xmm0,%xmm4,%xmm1",}, +{{0x0f, 0x38, 0xcb, 0xd7, }, 4, 0, "", "", +"0f 38 cb d7 \tsha256rnds2 %xmm0,%xmm7,%xmm2",}, +{{0x0f, 0x38, 0xcb, 0x08, }, 4, 0, "", "", +"0f 38 cb 08 \tsha256rnds2 %xmm0,(%eax),%xmm1",}, +{{0x0f, 0x38, 0xcb, 0x0d, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"0f 38 cb 0d 78 56 34 12 \tsha256rnds2 %xmm0,0x12345678,%xmm1",}, +{{0x0f, 0x38, 0xcb, 0x18, }, 4, 0, "", "", +"0f 38 cb 18 \tsha256rnds2 %xmm0,(%eax),%xmm3",}, +{{0x0f, 0x38, 0xcb, 0x0c, 0x01, }, 5, 0, "", "", +"0f 38 cb 0c 01 \tsha256rnds2 %xmm0,(%ecx,%eax,1),%xmm1",}, +{{0x0f, 0x38, 0xcb, 0x0c, 0x05, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"0f 38 cb 0c 05 78 56 34 12 \tsha256rnds2 %xmm0,0x12345678(,%eax,1),%xmm1",}, +{{0x0f, 0x38, 0xcb, 0x0c, 0x08, }, 5, 0, "", "", +"0f 38 cb 0c 08 \tsha256rnds2 %xmm0,(%eax,%ecx,1),%xmm1",}, +{{0x0f, 0x38, 0xcb, 0x0c, 0xc8, }, 5, 0, "", "", +"0f 38 cb 0c c8 \tsha256rnds2 %xmm0,(%eax,%ecx,8),%xmm1",}, +{{0x0f, 0x38, 0xcb, 0x48, 0x12, }, 5, 0, "", "", +"0f 38 cb 48 12 \tsha256rnds2 %xmm0,0x12(%eax),%xmm1",}, +{{0x0f, 0x38, 0xcb, 0x4d, 0x12, }, 5, 0, "", "", +"0f 38 cb 4d 12 \tsha256rnds2 %xmm0,0x12(%ebp),%xmm1",}, +{{0x0f, 0x38, 0xcb, 0x4c, 0x01, 0x12, }, 6, 0, "", "", +"0f 38 cb 4c 01 12 \tsha256rnds2 %xmm0,0x12(%ecx,%eax,1),%xmm1",}, +{{0x0f, 0x38, 0xcb, 0x4c, 0x05, 0x12, }, 6, 0, "", "", +"0f 38 cb 4c 05 12 \tsha256rnds2 %xmm0,0x12(%ebp,%eax,1),%xmm1",}, +{{0x0f, 0x38, 0xcb, 0x4c, 0x08, 0x12, }, 6, 0, "", "", +"0f 38 cb 4c 08 12 \tsha256rnds2 %xmm0,0x12(%eax,%ecx,1),%xmm1",}, +{{0x0f, 0x38, 0xcb, 0x4c, 0xc8, 0x12, }, 6, 0, "", "", +"0f 38 cb 4c c8 12 \tsha256rnds2 %xmm0,0x12(%eax,%ecx,8),%xmm1",}, +{{0x0f, 0x38, 0xcb, 0x88, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"0f 38 cb 88 78 56 34 12 \tsha256rnds2 %xmm0,0x12345678(%eax),%xmm1",}, +{{0x0f, 0x38, 0xcb, 0x8d, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"0f 38 cb 8d 78 56 34 12 \tsha256rnds2 %xmm0,0x12345678(%ebp),%xmm1",}, +{{0x0f, 0x38, 0xcb, 0x8c, 0x01, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"0f 38 cb 8c 01 78 56 34 12 \tsha256rnds2 %xmm0,0x12345678(%ecx,%eax,1),%xmm1",}, +{{0x0f, 0x38, 0xcb, 0x8c, 0x05, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"0f 38 cb 8c 05 78 56 34 12 \tsha256rnds2 %xmm0,0x12345678(%ebp,%eax,1),%xmm1",}, +{{0x0f, 0x38, 0xcb, 0x8c, 0x08, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"0f 38 cb 8c 08 78 56 34 12 \tsha256rnds2 %xmm0,0x12345678(%eax,%ecx,1),%xmm1",}, +{{0x0f, 0x38, 0xcb, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"0f 38 cb 8c c8 78 56 34 12 \tsha256rnds2 %xmm0,0x12345678(%eax,%ecx,8),%xmm1",}, +{{0x0f, 0x38, 0xcc, 0xc1, }, 4, 0, "", "", +"0f 38 cc c1 \tsha256msg1 %xmm1,%xmm0",}, +{{0x0f, 0x38, 0xcc, 0xd7, }, 4, 0, "", "", +"0f 38 cc d7 \tsha256msg1 %xmm7,%xmm2",}, +{{0x0f, 0x38, 0xcc, 0x00, }, 4, 0, "", "", +"0f 38 cc 00 \tsha256msg1 (%eax),%xmm0",}, +{{0x0f, 0x38, 0xcc, 0x05, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"0f 38 cc 05 78 56 34 12 \tsha256msg1 0x12345678,%xmm0",}, +{{0x0f, 0x38, 0xcc, 0x18, }, 4, 0, "", "", +"0f 38 cc 18 \tsha256msg1 (%eax),%xmm3",}, +{{0x0f, 0x38, 0xcc, 0x04, 0x01, }, 5, 0, "", "", +"0f 38 cc 04 01 \tsha256msg1 (%ecx,%eax,1),%xmm0",}, +{{0x0f, 0x38, 0xcc, 0x04, 0x05, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"0f 38 cc 04 05 78 56 34 12 \tsha256msg1 0x12345678(,%eax,1),%xmm0",}, +{{0x0f, 0x38, 0xcc, 0x04, 0x08, }, 5, 0, "", "", +"0f 38 cc 04 08 \tsha256msg1 (%eax,%ecx,1),%xmm0",}, +{{0x0f, 0x38, 0xcc, 0x04, 0xc8, }, 5, 0, "", "", +"0f 38 cc 04 c8 \tsha256msg1 (%eax,%ecx,8),%xmm0",}, +{{0x0f, 0x38, 0xcc, 0x40, 0x12, }, 5, 0, "", "", +"0f 38 cc 40 12 \tsha256msg1 0x12(%eax),%xmm0",}, +{{0x0f, 0x38, 0xcc, 0x45, 0x12, }, 5, 0, "", "", +"0f 38 cc 45 12 \tsha256msg1 0x12(%ebp),%xmm0",}, +{{0x0f, 0x38, 0xcc, 0x44, 0x01, 0x12, }, 6, 0, "", "", +"0f 38 cc 44 01 12 \tsha256msg1 0x12(%ecx,%eax,1),%xmm0",}, +{{0x0f, 0x38, 0xcc, 0x44, 0x05, 0x12, }, 6, 0, "", "", +"0f 38 cc 44 05 12 \tsha256msg1 0x12(%ebp,%eax,1),%xmm0",}, +{{0x0f, 0x38, 0xcc, 0x44, 0x08, 0x12, }, 6, 0, "", "", +"0f 38 cc 44 08 12 \tsha256msg1 0x12(%eax,%ecx,1),%xmm0",}, +{{0x0f, 0x38, 0xcc, 0x44, 0xc8, 0x12, }, 6, 0, "", "", +"0f 38 cc 44 c8 12 \tsha256msg1 0x12(%eax,%ecx,8),%xmm0",}, +{{0x0f, 0x38, 0xcc, 0x80, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"0f 38 cc 80 78 56 34 12 \tsha256msg1 0x12345678(%eax),%xmm0",}, +{{0x0f, 0x38, 0xcc, 0x85, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"0f 38 cc 85 78 56 34 12 \tsha256msg1 0x12345678(%ebp),%xmm0",}, +{{0x0f, 0x38, 0xcc, 0x84, 0x01, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"0f 38 cc 84 01 78 56 34 12 \tsha256msg1 0x12345678(%ecx,%eax,1),%xmm0",}, +{{0x0f, 0x38, 0xcc, 0x84, 0x05, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"0f 38 cc 84 05 78 56 34 12 \tsha256msg1 0x12345678(%ebp,%eax,1),%xmm0",}, +{{0x0f, 0x38, 0xcc, 0x84, 0x08, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"0f 38 cc 84 08 78 56 34 12 \tsha256msg1 0x12345678(%eax,%ecx,1),%xmm0",}, +{{0x0f, 0x38, 0xcc, 0x84, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"0f 38 cc 84 c8 78 56 34 12 \tsha256msg1 0x12345678(%eax,%ecx,8),%xmm0",}, +{{0x0f, 0x38, 0xcd, 0xc1, }, 4, 0, "", "", +"0f 38 cd c1 \tsha256msg2 %xmm1,%xmm0",}, +{{0x0f, 0x38, 0xcd, 0xd7, }, 4, 0, "", "", +"0f 38 cd d7 \tsha256msg2 %xmm7,%xmm2",}, +{{0x0f, 0x38, 0xcd, 0x00, }, 4, 0, "", "", +"0f 38 cd 00 \tsha256msg2 (%eax),%xmm0",}, +{{0x0f, 0x38, 0xcd, 0x05, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"0f 38 cd 05 78 56 34 12 \tsha256msg2 0x12345678,%xmm0",}, +{{0x0f, 0x38, 0xcd, 0x18, }, 4, 0, "", "", +"0f 38 cd 18 \tsha256msg2 (%eax),%xmm3",}, +{{0x0f, 0x38, 0xcd, 0x04, 0x01, }, 5, 0, "", "", +"0f 38 cd 04 01 \tsha256msg2 (%ecx,%eax,1),%xmm0",}, +{{0x0f, 0x38, 0xcd, 0x04, 0x05, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"0f 38 cd 04 05 78 56 34 12 \tsha256msg2 0x12345678(,%eax,1),%xmm0",}, +{{0x0f, 0x38, 0xcd, 0x04, 0x08, }, 5, 0, "", "", +"0f 38 cd 04 08 \tsha256msg2 (%eax,%ecx,1),%xmm0",}, +{{0x0f, 0x38, 0xcd, 0x04, 0xc8, }, 5, 0, "", "", +"0f 38 cd 04 c8 \tsha256msg2 (%eax,%ecx,8),%xmm0",}, +{{0x0f, 0x38, 0xcd, 0x40, 0x12, }, 5, 0, "", "", +"0f 38 cd 40 12 \tsha256msg2 0x12(%eax),%xmm0",}, +{{0x0f, 0x38, 0xcd, 0x45, 0x12, }, 5, 0, "", "", +"0f 38 cd 45 12 \tsha256msg2 0x12(%ebp),%xmm0",}, +{{0x0f, 0x38, 0xcd, 0x44, 0x01, 0x12, }, 6, 0, "", "", +"0f 38 cd 44 01 12 \tsha256msg2 0x12(%ecx,%eax,1),%xmm0",}, +{{0x0f, 0x38, 0xcd, 0x44, 0x05, 0x12, }, 6, 0, "", "", +"0f 38 cd 44 05 12 \tsha256msg2 0x12(%ebp,%eax,1),%xmm0",}, +{{0x0f, 0x38, 0xcd, 0x44, 0x08, 0x12, }, 6, 0, "", "", +"0f 38 cd 44 08 12 \tsha256msg2 0x12(%eax,%ecx,1),%xmm0",}, +{{0x0f, 0x38, 0xcd, 0x44, 0xc8, 0x12, }, 6, 0, "", "", +"0f 38 cd 44 c8 12 \tsha256msg2 0x12(%eax,%ecx,8),%xmm0",}, +{{0x0f, 0x38, 0xcd, 0x80, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"0f 38 cd 80 78 56 34 12 \tsha256msg2 0x12345678(%eax),%xmm0",}, +{{0x0f, 0x38, 0xcd, 0x85, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"0f 38 cd 85 78 56 34 12 \tsha256msg2 0x12345678(%ebp),%xmm0",}, +{{0x0f, 0x38, 0xcd, 0x84, 0x01, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"0f 38 cd 84 01 78 56 34 12 \tsha256msg2 0x12345678(%ecx,%eax,1),%xmm0",}, +{{0x0f, 0x38, 0xcd, 0x84, 0x05, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"0f 38 cd 84 05 78 56 34 12 \tsha256msg2 0x12345678(%ebp,%eax,1),%xmm0",}, +{{0x0f, 0x38, 0xcd, 0x84, 0x08, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"0f 38 cd 84 08 78 56 34 12 \tsha256msg2 0x12345678(%eax,%ecx,1),%xmm0",}, +{{0x0f, 0x38, 0xcd, 0x84, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"0f 38 cd 84 c8 78 56 34 12 \tsha256msg2 0x12345678(%eax,%ecx,8),%xmm0",}, +{{0x66, 0x0f, 0xae, 0x38, }, 4, 0, "", "", +"66 0f ae 38 \tclflushopt (%eax)",}, +{{0x66, 0x0f, 0xae, 0x3d, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"66 0f ae 3d 78 56 34 12 \tclflushopt 0x12345678",}, +{{0x66, 0x0f, 0xae, 0xbc, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"66 0f ae bc c8 78 56 34 12 \tclflushopt 0x12345678(%eax,%ecx,8)",}, +{{0x0f, 0xae, 0x38, }, 3, 0, "", "", +"0f ae 38 \tclflush (%eax)",}, +{{0x0f, 0xae, 0xf8, }, 3, 0, "", "", +"0f ae f8 \tsfence ",}, +{{0x66, 0x0f, 0xae, 0x30, }, 4, 0, "", "", +"66 0f ae 30 \tclwb (%eax)",}, +{{0x66, 0x0f, 0xae, 0x35, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"66 0f ae 35 78 56 34 12 \tclwb 0x12345678",}, +{{0x66, 0x0f, 0xae, 0xb4, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"66 0f ae b4 c8 78 56 34 12 \tclwb 0x12345678(%eax,%ecx,8)",}, +{{0x0f, 0xae, 0x30, }, 3, 0, "", "", +"0f ae 30 \txsaveopt (%eax)",}, +{{0x0f, 0xae, 0xf0, }, 3, 0, "", "", +"0f ae f0 \tmfence ",}, +{{0x0f, 0xc7, 0x20, }, 3, 0, "", "", +"0f c7 20 \txsavec (%eax)",}, +{{0x0f, 0xc7, 0x25, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "", +"0f c7 25 78 56 34 12 \txsavec 0x12345678",}, +{{0x0f, 0xc7, 0xa4, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"0f c7 a4 c8 78 56 34 12 \txsavec 0x12345678(%eax,%ecx,8)",}, +{{0x0f, 0xc7, 0x28, }, 3, 0, "", "", +"0f c7 28 \txsaves (%eax)",}, +{{0x0f, 0xc7, 0x2d, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "", +"0f c7 2d 78 56 34 12 \txsaves 0x12345678",}, +{{0x0f, 0xc7, 0xac, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"0f c7 ac c8 78 56 34 12 \txsaves 0x12345678(%eax,%ecx,8)",}, +{{0x0f, 0xc7, 0x18, }, 3, 0, "", "", +"0f c7 18 \txrstors (%eax)",}, +{{0x0f, 0xc7, 0x1d, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "", +"0f c7 1d 78 56 34 12 \txrstors 0x12345678",}, +{{0x0f, 0xc7, 0x9c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"0f c7 9c c8 78 56 34 12 \txrstors 0x12345678(%eax,%ecx,8)",}, +{{0x66, 0x0f, 0xae, 0xf8, }, 4, 0, "", "", +"66 0f ae f8 \tpcommit ",}, diff --git a/tools/perf/arch/x86/tests/insn-x86-dat-64.c b/tools/perf/arch/x86/tests/insn-x86-dat-64.c new file mode 100644 index 000000000000..4fe7cce179c4 --- /dev/null +++ b/tools/perf/arch/x86/tests/insn-x86-dat-64.c @@ -0,0 +1,768 @@ +/* + * Generated by gen-insn-x86-dat.sh and gen-insn-x86-dat.awk + * from insn-x86-dat-src.c for inclusion by insn-x86.c + * Do not change this code. +*/ + +{{0x0f, 0x31, }, 2, 0, "", "", +"0f 31 \trdtsc ",}, +{{0xf3, 0x0f, 0x1b, 0x00, }, 4, 0, "", "", +"f3 0f 1b 00 \tbndmk (%rax),%bnd0",}, +{{0xf3, 0x41, 0x0f, 0x1b, 0x00, }, 5, 0, "", "", +"f3 41 0f 1b 00 \tbndmk (%r8),%bnd0",}, +{{0xf3, 0x0f, 0x1b, 0x04, 0x25, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"f3 0f 1b 04 25 78 56 34 12 \tbndmk 0x12345678,%bnd0",}, +{{0xf3, 0x0f, 0x1b, 0x18, }, 4, 0, "", "", +"f3 0f 1b 18 \tbndmk (%rax),%bnd3",}, +{{0xf3, 0x0f, 0x1b, 0x04, 0x01, }, 5, 0, "", "", +"f3 0f 1b 04 01 \tbndmk (%rcx,%rax,1),%bnd0",}, +{{0xf3, 0x0f, 0x1b, 0x04, 0x05, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"f3 0f 1b 04 05 78 56 34 12 \tbndmk 0x12345678(,%rax,1),%bnd0",}, +{{0xf3, 0x0f, 0x1b, 0x04, 0x08, }, 5, 0, "", "", +"f3 0f 1b 04 08 \tbndmk (%rax,%rcx,1),%bnd0",}, +{{0xf3, 0x0f, 0x1b, 0x04, 0xc8, }, 5, 0, "", "", +"f3 0f 1b 04 c8 \tbndmk (%rax,%rcx,8),%bnd0",}, +{{0xf3, 0x0f, 0x1b, 0x40, 0x12, }, 5, 0, "", "", +"f3 0f 1b 40 12 \tbndmk 0x12(%rax),%bnd0",}, +{{0xf3, 0x0f, 0x1b, 0x45, 0x12, }, 5, 0, "", "", +"f3 0f 1b 45 12 \tbndmk 0x12(%rbp),%bnd0",}, +{{0xf3, 0x0f, 0x1b, 0x44, 0x01, 0x12, }, 6, 0, "", "", +"f3 0f 1b 44 01 12 \tbndmk 0x12(%rcx,%rax,1),%bnd0",}, +{{0xf3, 0x0f, 0x1b, 0x44, 0x05, 0x12, }, 6, 0, "", "", +"f3 0f 1b 44 05 12 \tbndmk 0x12(%rbp,%rax,1),%bnd0",}, +{{0xf3, 0x0f, 0x1b, 0x44, 0x08, 0x12, }, 6, 0, "", "", +"f3 0f 1b 44 08 12 \tbndmk 0x12(%rax,%rcx,1),%bnd0",}, +{{0xf3, 0x0f, 0x1b, 0x44, 0xc8, 0x12, }, 6, 0, "", "", +"f3 0f 1b 44 c8 12 \tbndmk 0x12(%rax,%rcx,8),%bnd0",}, +{{0xf3, 0x0f, 0x1b, 0x80, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"f3 0f 1b 80 78 56 34 12 \tbndmk 0x12345678(%rax),%bnd0",}, +{{0xf3, 0x0f, 0x1b, 0x85, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"f3 0f 1b 85 78 56 34 12 \tbndmk 0x12345678(%rbp),%bnd0",}, +{{0xf3, 0x0f, 0x1b, 0x84, 0x01, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"f3 0f 1b 84 01 78 56 34 12 \tbndmk 0x12345678(%rcx,%rax,1),%bnd0",}, +{{0xf3, 0x0f, 0x1b, 0x84, 0x05, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"f3 0f 1b 84 05 78 56 34 12 \tbndmk 0x12345678(%rbp,%rax,1),%bnd0",}, +{{0xf3, 0x0f, 0x1b, 0x84, 0x08, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"f3 0f 1b 84 08 78 56 34 12 \tbndmk 0x12345678(%rax,%rcx,1),%bnd0",}, +{{0xf3, 0x0f, 0x1b, 0x84, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"f3 0f 1b 84 c8 78 56 34 12 \tbndmk 0x12345678(%rax,%rcx,8),%bnd0",}, +{{0xf3, 0x0f, 0x1a, 0x00, }, 4, 0, "", "", +"f3 0f 1a 00 \tbndcl (%rax),%bnd0",}, +{{0xf3, 0x41, 0x0f, 0x1a, 0x00, }, 5, 0, "", "", +"f3 41 0f 1a 00 \tbndcl (%r8),%bnd0",}, +{{0xf3, 0x0f, 0x1a, 0x04, 0x25, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"f3 0f 1a 04 25 78 56 34 12 \tbndcl 0x12345678,%bnd0",}, +{{0xf3, 0x0f, 0x1a, 0x18, }, 4, 0, "", "", +"f3 0f 1a 18 \tbndcl (%rax),%bnd3",}, +{{0xf3, 0x0f, 0x1a, 0x04, 0x01, }, 5, 0, "", "", +"f3 0f 1a 04 01 \tbndcl (%rcx,%rax,1),%bnd0",}, +{{0xf3, 0x0f, 0x1a, 0x04, 0x05, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"f3 0f 1a 04 05 78 56 34 12 \tbndcl 0x12345678(,%rax,1),%bnd0",}, +{{0xf3, 0x0f, 0x1a, 0x04, 0x08, }, 5, 0, "", "", +"f3 0f 1a 04 08 \tbndcl (%rax,%rcx,1),%bnd0",}, +{{0xf3, 0x0f, 0x1a, 0x04, 0xc8, }, 5, 0, "", "", +"f3 0f 1a 04 c8 \tbndcl (%rax,%rcx,8),%bnd0",}, +{{0xf3, 0x0f, 0x1a, 0x40, 0x12, }, 5, 0, "", "", +"f3 0f 1a 40 12 \tbndcl 0x12(%rax),%bnd0",}, +{{0xf3, 0x0f, 0x1a, 0x45, 0x12, }, 5, 0, "", "", +"f3 0f 1a 45 12 \tbndcl 0x12(%rbp),%bnd0",}, +{{0xf3, 0x0f, 0x1a, 0x44, 0x01, 0x12, }, 6, 0, "", "", +"f3 0f 1a 44 01 12 \tbndcl 0x12(%rcx,%rax,1),%bnd0",}, +{{0xf3, 0x0f, 0x1a, 0x44, 0x05, 0x12, }, 6, 0, "", "", +"f3 0f 1a 44 05 12 \tbndcl 0x12(%rbp,%rax,1),%bnd0",}, +{{0xf3, 0x0f, 0x1a, 0x44, 0x08, 0x12, }, 6, 0, "", "", +"f3 0f 1a 44 08 12 \tbndcl 0x12(%rax,%rcx,1),%bnd0",}, +{{0xf3, 0x0f, 0x1a, 0x44, 0xc8, 0x12, }, 6, 0, "", "", +"f3 0f 1a 44 c8 12 \tbndcl 0x12(%rax,%rcx,8),%bnd0",}, +{{0xf3, 0x0f, 0x1a, 0x80, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"f3 0f 1a 80 78 56 34 12 \tbndcl 0x12345678(%rax),%bnd0",}, +{{0xf3, 0x0f, 0x1a, 0x85, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"f3 0f 1a 85 78 56 34 12 \tbndcl 0x12345678(%rbp),%bnd0",}, +{{0xf3, 0x0f, 0x1a, 0x84, 0x01, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"f3 0f 1a 84 01 78 56 34 12 \tbndcl 0x12345678(%rcx,%rax,1),%bnd0",}, +{{0xf3, 0x0f, 0x1a, 0x84, 0x05, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"f3 0f 1a 84 05 78 56 34 12 \tbndcl 0x12345678(%rbp,%rax,1),%bnd0",}, +{{0xf3, 0x0f, 0x1a, 0x84, 0x08, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"f3 0f 1a 84 08 78 56 34 12 \tbndcl 0x12345678(%rax,%rcx,1),%bnd0",}, +{{0xf3, 0x0f, 0x1a, 0x84, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"f3 0f 1a 84 c8 78 56 34 12 \tbndcl 0x12345678(%rax,%rcx,8),%bnd0",}, +{{0xf3, 0x0f, 0x1a, 0xc0, }, 4, 0, "", "", +"f3 0f 1a c0 \tbndcl %rax,%bnd0",}, +{{0xf2, 0x0f, 0x1a, 0x00, }, 4, 0, "", "", +"f2 0f 1a 00 \tbndcu (%rax),%bnd0",}, +{{0xf2, 0x41, 0x0f, 0x1a, 0x00, }, 5, 0, "", "", +"f2 41 0f 1a 00 \tbndcu (%r8),%bnd0",}, +{{0xf2, 0x0f, 0x1a, 0x04, 0x25, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"f2 0f 1a 04 25 78 56 34 12 \tbndcu 0x12345678,%bnd0",}, +{{0xf2, 0x0f, 0x1a, 0x18, }, 4, 0, "", "", +"f2 0f 1a 18 \tbndcu (%rax),%bnd3",}, +{{0xf2, 0x0f, 0x1a, 0x04, 0x01, }, 5, 0, "", "", +"f2 0f 1a 04 01 \tbndcu (%rcx,%rax,1),%bnd0",}, +{{0xf2, 0x0f, 0x1a, 0x04, 0x05, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"f2 0f 1a 04 05 78 56 34 12 \tbndcu 0x12345678(,%rax,1),%bnd0",}, +{{0xf2, 0x0f, 0x1a, 0x04, 0x08, }, 5, 0, "", "", +"f2 0f 1a 04 08 \tbndcu (%rax,%rcx,1),%bnd0",}, +{{0xf2, 0x0f, 0x1a, 0x04, 0xc8, }, 5, 0, "", "", +"f2 0f 1a 04 c8 \tbndcu (%rax,%rcx,8),%bnd0",}, +{{0xf2, 0x0f, 0x1a, 0x40, 0x12, }, 5, 0, "", "", +"f2 0f 1a 40 12 \tbndcu 0x12(%rax),%bnd0",}, +{{0xf2, 0x0f, 0x1a, 0x45, 0x12, }, 5, 0, "", "", +"f2 0f 1a 45 12 \tbndcu 0x12(%rbp),%bnd0",}, +{{0xf2, 0x0f, 0x1a, 0x44, 0x01, 0x12, }, 6, 0, "", "", +"f2 0f 1a 44 01 12 \tbndcu 0x12(%rcx,%rax,1),%bnd0",}, +{{0xf2, 0x0f, 0x1a, 0x44, 0x05, 0x12, }, 6, 0, "", "", +"f2 0f 1a 44 05 12 \tbndcu 0x12(%rbp,%rax,1),%bnd0",}, +{{0xf2, 0x0f, 0x1a, 0x44, 0x08, 0x12, }, 6, 0, "", "", +"f2 0f 1a 44 08 12 \tbndcu 0x12(%rax,%rcx,1),%bnd0",}, +{{0xf2, 0x0f, 0x1a, 0x44, 0xc8, 0x12, }, 6, 0, "", "", +"f2 0f 1a 44 c8 12 \tbndcu 0x12(%rax,%rcx,8),%bnd0",}, +{{0xf2, 0x0f, 0x1a, 0x80, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"f2 0f 1a 80 78 56 34 12 \tbndcu 0x12345678(%rax),%bnd0",}, +{{0xf2, 0x0f, 0x1a, 0x85, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"f2 0f 1a 85 78 56 34 12 \tbndcu 0x12345678(%rbp),%bnd0",}, +{{0xf2, 0x0f, 0x1a, 0x84, 0x01, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"f2 0f 1a 84 01 78 56 34 12 \tbndcu 0x12345678(%rcx,%rax,1),%bnd0",}, +{{0xf2, 0x0f, 0x1a, 0x84, 0x05, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"f2 0f 1a 84 05 78 56 34 12 \tbndcu 0x12345678(%rbp,%rax,1),%bnd0",}, +{{0xf2, 0x0f, 0x1a, 0x84, 0x08, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"f2 0f 1a 84 08 78 56 34 12 \tbndcu 0x12345678(%rax,%rcx,1),%bnd0",}, +{{0xf2, 0x0f, 0x1a, 0x84, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"f2 0f 1a 84 c8 78 56 34 12 \tbndcu 0x12345678(%rax,%rcx,8),%bnd0",}, +{{0xf2, 0x0f, 0x1a, 0xc0, }, 4, 0, "", "", +"f2 0f 1a c0 \tbndcu %rax,%bnd0",}, +{{0xf2, 0x0f, 0x1b, 0x00, }, 4, 0, "", "", +"f2 0f 1b 00 \tbndcn (%rax),%bnd0",}, +{{0xf2, 0x41, 0x0f, 0x1b, 0x00, }, 5, 0, "", "", +"f2 41 0f 1b 00 \tbndcn (%r8),%bnd0",}, +{{0xf2, 0x0f, 0x1b, 0x04, 0x25, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"f2 0f 1b 04 25 78 56 34 12 \tbndcn 0x12345678,%bnd0",}, +{{0xf2, 0x0f, 0x1b, 0x18, }, 4, 0, "", "", +"f2 0f 1b 18 \tbndcn (%rax),%bnd3",}, +{{0xf2, 0x0f, 0x1b, 0x04, 0x01, }, 5, 0, "", "", +"f2 0f 1b 04 01 \tbndcn (%rcx,%rax,1),%bnd0",}, +{{0xf2, 0x0f, 0x1b, 0x04, 0x05, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"f2 0f 1b 04 05 78 56 34 12 \tbndcn 0x12345678(,%rax,1),%bnd0",}, +{{0xf2, 0x0f, 0x1b, 0x04, 0x08, }, 5, 0, "", "", +"f2 0f 1b 04 08 \tbndcn (%rax,%rcx,1),%bnd0",}, +{{0xf2, 0x0f, 0x1b, 0x04, 0xc8, }, 5, 0, "", "", +"f2 0f 1b 04 c8 \tbndcn (%rax,%rcx,8),%bnd0",}, +{{0xf2, 0x0f, 0x1b, 0x40, 0x12, }, 5, 0, "", "", +"f2 0f 1b 40 12 \tbndcn 0x12(%rax),%bnd0",}, +{{0xf2, 0x0f, 0x1b, 0x45, 0x12, }, 5, 0, "", "", +"f2 0f 1b 45 12 \tbndcn 0x12(%rbp),%bnd0",}, +{{0xf2, 0x0f, 0x1b, 0x44, 0x01, 0x12, }, 6, 0, "", "", +"f2 0f 1b 44 01 12 \tbndcn 0x12(%rcx,%rax,1),%bnd0",}, +{{0xf2, 0x0f, 0x1b, 0x44, 0x05, 0x12, }, 6, 0, "", "", +"f2 0f 1b 44 05 12 \tbndcn 0x12(%rbp,%rax,1),%bnd0",}, +{{0xf2, 0x0f, 0x1b, 0x44, 0x08, 0x12, }, 6, 0, "", "", +"f2 0f 1b 44 08 12 \tbndcn 0x12(%rax,%rcx,1),%bnd0",}, +{{0xf2, 0x0f, 0x1b, 0x44, 0xc8, 0x12, }, 6, 0, "", "", +"f2 0f 1b 44 c8 12 \tbndcn 0x12(%rax,%rcx,8),%bnd0",}, +{{0xf2, 0x0f, 0x1b, 0x80, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"f2 0f 1b 80 78 56 34 12 \tbndcn 0x12345678(%rax),%bnd0",}, +{{0xf2, 0x0f, 0x1b, 0x85, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"f2 0f 1b 85 78 56 34 12 \tbndcn 0x12345678(%rbp),%bnd0",}, +{{0xf2, 0x0f, 0x1b, 0x84, 0x01, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"f2 0f 1b 84 01 78 56 34 12 \tbndcn 0x12345678(%rcx,%rax,1),%bnd0",}, +{{0xf2, 0x0f, 0x1b, 0x84, 0x05, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"f2 0f 1b 84 05 78 56 34 12 \tbndcn 0x12345678(%rbp,%rax,1),%bnd0",}, +{{0xf2, 0x0f, 0x1b, 0x84, 0x08, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"f2 0f 1b 84 08 78 56 34 12 \tbndcn 0x12345678(%rax,%rcx,1),%bnd0",}, +{{0xf2, 0x0f, 0x1b, 0x84, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"f2 0f 1b 84 c8 78 56 34 12 \tbndcn 0x12345678(%rax,%rcx,8),%bnd0",}, +{{0xf2, 0x0f, 0x1b, 0xc0, }, 4, 0, "", "", +"f2 0f 1b c0 \tbndcn %rax,%bnd0",}, +{{0x66, 0x0f, 0x1a, 0x00, }, 4, 0, "", "", +"66 0f 1a 00 \tbndmov (%rax),%bnd0",}, +{{0x66, 0x41, 0x0f, 0x1a, 0x00, }, 5, 0, "", "", +"66 41 0f 1a 00 \tbndmov (%r8),%bnd0",}, +{{0x66, 0x0f, 0x1a, 0x04, 0x25, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"66 0f 1a 04 25 78 56 34 12 \tbndmov 0x12345678,%bnd0",}, +{{0x66, 0x0f, 0x1a, 0x18, }, 4, 0, "", "", +"66 0f 1a 18 \tbndmov (%rax),%bnd3",}, +{{0x66, 0x0f, 0x1a, 0x04, 0x01, }, 5, 0, "", "", +"66 0f 1a 04 01 \tbndmov (%rcx,%rax,1),%bnd0",}, +{{0x66, 0x0f, 0x1a, 0x04, 0x05, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"66 0f 1a 04 05 78 56 34 12 \tbndmov 0x12345678(,%rax,1),%bnd0",}, +{{0x66, 0x0f, 0x1a, 0x04, 0x08, }, 5, 0, "", "", +"66 0f 1a 04 08 \tbndmov (%rax,%rcx,1),%bnd0",}, +{{0x66, 0x0f, 0x1a, 0x04, 0xc8, }, 5, 0, "", "", +"66 0f 1a 04 c8 \tbndmov (%rax,%rcx,8),%bnd0",}, +{{0x66, 0x0f, 0x1a, 0x40, 0x12, }, 5, 0, "", "", +"66 0f 1a 40 12 \tbndmov 0x12(%rax),%bnd0",}, +{{0x66, 0x0f, 0x1a, 0x45, 0x12, }, 5, 0, "", "", +"66 0f 1a 45 12 \tbndmov 0x12(%rbp),%bnd0",}, +{{0x66, 0x0f, 0x1a, 0x44, 0x01, 0x12, }, 6, 0, "", "", +"66 0f 1a 44 01 12 \tbndmov 0x12(%rcx,%rax,1),%bnd0",}, +{{0x66, 0x0f, 0x1a, 0x44, 0x05, 0x12, }, 6, 0, "", "", +"66 0f 1a 44 05 12 \tbndmov 0x12(%rbp,%rax,1),%bnd0",}, +{{0x66, 0x0f, 0x1a, 0x44, 0x08, 0x12, }, 6, 0, "", "", +"66 0f 1a 44 08 12 \tbndmov 0x12(%rax,%rcx,1),%bnd0",}, +{{0x66, 0x0f, 0x1a, 0x44, 0xc8, 0x12, }, 6, 0, "", "", +"66 0f 1a 44 c8 12 \tbndmov 0x12(%rax,%rcx,8),%bnd0",}, +{{0x66, 0x0f, 0x1a, 0x80, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"66 0f 1a 80 78 56 34 12 \tbndmov 0x12345678(%rax),%bnd0",}, +{{0x66, 0x0f, 0x1a, 0x85, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"66 0f 1a 85 78 56 34 12 \tbndmov 0x12345678(%rbp),%bnd0",}, +{{0x66, 0x0f, 0x1a, 0x84, 0x01, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"66 0f 1a 84 01 78 56 34 12 \tbndmov 0x12345678(%rcx,%rax,1),%bnd0",}, +{{0x66, 0x0f, 0x1a, 0x84, 0x05, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"66 0f 1a 84 05 78 56 34 12 \tbndmov 0x12345678(%rbp,%rax,1),%bnd0",}, +{{0x66, 0x0f, 0x1a, 0x84, 0x08, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"66 0f 1a 84 08 78 56 34 12 \tbndmov 0x12345678(%rax,%rcx,1),%bnd0",}, +{{0x66, 0x0f, 0x1a, 0x84, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"66 0f 1a 84 c8 78 56 34 12 \tbndmov 0x12345678(%rax,%rcx,8),%bnd0",}, +{{0x66, 0x0f, 0x1b, 0x00, }, 4, 0, "", "", +"66 0f 1b 00 \tbndmov %bnd0,(%rax)",}, +{{0x66, 0x41, 0x0f, 0x1b, 0x00, }, 5, 0, "", "", +"66 41 0f 1b 00 \tbndmov %bnd0,(%r8)",}, +{{0x66, 0x0f, 0x1b, 0x04, 0x25, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"66 0f 1b 04 25 78 56 34 12 \tbndmov %bnd0,0x12345678",}, +{{0x66, 0x0f, 0x1b, 0x18, }, 4, 0, "", "", +"66 0f 1b 18 \tbndmov %bnd3,(%rax)",}, +{{0x66, 0x0f, 0x1b, 0x04, 0x01, }, 5, 0, "", "", +"66 0f 1b 04 01 \tbndmov %bnd0,(%rcx,%rax,1)",}, +{{0x66, 0x0f, 0x1b, 0x04, 0x05, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"66 0f 1b 04 05 78 56 34 12 \tbndmov %bnd0,0x12345678(,%rax,1)",}, +{{0x66, 0x0f, 0x1b, 0x04, 0x08, }, 5, 0, "", "", +"66 0f 1b 04 08 \tbndmov %bnd0,(%rax,%rcx,1)",}, +{{0x66, 0x0f, 0x1b, 0x04, 0xc8, }, 5, 0, "", "", +"66 0f 1b 04 c8 \tbndmov %bnd0,(%rax,%rcx,8)",}, +{{0x66, 0x0f, 0x1b, 0x40, 0x12, }, 5, 0, "", "", +"66 0f 1b 40 12 \tbndmov %bnd0,0x12(%rax)",}, +{{0x66, 0x0f, 0x1b, 0x45, 0x12, }, 5, 0, "", "", +"66 0f 1b 45 12 \tbndmov %bnd0,0x12(%rbp)",}, +{{0x66, 0x0f, 0x1b, 0x44, 0x01, 0x12, }, 6, 0, "", "", +"66 0f 1b 44 01 12 \tbndmov %bnd0,0x12(%rcx,%rax,1)",}, +{{0x66, 0x0f, 0x1b, 0x44, 0x05, 0x12, }, 6, 0, "", "", +"66 0f 1b 44 05 12 \tbndmov %bnd0,0x12(%rbp,%rax,1)",}, +{{0x66, 0x0f, 0x1b, 0x44, 0x08, 0x12, }, 6, 0, "", "", +"66 0f 1b 44 08 12 \tbndmov %bnd0,0x12(%rax,%rcx,1)",}, +{{0x66, 0x0f, 0x1b, 0x44, 0xc8, 0x12, }, 6, 0, "", "", +"66 0f 1b 44 c8 12 \tbndmov %bnd0,0x12(%rax,%rcx,8)",}, +{{0x66, 0x0f, 0x1b, 0x80, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"66 0f 1b 80 78 56 34 12 \tbndmov %bnd0,0x12345678(%rax)",}, +{{0x66, 0x0f, 0x1b, 0x85, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"66 0f 1b 85 78 56 34 12 \tbndmov %bnd0,0x12345678(%rbp)",}, +{{0x66, 0x0f, 0x1b, 0x84, 0x01, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"66 0f 1b 84 01 78 56 34 12 \tbndmov %bnd0,0x12345678(%rcx,%rax,1)",}, +{{0x66, 0x0f, 0x1b, 0x84, 0x05, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"66 0f 1b 84 05 78 56 34 12 \tbndmov %bnd0,0x12345678(%rbp,%rax,1)",}, +{{0x66, 0x0f, 0x1b, 0x84, 0x08, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"66 0f 1b 84 08 78 56 34 12 \tbndmov %bnd0,0x12345678(%rax,%rcx,1)",}, +{{0x66, 0x0f, 0x1b, 0x84, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"66 0f 1b 84 c8 78 56 34 12 \tbndmov %bnd0,0x12345678(%rax,%rcx,8)",}, +{{0x66, 0x0f, 0x1a, 0xc8, }, 4, 0, "", "", +"66 0f 1a c8 \tbndmov %bnd0,%bnd1",}, +{{0x66, 0x0f, 0x1a, 0xc1, }, 4, 0, "", "", +"66 0f 1a c1 \tbndmov %bnd1,%bnd0",}, +{{0x0f, 0x1a, 0x00, }, 3, 0, "", "", +"0f 1a 00 \tbndldx (%rax),%bnd0",}, +{{0x41, 0x0f, 0x1a, 0x00, }, 4, 0, "", "", +"41 0f 1a 00 \tbndldx (%r8),%bnd0",}, +{{0x0f, 0x1a, 0x04, 0x25, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"0f 1a 04 25 78 56 34 12 \tbndldx 0x12345678,%bnd0",}, +{{0x0f, 0x1a, 0x18, }, 3, 0, "", "", +"0f 1a 18 \tbndldx (%rax),%bnd3",}, +{{0x0f, 0x1a, 0x04, 0x01, }, 4, 0, "", "", +"0f 1a 04 01 \tbndldx (%rcx,%rax,1),%bnd0",}, +{{0x0f, 0x1a, 0x04, 0x05, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"0f 1a 04 05 78 56 34 12 \tbndldx 0x12345678(,%rax,1),%bnd0",}, +{{0x0f, 0x1a, 0x04, 0x08, }, 4, 0, "", "", +"0f 1a 04 08 \tbndldx (%rax,%rcx,1),%bnd0",}, +{{0x0f, 0x1a, 0x40, 0x12, }, 4, 0, "", "", +"0f 1a 40 12 \tbndldx 0x12(%rax),%bnd0",}, +{{0x0f, 0x1a, 0x45, 0x12, }, 4, 0, "", "", +"0f 1a 45 12 \tbndldx 0x12(%rbp),%bnd0",}, +{{0x0f, 0x1a, 0x44, 0x01, 0x12, }, 5, 0, "", "", +"0f 1a 44 01 12 \tbndldx 0x12(%rcx,%rax,1),%bnd0",}, +{{0x0f, 0x1a, 0x44, 0x05, 0x12, }, 5, 0, "", "", +"0f 1a 44 05 12 \tbndldx 0x12(%rbp,%rax,1),%bnd0",}, +{{0x0f, 0x1a, 0x44, 0x08, 0x12, }, 5, 0, "", "", +"0f 1a 44 08 12 \tbndldx 0x12(%rax,%rcx,1),%bnd0",}, +{{0x0f, 0x1a, 0x80, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "", +"0f 1a 80 78 56 34 12 \tbndldx 0x12345678(%rax),%bnd0",}, +{{0x0f, 0x1a, 0x85, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "", +"0f 1a 85 78 56 34 12 \tbndldx 0x12345678(%rbp),%bnd0",}, +{{0x0f, 0x1a, 0x84, 0x01, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"0f 1a 84 01 78 56 34 12 \tbndldx 0x12345678(%rcx,%rax,1),%bnd0",}, +{{0x0f, 0x1a, 0x84, 0x05, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"0f 1a 84 05 78 56 34 12 \tbndldx 0x12345678(%rbp,%rax,1),%bnd0",}, +{{0x0f, 0x1a, 0x84, 0x08, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"0f 1a 84 08 78 56 34 12 \tbndldx 0x12345678(%rax,%rcx,1),%bnd0",}, +{{0x0f, 0x1b, 0x00, }, 3, 0, "", "", +"0f 1b 00 \tbndstx %bnd0,(%rax)",}, +{{0x41, 0x0f, 0x1b, 0x00, }, 4, 0, "", "", +"41 0f 1b 00 \tbndstx %bnd0,(%r8)",}, +{{0x0f, 0x1b, 0x04, 0x25, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"0f 1b 04 25 78 56 34 12 \tbndstx %bnd0,0x12345678",}, +{{0x0f, 0x1b, 0x18, }, 3, 0, "", "", +"0f 1b 18 \tbndstx %bnd3,(%rax)",}, +{{0x0f, 0x1b, 0x04, 0x01, }, 4, 0, "", "", +"0f 1b 04 01 \tbndstx %bnd0,(%rcx,%rax,1)",}, +{{0x0f, 0x1b, 0x04, 0x05, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"0f 1b 04 05 78 56 34 12 \tbndstx %bnd0,0x12345678(,%rax,1)",}, +{{0x0f, 0x1b, 0x04, 0x08, }, 4, 0, "", "", +"0f 1b 04 08 \tbndstx %bnd0,(%rax,%rcx,1)",}, +{{0x0f, 0x1b, 0x40, 0x12, }, 4, 0, "", "", +"0f 1b 40 12 \tbndstx %bnd0,0x12(%rax)",}, +{{0x0f, 0x1b, 0x45, 0x12, }, 4, 0, "", "", +"0f 1b 45 12 \tbndstx %bnd0,0x12(%rbp)",}, +{{0x0f, 0x1b, 0x44, 0x01, 0x12, }, 5, 0, "", "", +"0f 1b 44 01 12 \tbndstx %bnd0,0x12(%rcx,%rax,1)",}, +{{0x0f, 0x1b, 0x44, 0x05, 0x12, }, 5, 0, "", "", +"0f 1b 44 05 12 \tbndstx %bnd0,0x12(%rbp,%rax,1)",}, +{{0x0f, 0x1b, 0x44, 0x08, 0x12, }, 5, 0, "", "", +"0f 1b 44 08 12 \tbndstx %bnd0,0x12(%rax,%rcx,1)",}, +{{0x0f, 0x1b, 0x80, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "", +"0f 1b 80 78 56 34 12 \tbndstx %bnd0,0x12345678(%rax)",}, +{{0x0f, 0x1b, 0x85, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "", +"0f 1b 85 78 56 34 12 \tbndstx %bnd0,0x12345678(%rbp)",}, +{{0x0f, 0x1b, 0x84, 0x01, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"0f 1b 84 01 78 56 34 12 \tbndstx %bnd0,0x12345678(%rcx,%rax,1)",}, +{{0x0f, 0x1b, 0x84, 0x05, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"0f 1b 84 05 78 56 34 12 \tbndstx %bnd0,0x12345678(%rbp,%rax,1)",}, +{{0x0f, 0x1b, 0x84, 0x08, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"0f 1b 84 08 78 56 34 12 \tbndstx %bnd0,0x12345678(%rax,%rcx,1)",}, +{{0xf2, 0xe8, 0x00, 0x00, 0x00, 0x00, }, 6, 0, "call", "unconditional", +"f2 e8 00 00 00 00 \tbnd callq 3f6 <main+0x3f6>",}, +{{0x67, 0xf2, 0xff, 0x10, }, 4, 0, "call", "indirect", +"67 f2 ff 10 \tbnd callq *(%eax)",}, +{{0xf2, 0xc3, }, 2, 0, "ret", "indirect", +"f2 c3 \tbnd retq ",}, +{{0xf2, 0xe9, 0x00, 0x00, 0x00, 0x00, }, 6, 0, "jmp", "unconditional", +"f2 e9 00 00 00 00 \tbnd jmpq 402 <main+0x402>",}, +{{0xf2, 0xe9, 0x00, 0x00, 0x00, 0x00, }, 6, 0, "jmp", "unconditional", +"f2 e9 00 00 00 00 \tbnd jmpq 408 <main+0x408>",}, +{{0x67, 0xf2, 0xff, 0x21, }, 4, 0, "jmp", "indirect", +"67 f2 ff 21 \tbnd jmpq *(%ecx)",}, +{{0xf2, 0x0f, 0x85, 0x00, 0x00, 0x00, 0x00, }, 7, 0, "jcc", "conditional", +"f2 0f 85 00 00 00 00 \tbnd jne 413 <main+0x413>",}, +{{0x0f, 0x3a, 0xcc, 0xc1, 0x00, }, 5, 0, "", "", +"0f 3a cc c1 00 \tsha1rnds4 $0x0,%xmm1,%xmm0",}, +{{0x0f, 0x3a, 0xcc, 0xd7, 0x91, }, 5, 0, "", "", +"0f 3a cc d7 91 \tsha1rnds4 $0x91,%xmm7,%xmm2",}, +{{0x41, 0x0f, 0x3a, 0xcc, 0xc0, 0x91, }, 6, 0, "", "", +"41 0f 3a cc c0 91 \tsha1rnds4 $0x91,%xmm8,%xmm0",}, +{{0x44, 0x0f, 0x3a, 0xcc, 0xc7, 0x91, }, 6, 0, "", "", +"44 0f 3a cc c7 91 \tsha1rnds4 $0x91,%xmm7,%xmm8",}, +{{0x45, 0x0f, 0x3a, 0xcc, 0xc7, 0x91, }, 6, 0, "", "", +"45 0f 3a cc c7 91 \tsha1rnds4 $0x91,%xmm15,%xmm8",}, +{{0x0f, 0x3a, 0xcc, 0x00, 0x91, }, 5, 0, "", "", +"0f 3a cc 00 91 \tsha1rnds4 $0x91,(%rax),%xmm0",}, +{{0x41, 0x0f, 0x3a, 0xcc, 0x00, 0x91, }, 6, 0, "", "", +"41 0f 3a cc 00 91 \tsha1rnds4 $0x91,(%r8),%xmm0",}, +{{0x0f, 0x3a, 0xcc, 0x04, 0x25, 0x78, 0x56, 0x34, 0x12, 0x91, }, 10, 0, "", "", +"0f 3a cc 04 25 78 56 34 12 91 \tsha1rnds4 $0x91,0x12345678,%xmm0",}, +{{0x0f, 0x3a, 0xcc, 0x18, 0x91, }, 5, 0, "", "", +"0f 3a cc 18 91 \tsha1rnds4 $0x91,(%rax),%xmm3",}, +{{0x0f, 0x3a, 0xcc, 0x04, 0x01, 0x91, }, 6, 0, "", "", +"0f 3a cc 04 01 91 \tsha1rnds4 $0x91,(%rcx,%rax,1),%xmm0",}, +{{0x0f, 0x3a, 0xcc, 0x04, 0x05, 0x78, 0x56, 0x34, 0x12, 0x91, }, 10, 0, "", "", +"0f 3a cc 04 05 78 56 34 12 91 \tsha1rnds4 $0x91,0x12345678(,%rax,1),%xmm0",}, +{{0x0f, 0x3a, 0xcc, 0x04, 0x08, 0x91, }, 6, 0, "", "", +"0f 3a cc 04 08 91 \tsha1rnds4 $0x91,(%rax,%rcx,1),%xmm0",}, +{{0x0f, 0x3a, 0xcc, 0x04, 0xc8, 0x91, }, 6, 0, "", "", +"0f 3a cc 04 c8 91 \tsha1rnds4 $0x91,(%rax,%rcx,8),%xmm0",}, +{{0x0f, 0x3a, 0xcc, 0x40, 0x12, 0x91, }, 6, 0, "", "", +"0f 3a cc 40 12 91 \tsha1rnds4 $0x91,0x12(%rax),%xmm0",}, +{{0x0f, 0x3a, 0xcc, 0x45, 0x12, 0x91, }, 6, 0, "", "", +"0f 3a cc 45 12 91 \tsha1rnds4 $0x91,0x12(%rbp),%xmm0",}, +{{0x0f, 0x3a, 0xcc, 0x44, 0x01, 0x12, 0x91, }, 7, 0, "", "", +"0f 3a cc 44 01 12 91 \tsha1rnds4 $0x91,0x12(%rcx,%rax,1),%xmm0",}, +{{0x0f, 0x3a, 0xcc, 0x44, 0x05, 0x12, 0x91, }, 7, 0, "", "", +"0f 3a cc 44 05 12 91 \tsha1rnds4 $0x91,0x12(%rbp,%rax,1),%xmm0",}, +{{0x0f, 0x3a, 0xcc, 0x44, 0x08, 0x12, 0x91, }, 7, 0, "", "", +"0f 3a cc 44 08 12 91 \tsha1rnds4 $0x91,0x12(%rax,%rcx,1),%xmm0",}, +{{0x0f, 0x3a, 0xcc, 0x44, 0xc8, 0x12, 0x91, }, 7, 0, "", "", +"0f 3a cc 44 c8 12 91 \tsha1rnds4 $0x91,0x12(%rax,%rcx,8),%xmm0",}, +{{0x0f, 0x3a, 0xcc, 0x80, 0x78, 0x56, 0x34, 0x12, 0x91, }, 9, 0, "", "", +"0f 3a cc 80 78 56 34 12 91 \tsha1rnds4 $0x91,0x12345678(%rax),%xmm0",}, +{{0x0f, 0x3a, 0xcc, 0x85, 0x78, 0x56, 0x34, 0x12, 0x91, }, 9, 0, "", "", +"0f 3a cc 85 78 56 34 12 91 \tsha1rnds4 $0x91,0x12345678(%rbp),%xmm0",}, +{{0x0f, 0x3a, 0xcc, 0x84, 0x01, 0x78, 0x56, 0x34, 0x12, 0x91, }, 10, 0, "", "", +"0f 3a cc 84 01 78 56 34 12 91 \tsha1rnds4 $0x91,0x12345678(%rcx,%rax,1),%xmm0",}, +{{0x0f, 0x3a, 0xcc, 0x84, 0x05, 0x78, 0x56, 0x34, 0x12, 0x91, }, 10, 0, "", "", +"0f 3a cc 84 05 78 56 34 12 91 \tsha1rnds4 $0x91,0x12345678(%rbp,%rax,1),%xmm0",}, +{{0x0f, 0x3a, 0xcc, 0x84, 0x08, 0x78, 0x56, 0x34, 0x12, 0x91, }, 10, 0, "", "", +"0f 3a cc 84 08 78 56 34 12 91 \tsha1rnds4 $0x91,0x12345678(%rax,%rcx,1),%xmm0",}, +{{0x0f, 0x3a, 0xcc, 0x84, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x91, }, 10, 0, "", "", +"0f 3a cc 84 c8 78 56 34 12 91 \tsha1rnds4 $0x91,0x12345678(%rax,%rcx,8),%xmm0",}, +{{0x44, 0x0f, 0x3a, 0xcc, 0xbc, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x91, }, 11, 0, "", "", +"44 0f 3a cc bc c8 78 56 34 12 91 \tsha1rnds4 $0x91,0x12345678(%rax,%rcx,8),%xmm15",}, +{{0x0f, 0x38, 0xc8, 0xc1, }, 4, 0, "", "", +"0f 38 c8 c1 \tsha1nexte %xmm1,%xmm0",}, +{{0x0f, 0x38, 0xc8, 0xd7, }, 4, 0, "", "", +"0f 38 c8 d7 \tsha1nexte %xmm7,%xmm2",}, +{{0x41, 0x0f, 0x38, 0xc8, 0xc0, }, 5, 0, "", "", +"41 0f 38 c8 c0 \tsha1nexte %xmm8,%xmm0",}, +{{0x44, 0x0f, 0x38, 0xc8, 0xc7, }, 5, 0, "", "", +"44 0f 38 c8 c7 \tsha1nexte %xmm7,%xmm8",}, +{{0x45, 0x0f, 0x38, 0xc8, 0xc7, }, 5, 0, "", "", +"45 0f 38 c8 c7 \tsha1nexte %xmm15,%xmm8",}, +{{0x0f, 0x38, 0xc8, 0x00, }, 4, 0, "", "", +"0f 38 c8 00 \tsha1nexte (%rax),%xmm0",}, +{{0x41, 0x0f, 0x38, 0xc8, 0x00, }, 5, 0, "", "", +"41 0f 38 c8 00 \tsha1nexte (%r8),%xmm0",}, +{{0x0f, 0x38, 0xc8, 0x04, 0x25, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"0f 38 c8 04 25 78 56 34 12 \tsha1nexte 0x12345678,%xmm0",}, +{{0x0f, 0x38, 0xc8, 0x18, }, 4, 0, "", "", +"0f 38 c8 18 \tsha1nexte (%rax),%xmm3",}, +{{0x0f, 0x38, 0xc8, 0x04, 0x01, }, 5, 0, "", "", +"0f 38 c8 04 01 \tsha1nexte (%rcx,%rax,1),%xmm0",}, +{{0x0f, 0x38, 0xc8, 0x04, 0x05, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"0f 38 c8 04 05 78 56 34 12 \tsha1nexte 0x12345678(,%rax,1),%xmm0",}, +{{0x0f, 0x38, 0xc8, 0x04, 0x08, }, 5, 0, "", "", +"0f 38 c8 04 08 \tsha1nexte (%rax,%rcx,1),%xmm0",}, +{{0x0f, 0x38, 0xc8, 0x04, 0xc8, }, 5, 0, "", "", +"0f 38 c8 04 c8 \tsha1nexte (%rax,%rcx,8),%xmm0",}, +{{0x0f, 0x38, 0xc8, 0x40, 0x12, }, 5, 0, "", "", +"0f 38 c8 40 12 \tsha1nexte 0x12(%rax),%xmm0",}, +{{0x0f, 0x38, 0xc8, 0x45, 0x12, }, 5, 0, "", "", +"0f 38 c8 45 12 \tsha1nexte 0x12(%rbp),%xmm0",}, +{{0x0f, 0x38, 0xc8, 0x44, 0x01, 0x12, }, 6, 0, "", "", +"0f 38 c8 44 01 12 \tsha1nexte 0x12(%rcx,%rax,1),%xmm0",}, +{{0x0f, 0x38, 0xc8, 0x44, 0x05, 0x12, }, 6, 0, "", "", +"0f 38 c8 44 05 12 \tsha1nexte 0x12(%rbp,%rax,1),%xmm0",}, +{{0x0f, 0x38, 0xc8, 0x44, 0x08, 0x12, }, 6, 0, "", "", +"0f 38 c8 44 08 12 \tsha1nexte 0x12(%rax,%rcx,1),%xmm0",}, +{{0x0f, 0x38, 0xc8, 0x44, 0xc8, 0x12, }, 6, 0, "", "", +"0f 38 c8 44 c8 12 \tsha1nexte 0x12(%rax,%rcx,8),%xmm0",}, +{{0x0f, 0x38, 0xc8, 0x80, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"0f 38 c8 80 78 56 34 12 \tsha1nexte 0x12345678(%rax),%xmm0",}, +{{0x0f, 0x38, 0xc8, 0x85, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"0f 38 c8 85 78 56 34 12 \tsha1nexte 0x12345678(%rbp),%xmm0",}, +{{0x0f, 0x38, 0xc8, 0x84, 0x01, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"0f 38 c8 84 01 78 56 34 12 \tsha1nexte 0x12345678(%rcx,%rax,1),%xmm0",}, +{{0x0f, 0x38, 0xc8, 0x84, 0x05, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"0f 38 c8 84 05 78 56 34 12 \tsha1nexte 0x12345678(%rbp,%rax,1),%xmm0",}, +{{0x0f, 0x38, 0xc8, 0x84, 0x08, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"0f 38 c8 84 08 78 56 34 12 \tsha1nexte 0x12345678(%rax,%rcx,1),%xmm0",}, +{{0x0f, 0x38, 0xc8, 0x84, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"0f 38 c8 84 c8 78 56 34 12 \tsha1nexte 0x12345678(%rax,%rcx,8),%xmm0",}, +{{0x44, 0x0f, 0x38, 0xc8, 0xbc, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 10, 0, "", "", +"44 0f 38 c8 bc c8 78 56 34 12 \tsha1nexte 0x12345678(%rax,%rcx,8),%xmm15",}, +{{0x0f, 0x38, 0xc9, 0xc1, }, 4, 0, "", "", +"0f 38 c9 c1 \tsha1msg1 %xmm1,%xmm0",}, +{{0x0f, 0x38, 0xc9, 0xd7, }, 4, 0, "", "", +"0f 38 c9 d7 \tsha1msg1 %xmm7,%xmm2",}, +{{0x41, 0x0f, 0x38, 0xc9, 0xc0, }, 5, 0, "", "", +"41 0f 38 c9 c0 \tsha1msg1 %xmm8,%xmm0",}, +{{0x44, 0x0f, 0x38, 0xc9, 0xc7, }, 5, 0, "", "", +"44 0f 38 c9 c7 \tsha1msg1 %xmm7,%xmm8",}, +{{0x45, 0x0f, 0x38, 0xc9, 0xc7, }, 5, 0, "", "", +"45 0f 38 c9 c7 \tsha1msg1 %xmm15,%xmm8",}, +{{0x0f, 0x38, 0xc9, 0x00, }, 4, 0, "", "", +"0f 38 c9 00 \tsha1msg1 (%rax),%xmm0",}, +{{0x41, 0x0f, 0x38, 0xc9, 0x00, }, 5, 0, "", "", +"41 0f 38 c9 00 \tsha1msg1 (%r8),%xmm0",}, +{{0x0f, 0x38, 0xc9, 0x04, 0x25, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"0f 38 c9 04 25 78 56 34 12 \tsha1msg1 0x12345678,%xmm0",}, +{{0x0f, 0x38, 0xc9, 0x18, }, 4, 0, "", "", +"0f 38 c9 18 \tsha1msg1 (%rax),%xmm3",}, +{{0x0f, 0x38, 0xc9, 0x04, 0x01, }, 5, 0, "", "", +"0f 38 c9 04 01 \tsha1msg1 (%rcx,%rax,1),%xmm0",}, +{{0x0f, 0x38, 0xc9, 0x04, 0x05, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"0f 38 c9 04 05 78 56 34 12 \tsha1msg1 0x12345678(,%rax,1),%xmm0",}, +{{0x0f, 0x38, 0xc9, 0x04, 0x08, }, 5, 0, "", "", +"0f 38 c9 04 08 \tsha1msg1 (%rax,%rcx,1),%xmm0",}, +{{0x0f, 0x38, 0xc9, 0x04, 0xc8, }, 5, 0, "", "", +"0f 38 c9 04 c8 \tsha1msg1 (%rax,%rcx,8),%xmm0",}, +{{0x0f, 0x38, 0xc9, 0x40, 0x12, }, 5, 0, "", "", +"0f 38 c9 40 12 \tsha1msg1 0x12(%rax),%xmm0",}, +{{0x0f, 0x38, 0xc9, 0x45, 0x12, }, 5, 0, "", "", +"0f 38 c9 45 12 \tsha1msg1 0x12(%rbp),%xmm0",}, +{{0x0f, 0x38, 0xc9, 0x44, 0x01, 0x12, }, 6, 0, "", "", +"0f 38 c9 44 01 12 \tsha1msg1 0x12(%rcx,%rax,1),%xmm0",}, +{{0x0f, 0x38, 0xc9, 0x44, 0x05, 0x12, }, 6, 0, "", "", +"0f 38 c9 44 05 12 \tsha1msg1 0x12(%rbp,%rax,1),%xmm0",}, +{{0x0f, 0x38, 0xc9, 0x44, 0x08, 0x12, }, 6, 0, "", "", +"0f 38 c9 44 08 12 \tsha1msg1 0x12(%rax,%rcx,1),%xmm0",}, +{{0x0f, 0x38, 0xc9, 0x44, 0xc8, 0x12, }, 6, 0, "", "", +"0f 38 c9 44 c8 12 \tsha1msg1 0x12(%rax,%rcx,8),%xmm0",}, +{{0x0f, 0x38, 0xc9, 0x80, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"0f 38 c9 80 78 56 34 12 \tsha1msg1 0x12345678(%rax),%xmm0",}, +{{0x0f, 0x38, 0xc9, 0x85, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"0f 38 c9 85 78 56 34 12 \tsha1msg1 0x12345678(%rbp),%xmm0",}, +{{0x0f, 0x38, 0xc9, 0x84, 0x01, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"0f 38 c9 84 01 78 56 34 12 \tsha1msg1 0x12345678(%rcx,%rax,1),%xmm0",}, +{{0x0f, 0x38, 0xc9, 0x84, 0x05, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"0f 38 c9 84 05 78 56 34 12 \tsha1msg1 0x12345678(%rbp,%rax,1),%xmm0",}, +{{0x0f, 0x38, 0xc9, 0x84, 0x08, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"0f 38 c9 84 08 78 56 34 12 \tsha1msg1 0x12345678(%rax,%rcx,1),%xmm0",}, +{{0x0f, 0x38, 0xc9, 0x84, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"0f 38 c9 84 c8 78 56 34 12 \tsha1msg1 0x12345678(%rax,%rcx,8),%xmm0",}, +{{0x44, 0x0f, 0x38, 0xc9, 0xbc, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 10, 0, "", "", +"44 0f 38 c9 bc c8 78 56 34 12 \tsha1msg1 0x12345678(%rax,%rcx,8),%xmm15",}, +{{0x0f, 0x38, 0xca, 0xc1, }, 4, 0, "", "", +"0f 38 ca c1 \tsha1msg2 %xmm1,%xmm0",}, +{{0x0f, 0x38, 0xca, 0xd7, }, 4, 0, "", "", +"0f 38 ca d7 \tsha1msg2 %xmm7,%xmm2",}, +{{0x41, 0x0f, 0x38, 0xca, 0xc0, }, 5, 0, "", "", +"41 0f 38 ca c0 \tsha1msg2 %xmm8,%xmm0",}, +{{0x44, 0x0f, 0x38, 0xca, 0xc7, }, 5, 0, "", "", +"44 0f 38 ca c7 \tsha1msg2 %xmm7,%xmm8",}, +{{0x45, 0x0f, 0x38, 0xca, 0xc7, }, 5, 0, "", "", +"45 0f 38 ca c7 \tsha1msg2 %xmm15,%xmm8",}, +{{0x0f, 0x38, 0xca, 0x00, }, 4, 0, "", "", +"0f 38 ca 00 \tsha1msg2 (%rax),%xmm0",}, +{{0x41, 0x0f, 0x38, 0xca, 0x00, }, 5, 0, "", "", +"41 0f 38 ca 00 \tsha1msg2 (%r8),%xmm0",}, +{{0x0f, 0x38, 0xca, 0x04, 0x25, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"0f 38 ca 04 25 78 56 34 12 \tsha1msg2 0x12345678,%xmm0",}, +{{0x0f, 0x38, 0xca, 0x18, }, 4, 0, "", "", +"0f 38 ca 18 \tsha1msg2 (%rax),%xmm3",}, +{{0x0f, 0x38, 0xca, 0x04, 0x01, }, 5, 0, "", "", +"0f 38 ca 04 01 \tsha1msg2 (%rcx,%rax,1),%xmm0",}, +{{0x0f, 0x38, 0xca, 0x04, 0x05, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"0f 38 ca 04 05 78 56 34 12 \tsha1msg2 0x12345678(,%rax,1),%xmm0",}, +{{0x0f, 0x38, 0xca, 0x04, 0x08, }, 5, 0, "", "", +"0f 38 ca 04 08 \tsha1msg2 (%rax,%rcx,1),%xmm0",}, +{{0x0f, 0x38, 0xca, 0x04, 0xc8, }, 5, 0, "", "", +"0f 38 ca 04 c8 \tsha1msg2 (%rax,%rcx,8),%xmm0",}, +{{0x0f, 0x38, 0xca, 0x40, 0x12, }, 5, 0, "", "", +"0f 38 ca 40 12 \tsha1msg2 0x12(%rax),%xmm0",}, +{{0x0f, 0x38, 0xca, 0x45, 0x12, }, 5, 0, "", "", +"0f 38 ca 45 12 \tsha1msg2 0x12(%rbp),%xmm0",}, +{{0x0f, 0x38, 0xca, 0x44, 0x01, 0x12, }, 6, 0, "", "", +"0f 38 ca 44 01 12 \tsha1msg2 0x12(%rcx,%rax,1),%xmm0",}, +{{0x0f, 0x38, 0xca, 0x44, 0x05, 0x12, }, 6, 0, "", "", +"0f 38 ca 44 05 12 \tsha1msg2 0x12(%rbp,%rax,1),%xmm0",}, +{{0x0f, 0x38, 0xca, 0x44, 0x08, 0x12, }, 6, 0, "", "", +"0f 38 ca 44 08 12 \tsha1msg2 0x12(%rax,%rcx,1),%xmm0",}, +{{0x0f, 0x38, 0xca, 0x44, 0xc8, 0x12, }, 6, 0, "", "", +"0f 38 ca 44 c8 12 \tsha1msg2 0x12(%rax,%rcx,8),%xmm0",}, +{{0x0f, 0x38, 0xca, 0x80, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"0f 38 ca 80 78 56 34 12 \tsha1msg2 0x12345678(%rax),%xmm0",}, +{{0x0f, 0x38, 0xca, 0x85, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"0f 38 ca 85 78 56 34 12 \tsha1msg2 0x12345678(%rbp),%xmm0",}, +{{0x0f, 0x38, 0xca, 0x84, 0x01, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"0f 38 ca 84 01 78 56 34 12 \tsha1msg2 0x12345678(%rcx,%rax,1),%xmm0",}, +{{0x0f, 0x38, 0xca, 0x84, 0x05, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"0f 38 ca 84 05 78 56 34 12 \tsha1msg2 0x12345678(%rbp,%rax,1),%xmm0",}, +{{0x0f, 0x38, 0xca, 0x84, 0x08, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"0f 38 ca 84 08 78 56 34 12 \tsha1msg2 0x12345678(%rax,%rcx,1),%xmm0",}, +{{0x0f, 0x38, 0xca, 0x84, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"0f 38 ca 84 c8 78 56 34 12 \tsha1msg2 0x12345678(%rax,%rcx,8),%xmm0",}, +{{0x44, 0x0f, 0x38, 0xca, 0xbc, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 10, 0, "", "", +"44 0f 38 ca bc c8 78 56 34 12 \tsha1msg2 0x12345678(%rax,%rcx,8),%xmm15",}, +{{0x0f, 0x38, 0xcb, 0xcc, }, 4, 0, "", "", +"0f 38 cb cc \tsha256rnds2 %xmm0,%xmm4,%xmm1",}, +{{0x0f, 0x38, 0xcb, 0xd7, }, 4, 0, "", "", +"0f 38 cb d7 \tsha256rnds2 %xmm0,%xmm7,%xmm2",}, +{{0x41, 0x0f, 0x38, 0xcb, 0xc8, }, 5, 0, "", "", +"41 0f 38 cb c8 \tsha256rnds2 %xmm0,%xmm8,%xmm1",}, +{{0x44, 0x0f, 0x38, 0xcb, 0xc7, }, 5, 0, "", "", +"44 0f 38 cb c7 \tsha256rnds2 %xmm0,%xmm7,%xmm8",}, +{{0x45, 0x0f, 0x38, 0xcb, 0xc7, }, 5, 0, "", "", +"45 0f 38 cb c7 \tsha256rnds2 %xmm0,%xmm15,%xmm8",}, +{{0x0f, 0x38, 0xcb, 0x08, }, 4, 0, "", "", +"0f 38 cb 08 \tsha256rnds2 %xmm0,(%rax),%xmm1",}, +{{0x41, 0x0f, 0x38, 0xcb, 0x08, }, 5, 0, "", "", +"41 0f 38 cb 08 \tsha256rnds2 %xmm0,(%r8),%xmm1",}, +{{0x0f, 0x38, 0xcb, 0x0c, 0x25, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"0f 38 cb 0c 25 78 56 34 12 \tsha256rnds2 %xmm0,0x12345678,%xmm1",}, +{{0x0f, 0x38, 0xcb, 0x18, }, 4, 0, "", "", +"0f 38 cb 18 \tsha256rnds2 %xmm0,(%rax),%xmm3",}, +{{0x0f, 0x38, 0xcb, 0x0c, 0x01, }, 5, 0, "", "", +"0f 38 cb 0c 01 \tsha256rnds2 %xmm0,(%rcx,%rax,1),%xmm1",}, +{{0x0f, 0x38, 0xcb, 0x0c, 0x05, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"0f 38 cb 0c 05 78 56 34 12 \tsha256rnds2 %xmm0,0x12345678(,%rax,1),%xmm1",}, +{{0x0f, 0x38, 0xcb, 0x0c, 0x08, }, 5, 0, "", "", +"0f 38 cb 0c 08 \tsha256rnds2 %xmm0,(%rax,%rcx,1),%xmm1",}, +{{0x0f, 0x38, 0xcb, 0x0c, 0xc8, }, 5, 0, "", "", +"0f 38 cb 0c c8 \tsha256rnds2 %xmm0,(%rax,%rcx,8),%xmm1",}, +{{0x0f, 0x38, 0xcb, 0x48, 0x12, }, 5, 0, "", "", +"0f 38 cb 48 12 \tsha256rnds2 %xmm0,0x12(%rax),%xmm1",}, +{{0x0f, 0x38, 0xcb, 0x4d, 0x12, }, 5, 0, "", "", +"0f 38 cb 4d 12 \tsha256rnds2 %xmm0,0x12(%rbp),%xmm1",}, +{{0x0f, 0x38, 0xcb, 0x4c, 0x01, 0x12, }, 6, 0, "", "", +"0f 38 cb 4c 01 12 \tsha256rnds2 %xmm0,0x12(%rcx,%rax,1),%xmm1",}, +{{0x0f, 0x38, 0xcb, 0x4c, 0x05, 0x12, }, 6, 0, "", "", +"0f 38 cb 4c 05 12 \tsha256rnds2 %xmm0,0x12(%rbp,%rax,1),%xmm1",}, +{{0x0f, 0x38, 0xcb, 0x4c, 0x08, 0x12, }, 6, 0, "", "", +"0f 38 cb 4c 08 12 \tsha256rnds2 %xmm0,0x12(%rax,%rcx,1),%xmm1",}, +{{0x0f, 0x38, 0xcb, 0x4c, 0xc8, 0x12, }, 6, 0, "", "", +"0f 38 cb 4c c8 12 \tsha256rnds2 %xmm0,0x12(%rax,%rcx,8),%xmm1",}, +{{0x0f, 0x38, 0xcb, 0x88, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"0f 38 cb 88 78 56 34 12 \tsha256rnds2 %xmm0,0x12345678(%rax),%xmm1",}, +{{0x0f, 0x38, 0xcb, 0x8d, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"0f 38 cb 8d 78 56 34 12 \tsha256rnds2 %xmm0,0x12345678(%rbp),%xmm1",}, +{{0x0f, 0x38, 0xcb, 0x8c, 0x01, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"0f 38 cb 8c 01 78 56 34 12 \tsha256rnds2 %xmm0,0x12345678(%rcx,%rax,1),%xmm1",}, +{{0x0f, 0x38, 0xcb, 0x8c, 0x05, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"0f 38 cb 8c 05 78 56 34 12 \tsha256rnds2 %xmm0,0x12345678(%rbp,%rax,1),%xmm1",}, +{{0x0f, 0x38, 0xcb, 0x8c, 0x08, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"0f 38 cb 8c 08 78 56 34 12 \tsha256rnds2 %xmm0,0x12345678(%rax,%rcx,1),%xmm1",}, +{{0x0f, 0x38, 0xcb, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"0f 38 cb 8c c8 78 56 34 12 \tsha256rnds2 %xmm0,0x12345678(%rax,%rcx,8),%xmm1",}, +{{0x44, 0x0f, 0x38, 0xcb, 0xbc, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 10, 0, "", "", +"44 0f 38 cb bc c8 78 56 34 12 \tsha256rnds2 %xmm0,0x12345678(%rax,%rcx,8),%xmm15",}, +{{0x0f, 0x38, 0xcc, 0xc1, }, 4, 0, "", "", +"0f 38 cc c1 \tsha256msg1 %xmm1,%xmm0",}, +{{0x0f, 0x38, 0xcc, 0xd7, }, 4, 0, "", "", +"0f 38 cc d7 \tsha256msg1 %xmm7,%xmm2",}, +{{0x41, 0x0f, 0x38, 0xcc, 0xc0, }, 5, 0, "", "", +"41 0f 38 cc c0 \tsha256msg1 %xmm8,%xmm0",}, +{{0x44, 0x0f, 0x38, 0xcc, 0xc7, }, 5, 0, "", "", +"44 0f 38 cc c7 \tsha256msg1 %xmm7,%xmm8",}, +{{0x45, 0x0f, 0x38, 0xcc, 0xc7, }, 5, 0, "", "", +"45 0f 38 cc c7 \tsha256msg1 %xmm15,%xmm8",}, +{{0x0f, 0x38, 0xcc, 0x00, }, 4, 0, "", "", +"0f 38 cc 00 \tsha256msg1 (%rax),%xmm0",}, +{{0x41, 0x0f, 0x38, 0xcc, 0x00, }, 5, 0, "", "", +"41 0f 38 cc 00 \tsha256msg1 (%r8),%xmm0",}, +{{0x0f, 0x38, 0xcc, 0x04, 0x25, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"0f 38 cc 04 25 78 56 34 12 \tsha256msg1 0x12345678,%xmm0",}, +{{0x0f, 0x38, 0xcc, 0x18, }, 4, 0, "", "", +"0f 38 cc 18 \tsha256msg1 (%rax),%xmm3",}, +{{0x0f, 0x38, 0xcc, 0x04, 0x01, }, 5, 0, "", "", +"0f 38 cc 04 01 \tsha256msg1 (%rcx,%rax,1),%xmm0",}, +{{0x0f, 0x38, 0xcc, 0x04, 0x05, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"0f 38 cc 04 05 78 56 34 12 \tsha256msg1 0x12345678(,%rax,1),%xmm0",}, +{{0x0f, 0x38, 0xcc, 0x04, 0x08, }, 5, 0, "", "", +"0f 38 cc 04 08 \tsha256msg1 (%rax,%rcx,1),%xmm0",}, +{{0x0f, 0x38, 0xcc, 0x04, 0xc8, }, 5, 0, "", "", +"0f 38 cc 04 c8 \tsha256msg1 (%rax,%rcx,8),%xmm0",}, +{{0x0f, 0x38, 0xcc, 0x40, 0x12, }, 5, 0, "", "", +"0f 38 cc 40 12 \tsha256msg1 0x12(%rax),%xmm0",}, +{{0x0f, 0x38, 0xcc, 0x45, 0x12, }, 5, 0, "", "", +"0f 38 cc 45 12 \tsha256msg1 0x12(%rbp),%xmm0",}, +{{0x0f, 0x38, 0xcc, 0x44, 0x01, 0x12, }, 6, 0, "", "", +"0f 38 cc 44 01 12 \tsha256msg1 0x12(%rcx,%rax,1),%xmm0",}, +{{0x0f, 0x38, 0xcc, 0x44, 0x05, 0x12, }, 6, 0, "", "", +"0f 38 cc 44 05 12 \tsha256msg1 0x12(%rbp,%rax,1),%xmm0",}, +{{0x0f, 0x38, 0xcc, 0x44, 0x08, 0x12, }, 6, 0, "", "", +"0f 38 cc 44 08 12 \tsha256msg1 0x12(%rax,%rcx,1),%xmm0",}, +{{0x0f, 0x38, 0xcc, 0x44, 0xc8, 0x12, }, 6, 0, "", "", +"0f 38 cc 44 c8 12 \tsha256msg1 0x12(%rax,%rcx,8),%xmm0",}, +{{0x0f, 0x38, 0xcc, 0x80, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"0f 38 cc 80 78 56 34 12 \tsha256msg1 0x12345678(%rax),%xmm0",}, +{{0x0f, 0x38, 0xcc, 0x85, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"0f 38 cc 85 78 56 34 12 \tsha256msg1 0x12345678(%rbp),%xmm0",}, +{{0x0f, 0x38, 0xcc, 0x84, 0x01, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"0f 38 cc 84 01 78 56 34 12 \tsha256msg1 0x12345678(%rcx,%rax,1),%xmm0",}, +{{0x0f, 0x38, 0xcc, 0x84, 0x05, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"0f 38 cc 84 05 78 56 34 12 \tsha256msg1 0x12345678(%rbp,%rax,1),%xmm0",}, +{{0x0f, 0x38, 0xcc, 0x84, 0x08, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"0f 38 cc 84 08 78 56 34 12 \tsha256msg1 0x12345678(%rax,%rcx,1),%xmm0",}, +{{0x0f, 0x38, 0xcc, 0x84, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"0f 38 cc 84 c8 78 56 34 12 \tsha256msg1 0x12345678(%rax,%rcx,8),%xmm0",}, +{{0x44, 0x0f, 0x38, 0xcc, 0xbc, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 10, 0, "", "", +"44 0f 38 cc bc c8 78 56 34 12 \tsha256msg1 0x12345678(%rax,%rcx,8),%xmm15",}, +{{0x0f, 0x38, 0xcd, 0xc1, }, 4, 0, "", "", +"0f 38 cd c1 \tsha256msg2 %xmm1,%xmm0",}, +{{0x0f, 0x38, 0xcd, 0xd7, }, 4, 0, "", "", +"0f 38 cd d7 \tsha256msg2 %xmm7,%xmm2",}, +{{0x41, 0x0f, 0x38, 0xcd, 0xc0, }, 5, 0, "", "", +"41 0f 38 cd c0 \tsha256msg2 %xmm8,%xmm0",}, +{{0x44, 0x0f, 0x38, 0xcd, 0xc7, }, 5, 0, "", "", +"44 0f 38 cd c7 \tsha256msg2 %xmm7,%xmm8",}, +{{0x45, 0x0f, 0x38, 0xcd, 0xc7, }, 5, 0, "", "", +"45 0f 38 cd c7 \tsha256msg2 %xmm15,%xmm8",}, +{{0x0f, 0x38, 0xcd, 0x00, }, 4, 0, "", "", +"0f 38 cd 00 \tsha256msg2 (%rax),%xmm0",}, +{{0x41, 0x0f, 0x38, 0xcd, 0x00, }, 5, 0, "", "", +"41 0f 38 cd 00 \tsha256msg2 (%r8),%xmm0",}, +{{0x0f, 0x38, 0xcd, 0x04, 0x25, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"0f 38 cd 04 25 78 56 34 12 \tsha256msg2 0x12345678,%xmm0",}, +{{0x0f, 0x38, 0xcd, 0x18, }, 4, 0, "", "", +"0f 38 cd 18 \tsha256msg2 (%rax),%xmm3",}, +{{0x0f, 0x38, 0xcd, 0x04, 0x01, }, 5, 0, "", "", +"0f 38 cd 04 01 \tsha256msg2 (%rcx,%rax,1),%xmm0",}, +{{0x0f, 0x38, 0xcd, 0x04, 0x05, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"0f 38 cd 04 05 78 56 34 12 \tsha256msg2 0x12345678(,%rax,1),%xmm0",}, +{{0x0f, 0x38, 0xcd, 0x04, 0x08, }, 5, 0, "", "", +"0f 38 cd 04 08 \tsha256msg2 (%rax,%rcx,1),%xmm0",}, +{{0x0f, 0x38, 0xcd, 0x04, 0xc8, }, 5, 0, "", "", +"0f 38 cd 04 c8 \tsha256msg2 (%rax,%rcx,8),%xmm0",}, +{{0x0f, 0x38, 0xcd, 0x40, 0x12, }, 5, 0, "", "", +"0f 38 cd 40 12 \tsha256msg2 0x12(%rax),%xmm0",}, +{{0x0f, 0x38, 0xcd, 0x45, 0x12, }, 5, 0, "", "", +"0f 38 cd 45 12 \tsha256msg2 0x12(%rbp),%xmm0",}, +{{0x0f, 0x38, 0xcd, 0x44, 0x01, 0x12, }, 6, 0, "", "", +"0f 38 cd 44 01 12 \tsha256msg2 0x12(%rcx,%rax,1),%xmm0",}, +{{0x0f, 0x38, 0xcd, 0x44, 0x05, 0x12, }, 6, 0, "", "", +"0f 38 cd 44 05 12 \tsha256msg2 0x12(%rbp,%rax,1),%xmm0",}, +{{0x0f, 0x38, 0xcd, 0x44, 0x08, 0x12, }, 6, 0, "", "", +"0f 38 cd 44 08 12 \tsha256msg2 0x12(%rax,%rcx,1),%xmm0",}, +{{0x0f, 0x38, 0xcd, 0x44, 0xc8, 0x12, }, 6, 0, "", "", +"0f 38 cd 44 c8 12 \tsha256msg2 0x12(%rax,%rcx,8),%xmm0",}, +{{0x0f, 0x38, 0xcd, 0x80, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"0f 38 cd 80 78 56 34 12 \tsha256msg2 0x12345678(%rax),%xmm0",}, +{{0x0f, 0x38, 0xcd, 0x85, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"0f 38 cd 85 78 56 34 12 \tsha256msg2 0x12345678(%rbp),%xmm0",}, +{{0x0f, 0x38, 0xcd, 0x84, 0x01, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"0f 38 cd 84 01 78 56 34 12 \tsha256msg2 0x12345678(%rcx,%rax,1),%xmm0",}, +{{0x0f, 0x38, 0xcd, 0x84, 0x05, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"0f 38 cd 84 05 78 56 34 12 \tsha256msg2 0x12345678(%rbp,%rax,1),%xmm0",}, +{{0x0f, 0x38, 0xcd, 0x84, 0x08, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"0f 38 cd 84 08 78 56 34 12 \tsha256msg2 0x12345678(%rax,%rcx,1),%xmm0",}, +{{0x0f, 0x38, 0xcd, 0x84, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"0f 38 cd 84 c8 78 56 34 12 \tsha256msg2 0x12345678(%rax,%rcx,8),%xmm0",}, +{{0x44, 0x0f, 0x38, 0xcd, 0xbc, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 10, 0, "", "", +"44 0f 38 cd bc c8 78 56 34 12 \tsha256msg2 0x12345678(%rax,%rcx,8),%xmm15",}, +{{0x66, 0x0f, 0xae, 0x38, }, 4, 0, "", "", +"66 0f ae 38 \tclflushopt (%rax)",}, +{{0x66, 0x41, 0x0f, 0xae, 0x38, }, 5, 0, "", "", +"66 41 0f ae 38 \tclflushopt (%r8)",}, +{{0x66, 0x0f, 0xae, 0x3c, 0x25, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"66 0f ae 3c 25 78 56 34 12 \tclflushopt 0x12345678",}, +{{0x66, 0x0f, 0xae, 0xbc, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"66 0f ae bc c8 78 56 34 12 \tclflushopt 0x12345678(%rax,%rcx,8)",}, +{{0x66, 0x41, 0x0f, 0xae, 0xbc, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 10, 0, "", "", +"66 41 0f ae bc c8 78 56 34 12 \tclflushopt 0x12345678(%r8,%rcx,8)",}, +{{0x0f, 0xae, 0x38, }, 3, 0, "", "", +"0f ae 38 \tclflush (%rax)",}, +{{0x41, 0x0f, 0xae, 0x38, }, 4, 0, "", "", +"41 0f ae 38 \tclflush (%r8)",}, +{{0x0f, 0xae, 0xf8, }, 3, 0, "", "", +"0f ae f8 \tsfence ",}, +{{0x66, 0x0f, 0xae, 0x30, }, 4, 0, "", "", +"66 0f ae 30 \tclwb (%rax)",}, +{{0x66, 0x41, 0x0f, 0xae, 0x30, }, 5, 0, "", "", +"66 41 0f ae 30 \tclwb (%r8)",}, +{{0x66, 0x0f, 0xae, 0x34, 0x25, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"66 0f ae 34 25 78 56 34 12 \tclwb 0x12345678",}, +{{0x66, 0x0f, 0xae, 0xb4, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"66 0f ae b4 c8 78 56 34 12 \tclwb 0x12345678(%rax,%rcx,8)",}, +{{0x66, 0x41, 0x0f, 0xae, 0xb4, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 10, 0, "", "", +"66 41 0f ae b4 c8 78 56 34 12 \tclwb 0x12345678(%r8,%rcx,8)",}, +{{0x0f, 0xae, 0x30, }, 3, 0, "", "", +"0f ae 30 \txsaveopt (%rax)",}, +{{0x41, 0x0f, 0xae, 0x30, }, 4, 0, "", "", +"41 0f ae 30 \txsaveopt (%r8)",}, +{{0x0f, 0xae, 0xf0, }, 3, 0, "", "", +"0f ae f0 \tmfence ",}, +{{0x0f, 0xc7, 0x20, }, 3, 0, "", "", +"0f c7 20 \txsavec (%rax)",}, +{{0x41, 0x0f, 0xc7, 0x20, }, 4, 0, "", "", +"41 0f c7 20 \txsavec (%r8)",}, +{{0x0f, 0xc7, 0x24, 0x25, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"0f c7 24 25 78 56 34 12 \txsavec 0x12345678",}, +{{0x0f, 0xc7, 0xa4, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"0f c7 a4 c8 78 56 34 12 \txsavec 0x12345678(%rax,%rcx,8)",}, +{{0x41, 0x0f, 0xc7, 0xa4, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"41 0f c7 a4 c8 78 56 34 12 \txsavec 0x12345678(%r8,%rcx,8)",}, +{{0x0f, 0xc7, 0x28, }, 3, 0, "", "", +"0f c7 28 \txsaves (%rax)",}, +{{0x41, 0x0f, 0xc7, 0x28, }, 4, 0, "", "", +"41 0f c7 28 \txsaves (%r8)",}, +{{0x0f, 0xc7, 0x2c, 0x25, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"0f c7 2c 25 78 56 34 12 \txsaves 0x12345678",}, +{{0x0f, 0xc7, 0xac, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"0f c7 ac c8 78 56 34 12 \txsaves 0x12345678(%rax,%rcx,8)",}, +{{0x41, 0x0f, 0xc7, 0xac, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"41 0f c7 ac c8 78 56 34 12 \txsaves 0x12345678(%r8,%rcx,8)",}, +{{0x0f, 0xc7, 0x18, }, 3, 0, "", "", +"0f c7 18 \txrstors (%rax)",}, +{{0x41, 0x0f, 0xc7, 0x18, }, 4, 0, "", "", +"41 0f c7 18 \txrstors (%r8)",}, +{{0x0f, 0xc7, 0x1c, 0x25, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"0f c7 1c 25 78 56 34 12 \txrstors 0x12345678",}, +{{0x0f, 0xc7, 0x9c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"0f c7 9c c8 78 56 34 12 \txrstors 0x12345678(%rax,%rcx,8)",}, +{{0x41, 0x0f, 0xc7, 0x9c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"41 0f c7 9c c8 78 56 34 12 \txrstors 0x12345678(%r8,%rcx,8)",}, +{{0x66, 0x0f, 0xae, 0xf8, }, 4, 0, "", "", +"66 0f ae f8 \tpcommit ",}, diff --git a/tools/perf/arch/x86/tests/insn-x86-dat-src.c b/tools/perf/arch/x86/tests/insn-x86-dat-src.c new file mode 100644 index 000000000000..41b1b1c62660 --- /dev/null +++ b/tools/perf/arch/x86/tests/insn-x86-dat-src.c @@ -0,0 +1,877 @@ +/* + * This file contains instructions for testing by the test titled: + * + * "Test x86 instruction decoder - new instructions" + * + * Note that the 'Expecting' comment lines are consumed by the + * gen-insn-x86-dat.awk script and have the format: + * + * Expecting: <op> <branch> <rel> + * + * If this file is changed, remember to run the gen-insn-x86-dat.sh + * script and commit the result. + * + * Refer to insn-x86.c for more details. + */ + +int main(void) +{ + /* Following line is a marker for the awk script - do not change */ + asm volatile("rdtsc"); /* Start here */ + +#ifdef __x86_64__ + + /* bndmk m64, bnd */ + + asm volatile("bndmk (%rax), %bnd0"); + asm volatile("bndmk (%r8), %bnd0"); + asm volatile("bndmk (0x12345678), %bnd0"); + asm volatile("bndmk (%rax), %bnd3"); + asm volatile("bndmk (%rcx,%rax,1), %bnd0"); + asm volatile("bndmk 0x12345678(,%rax,1), %bnd0"); + asm volatile("bndmk (%rax,%rcx,1), %bnd0"); + asm volatile("bndmk (%rax,%rcx,8), %bnd0"); + asm volatile("bndmk 0x12(%rax), %bnd0"); + asm volatile("bndmk 0x12(%rbp), %bnd0"); + asm volatile("bndmk 0x12(%rcx,%rax,1), %bnd0"); + asm volatile("bndmk 0x12(%rbp,%rax,1), %bnd0"); + asm volatile("bndmk 0x12(%rax,%rcx,1), %bnd0"); + asm volatile("bndmk 0x12(%rax,%rcx,8), %bnd0"); + asm volatile("bndmk 0x12345678(%rax), %bnd0"); + asm volatile("bndmk 0x12345678(%rbp), %bnd0"); + asm volatile("bndmk 0x12345678(%rcx,%rax,1), %bnd0"); + asm volatile("bndmk 0x12345678(%rbp,%rax,1), %bnd0"); + asm volatile("bndmk 0x12345678(%rax,%rcx,1), %bnd0"); + asm volatile("bndmk 0x12345678(%rax,%rcx,8), %bnd0"); + + /* bndcl r/m64, bnd */ + + asm volatile("bndcl (%rax), %bnd0"); + asm volatile("bndcl (%r8), %bnd0"); + asm volatile("bndcl (0x12345678), %bnd0"); + asm volatile("bndcl (%rax), %bnd3"); + asm volatile("bndcl (%rcx,%rax,1), %bnd0"); + asm volatile("bndcl 0x12345678(,%rax,1), %bnd0"); + asm volatile("bndcl (%rax,%rcx,1), %bnd0"); + asm volatile("bndcl (%rax,%rcx,8), %bnd0"); + asm volatile("bndcl 0x12(%rax), %bnd0"); + asm volatile("bndcl 0x12(%rbp), %bnd0"); + asm volatile("bndcl 0x12(%rcx,%rax,1), %bnd0"); + asm volatile("bndcl 0x12(%rbp,%rax,1), %bnd0"); + asm volatile("bndcl 0x12(%rax,%rcx,1), %bnd0"); + asm volatile("bndcl 0x12(%rax,%rcx,8), %bnd0"); + asm volatile("bndcl 0x12345678(%rax), %bnd0"); + asm volatile("bndcl 0x12345678(%rbp), %bnd0"); + asm volatile("bndcl 0x12345678(%rcx,%rax,1), %bnd0"); + asm volatile("bndcl 0x12345678(%rbp,%rax,1), %bnd0"); + asm volatile("bndcl 0x12345678(%rax,%rcx,1), %bnd0"); + asm volatile("bndcl 0x12345678(%rax,%rcx,8), %bnd0"); + asm volatile("bndcl %rax, %bnd0"); + + /* bndcu r/m64, bnd */ + + asm volatile("bndcu (%rax), %bnd0"); + asm volatile("bndcu (%r8), %bnd0"); + asm volatile("bndcu (0x12345678), %bnd0"); + asm volatile("bndcu (%rax), %bnd3"); + asm volatile("bndcu (%rcx,%rax,1), %bnd0"); + asm volatile("bndcu 0x12345678(,%rax,1), %bnd0"); + asm volatile("bndcu (%rax,%rcx,1), %bnd0"); + asm volatile("bndcu (%rax,%rcx,8), %bnd0"); + asm volatile("bndcu 0x12(%rax), %bnd0"); + asm volatile("bndcu 0x12(%rbp), %bnd0"); + asm volatile("bndcu 0x12(%rcx,%rax,1), %bnd0"); + asm volatile("bndcu 0x12(%rbp,%rax,1), %bnd0"); + asm volatile("bndcu 0x12(%rax,%rcx,1), %bnd0"); + asm volatile("bndcu 0x12(%rax,%rcx,8), %bnd0"); + asm volatile("bndcu 0x12345678(%rax), %bnd0"); + asm volatile("bndcu 0x12345678(%rbp), %bnd0"); + asm volatile("bndcu 0x12345678(%rcx,%rax,1), %bnd0"); + asm volatile("bndcu 0x12345678(%rbp,%rax,1), %bnd0"); + asm volatile("bndcu 0x12345678(%rax,%rcx,1), %bnd0"); + asm volatile("bndcu 0x12345678(%rax,%rcx,8), %bnd0"); + asm volatile("bndcu %rax, %bnd0"); + + /* bndcn r/m64, bnd */ + + asm volatile("bndcn (%rax), %bnd0"); + asm volatile("bndcn (%r8), %bnd0"); + asm volatile("bndcn (0x12345678), %bnd0"); + asm volatile("bndcn (%rax), %bnd3"); + asm volatile("bndcn (%rcx,%rax,1), %bnd0"); + asm volatile("bndcn 0x12345678(,%rax,1), %bnd0"); + asm volatile("bndcn (%rax,%rcx,1), %bnd0"); + asm volatile("bndcn (%rax,%rcx,8), %bnd0"); + asm volatile("bndcn 0x12(%rax), %bnd0"); + asm volatile("bndcn 0x12(%rbp), %bnd0"); + asm volatile("bndcn 0x12(%rcx,%rax,1), %bnd0"); + asm volatile("bndcn 0x12(%rbp,%rax,1), %bnd0"); + asm volatile("bndcn 0x12(%rax,%rcx,1), %bnd0"); + asm volatile("bndcn 0x12(%rax,%rcx,8), %bnd0"); + asm volatile("bndcn 0x12345678(%rax), %bnd0"); + asm volatile("bndcn 0x12345678(%rbp), %bnd0"); + asm volatile("bndcn 0x12345678(%rcx,%rax,1), %bnd0"); + asm volatile("bndcn 0x12345678(%rbp,%rax,1), %bnd0"); + asm volatile("bndcn 0x12345678(%rax,%rcx,1), %bnd0"); + asm volatile("bndcn 0x12345678(%rax,%rcx,8), %bnd0"); + asm volatile("bndcn %rax, %bnd0"); + + /* bndmov m128, bnd */ + + asm volatile("bndmov (%rax), %bnd0"); + asm volatile("bndmov (%r8), %bnd0"); + asm volatile("bndmov (0x12345678), %bnd0"); + asm volatile("bndmov (%rax), %bnd3"); + asm volatile("bndmov (%rcx,%rax,1), %bnd0"); + asm volatile("bndmov 0x12345678(,%rax,1), %bnd0"); + asm volatile("bndmov (%rax,%rcx,1), %bnd0"); + asm volatile("bndmov (%rax,%rcx,8), %bnd0"); + asm volatile("bndmov 0x12(%rax), %bnd0"); + asm volatile("bndmov 0x12(%rbp), %bnd0"); + asm volatile("bndmov 0x12(%rcx,%rax,1), %bnd0"); + asm volatile("bndmov 0x12(%rbp,%rax,1), %bnd0"); + asm volatile("bndmov 0x12(%rax,%rcx,1), %bnd0"); + asm volatile("bndmov 0x12(%rax,%rcx,8), %bnd0"); + asm volatile("bndmov 0x12345678(%rax), %bnd0"); + asm volatile("bndmov 0x12345678(%rbp), %bnd0"); + asm volatile("bndmov 0x12345678(%rcx,%rax,1), %bnd0"); + asm volatile("bndmov 0x12345678(%rbp,%rax,1), %bnd0"); + asm volatile("bndmov 0x12345678(%rax,%rcx,1), %bnd0"); + asm volatile("bndmov 0x12345678(%rax,%rcx,8), %bnd0"); + + /* bndmov bnd, m128 */ + + asm volatile("bndmov %bnd0, (%rax)"); + asm volatile("bndmov %bnd0, (%r8)"); + asm volatile("bndmov %bnd0, (0x12345678)"); + asm volatile("bndmov %bnd3, (%rax)"); + asm volatile("bndmov %bnd0, (%rcx,%rax,1)"); + asm volatile("bndmov %bnd0, 0x12345678(,%rax,1)"); + asm volatile("bndmov %bnd0, (%rax,%rcx,1)"); + asm volatile("bndmov %bnd0, (%rax,%rcx,8)"); + asm volatile("bndmov %bnd0, 0x12(%rax)"); + asm volatile("bndmov %bnd0, 0x12(%rbp)"); + asm volatile("bndmov %bnd0, 0x12(%rcx,%rax,1)"); + asm volatile("bndmov %bnd0, 0x12(%rbp,%rax,1)"); + asm volatile("bndmov %bnd0, 0x12(%rax,%rcx,1)"); + asm volatile("bndmov %bnd0, 0x12(%rax,%rcx,8)"); + asm volatile("bndmov %bnd0, 0x12345678(%rax)"); + asm volatile("bndmov %bnd0, 0x12345678(%rbp)"); + asm volatile("bndmov %bnd0, 0x12345678(%rcx,%rax,1)"); + asm volatile("bndmov %bnd0, 0x12345678(%rbp,%rax,1)"); + asm volatile("bndmov %bnd0, 0x12345678(%rax,%rcx,1)"); + asm volatile("bndmov %bnd0, 0x12345678(%rax,%rcx,8)"); + + /* bndmov bnd2, bnd1 */ + + asm volatile("bndmov %bnd0, %bnd1"); + asm volatile("bndmov %bnd1, %bnd0"); + + /* bndldx mib, bnd */ + + asm volatile("bndldx (%rax), %bnd0"); + asm volatile("bndldx (%r8), %bnd0"); + asm volatile("bndldx (0x12345678), %bnd0"); + asm volatile("bndldx (%rax), %bnd3"); + asm volatile("bndldx (%rcx,%rax,1), %bnd0"); + asm volatile("bndldx 0x12345678(,%rax,1), %bnd0"); + asm volatile("bndldx (%rax,%rcx,1), %bnd0"); + asm volatile("bndldx 0x12(%rax), %bnd0"); + asm volatile("bndldx 0x12(%rbp), %bnd0"); + asm volatile("bndldx 0x12(%rcx,%rax,1), %bnd0"); + asm volatile("bndldx 0x12(%rbp,%rax,1), %bnd0"); + asm volatile("bndldx 0x12(%rax,%rcx,1), %bnd0"); + asm volatile("bndldx 0x12345678(%rax), %bnd0"); + asm volatile("bndldx 0x12345678(%rbp), %bnd0"); + asm volatile("bndldx 0x12345678(%rcx,%rax,1), %bnd0"); + asm volatile("bndldx 0x12345678(%rbp,%rax,1), %bnd0"); + asm volatile("bndldx 0x12345678(%rax,%rcx,1), %bnd0"); + + /* bndstx bnd, mib */ + + asm volatile("bndstx %bnd0, (%rax)"); + asm volatile("bndstx %bnd0, (%r8)"); + asm volatile("bndstx %bnd0, (0x12345678)"); + asm volatile("bndstx %bnd3, (%rax)"); + asm volatile("bndstx %bnd0, (%rcx,%rax,1)"); + asm volatile("bndstx %bnd0, 0x12345678(,%rax,1)"); + asm volatile("bndstx %bnd0, (%rax,%rcx,1)"); + asm volatile("bndstx %bnd0, 0x12(%rax)"); + asm volatile("bndstx %bnd0, 0x12(%rbp)"); + asm volatile("bndstx %bnd0, 0x12(%rcx,%rax,1)"); + asm volatile("bndstx %bnd0, 0x12(%rbp,%rax,1)"); + asm volatile("bndstx %bnd0, 0x12(%rax,%rcx,1)"); + asm volatile("bndstx %bnd0, 0x12345678(%rax)"); + asm volatile("bndstx %bnd0, 0x12345678(%rbp)"); + asm volatile("bndstx %bnd0, 0x12345678(%rcx,%rax,1)"); + asm volatile("bndstx %bnd0, 0x12345678(%rbp,%rax,1)"); + asm volatile("bndstx %bnd0, 0x12345678(%rax,%rcx,1)"); + + /* bnd prefix on call, ret, jmp and all jcc */ + + asm volatile("bnd call label1"); /* Expecting: call unconditional 0 */ + asm volatile("bnd call *(%eax)"); /* Expecting: call indirect 0 */ + asm volatile("bnd ret"); /* Expecting: ret indirect 0 */ + asm volatile("bnd jmp label1"); /* Expecting: jmp unconditional 0 */ + asm volatile("bnd jmp label1"); /* Expecting: jmp unconditional 0 */ + asm volatile("bnd jmp *(%ecx)"); /* Expecting: jmp indirect 0 */ + asm volatile("bnd jne label1"); /* Expecting: jcc conditional 0 */ + + /* sha1rnds4 imm8, xmm2/m128, xmm1 */ + + asm volatile("sha1rnds4 $0x0, %xmm1, %xmm0"); + asm volatile("sha1rnds4 $0x91, %xmm7, %xmm2"); + asm volatile("sha1rnds4 $0x91, %xmm8, %xmm0"); + asm volatile("sha1rnds4 $0x91, %xmm7, %xmm8"); + asm volatile("sha1rnds4 $0x91, %xmm15, %xmm8"); + asm volatile("sha1rnds4 $0x91, (%rax), %xmm0"); + asm volatile("sha1rnds4 $0x91, (%r8), %xmm0"); + asm volatile("sha1rnds4 $0x91, (0x12345678), %xmm0"); + asm volatile("sha1rnds4 $0x91, (%rax), %xmm3"); + asm volatile("sha1rnds4 $0x91, (%rcx,%rax,1), %xmm0"); + asm volatile("sha1rnds4 $0x91, 0x12345678(,%rax,1), %xmm0"); + asm volatile("sha1rnds4 $0x91, (%rax,%rcx,1), %xmm0"); + asm volatile("sha1rnds4 $0x91, (%rax,%rcx,8), %xmm0"); + asm volatile("sha1rnds4 $0x91, 0x12(%rax), %xmm0"); + asm volatile("sha1rnds4 $0x91, 0x12(%rbp), %xmm0"); + asm volatile("sha1rnds4 $0x91, 0x12(%rcx,%rax,1), %xmm0"); + asm volatile("sha1rnds4 $0x91, 0x12(%rbp,%rax,1), %xmm0"); + asm volatile("sha1rnds4 $0x91, 0x12(%rax,%rcx,1), %xmm0"); + asm volatile("sha1rnds4 $0x91, 0x12(%rax,%rcx,8), %xmm0"); + asm volatile("sha1rnds4 $0x91, 0x12345678(%rax), %xmm0"); + asm volatile("sha1rnds4 $0x91, 0x12345678(%rbp), %xmm0"); + asm volatile("sha1rnds4 $0x91, 0x12345678(%rcx,%rax,1), %xmm0"); + asm volatile("sha1rnds4 $0x91, 0x12345678(%rbp,%rax,1), %xmm0"); + asm volatile("sha1rnds4 $0x91, 0x12345678(%rax,%rcx,1), %xmm0"); + asm volatile("sha1rnds4 $0x91, 0x12345678(%rax,%rcx,8), %xmm0"); + asm volatile("sha1rnds4 $0x91, 0x12345678(%rax,%rcx,8), %xmm15"); + + /* sha1nexte xmm2/m128, xmm1 */ + + asm volatile("sha1nexte %xmm1, %xmm0"); + asm volatile("sha1nexte %xmm7, %xmm2"); + asm volatile("sha1nexte %xmm8, %xmm0"); + asm volatile("sha1nexte %xmm7, %xmm8"); + asm volatile("sha1nexte %xmm15, %xmm8"); + asm volatile("sha1nexte (%rax), %xmm0"); + asm volatile("sha1nexte (%r8), %xmm0"); + asm volatile("sha1nexte (0x12345678), %xmm0"); + asm volatile("sha1nexte (%rax), %xmm3"); + asm volatile("sha1nexte (%rcx,%rax,1), %xmm0"); + asm volatile("sha1nexte 0x12345678(,%rax,1), %xmm0"); + asm volatile("sha1nexte (%rax,%rcx,1), %xmm0"); + asm volatile("sha1nexte (%rax,%rcx,8), %xmm0"); + asm volatile("sha1nexte 0x12(%rax), %xmm0"); + asm volatile("sha1nexte 0x12(%rbp), %xmm0"); + asm volatile("sha1nexte 0x12(%rcx,%rax,1), %xmm0"); + asm volatile("sha1nexte 0x12(%rbp,%rax,1), %xmm0"); + asm volatile("sha1nexte 0x12(%rax,%rcx,1), %xmm0"); + asm volatile("sha1nexte 0x12(%rax,%rcx,8), %xmm0"); + asm volatile("sha1nexte 0x12345678(%rax), %xmm0"); + asm volatile("sha1nexte 0x12345678(%rbp), %xmm0"); + asm volatile("sha1nexte 0x12345678(%rcx,%rax,1), %xmm0"); + asm volatile("sha1nexte 0x12345678(%rbp,%rax,1), %xmm0"); + asm volatile("sha1nexte 0x12345678(%rax,%rcx,1), %xmm0"); + asm volatile("sha1nexte 0x12345678(%rax,%rcx,8), %xmm0"); + asm volatile("sha1nexte 0x12345678(%rax,%rcx,8), %xmm15"); + + /* sha1msg1 xmm2/m128, xmm1 */ + + asm volatile("sha1msg1 %xmm1, %xmm0"); + asm volatile("sha1msg1 %xmm7, %xmm2"); + asm volatile("sha1msg1 %xmm8, %xmm0"); + asm volatile("sha1msg1 %xmm7, %xmm8"); + asm volatile("sha1msg1 %xmm15, %xmm8"); + asm volatile("sha1msg1 (%rax), %xmm0"); + asm volatile("sha1msg1 (%r8), %xmm0"); + asm volatile("sha1msg1 (0x12345678), %xmm0"); + asm volatile("sha1msg1 (%rax), %xmm3"); + asm volatile("sha1msg1 (%rcx,%rax,1), %xmm0"); + asm volatile("sha1msg1 0x12345678(,%rax,1), %xmm0"); + asm volatile("sha1msg1 (%rax,%rcx,1), %xmm0"); + asm volatile("sha1msg1 (%rax,%rcx,8), %xmm0"); + asm volatile("sha1msg1 0x12(%rax), %xmm0"); + asm volatile("sha1msg1 0x12(%rbp), %xmm0"); + asm volatile("sha1msg1 0x12(%rcx,%rax,1), %xmm0"); + asm volatile("sha1msg1 0x12(%rbp,%rax,1), %xmm0"); + asm volatile("sha1msg1 0x12(%rax,%rcx,1), %xmm0"); + asm volatile("sha1msg1 0x12(%rax,%rcx,8), %xmm0"); + asm volatile("sha1msg1 0x12345678(%rax), %xmm0"); + asm volatile("sha1msg1 0x12345678(%rbp), %xmm0"); + asm volatile("sha1msg1 0x12345678(%rcx,%rax,1), %xmm0"); + asm volatile("sha1msg1 0x12345678(%rbp,%rax,1), %xmm0"); + asm volatile("sha1msg1 0x12345678(%rax,%rcx,1), %xmm0"); + asm volatile("sha1msg1 0x12345678(%rax,%rcx,8), %xmm0"); + asm volatile("sha1msg1 0x12345678(%rax,%rcx,8), %xmm15"); + + /* sha1msg2 xmm2/m128, xmm1 */ + + asm volatile("sha1msg2 %xmm1, %xmm0"); + asm volatile("sha1msg2 %xmm7, %xmm2"); + asm volatile("sha1msg2 %xmm8, %xmm0"); + asm volatile("sha1msg2 %xmm7, %xmm8"); + asm volatile("sha1msg2 %xmm15, %xmm8"); + asm volatile("sha1msg2 (%rax), %xmm0"); + asm volatile("sha1msg2 (%r8), %xmm0"); + asm volatile("sha1msg2 (0x12345678), %xmm0"); + asm volatile("sha1msg2 (%rax), %xmm3"); + asm volatile("sha1msg2 (%rcx,%rax,1), %xmm0"); + asm volatile("sha1msg2 0x12345678(,%rax,1), %xmm0"); + asm volatile("sha1msg2 (%rax,%rcx,1), %xmm0"); + asm volatile("sha1msg2 (%rax,%rcx,8), %xmm0"); + asm volatile("sha1msg2 0x12(%rax), %xmm0"); + asm volatile("sha1msg2 0x12(%rbp), %xmm0"); + asm volatile("sha1msg2 0x12(%rcx,%rax,1), %xmm0"); + asm volatile("sha1msg2 0x12(%rbp,%rax,1), %xmm0"); + asm volatile("sha1msg2 0x12(%rax,%rcx,1), %xmm0"); + asm volatile("sha1msg2 0x12(%rax,%rcx,8), %xmm0"); + asm volatile("sha1msg2 0x12345678(%rax), %xmm0"); + asm volatile("sha1msg2 0x12345678(%rbp), %xmm0"); + asm volatile("sha1msg2 0x12345678(%rcx,%rax,1), %xmm0"); + asm volatile("sha1msg2 0x12345678(%rbp,%rax,1), %xmm0"); + asm volatile("sha1msg2 0x12345678(%rax,%rcx,1), %xmm0"); + asm volatile("sha1msg2 0x12345678(%rax,%rcx,8), %xmm0"); + asm volatile("sha1msg2 0x12345678(%rax,%rcx,8), %xmm15"); + + /* sha256rnds2 <XMM0>, xmm2/m128, xmm1 */ + /* Note sha256rnds2 has an implicit operand 'xmm0' */ + + asm volatile("sha256rnds2 %xmm4, %xmm1"); + asm volatile("sha256rnds2 %xmm7, %xmm2"); + asm volatile("sha256rnds2 %xmm8, %xmm1"); + asm volatile("sha256rnds2 %xmm7, %xmm8"); + asm volatile("sha256rnds2 %xmm15, %xmm8"); + asm volatile("sha256rnds2 (%rax), %xmm1"); + asm volatile("sha256rnds2 (%r8), %xmm1"); + asm volatile("sha256rnds2 (0x12345678), %xmm1"); + asm volatile("sha256rnds2 (%rax), %xmm3"); + asm volatile("sha256rnds2 (%rcx,%rax,1), %xmm1"); + asm volatile("sha256rnds2 0x12345678(,%rax,1), %xmm1"); + asm volatile("sha256rnds2 (%rax,%rcx,1), %xmm1"); + asm volatile("sha256rnds2 (%rax,%rcx,8), %xmm1"); + asm volatile("sha256rnds2 0x12(%rax), %xmm1"); + asm volatile("sha256rnds2 0x12(%rbp), %xmm1"); + asm volatile("sha256rnds2 0x12(%rcx,%rax,1), %xmm1"); + asm volatile("sha256rnds2 0x12(%rbp,%rax,1), %xmm1"); + asm volatile("sha256rnds2 0x12(%rax,%rcx,1), %xmm1"); + asm volatile("sha256rnds2 0x12(%rax,%rcx,8), %xmm1"); + asm volatile("sha256rnds2 0x12345678(%rax), %xmm1"); + asm volatile("sha256rnds2 0x12345678(%rbp), %xmm1"); + asm volatile("sha256rnds2 0x12345678(%rcx,%rax,1), %xmm1"); + asm volatile("sha256rnds2 0x12345678(%rbp,%rax,1), %xmm1"); + asm volatile("sha256rnds2 0x12345678(%rax,%rcx,1), %xmm1"); + asm volatile("sha256rnds2 0x12345678(%rax,%rcx,8), %xmm1"); + asm volatile("sha256rnds2 0x12345678(%rax,%rcx,8), %xmm15"); + + /* sha256msg1 xmm2/m128, xmm1 */ + + asm volatile("sha256msg1 %xmm1, %xmm0"); + asm volatile("sha256msg1 %xmm7, %xmm2"); + asm volatile("sha256msg1 %xmm8, %xmm0"); + asm volatile("sha256msg1 %xmm7, %xmm8"); + asm volatile("sha256msg1 %xmm15, %xmm8"); + asm volatile("sha256msg1 (%rax), %xmm0"); + asm volatile("sha256msg1 (%r8), %xmm0"); + asm volatile("sha256msg1 (0x12345678), %xmm0"); + asm volatile("sha256msg1 (%rax), %xmm3"); + asm volatile("sha256msg1 (%rcx,%rax,1), %xmm0"); + asm volatile("sha256msg1 0x12345678(,%rax,1), %xmm0"); + asm volatile("sha256msg1 (%rax,%rcx,1), %xmm0"); + asm volatile("sha256msg1 (%rax,%rcx,8), %xmm0"); + asm volatile("sha256msg1 0x12(%rax), %xmm0"); + asm volatile("sha256msg1 0x12(%rbp), %xmm0"); + asm volatile("sha256msg1 0x12(%rcx,%rax,1), %xmm0"); + asm volatile("sha256msg1 0x12(%rbp,%rax,1), %xmm0"); + asm volatile("sha256msg1 0x12(%rax,%rcx,1), %xmm0"); + asm volatile("sha256msg1 0x12(%rax,%rcx,8), %xmm0"); + asm volatile("sha256msg1 0x12345678(%rax), %xmm0"); + asm volatile("sha256msg1 0x12345678(%rbp), %xmm0"); + asm volatile("sha256msg1 0x12345678(%rcx,%rax,1), %xmm0"); + asm volatile("sha256msg1 0x12345678(%rbp,%rax,1), %xmm0"); + asm volatile("sha256msg1 0x12345678(%rax,%rcx,1), %xmm0"); + asm volatile("sha256msg1 0x12345678(%rax,%rcx,8), %xmm0"); + asm volatile("sha256msg1 0x12345678(%rax,%rcx,8), %xmm15"); + + /* sha256msg2 xmm2/m128, xmm1 */ + + asm volatile("sha256msg2 %xmm1, %xmm0"); + asm volatile("sha256msg2 %xmm7, %xmm2"); + asm volatile("sha256msg2 %xmm8, %xmm0"); + asm volatile("sha256msg2 %xmm7, %xmm8"); + asm volatile("sha256msg2 %xmm15, %xmm8"); + asm volatile("sha256msg2 (%rax), %xmm0"); + asm volatile("sha256msg2 (%r8), %xmm0"); + asm volatile("sha256msg2 (0x12345678), %xmm0"); + asm volatile("sha256msg2 (%rax), %xmm3"); + asm volatile("sha256msg2 (%rcx,%rax,1), %xmm0"); + asm volatile("sha256msg2 0x12345678(,%rax,1), %xmm0"); + asm volatile("sha256msg2 (%rax,%rcx,1), %xmm0"); + asm volatile("sha256msg2 (%rax,%rcx,8), %xmm0"); + asm volatile("sha256msg2 0x12(%rax), %xmm0"); + asm volatile("sha256msg2 0x12(%rbp), %xmm0"); + asm volatile("sha256msg2 0x12(%rcx,%rax,1), %xmm0"); + asm volatile("sha256msg2 0x12(%rbp,%rax,1), %xmm0"); + asm volatile("sha256msg2 0x12(%rax,%rcx,1), %xmm0"); + asm volatile("sha256msg2 0x12(%rax,%rcx,8), %xmm0"); + asm volatile("sha256msg2 0x12345678(%rax), %xmm0"); + asm volatile("sha256msg2 0x12345678(%rbp), %xmm0"); + asm volatile("sha256msg2 0x12345678(%rcx,%rax,1), %xmm0"); + asm volatile("sha256msg2 0x12345678(%rbp,%rax,1), %xmm0"); + asm volatile("sha256msg2 0x12345678(%rax,%rcx,1), %xmm0"); + asm volatile("sha256msg2 0x12345678(%rax,%rcx,8), %xmm0"); + asm volatile("sha256msg2 0x12345678(%rax,%rcx,8), %xmm15"); + + /* clflushopt m8 */ + + asm volatile("clflushopt (%rax)"); + asm volatile("clflushopt (%r8)"); + asm volatile("clflushopt (0x12345678)"); + asm volatile("clflushopt 0x12345678(%rax,%rcx,8)"); + asm volatile("clflushopt 0x12345678(%r8,%rcx,8)"); + /* Also check instructions in the same group encoding as clflushopt */ + asm volatile("clflush (%rax)"); + asm volatile("clflush (%r8)"); + asm volatile("sfence"); + + /* clwb m8 */ + + asm volatile("clwb (%rax)"); + asm volatile("clwb (%r8)"); + asm volatile("clwb (0x12345678)"); + asm volatile("clwb 0x12345678(%rax,%rcx,8)"); + asm volatile("clwb 0x12345678(%r8,%rcx,8)"); + /* Also check instructions in the same group encoding as clwb */ + asm volatile("xsaveopt (%rax)"); + asm volatile("xsaveopt (%r8)"); + asm volatile("mfence"); + + /* xsavec mem */ + + asm volatile("xsavec (%rax)"); + asm volatile("xsavec (%r8)"); + asm volatile("xsavec (0x12345678)"); + asm volatile("xsavec 0x12345678(%rax,%rcx,8)"); + asm volatile("xsavec 0x12345678(%r8,%rcx,8)"); + + /* xsaves mem */ + + asm volatile("xsaves (%rax)"); + asm volatile("xsaves (%r8)"); + asm volatile("xsaves (0x12345678)"); + asm volatile("xsaves 0x12345678(%rax,%rcx,8)"); + asm volatile("xsaves 0x12345678(%r8,%rcx,8)"); + + /* xrstors mem */ + + asm volatile("xrstors (%rax)"); + asm volatile("xrstors (%r8)"); + asm volatile("xrstors (0x12345678)"); + asm volatile("xrstors 0x12345678(%rax,%rcx,8)"); + asm volatile("xrstors 0x12345678(%r8,%rcx,8)"); + +#else /* #ifdef __x86_64__ */ + + /* bndmk m32, bnd */ + + asm volatile("bndmk (%eax), %bnd0"); + asm volatile("bndmk (0x12345678), %bnd0"); + asm volatile("bndmk (%eax), %bnd3"); + asm volatile("bndmk (%ecx,%eax,1), %bnd0"); + asm volatile("bndmk 0x12345678(,%eax,1), %bnd0"); + asm volatile("bndmk (%eax,%ecx,1), %bnd0"); + asm volatile("bndmk (%eax,%ecx,8), %bnd0"); + asm volatile("bndmk 0x12(%eax), %bnd0"); + asm volatile("bndmk 0x12(%ebp), %bnd0"); + asm volatile("bndmk 0x12(%ecx,%eax,1), %bnd0"); + asm volatile("bndmk 0x12(%ebp,%eax,1), %bnd0"); + asm volatile("bndmk 0x12(%eax,%ecx,1), %bnd0"); + asm volatile("bndmk 0x12(%eax,%ecx,8), %bnd0"); + asm volatile("bndmk 0x12345678(%eax), %bnd0"); + asm volatile("bndmk 0x12345678(%ebp), %bnd0"); + asm volatile("bndmk 0x12345678(%ecx,%eax,1), %bnd0"); + asm volatile("bndmk 0x12345678(%ebp,%eax,1), %bnd0"); + asm volatile("bndmk 0x12345678(%eax,%ecx,1), %bnd0"); + asm volatile("bndmk 0x12345678(%eax,%ecx,8), %bnd0"); + + /* bndcl r/m32, bnd */ + + asm volatile("bndcl (%eax), %bnd0"); + asm volatile("bndcl (0x12345678), %bnd0"); + asm volatile("bndcl (%eax), %bnd3"); + asm volatile("bndcl (%ecx,%eax,1), %bnd0"); + asm volatile("bndcl 0x12345678(,%eax,1), %bnd0"); + asm volatile("bndcl (%eax,%ecx,1), %bnd0"); + asm volatile("bndcl (%eax,%ecx,8), %bnd0"); + asm volatile("bndcl 0x12(%eax), %bnd0"); + asm volatile("bndcl 0x12(%ebp), %bnd0"); + asm volatile("bndcl 0x12(%ecx,%eax,1), %bnd0"); + asm volatile("bndcl 0x12(%ebp,%eax,1), %bnd0"); + asm volatile("bndcl 0x12(%eax,%ecx,1), %bnd0"); + asm volatile("bndcl 0x12(%eax,%ecx,8), %bnd0"); + asm volatile("bndcl 0x12345678(%eax), %bnd0"); + asm volatile("bndcl 0x12345678(%ebp), %bnd0"); + asm volatile("bndcl 0x12345678(%ecx,%eax,1), %bnd0"); + asm volatile("bndcl 0x12345678(%ebp,%eax,1), %bnd0"); + asm volatile("bndcl 0x12345678(%eax,%ecx,1), %bnd0"); + asm volatile("bndcl 0x12345678(%eax,%ecx,8), %bnd0"); + asm volatile("bndcl %eax, %bnd0"); + + /* bndcu r/m32, bnd */ + + asm volatile("bndcu (%eax), %bnd0"); + asm volatile("bndcu (0x12345678), %bnd0"); + asm volatile("bndcu (%eax), %bnd3"); + asm volatile("bndcu (%ecx,%eax,1), %bnd0"); + asm volatile("bndcu 0x12345678(,%eax,1), %bnd0"); + asm volatile("bndcu (%eax,%ecx,1), %bnd0"); + asm volatile("bndcu (%eax,%ecx,8), %bnd0"); + asm volatile("bndcu 0x12(%eax), %bnd0"); + asm volatile("bndcu 0x12(%ebp), %bnd0"); + asm volatile("bndcu 0x12(%ecx,%eax,1), %bnd0"); + asm volatile("bndcu 0x12(%ebp,%eax,1), %bnd0"); + asm volatile("bndcu 0x12(%eax,%ecx,1), %bnd0"); + asm volatile("bndcu 0x12(%eax,%ecx,8), %bnd0"); + asm volatile("bndcu 0x12345678(%eax), %bnd0"); + asm volatile("bndcu 0x12345678(%ebp), %bnd0"); + asm volatile("bndcu 0x12345678(%ecx,%eax,1), %bnd0"); + asm volatile("bndcu 0x12345678(%ebp,%eax,1), %bnd0"); + asm volatile("bndcu 0x12345678(%eax,%ecx,1), %bnd0"); + asm volatile("bndcu 0x12345678(%eax,%ecx,8), %bnd0"); + asm volatile("bndcu %eax, %bnd0"); + + /* bndcn r/m32, bnd */ + + asm volatile("bndcn (%eax), %bnd0"); + asm volatile("bndcn (0x12345678), %bnd0"); + asm volatile("bndcn (%eax), %bnd3"); + asm volatile("bndcn (%ecx,%eax,1), %bnd0"); + asm volatile("bndcn 0x12345678(,%eax,1), %bnd0"); + asm volatile("bndcn (%eax,%ecx,1), %bnd0"); + asm volatile("bndcn (%eax,%ecx,8), %bnd0"); + asm volatile("bndcn 0x12(%eax), %bnd0"); + asm volatile("bndcn 0x12(%ebp), %bnd0"); + asm volatile("bndcn 0x12(%ecx,%eax,1), %bnd0"); + asm volatile("bndcn 0x12(%ebp,%eax,1), %bnd0"); + asm volatile("bndcn 0x12(%eax,%ecx,1), %bnd0"); + asm volatile("bndcn 0x12(%eax,%ecx,8), %bnd0"); + asm volatile("bndcn 0x12345678(%eax), %bnd0"); + asm volatile("bndcn 0x12345678(%ebp), %bnd0"); + asm volatile("bndcn 0x12345678(%ecx,%eax,1), %bnd0"); + asm volatile("bndcn 0x12345678(%ebp,%eax,1), %bnd0"); + asm volatile("bndcn 0x12345678(%eax,%ecx,1), %bnd0"); + asm volatile("bndcn 0x12345678(%eax,%ecx,8), %bnd0"); + asm volatile("bndcn %eax, %bnd0"); + + /* bndmov m64, bnd */ + + asm volatile("bndmov (%eax), %bnd0"); + asm volatile("bndmov (0x12345678), %bnd0"); + asm volatile("bndmov (%eax), %bnd3"); + asm volatile("bndmov (%ecx,%eax,1), %bnd0"); + asm volatile("bndmov 0x12345678(,%eax,1), %bnd0"); + asm volatile("bndmov (%eax,%ecx,1), %bnd0"); + asm volatile("bndmov (%eax,%ecx,8), %bnd0"); + asm volatile("bndmov 0x12(%eax), %bnd0"); + asm volatile("bndmov 0x12(%ebp), %bnd0"); + asm volatile("bndmov 0x12(%ecx,%eax,1), %bnd0"); + asm volatile("bndmov 0x12(%ebp,%eax,1), %bnd0"); + asm volatile("bndmov 0x12(%eax,%ecx,1), %bnd0"); + asm volatile("bndmov 0x12(%eax,%ecx,8), %bnd0"); + asm volatile("bndmov 0x12345678(%eax), %bnd0"); + asm volatile("bndmov 0x12345678(%ebp), %bnd0"); + asm volatile("bndmov 0x12345678(%ecx,%eax,1), %bnd0"); + asm volatile("bndmov 0x12345678(%ebp,%eax,1), %bnd0"); + asm volatile("bndmov 0x12345678(%eax,%ecx,1), %bnd0"); + asm volatile("bndmov 0x12345678(%eax,%ecx,8), %bnd0"); + + /* bndmov bnd, m64 */ + + asm volatile("bndmov %bnd0, (%eax)"); + asm volatile("bndmov %bnd0, (0x12345678)"); + asm volatile("bndmov %bnd3, (%eax)"); + asm volatile("bndmov %bnd0, (%ecx,%eax,1)"); + asm volatile("bndmov %bnd0, 0x12345678(,%eax,1)"); + asm volatile("bndmov %bnd0, (%eax,%ecx,1)"); + asm volatile("bndmov %bnd0, (%eax,%ecx,8)"); + asm volatile("bndmov %bnd0, 0x12(%eax)"); + asm volatile("bndmov %bnd0, 0x12(%ebp)"); + asm volatile("bndmov %bnd0, 0x12(%ecx,%eax,1)"); + asm volatile("bndmov %bnd0, 0x12(%ebp,%eax,1)"); + asm volatile("bndmov %bnd0, 0x12(%eax,%ecx,1)"); + asm volatile("bndmov %bnd0, 0x12(%eax,%ecx,8)"); + asm volatile("bndmov %bnd0, 0x12345678(%eax)"); + asm volatile("bndmov %bnd0, 0x12345678(%ebp)"); + asm volatile("bndmov %bnd0, 0x12345678(%ecx,%eax,1)"); + asm volatile("bndmov %bnd0, 0x12345678(%ebp,%eax,1)"); + asm volatile("bndmov %bnd0, 0x12345678(%eax,%ecx,1)"); + asm volatile("bndmov %bnd0, 0x12345678(%eax,%ecx,8)"); + + /* bndmov bnd2, bnd1 */ + + asm volatile("bndmov %bnd0, %bnd1"); + asm volatile("bndmov %bnd1, %bnd0"); + + /* bndldx mib, bnd */ + + asm volatile("bndldx (%eax), %bnd0"); + asm volatile("bndldx (0x12345678), %bnd0"); + asm volatile("bndldx (%eax), %bnd3"); + asm volatile("bndldx (%ecx,%eax,1), %bnd0"); + asm volatile("bndldx 0x12345678(,%eax,1), %bnd0"); + asm volatile("bndldx (%eax,%ecx,1), %bnd0"); + asm volatile("bndldx 0x12(%eax), %bnd0"); + asm volatile("bndldx 0x12(%ebp), %bnd0"); + asm volatile("bndldx 0x12(%ecx,%eax,1), %bnd0"); + asm volatile("bndldx 0x12(%ebp,%eax,1), %bnd0"); + asm volatile("bndldx 0x12(%eax,%ecx,1), %bnd0"); + asm volatile("bndldx 0x12345678(%eax), %bnd0"); + asm volatile("bndldx 0x12345678(%ebp), %bnd0"); + asm volatile("bndldx 0x12345678(%ecx,%eax,1), %bnd0"); + asm volatile("bndldx 0x12345678(%ebp,%eax,1), %bnd0"); + asm volatile("bndldx 0x12345678(%eax,%ecx,1), %bnd0"); + + /* bndstx bnd, mib */ + + asm volatile("bndstx %bnd0, (%eax)"); + asm volatile("bndstx %bnd0, (0x12345678)"); + asm volatile("bndstx %bnd3, (%eax)"); + asm volatile("bndstx %bnd0, (%ecx,%eax,1)"); + asm volatile("bndstx %bnd0, 0x12345678(,%eax,1)"); + asm volatile("bndstx %bnd0, (%eax,%ecx,1)"); + asm volatile("bndstx %bnd0, 0x12(%eax)"); + asm volatile("bndstx %bnd0, 0x12(%ebp)"); + asm volatile("bndstx %bnd0, 0x12(%ecx,%eax,1)"); + asm volatile("bndstx %bnd0, 0x12(%ebp,%eax,1)"); + asm volatile("bndstx %bnd0, 0x12(%eax,%ecx,1)"); + asm volatile("bndstx %bnd0, 0x12345678(%eax)"); + asm volatile("bndstx %bnd0, 0x12345678(%ebp)"); + asm volatile("bndstx %bnd0, 0x12345678(%ecx,%eax,1)"); + asm volatile("bndstx %bnd0, 0x12345678(%ebp,%eax,1)"); + asm volatile("bndstx %bnd0, 0x12345678(%eax,%ecx,1)"); + + /* bnd prefix on call, ret, jmp and all jcc */ + + asm volatile("bnd call label1"); /* Expecting: call unconditional 0xfffffffc */ + asm volatile("bnd call *(%eax)"); /* Expecting: call indirect 0 */ + asm volatile("bnd ret"); /* Expecting: ret indirect 0 */ + asm volatile("bnd jmp label1"); /* Expecting: jmp unconditional 0xfffffffc */ + asm volatile("bnd jmp label1"); /* Expecting: jmp unconditional 0xfffffffc */ + asm volatile("bnd jmp *(%ecx)"); /* Expecting: jmp indirect 0 */ + asm volatile("bnd jne label1"); /* Expecting: jcc conditional 0xfffffffc */ + + /* sha1rnds4 imm8, xmm2/m128, xmm1 */ + + asm volatile("sha1rnds4 $0x0, %xmm1, %xmm0"); + asm volatile("sha1rnds4 $0x91, %xmm7, %xmm2"); + asm volatile("sha1rnds4 $0x91, (%eax), %xmm0"); + asm volatile("sha1rnds4 $0x91, (0x12345678), %xmm0"); + asm volatile("sha1rnds4 $0x91, (%eax), %xmm3"); + asm volatile("sha1rnds4 $0x91, (%ecx,%eax,1), %xmm0"); + asm volatile("sha1rnds4 $0x91, 0x12345678(,%eax,1), %xmm0"); + asm volatile("sha1rnds4 $0x91, (%eax,%ecx,1), %xmm0"); + asm volatile("sha1rnds4 $0x91, (%eax,%ecx,8), %xmm0"); + asm volatile("sha1rnds4 $0x91, 0x12(%eax), %xmm0"); + asm volatile("sha1rnds4 $0x91, 0x12(%ebp), %xmm0"); + asm volatile("sha1rnds4 $0x91, 0x12(%ecx,%eax,1), %xmm0"); + asm volatile("sha1rnds4 $0x91, 0x12(%ebp,%eax,1), %xmm0"); + asm volatile("sha1rnds4 $0x91, 0x12(%eax,%ecx,1), %xmm0"); + asm volatile("sha1rnds4 $0x91, 0x12(%eax,%ecx,8), %xmm0"); + asm volatile("sha1rnds4 $0x91, 0x12345678(%eax), %xmm0"); + asm volatile("sha1rnds4 $0x91, 0x12345678(%ebp), %xmm0"); + asm volatile("sha1rnds4 $0x91, 0x12345678(%ecx,%eax,1), %xmm0"); + asm volatile("sha1rnds4 $0x91, 0x12345678(%ebp,%eax,1), %xmm0"); + asm volatile("sha1rnds4 $0x91, 0x12345678(%eax,%ecx,1), %xmm0"); + asm volatile("sha1rnds4 $0x91, 0x12345678(%eax,%ecx,8), %xmm0"); + + /* sha1nexte xmm2/m128, xmm1 */ + + asm volatile("sha1nexte %xmm1, %xmm0"); + asm volatile("sha1nexte %xmm7, %xmm2"); + asm volatile("sha1nexte (%eax), %xmm0"); + asm volatile("sha1nexte (0x12345678), %xmm0"); + asm volatile("sha1nexte (%eax), %xmm3"); + asm volatile("sha1nexte (%ecx,%eax,1), %xmm0"); + asm volatile("sha1nexte 0x12345678(,%eax,1), %xmm0"); + asm volatile("sha1nexte (%eax,%ecx,1), %xmm0"); + asm volatile("sha1nexte (%eax,%ecx,8), %xmm0"); + asm volatile("sha1nexte 0x12(%eax), %xmm0"); + asm volatile("sha1nexte 0x12(%ebp), %xmm0"); + asm volatile("sha1nexte 0x12(%ecx,%eax,1), %xmm0"); + asm volatile("sha1nexte 0x12(%ebp,%eax,1), %xmm0"); + asm volatile("sha1nexte 0x12(%eax,%ecx,1), %xmm0"); + asm volatile("sha1nexte 0x12(%eax,%ecx,8), %xmm0"); + asm volatile("sha1nexte 0x12345678(%eax), %xmm0"); + asm volatile("sha1nexte 0x12345678(%ebp), %xmm0"); + asm volatile("sha1nexte 0x12345678(%ecx,%eax,1), %xmm0"); + asm volatile("sha1nexte 0x12345678(%ebp,%eax,1), %xmm0"); + asm volatile("sha1nexte 0x12345678(%eax,%ecx,1), %xmm0"); + asm volatile("sha1nexte 0x12345678(%eax,%ecx,8), %xmm0"); + + /* sha1msg1 xmm2/m128, xmm1 */ + + asm volatile("sha1msg1 %xmm1, %xmm0"); + asm volatile("sha1msg1 %xmm7, %xmm2"); + asm volatile("sha1msg1 (%eax), %xmm0"); + asm volatile("sha1msg1 (0x12345678), %xmm0"); + asm volatile("sha1msg1 (%eax), %xmm3"); + asm volatile("sha1msg1 (%ecx,%eax,1), %xmm0"); + asm volatile("sha1msg1 0x12345678(,%eax,1), %xmm0"); + asm volatile("sha1msg1 (%eax,%ecx,1), %xmm0"); + asm volatile("sha1msg1 (%eax,%ecx,8), %xmm0"); + asm volatile("sha1msg1 0x12(%eax), %xmm0"); + asm volatile("sha1msg1 0x12(%ebp), %xmm0"); + asm volatile("sha1msg1 0x12(%ecx,%eax,1), %xmm0"); + asm volatile("sha1msg1 0x12(%ebp,%eax,1), %xmm0"); + asm volatile("sha1msg1 0x12(%eax,%ecx,1), %xmm0"); + asm volatile("sha1msg1 0x12(%eax,%ecx,8), %xmm0"); + asm volatile("sha1msg1 0x12345678(%eax), %xmm0"); + asm volatile("sha1msg1 0x12345678(%ebp), %xmm0"); + asm volatile("sha1msg1 0x12345678(%ecx,%eax,1), %xmm0"); + asm volatile("sha1msg1 0x12345678(%ebp,%eax,1), %xmm0"); + asm volatile("sha1msg1 0x12345678(%eax,%ecx,1), %xmm0"); + asm volatile("sha1msg1 0x12345678(%eax,%ecx,8), %xmm0"); + + /* sha1msg2 xmm2/m128, xmm1 */ + + asm volatile("sha1msg2 %xmm1, %xmm0"); + asm volatile("sha1msg2 %xmm7, %xmm2"); + asm volatile("sha1msg2 (%eax), %xmm0"); + asm volatile("sha1msg2 (0x12345678), %xmm0"); + asm volatile("sha1msg2 (%eax), %xmm3"); + asm volatile("sha1msg2 (%ecx,%eax,1), %xmm0"); + asm volatile("sha1msg2 0x12345678(,%eax,1), %xmm0"); + asm volatile("sha1msg2 (%eax,%ecx,1), %xmm0"); + asm volatile("sha1msg2 (%eax,%ecx,8), %xmm0"); + asm volatile("sha1msg2 0x12(%eax), %xmm0"); + asm volatile("sha1msg2 0x12(%ebp), %xmm0"); + asm volatile("sha1msg2 0x12(%ecx,%eax,1), %xmm0"); + asm volatile("sha1msg2 0x12(%ebp,%eax,1), %xmm0"); + asm volatile("sha1msg2 0x12(%eax,%ecx,1), %xmm0"); + asm volatile("sha1msg2 0x12(%eax,%ecx,8), %xmm0"); + asm volatile("sha1msg2 0x12345678(%eax), %xmm0"); + asm volatile("sha1msg2 0x12345678(%ebp), %xmm0"); + asm volatile("sha1msg2 0x12345678(%ecx,%eax,1), %xmm0"); + asm volatile("sha1msg2 0x12345678(%ebp,%eax,1), %xmm0"); + asm volatile("sha1msg2 0x12345678(%eax,%ecx,1), %xmm0"); + asm volatile("sha1msg2 0x12345678(%eax,%ecx,8), %xmm0"); + + /* sha256rnds2 <XMM0>, xmm2/m128, xmm1 */ + /* Note sha256rnds2 has an implicit operand 'xmm0' */ + + asm volatile("sha256rnds2 %xmm4, %xmm1"); + asm volatile("sha256rnds2 %xmm7, %xmm2"); + asm volatile("sha256rnds2 (%eax), %xmm1"); + asm volatile("sha256rnds2 (0x12345678), %xmm1"); + asm volatile("sha256rnds2 (%eax), %xmm3"); + asm volatile("sha256rnds2 (%ecx,%eax,1), %xmm1"); + asm volatile("sha256rnds2 0x12345678(,%eax,1), %xmm1"); + asm volatile("sha256rnds2 (%eax,%ecx,1), %xmm1"); + asm volatile("sha256rnds2 (%eax,%ecx,8), %xmm1"); + asm volatile("sha256rnds2 0x12(%eax), %xmm1"); + asm volatile("sha256rnds2 0x12(%ebp), %xmm1"); + asm volatile("sha256rnds2 0x12(%ecx,%eax,1), %xmm1"); + asm volatile("sha256rnds2 0x12(%ebp,%eax,1), %xmm1"); + asm volatile("sha256rnds2 0x12(%eax,%ecx,1), %xmm1"); + asm volatile("sha256rnds2 0x12(%eax,%ecx,8), %xmm1"); + asm volatile("sha256rnds2 0x12345678(%eax), %xmm1"); + asm volatile("sha256rnds2 0x12345678(%ebp), %xmm1"); + asm volatile("sha256rnds2 0x12345678(%ecx,%eax,1), %xmm1"); + asm volatile("sha256rnds2 0x12345678(%ebp,%eax,1), %xmm1"); + asm volatile("sha256rnds2 0x12345678(%eax,%ecx,1), %xmm1"); + asm volatile("sha256rnds2 0x12345678(%eax,%ecx,8), %xmm1"); + + /* sha256msg1 xmm2/m128, xmm1 */ + + asm volatile("sha256msg1 %xmm1, %xmm0"); + asm volatile("sha256msg1 %xmm7, %xmm2"); + asm volatile("sha256msg1 (%eax), %xmm0"); + asm volatile("sha256msg1 (0x12345678), %xmm0"); + asm volatile("sha256msg1 (%eax), %xmm3"); + asm volatile("sha256msg1 (%ecx,%eax,1), %xmm0"); + asm volatile("sha256msg1 0x12345678(,%eax,1), %xmm0"); + asm volatile("sha256msg1 (%eax,%ecx,1), %xmm0"); + asm volatile("sha256msg1 (%eax,%ecx,8), %xmm0"); + asm volatile("sha256msg1 0x12(%eax), %xmm0"); + asm volatile("sha256msg1 0x12(%ebp), %xmm0"); + asm volatile("sha256msg1 0x12(%ecx,%eax,1), %xmm0"); + asm volatile("sha256msg1 0x12(%ebp,%eax,1), %xmm0"); + asm volatile("sha256msg1 0x12(%eax,%ecx,1), %xmm0"); + asm volatile("sha256msg1 0x12(%eax,%ecx,8), %xmm0"); + asm volatile("sha256msg1 0x12345678(%eax), %xmm0"); + asm volatile("sha256msg1 0x12345678(%ebp), %xmm0"); + asm volatile("sha256msg1 0x12345678(%ecx,%eax,1), %xmm0"); + asm volatile("sha256msg1 0x12345678(%ebp,%eax,1), %xmm0"); + asm volatile("sha256msg1 0x12345678(%eax,%ecx,1), %xmm0"); + asm volatile("sha256msg1 0x12345678(%eax,%ecx,8), %xmm0"); + + /* sha256msg2 xmm2/m128, xmm1 */ + + asm volatile("sha256msg2 %xmm1, %xmm0"); + asm volatile("sha256msg2 %xmm7, %xmm2"); + asm volatile("sha256msg2 (%eax), %xmm0"); + asm volatile("sha256msg2 (0x12345678), %xmm0"); + asm volatile("sha256msg2 (%eax), %xmm3"); + asm volatile("sha256msg2 (%ecx,%eax,1), %xmm0"); + asm volatile("sha256msg2 0x12345678(,%eax,1), %xmm0"); + asm volatile("sha256msg2 (%eax,%ecx,1), %xmm0"); + asm volatile("sha256msg2 (%eax,%ecx,8), %xmm0"); + asm volatile("sha256msg2 0x12(%eax), %xmm0"); + asm volatile("sha256msg2 0x12(%ebp), %xmm0"); + asm volatile("sha256msg2 0x12(%ecx,%eax,1), %xmm0"); + asm volatile("sha256msg2 0x12(%ebp,%eax,1), %xmm0"); + asm volatile("sha256msg2 0x12(%eax,%ecx,1), %xmm0"); + asm volatile("sha256msg2 0x12(%eax,%ecx,8), %xmm0"); + asm volatile("sha256msg2 0x12345678(%eax), %xmm0"); + asm volatile("sha256msg2 0x12345678(%ebp), %xmm0"); + asm volatile("sha256msg2 0x12345678(%ecx,%eax,1), %xmm0"); + asm volatile("sha256msg2 0x12345678(%ebp,%eax,1), %xmm0"); + asm volatile("sha256msg2 0x12345678(%eax,%ecx,1), %xmm0"); + asm volatile("sha256msg2 0x12345678(%eax,%ecx,8), %xmm0"); + + /* clflushopt m8 */ + + asm volatile("clflushopt (%eax)"); + asm volatile("clflushopt (0x12345678)"); + asm volatile("clflushopt 0x12345678(%eax,%ecx,8)"); + /* Also check instructions in the same group encoding as clflushopt */ + asm volatile("clflush (%eax)"); + asm volatile("sfence"); + + /* clwb m8 */ + + asm volatile("clwb (%eax)"); + asm volatile("clwb (0x12345678)"); + asm volatile("clwb 0x12345678(%eax,%ecx,8)"); + /* Also check instructions in the same group encoding as clwb */ + asm volatile("xsaveopt (%eax)"); + asm volatile("mfence"); + + /* xsavec mem */ + + asm volatile("xsavec (%eax)"); + asm volatile("xsavec (0x12345678)"); + asm volatile("xsavec 0x12345678(%eax,%ecx,8)"); + + /* xsaves mem */ + + asm volatile("xsaves (%eax)"); + asm volatile("xsaves (0x12345678)"); + asm volatile("xsaves 0x12345678(%eax,%ecx,8)"); + + /* xrstors mem */ + + asm volatile("xrstors (%eax)"); + asm volatile("xrstors (0x12345678)"); + asm volatile("xrstors 0x12345678(%eax,%ecx,8)"); + +#endif /* #ifndef __x86_64__ */ + + /* pcommit */ + + asm volatile("pcommit"); + + /* Following line is a marker for the awk script - do not change */ + asm volatile("rdtsc"); /* Stop here */ + + return 0; +} diff --git a/tools/perf/arch/x86/tests/insn-x86.c b/tools/perf/arch/x86/tests/insn-x86.c new file mode 100644 index 000000000000..b6115dfd28f0 --- /dev/null +++ b/tools/perf/arch/x86/tests/insn-x86.c @@ -0,0 +1,185 @@ +#include <linux/types.h> + +#include "debug.h" +#include "tests/tests.h" +#include "arch-tests.h" + +#include "intel-pt-decoder/insn.h" +#include "intel-pt-decoder/intel-pt-insn-decoder.h" + +struct test_data { + u8 data[MAX_INSN_SIZE]; + int expected_length; + int expected_rel; + const char *expected_op_str; + const char *expected_branch_str; + const char *asm_rep; +}; + +struct test_data test_data_32[] = { +#include "insn-x86-dat-32.c" + {{0x0f, 0x01, 0xee}, 3, 0, NULL, NULL, "0f 01 ee \trdpkru"}, + {{0x0f, 0x01, 0xef}, 3, 0, NULL, NULL, "0f 01 ef \twrpkru"}, + {{0}, 0, 0, NULL, NULL, NULL}, +}; + +struct test_data test_data_64[] = { +#include "insn-x86-dat-64.c" + {{0x0f, 0x01, 0xee}, 3, 0, NULL, NULL, "0f 01 ee \trdpkru"}, + {{0x0f, 0x01, 0xef}, 3, 0, NULL, NULL, "0f 01 ef \twrpkru"}, + {{0}, 0, 0, NULL, NULL, NULL}, +}; + +static int get_op(const char *op_str) +{ + struct val_data { + const char *name; + int val; + } vals[] = { + {"other", INTEL_PT_OP_OTHER}, + {"call", INTEL_PT_OP_CALL}, + {"ret", INTEL_PT_OP_RET}, + {"jcc", INTEL_PT_OP_JCC}, + {"jmp", INTEL_PT_OP_JMP}, + {"loop", INTEL_PT_OP_LOOP}, + {"iret", INTEL_PT_OP_IRET}, + {"int", INTEL_PT_OP_INT}, + {"syscall", INTEL_PT_OP_SYSCALL}, + {"sysret", INTEL_PT_OP_SYSRET}, + {NULL, 0}, + }; + struct val_data *val; + + if (!op_str || !strlen(op_str)) + return 0; + + for (val = vals; val->name; val++) { + if (!strcmp(val->name, op_str)) + return val->val; + } + + pr_debug("Failed to get op\n"); + + return -1; +} + +static int get_branch(const char *branch_str) +{ + struct val_data { + const char *name; + int val; + } vals[] = { + {"no_branch", INTEL_PT_BR_NO_BRANCH}, + {"indirect", INTEL_PT_BR_INDIRECT}, + {"conditional", INTEL_PT_BR_CONDITIONAL}, + {"unconditional", INTEL_PT_BR_UNCONDITIONAL}, + {NULL, 0}, + }; + struct val_data *val; + + if (!branch_str || !strlen(branch_str)) + return 0; + + for (val = vals; val->name; val++) { + if (!strcmp(val->name, branch_str)) + return val->val; + } + + pr_debug("Failed to get branch\n"); + + return -1; +} + +static int test_data_item(struct test_data *dat, int x86_64) +{ + struct intel_pt_insn intel_pt_insn; + struct insn insn; + int op, branch; + + insn_init(&insn, dat->data, MAX_INSN_SIZE, x86_64); + insn_get_length(&insn); + + if (!insn_complete(&insn)) { + pr_debug("Failed to decode: %s\n", dat->asm_rep); + return -1; + } + + if (insn.length != dat->expected_length) { + pr_debug("Failed to decode length (%d vs expected %d): %s\n", + insn.length, dat->expected_length, dat->asm_rep); + return -1; + } + + op = get_op(dat->expected_op_str); + branch = get_branch(dat->expected_branch_str); + + if (intel_pt_get_insn(dat->data, MAX_INSN_SIZE, x86_64, &intel_pt_insn)) { + pr_debug("Intel PT failed to decode: %s\n", dat->asm_rep); + return -1; + } + + if ((int)intel_pt_insn.op != op) { + pr_debug("Failed to decode 'op' value (%d vs expected %d): %s\n", + intel_pt_insn.op, op, dat->asm_rep); + return -1; + } + + if ((int)intel_pt_insn.branch != branch) { + pr_debug("Failed to decode 'branch' value (%d vs expected %d): %s\n", + intel_pt_insn.branch, branch, dat->asm_rep); + return -1; + } + + if (intel_pt_insn.rel != dat->expected_rel) { + pr_debug("Failed to decode 'rel' value (%#x vs expected %#x): %s\n", + intel_pt_insn.rel, dat->expected_rel, dat->asm_rep); + return -1; + } + + pr_debug("Decoded ok: %s\n", dat->asm_rep); + + return 0; +} + +static int test_data_set(struct test_data *dat_set, int x86_64) +{ + struct test_data *dat; + int ret = 0; + + for (dat = dat_set; dat->expected_length; dat++) { + if (test_data_item(dat, x86_64)) + ret = -1; + } + + return ret; +} + +/** + * test__insn_x86 - test x86 instruction decoder - new instructions. + * + * This function implements a test that decodes a selection of instructions and + * checks the results. The Intel PT function that further categorizes + * instructions (i.e. intel_pt_get_insn()) is also checked. + * + * The instructions are originally in insn-x86-dat-src.c which has been + * processed by scripts gen-insn-x86-dat.sh and gen-insn-x86-dat.awk to produce + * insn-x86-dat-32.c and insn-x86-dat-64.c which are included into this program. + * i.e. to add new instructions to the test, edit insn-x86-dat-src.c, run the + * gen-insn-x86-dat.sh script, make perf, and then run the test. + * + * If the test passes %0 is returned, otherwise %-1 is returned. Use the + * verbose (-v) option to see all the instructions and whether or not they + * decoded successfuly. + */ +int test__insn_x86(void) +{ + int ret = 0; + + if (test_data_set(test_data_32, 0)) + ret = -1; + + if (test_data_set(test_data_64, 1)) + ret = -1; + + return ret; +} diff --git a/tools/perf/arch/x86/tests/intel-cqm.c b/tools/perf/arch/x86/tests/intel-cqm.c new file mode 100644 index 000000000000..d28c1b6a3b54 --- /dev/null +++ b/tools/perf/arch/x86/tests/intel-cqm.c @@ -0,0 +1,124 @@ +#include "tests/tests.h" +#include "perf.h" +#include "cloexec.h" +#include "debug.h" +#include "evlist.h" +#include "evsel.h" +#include "arch-tests.h" + +#include <sys/mman.h> +#include <string.h> + +static pid_t spawn(void) +{ + pid_t pid; + + pid = fork(); + if (pid) + return pid; + + while(1); + sleep(5); + return 0; +} + +/* + * Create an event group that contains both a sampled hardware + * (cpu-cycles) and software (intel_cqm/llc_occupancy/) event. We then + * wait for the hardware perf counter to overflow and generate a PMI, + * which triggers an event read for both of the events in the group. + * + * Since reading Intel CQM event counters requires sending SMP IPIs, the + * CQM pmu needs to handle the above situation gracefully, and return + * the last read counter value to avoid triggering a WARN_ON_ONCE() in + * smp_call_function_many() caused by sending IPIs from NMI context. + */ +int test__intel_cqm_count_nmi_context(void) +{ + struct perf_evlist *evlist = NULL; + struct perf_evsel *evsel = NULL; + struct perf_event_attr pe; + int i, fd[2], flag, ret; + size_t mmap_len; + void *event; + pid_t pid; + int err = TEST_FAIL; + + flag = perf_event_open_cloexec_flag(); + + evlist = perf_evlist__new(); + if (!evlist) { + pr_debug("perf_evlist__new failed\n"); + return TEST_FAIL; + } + + ret = parse_events(evlist, "intel_cqm/llc_occupancy/", NULL); + if (ret) { + pr_debug("parse_events failed\n"); + err = TEST_SKIP; + goto out; + } + + evsel = perf_evlist__first(evlist); + if (!evsel) { + pr_debug("perf_evlist__first failed\n"); + goto out; + } + + memset(&pe, 0, sizeof(pe)); + pe.size = sizeof(pe); + + pe.type = PERF_TYPE_HARDWARE; + pe.config = PERF_COUNT_HW_CPU_CYCLES; + pe.read_format = PERF_FORMAT_GROUP; + + pe.sample_period = 128; + pe.sample_type = PERF_SAMPLE_IP | PERF_SAMPLE_READ; + + pid = spawn(); + + fd[0] = sys_perf_event_open(&pe, pid, -1, -1, flag); + if (fd[0] < 0) { + pr_debug("failed to open event\n"); + goto out; + } + + memset(&pe, 0, sizeof(pe)); + pe.size = sizeof(pe); + + pe.type = evsel->attr.type; + pe.config = evsel->attr.config; + + fd[1] = sys_perf_event_open(&pe, pid, -1, fd[0], flag); + if (fd[1] < 0) { + pr_debug("failed to open event\n"); + goto out; + } + + /* + * Pick a power-of-two number of pages + 1 for the meta-data + * page (struct perf_event_mmap_page). See tools/perf/design.txt. + */ + mmap_len = page_size * 65; + + event = mmap(NULL, mmap_len, PROT_READ, MAP_SHARED, fd[0], 0); + if (event == (void *)(-1)) { + pr_debug("failed to mmap %d\n", errno); + goto out; + } + + sleep(1); + + err = TEST_OK; + + munmap(event, mmap_len); + + for (i = 0; i < 2; i++) + close(fd[i]); + + kill(pid, SIGKILL); + wait(NULL); +out: + perf_evlist__delete(evlist); + return err; +} diff --git a/tools/perf/tests/perf-time-to-tsc.c b/tools/perf/arch/x86/tests/perf-time-to-tsc.c index 5f49484f1abc..658cd200af74 100644 --- a/tools/perf/tests/perf-time-to-tsc.c +++ b/tools/perf/arch/x86/tests/perf-time-to-tsc.c @@ -9,7 +9,9 @@ #include "thread_map.h" #include "cpumap.h" #include "tsc.h" -#include "tests.h" +#include "tests/tests.h" + +#include "arch-tests.h" #define CHECK__(x) { \ while ((x) < 0) { \ diff --git a/tools/perf/tests/rdpmc.c b/tools/perf/arch/x86/tests/rdpmc.c index d31f2c4d9f64..e7688214c7cf 100644 --- a/tools/perf/tests/rdpmc.c +++ b/tools/perf/arch/x86/tests/rdpmc.c @@ -5,10 +5,9 @@ #include <linux/types.h> #include "perf.h" #include "debug.h" -#include "tests.h" +#include "tests/tests.h" #include "cloexec.h" - -#if defined(__x86_64__) || defined(__i386__) +#include "arch-tests.h" static u64 rdpmc(unsigned int counter) { @@ -173,5 +172,3 @@ int test__rdpmc(void) return 0; } - -#endif diff --git a/tools/perf/arch/x86/util/Build b/tools/perf/arch/x86/util/Build index cfbccc4e3187..ff63649fa9ac 100644 --- a/tools/perf/arch/x86/util/Build +++ b/tools/perf/arch/x86/util/Build @@ -1,8 +1,14 @@ libperf-y += header.o libperf-y += tsc.o +libperf-y += pmu.o libperf-y += kvm-stat.o +libperf-y += perf_regs.o libperf-$(CONFIG_DWARF) += dwarf-regs.o libperf-$(CONFIG_LIBUNWIND) += unwind-libunwind.o libperf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o + +libperf-$(CONFIG_AUXTRACE) += auxtrace.o +libperf-$(CONFIG_AUXTRACE) += intel-pt.o +libperf-$(CONFIG_AUXTRACE) += intel-bts.o diff --git a/tools/perf/arch/x86/util/auxtrace.c b/tools/perf/arch/x86/util/auxtrace.c new file mode 100644 index 000000000000..7a7805583e3f --- /dev/null +++ b/tools/perf/arch/x86/util/auxtrace.c @@ -0,0 +1,83 @@ +/* + * auxtrace.c: AUX area tracing support + * Copyright (c) 2013-2014, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + */ + +#include <stdbool.h> + +#include "../../util/header.h" +#include "../../util/debug.h" +#include "../../util/pmu.h" +#include "../../util/auxtrace.h" +#include "../../util/intel-pt.h" +#include "../../util/intel-bts.h" +#include "../../util/evlist.h" + +static +struct auxtrace_record *auxtrace_record__init_intel(struct perf_evlist *evlist, + int *err) +{ + struct perf_pmu *intel_pt_pmu; + struct perf_pmu *intel_bts_pmu; + struct perf_evsel *evsel; + bool found_pt = false; + bool found_bts = false; + + intel_pt_pmu = perf_pmu__find(INTEL_PT_PMU_NAME); + intel_bts_pmu = perf_pmu__find(INTEL_BTS_PMU_NAME); + + if (evlist) { + evlist__for_each(evlist, evsel) { + if (intel_pt_pmu && + evsel->attr.type == intel_pt_pmu->type) + found_pt = true; + if (intel_bts_pmu && + evsel->attr.type == intel_bts_pmu->type) + found_bts = true; + } + } + + if (found_pt && found_bts) { + pr_err("intel_pt and intel_bts may not be used together\n"); + *err = -EINVAL; + return NULL; + } + + if (found_pt) + return intel_pt_recording_init(err); + + if (found_bts) + return intel_bts_recording_init(err); + + return NULL; +} + +struct auxtrace_record *auxtrace_record__init(struct perf_evlist *evlist, + int *err) +{ + char buffer[64]; + int ret; + + *err = 0; + + ret = get_cpuid(buffer, sizeof(buffer)); + if (ret) { + *err = ret; + return NULL; + } + + if (!strncmp(buffer, "GenuineIntel,", 13)) + return auxtrace_record__init_intel(evlist, err); + + return NULL; +} diff --git a/tools/perf/arch/x86/util/dwarf-regs.c b/tools/perf/arch/x86/util/dwarf-regs.c index be22dd463232..9223c164e545 100644 --- a/tools/perf/arch/x86/util/dwarf-regs.c +++ b/tools/perf/arch/x86/util/dwarf-regs.c @@ -21,55 +21,109 @@ */ #include <stddef.h> +#include <errno.h> /* for EINVAL */ +#include <string.h> /* for strcmp */ +#include <linux/ptrace.h> /* for struct pt_regs */ +#include <linux/kernel.h> /* for offsetof */ #include <dwarf-regs.h> /* - * Generic dwarf analysis helpers + * See arch/x86/kernel/ptrace.c. + * Different from it: + * + * - Since struct pt_regs is defined differently for user and kernel, + * but we want to use 'ax, bx' instead of 'rax, rbx' (which is struct + * field name of user's pt_regs), we make REG_OFFSET_NAME to accept + * both string name and reg field name. + * + * - Since accessing x86_32's pt_regs from x86_64 building is difficult + * and vise versa, we simply fill offset with -1, so + * get_arch_regstr() still works but regs_query_register_offset() + * returns error. + * The only inconvenience caused by it now is that we are not allowed + * to generate BPF prologue for a x86_64 kernel if perf is built for + * x86_32. This is really a rare usecase. + * + * - Order is different from kernel's ptrace.c for get_arch_regstr(). Use + * the order defined by dwarf. */ -#define X86_32_MAX_REGS 8 -const char *x86_32_regs_table[X86_32_MAX_REGS] = { - "%ax", - "%cx", - "%dx", - "%bx", - "$stack", /* Stack address instead of %sp */ - "%bp", - "%si", - "%di", +struct pt_regs_offset { + const char *name; + int offset; +}; + +#define REG_OFFSET_END {.name = NULL, .offset = 0} + +#ifdef __x86_64__ +# define REG_OFFSET_NAME_64(n, r) {.name = n, .offset = offsetof(struct pt_regs, r)} +# define REG_OFFSET_NAME_32(n, r) {.name = n, .offset = -1} +#else +# define REG_OFFSET_NAME_64(n, r) {.name = n, .offset = -1} +# define REG_OFFSET_NAME_32(n, r) {.name = n, .offset = offsetof(struct pt_regs, r)} +#endif + +static const struct pt_regs_offset x86_32_regoffset_table[] = { + REG_OFFSET_NAME_32("%ax", eax), + REG_OFFSET_NAME_32("%cx", ecx), + REG_OFFSET_NAME_32("%dx", edx), + REG_OFFSET_NAME_32("%bx", ebx), + REG_OFFSET_NAME_32("$stack", esp), /* Stack address instead of %sp */ + REG_OFFSET_NAME_32("%bp", ebp), + REG_OFFSET_NAME_32("%si", esi), + REG_OFFSET_NAME_32("%di", edi), + REG_OFFSET_END, }; -#define X86_64_MAX_REGS 16 -const char *x86_64_regs_table[X86_64_MAX_REGS] = { - "%ax", - "%dx", - "%cx", - "%bx", - "%si", - "%di", - "%bp", - "%sp", - "%r8", - "%r9", - "%r10", - "%r11", - "%r12", - "%r13", - "%r14", - "%r15", +static const struct pt_regs_offset x86_64_regoffset_table[] = { + REG_OFFSET_NAME_64("%ax", rax), + REG_OFFSET_NAME_64("%dx", rdx), + REG_OFFSET_NAME_64("%cx", rcx), + REG_OFFSET_NAME_64("%bx", rbx), + REG_OFFSET_NAME_64("%si", rsi), + REG_OFFSET_NAME_64("%di", rdi), + REG_OFFSET_NAME_64("%bp", rbp), + REG_OFFSET_NAME_64("%sp", rsp), + REG_OFFSET_NAME_64("%r8", r8), + REG_OFFSET_NAME_64("%r9", r9), + REG_OFFSET_NAME_64("%r10", r10), + REG_OFFSET_NAME_64("%r11", r11), + REG_OFFSET_NAME_64("%r12", r12), + REG_OFFSET_NAME_64("%r13", r13), + REG_OFFSET_NAME_64("%r14", r14), + REG_OFFSET_NAME_64("%r15", r15), + REG_OFFSET_END, }; /* TODO: switching by dwarf address size */ #ifdef __x86_64__ -#define ARCH_MAX_REGS X86_64_MAX_REGS -#define arch_regs_table x86_64_regs_table +#define regoffset_table x86_64_regoffset_table #else -#define ARCH_MAX_REGS X86_32_MAX_REGS -#define arch_regs_table x86_32_regs_table +#define regoffset_table x86_32_regoffset_table #endif +/* Minus 1 for the ending REG_OFFSET_END */ +#define ARCH_MAX_REGS ((sizeof(regoffset_table) / sizeof(regoffset_table[0])) - 1) + /* Return architecture dependent register string (for kprobe-tracer) */ const char *get_arch_regstr(unsigned int n) { - return (n <= ARCH_MAX_REGS) ? arch_regs_table[n] : NULL; + return (n < ARCH_MAX_REGS) ? regoffset_table[n].name : NULL; +} + +/* Reuse code from arch/x86/kernel/ptrace.c */ +/** + * regs_query_register_offset() - query register offset from its name + * @name: the name of a register + * + * regs_query_register_offset() returns the offset of a register in struct + * pt_regs from its name. If the name is invalid, this returns -EINVAL; + */ +int regs_query_register_offset(const char *name) +{ + const struct pt_regs_offset *roff; + for (roff = regoffset_table; roff->name != NULL; roff++) + if (!strcmp(roff->name, name)) + return roff->offset; + return -EINVAL; } diff --git a/tools/perf/arch/x86/util/intel-bts.c b/tools/perf/arch/x86/util/intel-bts.c new file mode 100644 index 000000000000..9b94ce520917 --- /dev/null +++ b/tools/perf/arch/x86/util/intel-bts.c @@ -0,0 +1,458 @@ +/* + * intel-bts.c: Intel Processor Trace support + * Copyright (c) 2013-2015, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + */ + +#include <linux/kernel.h> +#include <linux/types.h> +#include <linux/bitops.h> +#include <linux/log2.h> + +#include "../../util/cpumap.h" +#include "../../util/evsel.h" +#include "../../util/evlist.h" +#include "../../util/session.h" +#include "../../util/util.h" +#include "../../util/pmu.h" +#include "../../util/debug.h" +#include "../../util/tsc.h" +#include "../../util/auxtrace.h" +#include "../../util/intel-bts.h" + +#define KiB(x) ((x) * 1024) +#define MiB(x) ((x) * 1024 * 1024) +#define KiB_MASK(x) (KiB(x) - 1) +#define MiB_MASK(x) (MiB(x) - 1) + +#define INTEL_BTS_DFLT_SAMPLE_SIZE KiB(4) + +#define INTEL_BTS_MAX_SAMPLE_SIZE KiB(60) + +struct intel_bts_snapshot_ref { + void *ref_buf; + size_t ref_offset; + bool wrapped; +}; + +struct intel_bts_recording { + struct auxtrace_record itr; + struct perf_pmu *intel_bts_pmu; + struct perf_evlist *evlist; + bool snapshot_mode; + size_t snapshot_size; + int snapshot_ref_cnt; + struct intel_bts_snapshot_ref *snapshot_refs; +}; + +struct branch { + u64 from; + u64 to; + u64 misc; +}; + +static size_t intel_bts_info_priv_size(struct auxtrace_record *itr __maybe_unused) +{ + return INTEL_BTS_AUXTRACE_PRIV_SIZE; +} + +static int intel_bts_info_fill(struct auxtrace_record *itr, + struct perf_session *session, + struct auxtrace_info_event *auxtrace_info, + size_t priv_size) +{ + struct intel_bts_recording *btsr = + container_of(itr, struct intel_bts_recording, itr); + struct perf_pmu *intel_bts_pmu = btsr->intel_bts_pmu; + struct perf_event_mmap_page *pc; + struct perf_tsc_conversion tc = { .time_mult = 0, }; + bool cap_user_time_zero = false; + int err; + + if (priv_size != INTEL_BTS_AUXTRACE_PRIV_SIZE) + return -EINVAL; + + if (!session->evlist->nr_mmaps) + return -EINVAL; + + pc = session->evlist->mmap[0].base; + if (pc) { + err = perf_read_tsc_conversion(pc, &tc); + if (err) { + if (err != -EOPNOTSUPP) + return err; + } else { + cap_user_time_zero = tc.time_mult != 0; + } + if (!cap_user_time_zero) + ui__warning("Intel BTS: TSC not available\n"); + } + + auxtrace_info->type = PERF_AUXTRACE_INTEL_BTS; + auxtrace_info->priv[INTEL_BTS_PMU_TYPE] = intel_bts_pmu->type; + auxtrace_info->priv[INTEL_BTS_TIME_SHIFT] = tc.time_shift; + auxtrace_info->priv[INTEL_BTS_TIME_MULT] = tc.time_mult; + auxtrace_info->priv[INTEL_BTS_TIME_ZERO] = tc.time_zero; + auxtrace_info->priv[INTEL_BTS_CAP_USER_TIME_ZERO] = cap_user_time_zero; + auxtrace_info->priv[INTEL_BTS_SNAPSHOT_MODE] = btsr->snapshot_mode; + + return 0; +} + +static int intel_bts_recording_options(struct auxtrace_record *itr, + struct perf_evlist *evlist, + struct record_opts *opts) +{ + struct intel_bts_recording *btsr = + container_of(itr, struct intel_bts_recording, itr); + struct perf_pmu *intel_bts_pmu = btsr->intel_bts_pmu; + struct perf_evsel *evsel, *intel_bts_evsel = NULL; + const struct cpu_map *cpus = evlist->cpus; + bool privileged = geteuid() == 0 || perf_event_paranoid() < 0; + + btsr->evlist = evlist; + btsr->snapshot_mode = opts->auxtrace_snapshot_mode; + + evlist__for_each(evlist, evsel) { + if (evsel->attr.type == intel_bts_pmu->type) { + if (intel_bts_evsel) { + pr_err("There may be only one " INTEL_BTS_PMU_NAME " event\n"); + return -EINVAL; + } + evsel->attr.freq = 0; + evsel->attr.sample_period = 1; + intel_bts_evsel = evsel; + opts->full_auxtrace = true; + } + } + + if (opts->auxtrace_snapshot_mode && !opts->full_auxtrace) { + pr_err("Snapshot mode (-S option) requires " INTEL_BTS_PMU_NAME " PMU event (-e " INTEL_BTS_PMU_NAME ")\n"); + return -EINVAL; + } + + if (!opts->full_auxtrace) + return 0; + + if (opts->full_auxtrace && !cpu_map__empty(cpus)) { + pr_err(INTEL_BTS_PMU_NAME " does not support per-cpu recording\n"); + return -EINVAL; + } + + /* Set default sizes for snapshot mode */ + if (opts->auxtrace_snapshot_mode) { + if (!opts->auxtrace_snapshot_size && !opts->auxtrace_mmap_pages) { + if (privileged) { + opts->auxtrace_mmap_pages = MiB(4) / page_size; + } else { + opts->auxtrace_mmap_pages = KiB(128) / page_size; + if (opts->mmap_pages == UINT_MAX) + opts->mmap_pages = KiB(256) / page_size; + } + } else if (!opts->auxtrace_mmap_pages && !privileged && + opts->mmap_pages == UINT_MAX) { + opts->mmap_pages = KiB(256) / page_size; + } + if (!opts->auxtrace_snapshot_size) + opts->auxtrace_snapshot_size = + opts->auxtrace_mmap_pages * (size_t)page_size; + if (!opts->auxtrace_mmap_pages) { + size_t sz = opts->auxtrace_snapshot_size; + + sz = round_up(sz, page_size) / page_size; + opts->auxtrace_mmap_pages = roundup_pow_of_two(sz); + } + if (opts->auxtrace_snapshot_size > + opts->auxtrace_mmap_pages * (size_t)page_size) { + pr_err("Snapshot size %zu must not be greater than AUX area tracing mmap size %zu\n", + opts->auxtrace_snapshot_size, + opts->auxtrace_mmap_pages * (size_t)page_size); + return -EINVAL; + } + if (!opts->auxtrace_snapshot_size || !opts->auxtrace_mmap_pages) { + pr_err("Failed to calculate default snapshot size and/or AUX area tracing mmap pages\n"); + return -EINVAL; + } + pr_debug2("Intel BTS snapshot size: %zu\n", + opts->auxtrace_snapshot_size); + } + + /* Set default sizes for full trace mode */ + if (opts->full_auxtrace && !opts->auxtrace_mmap_pages) { + if (privileged) { + opts->auxtrace_mmap_pages = MiB(4) / page_size; + } else { + opts->auxtrace_mmap_pages = KiB(128) / page_size; + if (opts->mmap_pages == UINT_MAX) + opts->mmap_pages = KiB(256) / page_size; + } + } + + /* Validate auxtrace_mmap_pages */ + if (opts->auxtrace_mmap_pages) { + size_t sz = opts->auxtrace_mmap_pages * (size_t)page_size; + size_t min_sz; + + if (opts->auxtrace_snapshot_mode) + min_sz = KiB(4); + else + min_sz = KiB(8); + + if (sz < min_sz || !is_power_of_2(sz)) { + pr_err("Invalid mmap size for Intel BTS: must be at least %zuKiB and a power of 2\n", + min_sz / 1024); + return -EINVAL; + } + } + + if (intel_bts_evsel) { + /* + * To obtain the auxtrace buffer file descriptor, the auxtrace event + * must come first. + */ + perf_evlist__to_front(evlist, intel_bts_evsel); + /* + * In the case of per-cpu mmaps, we need the CPU on the + * AUX event. + */ + if (!cpu_map__empty(cpus)) + perf_evsel__set_sample_bit(intel_bts_evsel, CPU); + } + + /* Add dummy event to keep tracking */ + if (opts->full_auxtrace) { + struct perf_evsel *tracking_evsel; + int err; + + err = parse_events(evlist, "dummy:u", NULL); + if (err) + return err; + + tracking_evsel = perf_evlist__last(evlist); + + perf_evlist__set_tracking_event(evlist, tracking_evsel); + + tracking_evsel->attr.freq = 0; + tracking_evsel->attr.sample_period = 1; + } + + return 0; +} + +static int intel_bts_parse_snapshot_options(struct auxtrace_record *itr, + struct record_opts *opts, + const char *str) +{ + struct intel_bts_recording *btsr = + container_of(itr, struct intel_bts_recording, itr); + unsigned long long snapshot_size = 0; + char *endptr; + + if (str) { + snapshot_size = strtoull(str, &endptr, 0); + if (*endptr || snapshot_size > SIZE_MAX) + return -1; + } + + opts->auxtrace_snapshot_mode = true; + opts->auxtrace_snapshot_size = snapshot_size; + + btsr->snapshot_size = snapshot_size; + + return 0; +} + +static u64 intel_bts_reference(struct auxtrace_record *itr __maybe_unused) +{ + return rdtsc(); +} + +static int intel_bts_alloc_snapshot_refs(struct intel_bts_recording *btsr, + int idx) +{ + const size_t sz = sizeof(struct intel_bts_snapshot_ref); + int cnt = btsr->snapshot_ref_cnt, new_cnt = cnt * 2; + struct intel_bts_snapshot_ref *refs; + + if (!new_cnt) + new_cnt = 16; + + while (new_cnt <= idx) + new_cnt *= 2; + + refs = calloc(new_cnt, sz); + if (!refs) + return -ENOMEM; + + memcpy(refs, btsr->snapshot_refs, cnt * sz); + + btsr->snapshot_refs = refs; + btsr->snapshot_ref_cnt = new_cnt; + + return 0; +} + +static void intel_bts_free_snapshot_refs(struct intel_bts_recording *btsr) +{ + int i; + + for (i = 0; i < btsr->snapshot_ref_cnt; i++) + zfree(&btsr->snapshot_refs[i].ref_buf); + zfree(&btsr->snapshot_refs); +} + +static void intel_bts_recording_free(struct auxtrace_record *itr) +{ + struct intel_bts_recording *btsr = + container_of(itr, struct intel_bts_recording, itr); + + intel_bts_free_snapshot_refs(btsr); + free(btsr); +} + +static int intel_bts_snapshot_start(struct auxtrace_record *itr) +{ + struct intel_bts_recording *btsr = + container_of(itr, struct intel_bts_recording, itr); + struct perf_evsel *evsel; + + evlist__for_each(btsr->evlist, evsel) { + if (evsel->attr.type == btsr->intel_bts_pmu->type) + return perf_evlist__disable_event(btsr->evlist, evsel); + } + return -EINVAL; +} + +static int intel_bts_snapshot_finish(struct auxtrace_record *itr) +{ + struct intel_bts_recording *btsr = + container_of(itr, struct intel_bts_recording, itr); + struct perf_evsel *evsel; + + evlist__for_each(btsr->evlist, evsel) { + if (evsel->attr.type == btsr->intel_bts_pmu->type) + return perf_evlist__enable_event(btsr->evlist, evsel); + } + return -EINVAL; +} + +static bool intel_bts_first_wrap(u64 *data, size_t buf_size) +{ + int i, a, b; + + b = buf_size >> 3; + a = b - 512; + if (a < 0) + a = 0; + + for (i = a; i < b; i++) { + if (data[i]) + return true; + } + + return false; +} + +static int intel_bts_find_snapshot(struct auxtrace_record *itr, int idx, + struct auxtrace_mmap *mm, unsigned char *data, + u64 *head, u64 *old) +{ + struct intel_bts_recording *btsr = + container_of(itr, struct intel_bts_recording, itr); + bool wrapped; + int err; + + pr_debug3("%s: mmap index %d old head %zu new head %zu\n", + __func__, idx, (size_t)*old, (size_t)*head); + + if (idx >= btsr->snapshot_ref_cnt) { + err = intel_bts_alloc_snapshot_refs(btsr, idx); + if (err) + goto out_err; + } + + wrapped = btsr->snapshot_refs[idx].wrapped; + if (!wrapped && intel_bts_first_wrap((u64 *)data, mm->len)) { + btsr->snapshot_refs[idx].wrapped = true; + wrapped = true; + } + + /* + * In full trace mode 'head' continually increases. However in snapshot + * mode 'head' is an offset within the buffer. Here 'old' and 'head' + * are adjusted to match the full trace case which expects that 'old' is + * always less than 'head'. + */ + if (wrapped) { + *old = *head; + *head += mm->len; + } else { + if (mm->mask) + *old &= mm->mask; + else + *old %= mm->len; + if (*old > *head) + *head += mm->len; + } + + pr_debug3("%s: wrap-around %sdetected, adjusted old head %zu adjusted new head %zu\n", + __func__, wrapped ? "" : "not ", (size_t)*old, (size_t)*head); + + return 0; + +out_err: + pr_err("%s: failed, error %d\n", __func__, err); + return err; +} + +static int intel_bts_read_finish(struct auxtrace_record *itr, int idx) +{ + struct intel_bts_recording *btsr = + container_of(itr, struct intel_bts_recording, itr); + struct perf_evsel *evsel; + + evlist__for_each(btsr->evlist, evsel) { + if (evsel->attr.type == btsr->intel_bts_pmu->type) + return perf_evlist__enable_event_idx(btsr->evlist, + evsel, idx); + } + return -EINVAL; +} + +struct auxtrace_record *intel_bts_recording_init(int *err) +{ + struct perf_pmu *intel_bts_pmu = perf_pmu__find(INTEL_BTS_PMU_NAME); + struct intel_bts_recording *btsr; + + if (!intel_bts_pmu) + return NULL; + + btsr = zalloc(sizeof(struct intel_bts_recording)); + if (!btsr) { + *err = -ENOMEM; + return NULL; + } + + btsr->intel_bts_pmu = intel_bts_pmu; + btsr->itr.recording_options = intel_bts_recording_options; + btsr->itr.info_priv_size = intel_bts_info_priv_size; + btsr->itr.info_fill = intel_bts_info_fill; + btsr->itr.free = intel_bts_recording_free; + btsr->itr.snapshot_start = intel_bts_snapshot_start; + btsr->itr.snapshot_finish = intel_bts_snapshot_finish; + btsr->itr.find_snapshot = intel_bts_find_snapshot; + btsr->itr.parse_snapshot_options = intel_bts_parse_snapshot_options; + btsr->itr.reference = intel_bts_reference; + btsr->itr.read_finish = intel_bts_read_finish; + btsr->itr.alignment = sizeof(struct branch); + return &btsr->itr; +} diff --git a/tools/perf/arch/x86/util/intel-pt.c b/tools/perf/arch/x86/util/intel-pt.c new file mode 100644 index 000000000000..b02af064f0f9 --- /dev/null +++ b/tools/perf/arch/x86/util/intel-pt.c @@ -0,0 +1,1046 @@ +/* + * intel_pt.c: Intel Processor Trace support + * Copyright (c) 2013-2015, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + */ + +#include <stdbool.h> +#include <linux/kernel.h> +#include <linux/types.h> +#include <linux/bitops.h> +#include <linux/log2.h> +#include <cpuid.h> + +#include "../../perf.h" +#include "../../util/session.h" +#include "../../util/event.h" +#include "../../util/evlist.h" +#include "../../util/evsel.h" +#include "../../util/cpumap.h" +#include "../../util/parse-options.h" +#include "../../util/parse-events.h" +#include "../../util/pmu.h" +#include "../../util/debug.h" +#include "../../util/auxtrace.h" +#include "../../util/tsc.h" +#include "../../util/intel-pt.h" + +#define KiB(x) ((x) * 1024) +#define MiB(x) ((x) * 1024 * 1024) +#define KiB_MASK(x) (KiB(x) - 1) +#define MiB_MASK(x) (MiB(x) - 1) + +#define INTEL_PT_DEFAULT_SAMPLE_SIZE KiB(4) + +#define INTEL_PT_MAX_SAMPLE_SIZE KiB(60) + +#define INTEL_PT_PSB_PERIOD_NEAR 256 + +struct intel_pt_snapshot_ref { + void *ref_buf; + size_t ref_offset; + bool wrapped; +}; + +struct intel_pt_recording { + struct auxtrace_record itr; + struct perf_pmu *intel_pt_pmu; + int have_sched_switch; + struct perf_evlist *evlist; + bool snapshot_mode; + bool snapshot_init_done; + size_t snapshot_size; + size_t snapshot_ref_buf_size; + int snapshot_ref_cnt; + struct intel_pt_snapshot_ref *snapshot_refs; +}; + +static int intel_pt_parse_terms_with_default(struct list_head *formats, + const char *str, + u64 *config) +{ + struct list_head *terms; + struct perf_event_attr attr = { .size = 0, }; + int err; + + terms = malloc(sizeof(struct list_head)); + if (!terms) + return -ENOMEM; + + INIT_LIST_HEAD(terms); + + err = parse_events_terms(terms, str); + if (err) + goto out_free; + + attr.config = *config; + err = perf_pmu__config_terms(formats, &attr, terms, true, NULL); + if (err) + goto out_free; + + *config = attr.config; +out_free: + parse_events__free_terms(terms); + return err; +} + +static int intel_pt_parse_terms(struct list_head *formats, const char *str, + u64 *config) +{ + *config = 0; + return intel_pt_parse_terms_with_default(formats, str, config); +} + +static u64 intel_pt_masked_bits(u64 mask, u64 bits) +{ + const u64 top_bit = 1ULL << 63; + u64 res = 0; + int i; + + for (i = 0; i < 64; i++) { + if (mask & top_bit) { + res <<= 1; + if (bits & top_bit) + res |= 1; + } + mask <<= 1; + bits <<= 1; + } + + return res; +} + +static int intel_pt_read_config(struct perf_pmu *intel_pt_pmu, const char *str, + struct perf_evlist *evlist, u64 *res) +{ + struct perf_evsel *evsel; + u64 mask; + + *res = 0; + + mask = perf_pmu__format_bits(&intel_pt_pmu->format, str); + if (!mask) + return -EINVAL; + + evlist__for_each(evlist, evsel) { + if (evsel->attr.type == intel_pt_pmu->type) { + *res = intel_pt_masked_bits(mask, evsel->attr.config); + return 0; + } + } + + return -EINVAL; +} + +static size_t intel_pt_psb_period(struct perf_pmu *intel_pt_pmu, + struct perf_evlist *evlist) +{ + u64 val; + int err, topa_multiple_entries; + size_t psb_period; + + if (perf_pmu__scan_file(intel_pt_pmu, "caps/topa_multiple_entries", + "%d", &topa_multiple_entries) != 1) + topa_multiple_entries = 0; + + /* + * Use caps/topa_multiple_entries to indicate early hardware that had + * extra frequent PSBs. + */ + if (!topa_multiple_entries) { + psb_period = 256; + goto out; + } + + err = intel_pt_read_config(intel_pt_pmu, "psb_period", evlist, &val); + if (err) + val = 0; + + psb_period = 1 << (val + 11); +out: + pr_debug2("%s psb_period %zu\n", intel_pt_pmu->name, psb_period); + return psb_period; +} + +static int intel_pt_pick_bit(int bits, int target) +{ + int pos, pick = -1; + + for (pos = 0; bits; bits >>= 1, pos++) { + if (bits & 1) { + if (pos <= target || pick < 0) + pick = pos; + if (pos >= target) + break; + } + } + + return pick; +} + +static u64 intel_pt_default_config(struct perf_pmu *intel_pt_pmu) +{ + char buf[256]; + int mtc, mtc_periods = 0, mtc_period; + int psb_cyc, psb_periods, psb_period; + int pos = 0; + u64 config; + + pos += scnprintf(buf + pos, sizeof(buf) - pos, "tsc"); + + if (perf_pmu__scan_file(intel_pt_pmu, "caps/mtc", "%d", + &mtc) != 1) + mtc = 1; + + if (mtc) { + if (perf_pmu__scan_file(intel_pt_pmu, "caps/mtc_periods", "%x", + &mtc_periods) != 1) + mtc_periods = 0; + if (mtc_periods) { + mtc_period = intel_pt_pick_bit(mtc_periods, 3); + pos += scnprintf(buf + pos, sizeof(buf) - pos, + ",mtc,mtc_period=%d", mtc_period); + } + } + + if (perf_pmu__scan_file(intel_pt_pmu, "caps/psb_cyc", "%d", + &psb_cyc) != 1) + psb_cyc = 1; + + if (psb_cyc && mtc_periods) { + if (perf_pmu__scan_file(intel_pt_pmu, "caps/psb_periods", "%x", + &psb_periods) != 1) + psb_periods = 0; + if (psb_periods) { + psb_period = intel_pt_pick_bit(psb_periods, 3); + pos += scnprintf(buf + pos, sizeof(buf) - pos, + ",psb_period=%d", psb_period); + } + } + + pr_debug2("%s default config: %s\n", intel_pt_pmu->name, buf); + + intel_pt_parse_terms(&intel_pt_pmu->format, buf, &config); + + return config; +} + +static int intel_pt_parse_snapshot_options(struct auxtrace_record *itr, + struct record_opts *opts, + const char *str) +{ + struct intel_pt_recording *ptr = + container_of(itr, struct intel_pt_recording, itr); + unsigned long long snapshot_size = 0; + char *endptr; + + if (str) { + snapshot_size = strtoull(str, &endptr, 0); + if (*endptr || snapshot_size > SIZE_MAX) + return -1; + } + + opts->auxtrace_snapshot_mode = true; + opts->auxtrace_snapshot_size = snapshot_size; + + ptr->snapshot_size = snapshot_size; + + return 0; +} + +struct perf_event_attr * +intel_pt_pmu_default_config(struct perf_pmu *intel_pt_pmu) +{ + struct perf_event_attr *attr; + + attr = zalloc(sizeof(struct perf_event_attr)); + if (!attr) + return NULL; + + attr->config = intel_pt_default_config(intel_pt_pmu); + + intel_pt_pmu->selectable = true; + + return attr; +} + +static size_t intel_pt_info_priv_size(struct auxtrace_record *itr __maybe_unused) +{ + return INTEL_PT_AUXTRACE_PRIV_SIZE; +} + +static void intel_pt_tsc_ctc_ratio(u32 *n, u32 *d) +{ + unsigned int eax = 0, ebx = 0, ecx = 0, edx = 0; + + __get_cpuid(0x15, &eax, &ebx, &ecx, &edx); + *n = ebx; + *d = eax; +} + +static int intel_pt_info_fill(struct auxtrace_record *itr, + struct perf_session *session, + struct auxtrace_info_event *auxtrace_info, + size_t priv_size) +{ + struct intel_pt_recording *ptr = + container_of(itr, struct intel_pt_recording, itr); + struct perf_pmu *intel_pt_pmu = ptr->intel_pt_pmu; + struct perf_event_mmap_page *pc; + struct perf_tsc_conversion tc = { .time_mult = 0, }; + bool cap_user_time_zero = false, per_cpu_mmaps; + u64 tsc_bit, mtc_bit, mtc_freq_bits, cyc_bit, noretcomp_bit; + u32 tsc_ctc_ratio_n, tsc_ctc_ratio_d; + int err; + + if (priv_size != INTEL_PT_AUXTRACE_PRIV_SIZE) + return -EINVAL; + + intel_pt_parse_terms(&intel_pt_pmu->format, "tsc", &tsc_bit); + intel_pt_parse_terms(&intel_pt_pmu->format, "noretcomp", + &noretcomp_bit); + intel_pt_parse_terms(&intel_pt_pmu->format, "mtc", &mtc_bit); + mtc_freq_bits = perf_pmu__format_bits(&intel_pt_pmu->format, + "mtc_period"); + intel_pt_parse_terms(&intel_pt_pmu->format, "cyc", &cyc_bit); + + intel_pt_tsc_ctc_ratio(&tsc_ctc_ratio_n, &tsc_ctc_ratio_d); + + if (!session->evlist->nr_mmaps) + return -EINVAL; + + pc = session->evlist->mmap[0].base; + if (pc) { + err = perf_read_tsc_conversion(pc, &tc); + if (err) { + if (err != -EOPNOTSUPP) + return err; + } else { + cap_user_time_zero = tc.time_mult != 0; + } + if (!cap_user_time_zero) + ui__warning("Intel Processor Trace: TSC not available\n"); + } + + per_cpu_mmaps = !cpu_map__empty(session->evlist->cpus); + + auxtrace_info->type = PERF_AUXTRACE_INTEL_PT; + auxtrace_info->priv[INTEL_PT_PMU_TYPE] = intel_pt_pmu->type; + auxtrace_info->priv[INTEL_PT_TIME_SHIFT] = tc.time_shift; + auxtrace_info->priv[INTEL_PT_TIME_MULT] = tc.time_mult; + auxtrace_info->priv[INTEL_PT_TIME_ZERO] = tc.time_zero; + auxtrace_info->priv[INTEL_PT_CAP_USER_TIME_ZERO] = cap_user_time_zero; + auxtrace_info->priv[INTEL_PT_TSC_BIT] = tsc_bit; + auxtrace_info->priv[INTEL_PT_NORETCOMP_BIT] = noretcomp_bit; + auxtrace_info->priv[INTEL_PT_HAVE_SCHED_SWITCH] = ptr->have_sched_switch; + auxtrace_info->priv[INTEL_PT_SNAPSHOT_MODE] = ptr->snapshot_mode; + auxtrace_info->priv[INTEL_PT_PER_CPU_MMAPS] = per_cpu_mmaps; + auxtrace_info->priv[INTEL_PT_MTC_BIT] = mtc_bit; + auxtrace_info->priv[INTEL_PT_MTC_FREQ_BITS] = mtc_freq_bits; + auxtrace_info->priv[INTEL_PT_TSC_CTC_N] = tsc_ctc_ratio_n; + auxtrace_info->priv[INTEL_PT_TSC_CTC_D] = tsc_ctc_ratio_d; + auxtrace_info->priv[INTEL_PT_CYC_BIT] = cyc_bit; + + return 0; +} + +static int intel_pt_track_switches(struct perf_evlist *evlist) +{ + const char *sched_switch = "sched:sched_switch"; + struct perf_evsel *evsel; + int err; + + if (!perf_evlist__can_select_event(evlist, sched_switch)) + return -EPERM; + + err = parse_events(evlist, sched_switch, NULL); + if (err) { + pr_debug2("%s: failed to parse %s, error %d\n", + __func__, sched_switch, err); + return err; + } + + evsel = perf_evlist__last(evlist); + + perf_evsel__set_sample_bit(evsel, CPU); + perf_evsel__set_sample_bit(evsel, TIME); + + evsel->system_wide = true; + evsel->no_aux_samples = true; + evsel->immediate = true; + + return 0; +} + +static void intel_pt_valid_str(char *str, size_t len, u64 valid) +{ + unsigned int val, last = 0, state = 1; + int p = 0; + + str[0] = '\0'; + + for (val = 0; val <= 64; val++, valid >>= 1) { + if (valid & 1) { + last = val; + switch (state) { + case 0: + p += scnprintf(str + p, len - p, ","); + /* Fall through */ + case 1: + p += scnprintf(str + p, len - p, "%u", val); + state = 2; + break; + case 2: + state = 3; + break; + case 3: + state = 4; + break; + default: + break; + } + } else { + switch (state) { + case 3: + p += scnprintf(str + p, len - p, ",%u", last); + state = 0; + break; + case 4: + p += scnprintf(str + p, len - p, "-%u", last); + state = 0; + break; + default: + break; + } + if (state != 1) + state = 0; + } + } +} + +static int intel_pt_val_config_term(struct perf_pmu *intel_pt_pmu, + const char *caps, const char *name, + const char *supported, u64 config) +{ + char valid_str[256]; + unsigned int shift; + unsigned long long valid; + u64 bits; + int ok; + + if (perf_pmu__scan_file(intel_pt_pmu, caps, "%llx", &valid) != 1) + valid = 0; + + if (supported && + perf_pmu__scan_file(intel_pt_pmu, supported, "%d", &ok) == 1 && !ok) + valid = 0; + + valid |= 1; + + bits = perf_pmu__format_bits(&intel_pt_pmu->format, name); + + config &= bits; + + for (shift = 0; bits && !(bits & 1); shift++) + bits >>= 1; + + config >>= shift; + + if (config > 63) + goto out_err; + + if (valid & (1 << config)) + return 0; +out_err: + intel_pt_valid_str(valid_str, sizeof(valid_str), valid); + pr_err("Invalid %s for %s. Valid values are: %s\n", + name, INTEL_PT_PMU_NAME, valid_str); + return -EINVAL; +} + +static int intel_pt_validate_config(struct perf_pmu *intel_pt_pmu, + struct perf_evsel *evsel) +{ + int err; + + if (!evsel) + return 0; + + err = intel_pt_val_config_term(intel_pt_pmu, "caps/cycle_thresholds", + "cyc_thresh", "caps/psb_cyc", + evsel->attr.config); + if (err) + return err; + + err = intel_pt_val_config_term(intel_pt_pmu, "caps/mtc_periods", + "mtc_period", "caps/mtc", + evsel->attr.config); + if (err) + return err; + + return intel_pt_val_config_term(intel_pt_pmu, "caps/psb_periods", + "psb_period", "caps/psb_cyc", + evsel->attr.config); +} + +static int intel_pt_recording_options(struct auxtrace_record *itr, + struct perf_evlist *evlist, + struct record_opts *opts) +{ + struct intel_pt_recording *ptr = + container_of(itr, struct intel_pt_recording, itr); + struct perf_pmu *intel_pt_pmu = ptr->intel_pt_pmu; + bool have_timing_info; + struct perf_evsel *evsel, *intel_pt_evsel = NULL; + const struct cpu_map *cpus = evlist->cpus; + bool privileged = geteuid() == 0 || perf_event_paranoid() < 0; + u64 tsc_bit; + int err; + + ptr->evlist = evlist; + ptr->snapshot_mode = opts->auxtrace_snapshot_mode; + + evlist__for_each(evlist, evsel) { + if (evsel->attr.type == intel_pt_pmu->type) { + if (intel_pt_evsel) { + pr_err("There may be only one " INTEL_PT_PMU_NAME " event\n"); + return -EINVAL; + } + evsel->attr.freq = 0; + evsel->attr.sample_period = 1; + intel_pt_evsel = evsel; + opts->full_auxtrace = true; + } + } + + if (opts->auxtrace_snapshot_mode && !opts->full_auxtrace) { + pr_err("Snapshot mode (-S option) requires " INTEL_PT_PMU_NAME " PMU event (-e " INTEL_PT_PMU_NAME ")\n"); + return -EINVAL; + } + + if (opts->use_clockid) { + pr_err("Cannot use clockid (-k option) with " INTEL_PT_PMU_NAME "\n"); + return -EINVAL; + } + + if (!opts->full_auxtrace) + return 0; + + err = intel_pt_validate_config(intel_pt_pmu, intel_pt_evsel); + if (err) + return err; + + /* Set default sizes for snapshot mode */ + if (opts->auxtrace_snapshot_mode) { + size_t psb_period = intel_pt_psb_period(intel_pt_pmu, evlist); + + if (!opts->auxtrace_snapshot_size && !opts->auxtrace_mmap_pages) { + if (privileged) { + opts->auxtrace_mmap_pages = MiB(4) / page_size; + } else { + opts->auxtrace_mmap_pages = KiB(128) / page_size; + if (opts->mmap_pages == UINT_MAX) + opts->mmap_pages = KiB(256) / page_size; + } + } else if (!opts->auxtrace_mmap_pages && !privileged && + opts->mmap_pages == UINT_MAX) { + opts->mmap_pages = KiB(256) / page_size; + } + if (!opts->auxtrace_snapshot_size) + opts->auxtrace_snapshot_size = + opts->auxtrace_mmap_pages * (size_t)page_size; + if (!opts->auxtrace_mmap_pages) { + size_t sz = opts->auxtrace_snapshot_size; + + sz = round_up(sz, page_size) / page_size; + opts->auxtrace_mmap_pages = roundup_pow_of_two(sz); + } + if (opts->auxtrace_snapshot_size > + opts->auxtrace_mmap_pages * (size_t)page_size) { + pr_err("Snapshot size %zu must not be greater than AUX area tracing mmap size %zu\n", + opts->auxtrace_snapshot_size, + opts->auxtrace_mmap_pages * (size_t)page_size); + return -EINVAL; + } + if (!opts->auxtrace_snapshot_size || !opts->auxtrace_mmap_pages) { + pr_err("Failed to calculate default snapshot size and/or AUX area tracing mmap pages\n"); + return -EINVAL; + } + pr_debug2("Intel PT snapshot size: %zu\n", + opts->auxtrace_snapshot_size); + if (psb_period && + opts->auxtrace_snapshot_size <= psb_period + + INTEL_PT_PSB_PERIOD_NEAR) + ui__warning("Intel PT snapshot size (%zu) may be too small for PSB period (%zu)\n", + opts->auxtrace_snapshot_size, psb_period); + } + + /* Set default sizes for full trace mode */ + if (opts->full_auxtrace && !opts->auxtrace_mmap_pages) { + if (privileged) { + opts->auxtrace_mmap_pages = MiB(4) / page_size; + } else { + opts->auxtrace_mmap_pages = KiB(128) / page_size; + if (opts->mmap_pages == UINT_MAX) + opts->mmap_pages = KiB(256) / page_size; + } + } + + /* Validate auxtrace_mmap_pages */ + if (opts->auxtrace_mmap_pages) { + size_t sz = opts->auxtrace_mmap_pages * (size_t)page_size; + size_t min_sz; + + if (opts->auxtrace_snapshot_mode) + min_sz = KiB(4); + else + min_sz = KiB(8); + + if (sz < min_sz || !is_power_of_2(sz)) { + pr_err("Invalid mmap size for Intel Processor Trace: must be at least %zuKiB and a power of 2\n", + min_sz / 1024); + return -EINVAL; + } + } + + intel_pt_parse_terms(&intel_pt_pmu->format, "tsc", &tsc_bit); + + if (opts->full_auxtrace && (intel_pt_evsel->attr.config & tsc_bit)) + have_timing_info = true; + else + have_timing_info = false; + + /* + * Per-cpu recording needs sched_switch events to distinguish different + * threads. + */ + if (have_timing_info && !cpu_map__empty(cpus)) { + if (perf_can_record_switch_events()) { + bool cpu_wide = !target__none(&opts->target) && + !target__has_task(&opts->target); + + if (!cpu_wide && perf_can_record_cpu_wide()) { + struct perf_evsel *switch_evsel; + + err = parse_events(evlist, "dummy:u", NULL); + if (err) + return err; + + switch_evsel = perf_evlist__last(evlist); + + switch_evsel->attr.freq = 0; + switch_evsel->attr.sample_period = 1; + switch_evsel->attr.context_switch = 1; + + switch_evsel->system_wide = true; + switch_evsel->no_aux_samples = true; + switch_evsel->immediate = true; + + perf_evsel__set_sample_bit(switch_evsel, TID); + perf_evsel__set_sample_bit(switch_evsel, TIME); + perf_evsel__set_sample_bit(switch_evsel, CPU); + + opts->record_switch_events = false; + ptr->have_sched_switch = 3; + } else { + opts->record_switch_events = true; + if (cpu_wide) + ptr->have_sched_switch = 3; + else + ptr->have_sched_switch = 2; + } + } else { + err = intel_pt_track_switches(evlist); + if (err == -EPERM) + pr_debug2("Unable to select sched:sched_switch\n"); + else if (err) + return err; + else + ptr->have_sched_switch = 1; + } + } + + if (intel_pt_evsel) { + /* + * To obtain the auxtrace buffer file descriptor, the auxtrace + * event must come first. + */ + perf_evlist__to_front(evlist, intel_pt_evsel); + /* + * In the case of per-cpu mmaps, we need the CPU on the + * AUX event. + */ + if (!cpu_map__empty(cpus)) + perf_evsel__set_sample_bit(intel_pt_evsel, CPU); + } + + /* Add dummy event to keep tracking */ + if (opts->full_auxtrace) { + struct perf_evsel *tracking_evsel; + + err = parse_events(evlist, "dummy:u", NULL); + if (err) + return err; + + tracking_evsel = perf_evlist__last(evlist); + + perf_evlist__set_tracking_event(evlist, tracking_evsel); + + tracking_evsel->attr.freq = 0; + tracking_evsel->attr.sample_period = 1; + + /* In per-cpu case, always need the time of mmap events etc */ + if (!cpu_map__empty(cpus)) { + perf_evsel__set_sample_bit(tracking_evsel, TIME); + /* And the CPU for switch events */ + perf_evsel__set_sample_bit(tracking_evsel, CPU); + } + } + + /* + * Warn the user when we do not have enough information to decode i.e. + * per-cpu with no sched_switch (except workload-only). + */ + if (!ptr->have_sched_switch && !cpu_map__empty(cpus) && + !target__none(&opts->target)) + ui__warning("Intel Processor Trace decoding will not be possible except for kernel tracing!\n"); + + return 0; +} + +static int intel_pt_snapshot_start(struct auxtrace_record *itr) +{ + struct intel_pt_recording *ptr = + container_of(itr, struct intel_pt_recording, itr); + struct perf_evsel *evsel; + + evlist__for_each(ptr->evlist, evsel) { + if (evsel->attr.type == ptr->intel_pt_pmu->type) + return perf_evlist__disable_event(ptr->evlist, evsel); + } + return -EINVAL; +} + +static int intel_pt_snapshot_finish(struct auxtrace_record *itr) +{ + struct intel_pt_recording *ptr = + container_of(itr, struct intel_pt_recording, itr); + struct perf_evsel *evsel; + + evlist__for_each(ptr->evlist, evsel) { + if (evsel->attr.type == ptr->intel_pt_pmu->type) + return perf_evlist__enable_event(ptr->evlist, evsel); + } + return -EINVAL; +} + +static int intel_pt_alloc_snapshot_refs(struct intel_pt_recording *ptr, int idx) +{ + const size_t sz = sizeof(struct intel_pt_snapshot_ref); + int cnt = ptr->snapshot_ref_cnt, new_cnt = cnt * 2; + struct intel_pt_snapshot_ref *refs; + + if (!new_cnt) + new_cnt = 16; + + while (new_cnt <= idx) + new_cnt *= 2; + + refs = calloc(new_cnt, sz); + if (!refs) + return -ENOMEM; + + memcpy(refs, ptr->snapshot_refs, cnt * sz); + + ptr->snapshot_refs = refs; + ptr->snapshot_ref_cnt = new_cnt; + + return 0; +} + +static void intel_pt_free_snapshot_refs(struct intel_pt_recording *ptr) +{ + int i; + + for (i = 0; i < ptr->snapshot_ref_cnt; i++) + zfree(&ptr->snapshot_refs[i].ref_buf); + zfree(&ptr->snapshot_refs); +} + +static void intel_pt_recording_free(struct auxtrace_record *itr) +{ + struct intel_pt_recording *ptr = + container_of(itr, struct intel_pt_recording, itr); + + intel_pt_free_snapshot_refs(ptr); + free(ptr); +} + +static int intel_pt_alloc_snapshot_ref(struct intel_pt_recording *ptr, int idx, + size_t snapshot_buf_size) +{ + size_t ref_buf_size = ptr->snapshot_ref_buf_size; + void *ref_buf; + + ref_buf = zalloc(ref_buf_size); + if (!ref_buf) + return -ENOMEM; + + ptr->snapshot_refs[idx].ref_buf = ref_buf; + ptr->snapshot_refs[idx].ref_offset = snapshot_buf_size - ref_buf_size; + + return 0; +} + +static size_t intel_pt_snapshot_ref_buf_size(struct intel_pt_recording *ptr, + size_t snapshot_buf_size) +{ + const size_t max_size = 256 * 1024; + size_t buf_size = 0, psb_period; + + if (ptr->snapshot_size <= 64 * 1024) + return 0; + + psb_period = intel_pt_psb_period(ptr->intel_pt_pmu, ptr->evlist); + if (psb_period) + buf_size = psb_period * 2; + + if (!buf_size || buf_size > max_size) + buf_size = max_size; + + if (buf_size >= snapshot_buf_size) + return 0; + + if (buf_size >= ptr->snapshot_size / 2) + return 0; + + return buf_size; +} + +static int intel_pt_snapshot_init(struct intel_pt_recording *ptr, + size_t snapshot_buf_size) +{ + if (ptr->snapshot_init_done) + return 0; + + ptr->snapshot_init_done = true; + + ptr->snapshot_ref_buf_size = intel_pt_snapshot_ref_buf_size(ptr, + snapshot_buf_size); + + return 0; +} + +/** + * intel_pt_compare_buffers - compare bytes in a buffer to a circular buffer. + * @buf1: first buffer + * @compare_size: number of bytes to compare + * @buf2: second buffer (a circular buffer) + * @offs2: offset in second buffer + * @buf2_size: size of second buffer + * + * The comparison allows for the possibility that the bytes to compare in the + * circular buffer are not contiguous. It is assumed that @compare_size <= + * @buf2_size. This function returns %false if the bytes are identical, %true + * otherwise. + */ +static bool intel_pt_compare_buffers(void *buf1, size_t compare_size, + void *buf2, size_t offs2, size_t buf2_size) +{ + size_t end2 = offs2 + compare_size, part_size; + + if (end2 <= buf2_size) + return memcmp(buf1, buf2 + offs2, compare_size); + + part_size = end2 - buf2_size; + if (memcmp(buf1, buf2 + offs2, part_size)) + return true; + + compare_size -= part_size; + + return memcmp(buf1 + part_size, buf2, compare_size); +} + +static bool intel_pt_compare_ref(void *ref_buf, size_t ref_offset, + size_t ref_size, size_t buf_size, + void *data, size_t head) +{ + size_t ref_end = ref_offset + ref_size; + + if (ref_end > buf_size) { + if (head > ref_offset || head < ref_end - buf_size) + return true; + } else if (head > ref_offset && head < ref_end) { + return true; + } + + return intel_pt_compare_buffers(ref_buf, ref_size, data, ref_offset, + buf_size); +} + +static void intel_pt_copy_ref(void *ref_buf, size_t ref_size, size_t buf_size, + void *data, size_t head) +{ + if (head >= ref_size) { + memcpy(ref_buf, data + head - ref_size, ref_size); + } else { + memcpy(ref_buf, data, head); + ref_size -= head; + memcpy(ref_buf + head, data + buf_size - ref_size, ref_size); + } +} + +static bool intel_pt_wrapped(struct intel_pt_recording *ptr, int idx, + struct auxtrace_mmap *mm, unsigned char *data, + u64 head) +{ + struct intel_pt_snapshot_ref *ref = &ptr->snapshot_refs[idx]; + bool wrapped; + + wrapped = intel_pt_compare_ref(ref->ref_buf, ref->ref_offset, + ptr->snapshot_ref_buf_size, mm->len, + data, head); + + intel_pt_copy_ref(ref->ref_buf, ptr->snapshot_ref_buf_size, mm->len, + data, head); + + return wrapped; +} + +static bool intel_pt_first_wrap(u64 *data, size_t buf_size) +{ + int i, a, b; + + b = buf_size >> 3; + a = b - 512; + if (a < 0) + a = 0; + + for (i = a; i < b; i++) { + if (data[i]) + return true; + } + + return false; +} + +static int intel_pt_find_snapshot(struct auxtrace_record *itr, int idx, + struct auxtrace_mmap *mm, unsigned char *data, + u64 *head, u64 *old) +{ + struct intel_pt_recording *ptr = + container_of(itr, struct intel_pt_recording, itr); + bool wrapped; + int err; + + pr_debug3("%s: mmap index %d old head %zu new head %zu\n", + __func__, idx, (size_t)*old, (size_t)*head); + + err = intel_pt_snapshot_init(ptr, mm->len); + if (err) + goto out_err; + + if (idx >= ptr->snapshot_ref_cnt) { + err = intel_pt_alloc_snapshot_refs(ptr, idx); + if (err) + goto out_err; + } + + if (ptr->snapshot_ref_buf_size) { + if (!ptr->snapshot_refs[idx].ref_buf) { + err = intel_pt_alloc_snapshot_ref(ptr, idx, mm->len); + if (err) + goto out_err; + } + wrapped = intel_pt_wrapped(ptr, idx, mm, data, *head); + } else { + wrapped = ptr->snapshot_refs[idx].wrapped; + if (!wrapped && intel_pt_first_wrap((u64 *)data, mm->len)) { + ptr->snapshot_refs[idx].wrapped = true; + wrapped = true; + } + } + + /* + * In full trace mode 'head' continually increases. However in snapshot + * mode 'head' is an offset within the buffer. Here 'old' and 'head' + * are adjusted to match the full trace case which expects that 'old' is + * always less than 'head'. + */ + if (wrapped) { + *old = *head; + *head += mm->len; + } else { + if (mm->mask) + *old &= mm->mask; + else + *old %= mm->len; + if (*old > *head) + *head += mm->len; + } + + pr_debug3("%s: wrap-around %sdetected, adjusted old head %zu adjusted new head %zu\n", + __func__, wrapped ? "" : "not ", (size_t)*old, (size_t)*head); + + return 0; + +out_err: + pr_err("%s: failed, error %d\n", __func__, err); + return err; +} + +static u64 intel_pt_reference(struct auxtrace_record *itr __maybe_unused) +{ + return rdtsc(); +} + +static int intel_pt_read_finish(struct auxtrace_record *itr, int idx) +{ + struct intel_pt_recording *ptr = + container_of(itr, struct intel_pt_recording, itr); + struct perf_evsel *evsel; + + evlist__for_each(ptr->evlist, evsel) { + if (evsel->attr.type == ptr->intel_pt_pmu->type) + return perf_evlist__enable_event_idx(ptr->evlist, evsel, + idx); + } + return -EINVAL; +} + +struct auxtrace_record *intel_pt_recording_init(int *err) +{ + struct perf_pmu *intel_pt_pmu = perf_pmu__find(INTEL_PT_PMU_NAME); + struct intel_pt_recording *ptr; + + if (!intel_pt_pmu) + return NULL; + + ptr = zalloc(sizeof(struct intel_pt_recording)); + if (!ptr) { + *err = -ENOMEM; + return NULL; + } + + ptr->intel_pt_pmu = intel_pt_pmu; + ptr->itr.recording_options = intel_pt_recording_options; + ptr->itr.info_priv_size = intel_pt_info_priv_size; + ptr->itr.info_fill = intel_pt_info_fill; + ptr->itr.free = intel_pt_recording_free; + ptr->itr.snapshot_start = intel_pt_snapshot_start; + ptr->itr.snapshot_finish = intel_pt_snapshot_finish; + ptr->itr.find_snapshot = intel_pt_find_snapshot; + ptr->itr.parse_snapshot_options = intel_pt_parse_snapshot_options; + ptr->itr.reference = intel_pt_reference; + ptr->itr.read_finish = intel_pt_read_finish; + return &ptr->itr; +} diff --git a/tools/perf/arch/x86/util/perf_regs.c b/tools/perf/arch/x86/util/perf_regs.c new file mode 100644 index 000000000000..c5db14f36cc7 --- /dev/null +++ b/tools/perf/arch/x86/util/perf_regs.c @@ -0,0 +1,28 @@ +#include "../../perf.h" +#include "../../util/perf_regs.h" + +const struct sample_reg sample_reg_masks[] = { + SMPL_REG(AX, PERF_REG_X86_AX), + SMPL_REG(BX, PERF_REG_X86_BX), + SMPL_REG(CX, PERF_REG_X86_CX), + SMPL_REG(DX, PERF_REG_X86_DX), + SMPL_REG(SI, PERF_REG_X86_SI), + SMPL_REG(DI, PERF_REG_X86_DI), + SMPL_REG(BP, PERF_REG_X86_BP), + SMPL_REG(SP, PERF_REG_X86_SP), + SMPL_REG(IP, PERF_REG_X86_IP), + SMPL_REG(FLAGS, PERF_REG_X86_FLAGS), + SMPL_REG(CS, PERF_REG_X86_CS), + SMPL_REG(SS, PERF_REG_X86_SS), +#ifdef HAVE_ARCH_X86_64_SUPPORT + SMPL_REG(R8, PERF_REG_X86_R8), + SMPL_REG(R9, PERF_REG_X86_R9), + SMPL_REG(R10, PERF_REG_X86_R10), + SMPL_REG(R11, PERF_REG_X86_R11), + SMPL_REG(R12, PERF_REG_X86_R12), + SMPL_REG(R13, PERF_REG_X86_R13), + SMPL_REG(R14, PERF_REG_X86_R14), + SMPL_REG(R15, PERF_REG_X86_R15), +#endif + SMPL_REG_END +}; diff --git a/tools/perf/arch/x86/util/pmu.c b/tools/perf/arch/x86/util/pmu.c new file mode 100644 index 000000000000..79fe07158d00 --- /dev/null +++ b/tools/perf/arch/x86/util/pmu.c @@ -0,0 +1,18 @@ +#include <string.h> + +#include <linux/perf_event.h> + +#include "../../util/intel-pt.h" +#include "../../util/intel-bts.h" +#include "../../util/pmu.h" + +struct perf_event_attr *perf_pmu__get_default_config(struct perf_pmu *pmu __maybe_unused) +{ +#ifdef HAVE_AUXTRACE_SUPPORT + if (!strcmp(pmu->name, INTEL_PT_PMU_NAME)) + return intel_pt_pmu_default_config(pmu); + if (!strcmp(pmu->name, INTEL_BTS_PMU_NAME)) + pmu->selectable = true; +#endif + return NULL; +} diff --git a/tools/perf/arch/xtensa/Build b/tools/perf/arch/xtensa/Build new file mode 100644 index 000000000000..54afe4a467e7 --- /dev/null +++ b/tools/perf/arch/xtensa/Build @@ -0,0 +1 @@ +libperf-y += util/ diff --git a/tools/perf/arch/xtensa/Makefile b/tools/perf/arch/xtensa/Makefile new file mode 100644 index 000000000000..7fbca175099e --- /dev/null +++ b/tools/perf/arch/xtensa/Makefile @@ -0,0 +1,3 @@ +ifndef NO_DWARF +PERF_HAVE_DWARF_REGS := 1 +endif diff --git a/tools/perf/arch/xtensa/util/Build b/tools/perf/arch/xtensa/util/Build new file mode 100644 index 000000000000..954e287bbb89 --- /dev/null +++ b/tools/perf/arch/xtensa/util/Build @@ -0,0 +1 @@ +libperf-$(CONFIG_DWARF) += dwarf-regs.o diff --git a/tools/perf/arch/xtensa/util/dwarf-regs.c b/tools/perf/arch/xtensa/util/dwarf-regs.c new file mode 100644 index 000000000000..4dba76bfb4ce --- /dev/null +++ b/tools/perf/arch/xtensa/util/dwarf-regs.c @@ -0,0 +1,25 @@ +/* + * Mapping of DWARF debug register numbers into register names. + * + * Copyright (c) 2015 Cadence Design Systems Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <stddef.h> +#include <dwarf-regs.h> + +#define XTENSA_MAX_REGS 16 + +const char *xtensa_regs_table[XTENSA_MAX_REGS] = { + "a0", "a1", "a2", "a3", "a4", "a5", "a6", "a7", + "a8", "a9", "a10", "a11", "a12", "a13", "a14", "a15", +}; + +const char *get_arch_regstr(unsigned int n) +{ + return n < XTENSA_MAX_REGS ? xtensa_regs_table[n] : NULL; +} diff --git a/tools/perf/bench/Build b/tools/perf/bench/Build index c3ab760e06b4..60bf11943047 100644 --- a/tools/perf/bench/Build +++ b/tools/perf/bench/Build @@ -1,10 +1,11 @@ perf-y += sched-messaging.o perf-y += sched-pipe.o -perf-y += mem-memcpy.o +perf-y += mem-functions.o perf-y += futex-hash.o perf-y += futex-wake.o perf-y += futex-wake-parallel.o perf-y += futex-requeue.o +perf-y += futex-lock-pi.o perf-$(CONFIG_X86_64) += mem-memcpy-x86-64-asm.o perf-$(CONFIG_X86_64) += mem-memset-x86-64-asm.o diff --git a/tools/perf/bench/bench.h b/tools/perf/bench/bench.h index 70b2f718cc21..a50df86f2b9b 100644 --- a/tools/perf/bench/bench.h +++ b/tools/perf/bench/bench.h @@ -36,6 +36,8 @@ extern int bench_futex_wake(int argc, const char **argv, const char *prefix); extern int bench_futex_wake_parallel(int argc, const char **argv, const char *prefix); extern int bench_futex_requeue(int argc, const char **argv, const char *prefix); +/* pi futexes */ +extern int bench_futex_lock_pi(int argc, const char **argv, const char *prefix); #define BENCH_FORMAT_DEFAULT_STR "default" #define BENCH_FORMAT_DEFAULT 0 diff --git a/tools/perf/bench/futex-lock-pi.c b/tools/perf/bench/futex-lock-pi.c new file mode 100644 index 000000000000..bc6a16adbca8 --- /dev/null +++ b/tools/perf/bench/futex-lock-pi.c @@ -0,0 +1,219 @@ +/* + * Copyright (C) 2015 Davidlohr Bueso. + */ + +#include "../perf.h" +#include "../util/util.h" +#include "../util/stat.h" +#include "../util/parse-options.h" +#include "../util/header.h" +#include "bench.h" +#include "futex.h" + +#include <err.h> +#include <stdlib.h> +#include <sys/time.h> +#include <pthread.h> + +struct worker { + int tid; + u_int32_t *futex; + pthread_t thread; + unsigned long ops; +}; + +static u_int32_t global_futex = 0; +static struct worker *worker; +static unsigned int nsecs = 10; +static bool silent = false, multi = false; +static bool done = false, fshared = false; +static unsigned int ncpus, nthreads = 0; +static int futex_flag = 0; +struct timeval start, end, runtime; +static pthread_mutex_t thread_lock; +static unsigned int threads_starting; +static struct stats throughput_stats; +static pthread_cond_t thread_parent, thread_worker; + +static const struct option options[] = { + OPT_UINTEGER('t', "threads", &nthreads, "Specify amount of threads"), + OPT_UINTEGER('r', "runtime", &nsecs, "Specify runtime (in seconds)"), + OPT_BOOLEAN( 'M', "multi", &multi, "Use multiple futexes"), + OPT_BOOLEAN( 's', "silent", &silent, "Silent mode: do not display data/details"), + OPT_BOOLEAN( 'S', "shared", &fshared, "Use shared futexes instead of private ones"), + OPT_END() +}; + +static const char * const bench_futex_lock_pi_usage[] = { + "perf bench futex requeue <options>", + NULL +}; + +static void print_summary(void) +{ + unsigned long avg = avg_stats(&throughput_stats); + double stddev = stddev_stats(&throughput_stats); + + printf("%sAveraged %ld operations/sec (+- %.2f%%), total secs = %d\n", + !silent ? "\n" : "", avg, rel_stddev_stats(stddev, avg), + (int) runtime.tv_sec); +} + +static void toggle_done(int sig __maybe_unused, + siginfo_t *info __maybe_unused, + void *uc __maybe_unused) +{ + /* inform all threads that we're done for the day */ + done = true; + gettimeofday(&end, NULL); + timersub(&end, &start, &runtime); +} + +static void *workerfn(void *arg) +{ + struct worker *w = (struct worker *) arg; + + pthread_mutex_lock(&thread_lock); + threads_starting--; + if (!threads_starting) + pthread_cond_signal(&thread_parent); + pthread_cond_wait(&thread_worker, &thread_lock); + pthread_mutex_unlock(&thread_lock); + + do { + int ret; + again: + ret = futex_lock_pi(w->futex, NULL, 0, futex_flag); + + if (ret) { /* handle lock acquisition */ + if (!silent) + warn("thread %d: Could not lock pi-lock for %p (%d)", + w->tid, w->futex, ret); + if (done) + break; + + goto again; + } + + usleep(1); + ret = futex_unlock_pi(w->futex, futex_flag); + if (ret && !silent) + warn("thread %d: Could not unlock pi-lock for %p (%d)", + w->tid, w->futex, ret); + w->ops++; /* account for thread's share of work */ + } while (!done); + + return NULL; +} + +static void create_threads(struct worker *w, pthread_attr_t thread_attr) +{ + cpu_set_t cpu; + unsigned int i; + + threads_starting = nthreads; + + for (i = 0; i < nthreads; i++) { + worker[i].tid = i; + + if (multi) { + worker[i].futex = calloc(1, sizeof(u_int32_t)); + if (!worker[i].futex) + err(EXIT_FAILURE, "calloc"); + } else + worker[i].futex = &global_futex; + + CPU_ZERO(&cpu); + CPU_SET(i % ncpus, &cpu); + + if (pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpu)) + err(EXIT_FAILURE, "pthread_attr_setaffinity_np"); + + if (pthread_create(&w[i].thread, &thread_attr, workerfn, &worker[i])) + err(EXIT_FAILURE, "pthread_create"); + } +} + +int bench_futex_lock_pi(int argc, const char **argv, + const char *prefix __maybe_unused) +{ + int ret = 0; + unsigned int i; + struct sigaction act; + pthread_attr_t thread_attr; + + argc = parse_options(argc, argv, options, bench_futex_lock_pi_usage, 0); + if (argc) + goto err; + + ncpus = sysconf(_SC_NPROCESSORS_ONLN); + + sigfillset(&act.sa_mask); + act.sa_sigaction = toggle_done; + sigaction(SIGINT, &act, NULL); + + if (!nthreads) + nthreads = ncpus; + + worker = calloc(nthreads, sizeof(*worker)); + if (!worker) + err(EXIT_FAILURE, "calloc"); + + if (!fshared) + futex_flag = FUTEX_PRIVATE_FLAG; + + printf("Run summary [PID %d]: %d threads doing pi lock/unlock pairing for %d secs.\n\n", + getpid(), nthreads, nsecs); + + init_stats(&throughput_stats); + pthread_mutex_init(&thread_lock, NULL); + pthread_cond_init(&thread_parent, NULL); + pthread_cond_init(&thread_worker, NULL); + + threads_starting = nthreads; + pthread_attr_init(&thread_attr); + gettimeofday(&start, NULL); + + create_threads(worker, thread_attr); + pthread_attr_destroy(&thread_attr); + + pthread_mutex_lock(&thread_lock); + while (threads_starting) + pthread_cond_wait(&thread_parent, &thread_lock); + pthread_cond_broadcast(&thread_worker); + pthread_mutex_unlock(&thread_lock); + + sleep(nsecs); + toggle_done(0, NULL, NULL); + + for (i = 0; i < nthreads; i++) { + ret = pthread_join(worker[i].thread, NULL); + if (ret) + err(EXIT_FAILURE, "pthread_join"); + } + + /* cleanup & report results */ + pthread_cond_destroy(&thread_parent); + pthread_cond_destroy(&thread_worker); + pthread_mutex_destroy(&thread_lock); + + for (i = 0; i < nthreads; i++) { + unsigned long t = worker[i].ops/runtime.tv_sec; + + update_stats(&throughput_stats, t); + if (!silent) + printf("[thread %3d] futex: %p [ %ld ops/sec ]\n", + worker[i].tid, worker[i].futex, t); + + if (multi) + free(worker[i].futex); + } + + print_summary(); + + free(worker); + return ret; +err: + usage_with_options(bench_futex_lock_pi_usage, options); + exit(EXIT_FAILURE); +} diff --git a/tools/perf/bench/futex.h b/tools/perf/bench/futex.h index 7ed22ff1e1ac..d44de9f44281 100644 --- a/tools/perf/bench/futex.h +++ b/tools/perf/bench/futex.h @@ -56,6 +56,26 @@ futex_wake(u_int32_t *uaddr, int nr_wake, int opflags) } /** + * futex_lock_pi() - block on uaddr as a PI mutex + * @detect: whether (1) or not (0) to perform deadlock detection + */ +static inline int +futex_lock_pi(u_int32_t *uaddr, struct timespec *timeout, int detect, + int opflags) +{ + return futex(uaddr, FUTEX_LOCK_PI, detect, timeout, NULL, 0, opflags); +} + +/** + * futex_unlock_pi() - release uaddr as a PI mutex, waking the top waiter + */ +static inline int +futex_unlock_pi(u_int32_t *uaddr, int opflags) +{ + return futex(uaddr, FUTEX_UNLOCK_PI, 0, NULL, NULL, 0, opflags); +} + +/** * futex_cmp_requeue() - requeue tasks from uaddr to uaddr2 * @nr_wake: wake up to this many tasks * @nr_requeue: requeue up to this many tasks diff --git a/tools/perf/bench/mem-functions.c b/tools/perf/bench/mem-functions.c new file mode 100644 index 000000000000..9419b944220f --- /dev/null +++ b/tools/perf/bench/mem-functions.c @@ -0,0 +1,379 @@ +/* + * mem-memcpy.c + * + * Simple memcpy() and memset() benchmarks + * + * Written by Hitoshi Mitake <mitake@dcl.info.waseda.ac.jp> + */ + +#include "../perf.h" +#include "../util/util.h" +#include "../util/parse-options.h" +#include "../util/header.h" +#include "../util/cloexec.h" +#include "bench.h" +#include "mem-memcpy-arch.h" +#include "mem-memset-arch.h" + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/time.h> +#include <errno.h> + +#define K 1024 + +static const char *size_str = "1MB"; +static const char *function_str = "all"; +static int nr_loops = 1; +static bool use_cycles; +static int cycles_fd; + +static const struct option options[] = { + OPT_STRING('s', "size", &size_str, "1MB", + "Specify the size of the memory buffers. " + "Available units: B, KB, MB, GB and TB (case insensitive)"), + + OPT_STRING('f', "function", &function_str, "all", + "Specify the function to run, \"all\" runs all available functions, \"help\" lists them"), + + OPT_INTEGER('l', "nr_loops", &nr_loops, + "Specify the number of loops to run. (default: 1)"), + + OPT_BOOLEAN('c', "cycles", &use_cycles, + "Use a cycles event instead of gettimeofday() to measure performance"), + + OPT_END() +}; + +typedef void *(*memcpy_t)(void *, const void *, size_t); +typedef void *(*memset_t)(void *, int, size_t); + +struct function { + const char *name; + const char *desc; + union { + memcpy_t memcpy; + memset_t memset; + } fn; +}; + +static struct perf_event_attr cycle_attr = { + .type = PERF_TYPE_HARDWARE, + .config = PERF_COUNT_HW_CPU_CYCLES +}; + +static void init_cycles(void) +{ + cycles_fd = sys_perf_event_open(&cycle_attr, getpid(), -1, -1, perf_event_open_cloexec_flag()); + + if (cycles_fd < 0 && errno == ENOSYS) + die("No CONFIG_PERF_EVENTS=y kernel support configured?\n"); + else + BUG_ON(cycles_fd < 0); +} + +static u64 get_cycles(void) +{ + int ret; + u64 clk; + + ret = read(cycles_fd, &clk, sizeof(u64)); + BUG_ON(ret != sizeof(u64)); + + return clk; +} + +static double timeval2double(struct timeval *ts) +{ + return (double)ts->tv_sec + (double)ts->tv_usec / (double)1000000; +} + +#define print_bps(x) do { \ + if (x < K) \ + printf(" %14lf bytes/sec\n", x); \ + else if (x < K * K) \ + printf(" %14lfd KB/sec\n", x / K); \ + else if (x < K * K * K) \ + printf(" %14lf MB/sec\n", x / K / K); \ + else \ + printf(" %14lf GB/sec\n", x / K / K / K); \ + } while (0) + +struct bench_mem_info { + const struct function *functions; + u64 (*do_cycles)(const struct function *r, size_t size); + double (*do_gettimeofday)(const struct function *r, size_t size); + const char *const *usage; +}; + +static void __bench_mem_function(struct bench_mem_info *info, int r_idx, size_t size, double size_total) +{ + const struct function *r = &info->functions[r_idx]; + double result_bps = 0.0; + u64 result_cycles = 0; + + printf("# function '%s' (%s)\n", r->name, r->desc); + + if (bench_format == BENCH_FORMAT_DEFAULT) + printf("# Copying %s bytes ...\n\n", size_str); + + if (use_cycles) { + result_cycles = info->do_cycles(r, size); + } else { + result_bps = info->do_gettimeofday(r, size); + } + + switch (bench_format) { + case BENCH_FORMAT_DEFAULT: + if (use_cycles) { + printf(" %14lf cycles/byte\n", (double)result_cycles/size_total); + } else { + print_bps(result_bps); + } + break; + + case BENCH_FORMAT_SIMPLE: + if (use_cycles) { + printf("%lf\n", (double)result_cycles/size_total); + } else { + printf("%lf\n", result_bps); + } + break; + + default: + BUG_ON(1); + break; + } +} + +static int bench_mem_common(int argc, const char **argv, struct bench_mem_info *info) +{ + int i; + size_t size; + double size_total; + + argc = parse_options(argc, argv, options, info->usage, 0); + + if (use_cycles) + init_cycles(); + + size = (size_t)perf_atoll((char *)size_str); + size_total = (double)size * nr_loops; + + if ((s64)size <= 0) { + fprintf(stderr, "Invalid size:%s\n", size_str); + return 1; + } + + if (!strncmp(function_str, "all", 3)) { + for (i = 0; info->functions[i].name; i++) + __bench_mem_function(info, i, size, size_total); + return 0; + } + + for (i = 0; info->functions[i].name; i++) { + if (!strcmp(info->functions[i].name, function_str)) + break; + } + if (!info->functions[i].name) { + if (strcmp(function_str, "help") && strcmp(function_str, "h")) + printf("Unknown function: %s\n", function_str); + printf("Available functions:\n"); + for (i = 0; info->functions[i].name; i++) { + printf("\t%s ... %s\n", + info->functions[i].name, info->functions[i].desc); + } + return 1; + } + + __bench_mem_function(info, i, size, size_total); + + return 0; +} + +static void memcpy_alloc_mem(void **dst, void **src, size_t size) +{ + *dst = zalloc(size); + if (!*dst) + die("memory allocation failed - maybe size is too large?\n"); + + *src = zalloc(size); + if (!*src) + die("memory allocation failed - maybe size is too large?\n"); + + /* Make sure to always prefault zero pages even if MMAP_THRESH is crossed: */ + memset(*src, 0, size); +} + +static u64 do_memcpy_cycles(const struct function *r, size_t size) +{ + u64 cycle_start = 0ULL, cycle_end = 0ULL; + void *src = NULL, *dst = NULL; + memcpy_t fn = r->fn.memcpy; + int i; + + memcpy_alloc_mem(&dst, &src, size); + + /* + * We prefault the freshly allocated memory range here, + * to not measure page fault overhead: + */ + fn(dst, src, size); + + cycle_start = get_cycles(); + for (i = 0; i < nr_loops; ++i) + fn(dst, src, size); + cycle_end = get_cycles(); + + free(src); + free(dst); + return cycle_end - cycle_start; +} + +static double do_memcpy_gettimeofday(const struct function *r, size_t size) +{ + struct timeval tv_start, tv_end, tv_diff; + memcpy_t fn = r->fn.memcpy; + void *src = NULL, *dst = NULL; + int i; + + memcpy_alloc_mem(&dst, &src, size); + + /* + * We prefault the freshly allocated memory range here, + * to not measure page fault overhead: + */ + fn(dst, src, size); + + BUG_ON(gettimeofday(&tv_start, NULL)); + for (i = 0; i < nr_loops; ++i) + fn(dst, src, size); + BUG_ON(gettimeofday(&tv_end, NULL)); + + timersub(&tv_end, &tv_start, &tv_diff); + + free(src); + free(dst); + + return (double)(((double)size * nr_loops) / timeval2double(&tv_diff)); +} + +struct function memcpy_functions[] = { + { .name = "default", + .desc = "Default memcpy() provided by glibc", + .fn.memcpy = memcpy }, + +#ifdef HAVE_ARCH_X86_64_SUPPORT +# define MEMCPY_FN(_fn, _name, _desc) {.name = _name, .desc = _desc, .fn.memcpy = _fn}, +# include "mem-memcpy-x86-64-asm-def.h" +# undef MEMCPY_FN +#endif + + { .name = NULL, } +}; + +static const char * const bench_mem_memcpy_usage[] = { + "perf bench mem memcpy <options>", + NULL +}; + +int bench_mem_memcpy(int argc, const char **argv, const char *prefix __maybe_unused) +{ + struct bench_mem_info info = { + .functions = memcpy_functions, + .do_cycles = do_memcpy_cycles, + .do_gettimeofday = do_memcpy_gettimeofday, + .usage = bench_mem_memcpy_usage, + }; + + return bench_mem_common(argc, argv, &info); +} + +static void memset_alloc_mem(void **dst, size_t size) +{ + *dst = zalloc(size); + if (!*dst) + die("memory allocation failed - maybe size is too large?\n"); +} + +static u64 do_memset_cycles(const struct function *r, size_t size) +{ + u64 cycle_start = 0ULL, cycle_end = 0ULL; + memset_t fn = r->fn.memset; + void *dst = NULL; + int i; + + memset_alloc_mem(&dst, size); + + /* + * We prefault the freshly allocated memory range here, + * to not measure page fault overhead: + */ + fn(dst, -1, size); + + cycle_start = get_cycles(); + for (i = 0; i < nr_loops; ++i) + fn(dst, i, size); + cycle_end = get_cycles(); + + free(dst); + return cycle_end - cycle_start; +} + +static double do_memset_gettimeofday(const struct function *r, size_t size) +{ + struct timeval tv_start, tv_end, tv_diff; + memset_t fn = r->fn.memset; + void *dst = NULL; + int i; + + memset_alloc_mem(&dst, size); + + /* + * We prefault the freshly allocated memory range here, + * to not measure page fault overhead: + */ + fn(dst, -1, size); + + BUG_ON(gettimeofday(&tv_start, NULL)); + for (i = 0; i < nr_loops; ++i) + fn(dst, i, size); + BUG_ON(gettimeofday(&tv_end, NULL)); + + timersub(&tv_end, &tv_start, &tv_diff); + + free(dst); + return (double)(((double)size * nr_loops) / timeval2double(&tv_diff)); +} + +static const char * const bench_mem_memset_usage[] = { + "perf bench mem memset <options>", + NULL +}; + +static const struct function memset_functions[] = { + { .name = "default", + .desc = "Default memset() provided by glibc", + .fn.memset = memset }, + +#ifdef HAVE_ARCH_X86_64_SUPPORT +# define MEMSET_FN(_fn, _name, _desc) { .name = _name, .desc = _desc, .fn.memset = _fn }, +# include "mem-memset-x86-64-asm-def.h" +# undef MEMSET_FN +#endif + + { .name = NULL, } +}; + +int bench_mem_memset(int argc, const char **argv, const char *prefix __maybe_unused) +{ + struct bench_mem_info info = { + .functions = memset_functions, + .do_cycles = do_memset_cycles, + .do_gettimeofday = do_memset_gettimeofday, + .usage = bench_mem_memset_usage, + }; + + return bench_mem_common(argc, argv, &info); +} diff --git a/tools/perf/bench/mem-memcpy.c b/tools/perf/bench/mem-memcpy.c deleted file mode 100644 index d3dfb7936dcd..000000000000 --- a/tools/perf/bench/mem-memcpy.c +++ /dev/null @@ -1,434 +0,0 @@ -/* - * mem-memcpy.c - * - * memcpy: Simple memory copy in various ways - * - * Written by Hitoshi Mitake <mitake@dcl.info.waseda.ac.jp> - */ - -#include "../perf.h" -#include "../util/util.h" -#include "../util/parse-options.h" -#include "../util/header.h" -#include "../util/cloexec.h" -#include "bench.h" -#include "mem-memcpy-arch.h" -#include "mem-memset-arch.h" - -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include <sys/time.h> -#include <errno.h> - -#define K 1024 - -static const char *length_str = "1MB"; -static const char *routine = "default"; -static int iterations = 1; -static bool use_cycle; -static int cycle_fd; -static bool only_prefault; -static bool no_prefault; - -static const struct option options[] = { - OPT_STRING('l', "length", &length_str, "1MB", - "Specify length of memory to copy. " - "Available units: B, KB, MB, GB and TB (upper and lower)"), - OPT_STRING('r', "routine", &routine, "default", - "Specify routine to copy, \"all\" runs all available routines"), - OPT_INTEGER('i', "iterations", &iterations, - "repeat memcpy() invocation this number of times"), - OPT_BOOLEAN('c', "cycle", &use_cycle, - "Use cycles event instead of gettimeofday() for measuring"), - OPT_BOOLEAN('o', "only-prefault", &only_prefault, - "Show only the result with page faults before memcpy()"), - OPT_BOOLEAN('n', "no-prefault", &no_prefault, - "Show only the result without page faults before memcpy()"), - OPT_END() -}; - -typedef void *(*memcpy_t)(void *, const void *, size_t); -typedef void *(*memset_t)(void *, int, size_t); - -struct routine { - const char *name; - const char *desc; - union { - memcpy_t memcpy; - memset_t memset; - } fn; -}; - -struct routine memcpy_routines[] = { - { .name = "default", - .desc = "Default memcpy() provided by glibc", - .fn.memcpy = memcpy }, -#ifdef HAVE_ARCH_X86_64_SUPPORT - -#define MEMCPY_FN(_fn, _name, _desc) {.name = _name, .desc = _desc, .fn.memcpy = _fn}, -#include "mem-memcpy-x86-64-asm-def.h" -#undef MEMCPY_FN - -#endif - - { NULL, - NULL, - {NULL} } -}; - -static const char * const bench_mem_memcpy_usage[] = { - "perf bench mem memcpy <options>", - NULL -}; - -static struct perf_event_attr cycle_attr = { - .type = PERF_TYPE_HARDWARE, - .config = PERF_COUNT_HW_CPU_CYCLES -}; - -static void init_cycle(void) -{ - cycle_fd = sys_perf_event_open(&cycle_attr, getpid(), -1, -1, - perf_event_open_cloexec_flag()); - - if (cycle_fd < 0 && errno == ENOSYS) - die("No CONFIG_PERF_EVENTS=y kernel support configured?\n"); - else - BUG_ON(cycle_fd < 0); -} - -static u64 get_cycle(void) -{ - int ret; - u64 clk; - - ret = read(cycle_fd, &clk, sizeof(u64)); - BUG_ON(ret != sizeof(u64)); - - return clk; -} - -static double timeval2double(struct timeval *ts) -{ - return (double)ts->tv_sec + - (double)ts->tv_usec / (double)1000000; -} - -#define pf (no_prefault ? 0 : 1) - -#define print_bps(x) do { \ - if (x < K) \ - printf(" %14lf B/Sec", x); \ - else if (x < K * K) \ - printf(" %14lfd KB/Sec", x / K); \ - else if (x < K * K * K) \ - printf(" %14lf MB/Sec", x / K / K); \ - else \ - printf(" %14lf GB/Sec", x / K / K / K); \ - } while (0) - -struct bench_mem_info { - const struct routine *routines; - u64 (*do_cycle)(const struct routine *r, size_t len, bool prefault); - double (*do_gettimeofday)(const struct routine *r, size_t len, bool prefault); - const char *const *usage; -}; - -static void __bench_mem_routine(struct bench_mem_info *info, int r_idx, size_t len, double totallen) -{ - const struct routine *r = &info->routines[r_idx]; - double result_bps[2]; - u64 result_cycle[2]; - - result_cycle[0] = result_cycle[1] = 0ULL; - result_bps[0] = result_bps[1] = 0.0; - - printf("Routine %s (%s)\n", r->name, r->desc); - - if (bench_format == BENCH_FORMAT_DEFAULT) - printf("# Copying %s Bytes ...\n\n", length_str); - - if (!only_prefault && !no_prefault) { - /* show both of results */ - if (use_cycle) { - result_cycle[0] = info->do_cycle(r, len, false); - result_cycle[1] = info->do_cycle(r, len, true); - } else { - result_bps[0] = info->do_gettimeofday(r, len, false); - result_bps[1] = info->do_gettimeofday(r, len, true); - } - } else { - if (use_cycle) - result_cycle[pf] = info->do_cycle(r, len, only_prefault); - else - result_bps[pf] = info->do_gettimeofday(r, len, only_prefault); - } - - switch (bench_format) { - case BENCH_FORMAT_DEFAULT: - if (!only_prefault && !no_prefault) { - if (use_cycle) { - printf(" %14lf Cycle/Byte\n", - (double)result_cycle[0] - / totallen); - printf(" %14lf Cycle/Byte (with prefault)\n", - (double)result_cycle[1] - / totallen); - } else { - print_bps(result_bps[0]); - printf("\n"); - print_bps(result_bps[1]); - printf(" (with prefault)\n"); - } - } else { - if (use_cycle) { - printf(" %14lf Cycle/Byte", - (double)result_cycle[pf] - / totallen); - } else - print_bps(result_bps[pf]); - - printf("%s\n", only_prefault ? " (with prefault)" : ""); - } - break; - case BENCH_FORMAT_SIMPLE: - if (!only_prefault && !no_prefault) { - if (use_cycle) { - printf("%lf %lf\n", - (double)result_cycle[0] / totallen, - (double)result_cycle[1] / totallen); - } else { - printf("%lf %lf\n", - result_bps[0], result_bps[1]); - } - } else { - if (use_cycle) { - printf("%lf\n", (double)result_cycle[pf] - / totallen); - } else - printf("%lf\n", result_bps[pf]); - } - break; - default: - /* reaching this means there's some disaster: */ - die("unknown format: %d\n", bench_format); - break; - } -} - -static int bench_mem_common(int argc, const char **argv, - const char *prefix __maybe_unused, - struct bench_mem_info *info) -{ - int i; - size_t len; - double totallen; - - argc = parse_options(argc, argv, options, - info->usage, 0); - - if (no_prefault && only_prefault) { - fprintf(stderr, "Invalid options: -o and -n are mutually exclusive\n"); - return 1; - } - - if (use_cycle) - init_cycle(); - - len = (size_t)perf_atoll((char *)length_str); - totallen = (double)len * iterations; - - if ((s64)len <= 0) { - fprintf(stderr, "Invalid length:%s\n", length_str); - return 1; - } - - /* same to without specifying either of prefault and no-prefault */ - if (only_prefault && no_prefault) - only_prefault = no_prefault = false; - - if (!strncmp(routine, "all", 3)) { - for (i = 0; info->routines[i].name; i++) - __bench_mem_routine(info, i, len, totallen); - return 0; - } - - for (i = 0; info->routines[i].name; i++) { - if (!strcmp(info->routines[i].name, routine)) - break; - } - if (!info->routines[i].name) { - printf("Unknown routine:%s\n", routine); - printf("Available routines...\n"); - for (i = 0; info->routines[i].name; i++) { - printf("\t%s ... %s\n", - info->routines[i].name, info->routines[i].desc); - } - return 1; - } - - __bench_mem_routine(info, i, len, totallen); - - return 0; -} - -static void memcpy_alloc_mem(void **dst, void **src, size_t length) -{ - *dst = zalloc(length); - if (!*dst) - die("memory allocation failed - maybe length is too large?\n"); - - *src = zalloc(length); - if (!*src) - die("memory allocation failed - maybe length is too large?\n"); - /* Make sure to always replace the zero pages even if MMAP_THRESH is crossed */ - memset(*src, 0, length); -} - -static u64 do_memcpy_cycle(const struct routine *r, size_t len, bool prefault) -{ - u64 cycle_start = 0ULL, cycle_end = 0ULL; - void *src = NULL, *dst = NULL; - memcpy_t fn = r->fn.memcpy; - int i; - - memcpy_alloc_mem(&dst, &src, len); - - if (prefault) - fn(dst, src, len); - - cycle_start = get_cycle(); - for (i = 0; i < iterations; ++i) - fn(dst, src, len); - cycle_end = get_cycle(); - - free(src); - free(dst); - return cycle_end - cycle_start; -} - -static double do_memcpy_gettimeofday(const struct routine *r, size_t len, - bool prefault) -{ - struct timeval tv_start, tv_end, tv_diff; - memcpy_t fn = r->fn.memcpy; - void *src = NULL, *dst = NULL; - int i; - - memcpy_alloc_mem(&dst, &src, len); - - if (prefault) - fn(dst, src, len); - - BUG_ON(gettimeofday(&tv_start, NULL)); - for (i = 0; i < iterations; ++i) - fn(dst, src, len); - BUG_ON(gettimeofday(&tv_end, NULL)); - - timersub(&tv_end, &tv_start, &tv_diff); - - free(src); - free(dst); - return (double)(((double)len * iterations) / timeval2double(&tv_diff)); -} - -int bench_mem_memcpy(int argc, const char **argv, - const char *prefix __maybe_unused) -{ - struct bench_mem_info info = { - .routines = memcpy_routines, - .do_cycle = do_memcpy_cycle, - .do_gettimeofday = do_memcpy_gettimeofday, - .usage = bench_mem_memcpy_usage, - }; - - return bench_mem_common(argc, argv, prefix, &info); -} - -static void memset_alloc_mem(void **dst, size_t length) -{ - *dst = zalloc(length); - if (!*dst) - die("memory allocation failed - maybe length is too large?\n"); -} - -static u64 do_memset_cycle(const struct routine *r, size_t len, bool prefault) -{ - u64 cycle_start = 0ULL, cycle_end = 0ULL; - memset_t fn = r->fn.memset; - void *dst = NULL; - int i; - - memset_alloc_mem(&dst, len); - - if (prefault) - fn(dst, -1, len); - - cycle_start = get_cycle(); - for (i = 0; i < iterations; ++i) - fn(dst, i, len); - cycle_end = get_cycle(); - - free(dst); - return cycle_end - cycle_start; -} - -static double do_memset_gettimeofday(const struct routine *r, size_t len, - bool prefault) -{ - struct timeval tv_start, tv_end, tv_diff; - memset_t fn = r->fn.memset; - void *dst = NULL; - int i; - - memset_alloc_mem(&dst, len); - - if (prefault) - fn(dst, -1, len); - - BUG_ON(gettimeofday(&tv_start, NULL)); - for (i = 0; i < iterations; ++i) - fn(dst, i, len); - BUG_ON(gettimeofday(&tv_end, NULL)); - - timersub(&tv_end, &tv_start, &tv_diff); - - free(dst); - return (double)(((double)len * iterations) / timeval2double(&tv_diff)); -} - -static const char * const bench_mem_memset_usage[] = { - "perf bench mem memset <options>", - NULL -}; - -static const struct routine memset_routines[] = { - { .name ="default", - .desc = "Default memset() provided by glibc", - .fn.memset = memset }, -#ifdef HAVE_ARCH_X86_64_SUPPORT - -#define MEMSET_FN(_fn, _name, _desc) { .name = _name, .desc = _desc, .fn.memset = _fn }, -#include "mem-memset-x86-64-asm-def.h" -#undef MEMSET_FN - -#endif - - { .name = NULL, - .desc = NULL, - .fn.memset = NULL } -}; - -int bench_mem_memset(int argc, const char **argv, - const char *prefix __maybe_unused) -{ - struct bench_mem_info info = { - .routines = memset_routines, - .do_cycle = do_memset_cycle, - .do_gettimeofday = do_memset_gettimeofday, - .usage = bench_mem_memset_usage, - }; - - return bench_mem_common(argc, argv, prefix, &info); -} diff --git a/tools/perf/bench/numa.c b/tools/perf/bench/numa.c index 870b7e665a20..492df2752a2d 100644 --- a/tools/perf/bench/numa.c +++ b/tools/perf/bench/numa.c @@ -164,8 +164,8 @@ static const struct option options[] = { OPT_STRING('L', "mb_proc_locked", &p0.mb_proc_locked_str,"MB", "process serialized/locked memory access (MBs), <= process_memory"), OPT_STRING('T', "mb_thread" , &p0.mb_thread_str, "MB", "thread memory (MBs)"), - OPT_UINTEGER('l', "nr_loops" , &p0.nr_loops, "max number of loops to run"), - OPT_UINTEGER('s', "nr_secs" , &p0.nr_secs, "max number of seconds to run"), + OPT_UINTEGER('l', "nr_loops" , &p0.nr_loops, "max number of loops to run (default: unlimited)"), + OPT_UINTEGER('s', "nr_secs" , &p0.nr_secs, "max number of seconds to run (default: 5 secs)"), OPT_UINTEGER('u', "usleep" , &p0.sleep_usecs, "usecs to sleep per loop iteration"), OPT_BOOLEAN('R', "data_reads" , &p0.data_reads, "access the data via writes (can be mixed with -W)"), diff --git a/tools/perf/bench/sched-messaging.c b/tools/perf/bench/sched-messaging.c index d7f281c2828d..d4ff1b539cfd 100644 --- a/tools/perf/bench/sched-messaging.c +++ b/tools/perf/bench/sched-messaging.c @@ -33,7 +33,7 @@ #define DATASIZE 100 static bool use_pipes = false; -static unsigned int loops = 100; +static unsigned int nr_loops = 100; static bool thread_mode = false; static unsigned int num_groups = 10; @@ -79,7 +79,7 @@ static void ready(int ready_out, int wakefd) err(EXIT_FAILURE, "poll"); } -/* Sender sprays loops messages down each file descriptor */ +/* Sender sprays nr_loops messages down each file descriptor */ static void *sender(struct sender_context *ctx) { char data[DATASIZE]; @@ -88,7 +88,7 @@ static void *sender(struct sender_context *ctx) ready(ctx->ready_out, ctx->wakefd); /* Now pump to every receiver. */ - for (i = 0; i < loops; i++) { + for (i = 0; i < nr_loops; i++) { for (j = 0; j < ctx->num_fds; j++) { int ret, done = 0; @@ -213,7 +213,7 @@ static unsigned int group(pthread_t *pth, /* Create the pipe between client and server */ fdpair(fds); - ctx->num_packets = num_fds * loops; + ctx->num_packets = num_fds * nr_loops; ctx->in_fds[0] = fds[0]; ctx->in_fds[1] = fds[1]; ctx->ready_out = ready_out; @@ -250,7 +250,7 @@ static const struct option options[] = { OPT_BOOLEAN('t', "thread", &thread_mode, "Be multi thread instead of multi process"), OPT_UINTEGER('g', "group", &num_groups, "Specify number of groups"), - OPT_UINTEGER('l', "loop", &loops, "Specify number of loops"), + OPT_UINTEGER('l', "nr_loops", &nr_loops, "Specify the number of loops to run (default: 100)"), OPT_END() }; diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index 2c1bec39c30e..2bf9b3fd9e61 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -67,6 +67,7 @@ static int perf_evsel__add_sample(struct perf_evsel *evsel, rb_erase(&al->sym->rb_node, &al->map->dso->symbols[al->map->type]); symbol__delete(al->sym); + dso__reset_find_symbol_cache(al->map->dso); } return 0; } @@ -187,6 +188,7 @@ find_next: * symbol, free he->ms.sym->src to signal we already * processed this symbol. */ + zfree(¬es->src->cycles_hist); zfree(¬es->src); } } @@ -209,7 +211,7 @@ static int __cmd_annotate(struct perf_annotate *ann) } if (!objdump_path) { - ret = perf_session_env__lookup_objdump(&session->header.env); + ret = perf_env__lookup_objdump(&session->header.env); if (ret) goto out; } @@ -238,6 +240,8 @@ static int __cmd_annotate(struct perf_annotate *ann) if (nr_samples > 0) { total_nr_samples += nr_samples; hists__collapse_resort(hists, NULL); + /* Don't sort callchain */ + perf_evsel__reset_sample_bit(pos, CALLCHAIN); hists__output_resort(hists, NULL); if (symbol_conf.event_group && diff --git a/tools/perf/builtin-bench.c b/tools/perf/builtin-bench.c index b5314e452ec7..b17aed36ca16 100644 --- a/tools/perf/builtin-bench.c +++ b/tools/perf/builtin-bench.c @@ -36,7 +36,7 @@ struct bench { #ifdef HAVE_LIBNUMA_SUPPORT static struct bench numa_benchmarks[] = { { "mem", "Benchmark for NUMA workloads", bench_numa }, - { "all", "Test all NUMA benchmarks", NULL }, + { "all", "Run all NUMA benchmarks", NULL }, { NULL, NULL, NULL } }; #endif @@ -44,14 +44,14 @@ static struct bench numa_benchmarks[] = { static struct bench sched_benchmarks[] = { { "messaging", "Benchmark for scheduling and IPC", bench_sched_messaging }, { "pipe", "Benchmark for pipe() between two processes", bench_sched_pipe }, - { "all", "Test all scheduler benchmarks", NULL }, + { "all", "Run all scheduler benchmarks", NULL }, { NULL, NULL, NULL } }; static struct bench mem_benchmarks[] = { - { "memcpy", "Benchmark for memcpy()", bench_mem_memcpy }, - { "memset", "Benchmark for memset() tests", bench_mem_memset }, - { "all", "Test all memory benchmarks", NULL }, + { "memcpy", "Benchmark for memcpy() functions", bench_mem_memcpy }, + { "memset", "Benchmark for memset() functions", bench_mem_memset }, + { "all", "Run all memory access benchmarks", NULL }, { NULL, NULL, NULL } }; @@ -60,7 +60,9 @@ static struct bench futex_benchmarks[] = { { "wake", "Benchmark for futex wake calls", bench_futex_wake }, { "wake-parallel", "Benchmark for parallel futex wake calls", bench_futex_wake_parallel }, { "requeue", "Benchmark for futex requeue calls", bench_futex_requeue }, - { "all", "Test all futex benchmarks", NULL }, + /* pi-futexes */ + { "lock-pi", "Benchmark for futex lock_pi calls", bench_futex_lock_pi }, + { "all", "Run all futex benchmarks", NULL }, { NULL, NULL, NULL } }; @@ -108,7 +110,7 @@ int bench_format = BENCH_FORMAT_DEFAULT; unsigned int bench_repeat = 10; /* default number of times to repeat the run */ static const struct option bench_options[] = { - OPT_STRING('f', "format", &bench_format_str, "default", "Specify format style"), + OPT_STRING('f', "format", &bench_format_str, "default|simple", "Specify the output formatting style"), OPT_UINTEGER('r', "repeat", &bench_repeat, "Specify amount of times to repeat the run"), OPT_END() }; diff --git a/tools/perf/builtin-buildid-cache.c b/tools/perf/builtin-buildid-cache.c index d47a0cdc71c9..7b8450cd33c2 100644 --- a/tools/perf/builtin-buildid-cache.c +++ b/tools/perf/builtin-buildid-cache.c @@ -25,8 +25,6 @@ static int build_id_cache__kcore_buildid(const char *proc_dir, char *sbuildid) { char root_dir[PATH_MAX]; - char notes[PATH_MAX]; - u8 build_id[BUILD_ID_SIZE]; char *p; strlcpy(root_dir, proc_dir, sizeof(root_dir)); @@ -35,15 +33,7 @@ static int build_id_cache__kcore_buildid(const char *proc_dir, char *sbuildid) if (!p) return -1; *p = '\0'; - - scnprintf(notes, sizeof(notes), "%s/sys/kernel/notes", root_dir); - - if (sysfs__read_build_id(notes, build_id, sizeof(build_id))) - return -1; - - build_id__sprintf(build_id, sizeof(build_id), sbuildid); - - return 0; + return sysfs__sprintf_build_id(root_dir, sbuildid); } static int build_id_cache__kcore_dir(char *dir, size_t sz) @@ -127,7 +117,7 @@ static int build_id_cache__kcore_existing(const char *from_dir, char *to_dir, static int build_id_cache__add_kcore(const char *filename, bool force) { - char dir[32], sbuildid[BUILD_ID_SIZE * 2 + 1]; + char dir[32], sbuildid[SBUILD_ID_SIZE]; char from_dir[PATH_MAX], to_dir[PATH_MAX]; char *p; @@ -138,7 +128,7 @@ static int build_id_cache__add_kcore(const char *filename, bool force) return -1; *p = '\0'; - if (build_id_cache__kcore_buildid(from_dir, sbuildid)) + if (build_id_cache__kcore_buildid(from_dir, sbuildid) < 0) return -1; scnprintf(to_dir, sizeof(to_dir), "%s/[kernel.kcore]/%s", @@ -184,7 +174,7 @@ static int build_id_cache__add_kcore(const char *filename, bool force) static int build_id_cache__add_file(const char *filename) { - char sbuild_id[BUILD_ID_SIZE * 2 + 1]; + char sbuild_id[SBUILD_ID_SIZE]; u8 build_id[BUILD_ID_SIZE]; int err; @@ -204,7 +194,7 @@ static int build_id_cache__add_file(const char *filename) static int build_id_cache__remove_file(const char *filename) { u8 build_id[BUILD_ID_SIZE]; - char sbuild_id[BUILD_ID_SIZE * 2 + 1]; + char sbuild_id[SBUILD_ID_SIZE]; int err; @@ -276,7 +266,7 @@ static int build_id_cache__fprintf_missing(struct perf_session *session, FILE *f static int build_id_cache__update_file(const char *filename) { u8 build_id[BUILD_ID_SIZE]; - char sbuild_id[BUILD_ID_SIZE * 2 + 1]; + char sbuild_id[SBUILD_ID_SIZE]; int err = 0; @@ -363,7 +353,7 @@ int cmd_buildid_cache(int argc, const char **argv, setup_pager(); if (add_name_list_str) { - list = strlist__new(true, add_name_list_str); + list = strlist__new(add_name_list_str, NULL); if (list) { strlist__for_each(pos, list) if (build_id_cache__add_file(pos->s)) { @@ -381,7 +371,7 @@ int cmd_buildid_cache(int argc, const char **argv, } if (remove_name_list_str) { - list = strlist__new(true, remove_name_list_str); + list = strlist__new(remove_name_list_str, NULL); if (list) { strlist__for_each(pos, list) if (build_id_cache__remove_file(pos->s)) { @@ -399,7 +389,7 @@ int cmd_buildid_cache(int argc, const char **argv, } if (purge_name_list_str) { - list = strlist__new(true, purge_name_list_str); + list = strlist__new(purge_name_list_str, NULL); if (list) { strlist__for_each(pos, list) if (build_id_cache__purge_path(pos->s)) { @@ -420,7 +410,7 @@ int cmd_buildid_cache(int argc, const char **argv, ret = build_id_cache__fprintf_missing(session, stdout); if (update_name_list_str) { - list = strlist__new(true, update_name_list_str); + list = strlist__new(update_name_list_str, NULL); if (list) { strlist__for_each(pos, list) if (build_id_cache__update_file(pos->s)) { diff --git a/tools/perf/builtin-buildid-list.c b/tools/perf/builtin-buildid-list.c index 9fe93c8d4fcf..918b4de29de4 100644 --- a/tools/perf/builtin-buildid-list.c +++ b/tools/perf/builtin-buildid-list.c @@ -19,29 +19,25 @@ static int sysfs__fprintf_build_id(FILE *fp) { - u8 kallsyms_build_id[BUILD_ID_SIZE]; - char sbuild_id[BUILD_ID_SIZE * 2 + 1]; + char sbuild_id[SBUILD_ID_SIZE]; + int ret; - if (sysfs__read_build_id("/sys/kernel/notes", kallsyms_build_id, - sizeof(kallsyms_build_id)) != 0) - return -1; + ret = sysfs__sprintf_build_id("/", sbuild_id); + if (ret != sizeof(sbuild_id)) + return ret < 0 ? ret : -EINVAL; - build_id__sprintf(kallsyms_build_id, sizeof(kallsyms_build_id), - sbuild_id); - fprintf(fp, "%s\n", sbuild_id); - return 0; + return fprintf(fp, "%s\n", sbuild_id); } static int filename__fprintf_build_id(const char *name, FILE *fp) { - u8 build_id[BUILD_ID_SIZE]; - char sbuild_id[BUILD_ID_SIZE * 2 + 1]; + char sbuild_id[SBUILD_ID_SIZE]; + int ret; - if (filename__read_build_id(name, build_id, - sizeof(build_id)) != sizeof(build_id)) - return 0; + ret = filename__sprintf_build_id(name, sbuild_id); + if (ret != sizeof(sbuild_id)) + return ret < 0 ? ret : -EINVAL; - build_id__sprintf(build_id, sizeof(build_id), sbuild_id); return fprintf(fp, "%s\n", sbuild_id); } @@ -63,7 +59,7 @@ static int perf_session__list_build_ids(bool force, bool with_hits) /* * See if this is an ELF file first: */ - if (filename__fprintf_build_id(input_name, stdout)) + if (filename__fprintf_build_id(input_name, stdout) > 0) goto out; session = perf_session__new(&file, false, &build_id__mark_dso_hit_ops); diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c index daaa7dca9c3b..0b180a885ba3 100644 --- a/tools/perf/builtin-diff.c +++ b/tools/perf/builtin-diff.c @@ -722,6 +722,9 @@ static void data_process(void) if (verbose || data__files_cnt > 2) data__fprintf(); + /* Don't sort callchain for perf diff */ + perf_evsel__reset_sample_bit(evsel_base, CALLCHAIN); + hists__process(hists_base); } } diff --git a/tools/perf/builtin-evlist.c b/tools/perf/builtin-evlist.c index 695ec5a50cf2..f4d62510acbb 100644 --- a/tools/perf/builtin-evlist.c +++ b/tools/perf/builtin-evlist.c @@ -61,8 +61,8 @@ int cmd_evlist(int argc, const char **argv, const char *prefix __maybe_unused) usage_with_options(evlist_usage, options); if (details.event_group && (details.verbose || details.freq)) { - pr_err("--group option is not compatible with other options\n"); - usage_with_options(evlist_usage, options); + usage_with_options_msg(evlist_usage, options, + "--group option is not compatible with other options\n"); } return __cmd_evlist(input_name, &details); diff --git a/tools/perf/builtin-help.c b/tools/perf/builtin-help.c index 36486eade1ef..a7d588bf3cdd 100644 --- a/tools/perf/builtin-help.c +++ b/tools/perf/builtin-help.c @@ -463,7 +463,7 @@ int cmd_help(int argc, const char **argv, const char *prefix __maybe_unused) builtin_help_subcommands, builtin_help_usage, 0); if (show_all) { - printf("\n usage: %s\n\n", perf_usage_string); + printf("\n Usage: %s\n\n", perf_usage_string); list_commands("perf commands", &main_cmds, &other_cmds); printf(" %s\n\n", perf_more_info_string); return 0; diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c index 01b06492bd6a..0a945d2e8ca5 100644 --- a/tools/perf/builtin-inject.c +++ b/tools/perf/builtin-inject.c @@ -28,9 +28,11 @@ struct perf_inject { bool build_ids; bool sched_stat; bool have_auxtrace; + bool strip; const char *input_name; struct perf_data_file output; u64 bytes_written; + u64 aux_id; struct list_head samples; struct itrace_synth_opts itrace_synth_opts; }; @@ -176,6 +178,27 @@ static int perf_event__repipe(struct perf_tool *tool, return perf_event__repipe_synth(tool, event); } +static int perf_event__drop(struct perf_tool *tool __maybe_unused, + union perf_event *event __maybe_unused, + struct perf_sample *sample __maybe_unused, + struct machine *machine __maybe_unused) +{ + return 0; +} + +static int perf_event__drop_aux(struct perf_tool *tool, + union perf_event *event __maybe_unused, + struct perf_sample *sample, + struct machine *machine __maybe_unused) +{ + struct perf_inject *inject = container_of(tool, struct perf_inject, tool); + + if (!inject->aux_id) + inject->aux_id = sample->id; + + return 0; +} + typedef int (*inject_handler)(struct perf_tool *tool, union perf_event *event, struct perf_sample *sample, @@ -466,6 +489,78 @@ static int perf_evsel__check_stype(struct perf_evsel *evsel, return 0; } +static int drop_sample(struct perf_tool *tool __maybe_unused, + union perf_event *event __maybe_unused, + struct perf_sample *sample __maybe_unused, + struct perf_evsel *evsel __maybe_unused, + struct machine *machine __maybe_unused) +{ + return 0; +} + +static void strip_init(struct perf_inject *inject) +{ + struct perf_evlist *evlist = inject->session->evlist; + struct perf_evsel *evsel; + + inject->tool.context_switch = perf_event__drop; + + evlist__for_each(evlist, evsel) + evsel->handler = drop_sample; +} + +static bool has_tracking(struct perf_evsel *evsel) +{ + return evsel->attr.mmap || evsel->attr.mmap2 || evsel->attr.comm || + evsel->attr.task; +} + +#define COMPAT_MASK (PERF_SAMPLE_ID | PERF_SAMPLE_TID | PERF_SAMPLE_TIME | \ + PERF_SAMPLE_ID | PERF_SAMPLE_CPU | PERF_SAMPLE_IDENTIFIER) + +/* + * In order that the perf.data file is parsable, tracking events like MMAP need + * their selected event to exist, except if there is only 1 selected event left + * and it has a compatible sample type. + */ +static bool ok_to_remove(struct perf_evlist *evlist, + struct perf_evsel *evsel_to_remove) +{ + struct perf_evsel *evsel; + int cnt = 0; + bool ok = false; + + if (!has_tracking(evsel_to_remove)) + return true; + + evlist__for_each(evlist, evsel) { + if (evsel->handler != drop_sample) { + cnt += 1; + if ((evsel->attr.sample_type & COMPAT_MASK) == + (evsel_to_remove->attr.sample_type & COMPAT_MASK)) + ok = true; + } + } + + return ok && cnt == 1; +} + +static void strip_fini(struct perf_inject *inject) +{ + struct perf_evlist *evlist = inject->session->evlist; + struct perf_evsel *evsel, *tmp; + + /* Remove non-synthesized evsels if possible */ + evlist__for_each_safe(evlist, tmp, evsel) { + if (evsel->handler == drop_sample && + ok_to_remove(evlist, evsel)) { + pr_debug("Deleting %s\n", perf_evsel__name(evsel)); + perf_evlist__remove(evlist, evsel); + perf_evsel__delete(evsel); + } + } +} + static int __cmd_inject(struct perf_inject *inject) { int ret = -EINVAL; @@ -512,10 +607,14 @@ static int __cmd_inject(struct perf_inject *inject) inject->tool.id_index = perf_event__repipe_id_index; inject->tool.auxtrace_info = perf_event__process_auxtrace_info; inject->tool.auxtrace = perf_event__process_auxtrace; + inject->tool.aux = perf_event__drop_aux; + inject->tool.itrace_start = perf_event__drop_aux, inject->tool.ordered_events = true; inject->tool.ordering_requires_timestamps = true; /* Allow space in the header for new attributes */ output_data_offset = 4096; + if (inject->strip) + strip_init(inject); } if (!inject->itrace_synth_opts.set) @@ -535,11 +634,28 @@ static int __cmd_inject(struct perf_inject *inject) } /* * The AUX areas have been removed and replaced with - * synthesized hardware events, so clear the feature flag. + * synthesized hardware events, so clear the feature flag and + * remove the evsel. */ - if (inject->itrace_synth_opts.set) + if (inject->itrace_synth_opts.set) { + struct perf_evsel *evsel; + perf_header__clear_feat(&session->header, HEADER_AUXTRACE); + if (inject->itrace_synth_opts.last_branch) + perf_header__set_feat(&session->header, + HEADER_BRANCH_STACK); + evsel = perf_evlist__id2evsel_strict(session->evlist, + inject->aux_id); + if (evsel) { + pr_debug("Deleting %s\n", + perf_evsel__name(evsel)); + perf_evlist__remove(session->evlist, evsel); + perf_evsel__delete(evsel); + } + if (inject->strip) + strip_fini(inject); + } session->header.data_offset = output_data_offset; session->header.data_size = inject->bytes_written; perf_session__write_header(session, session->evlist, fd, true); @@ -561,6 +677,7 @@ int cmd_inject(int argc, const char **argv, const char *prefix __maybe_unused) .lost = perf_event__repipe, .aux = perf_event__repipe, .itrace_start = perf_event__repipe, + .context_switch = perf_event__repipe, .read = perf_event__repipe_sample, .throttle = perf_event__repipe, .unthrottle = perf_event__repipe, @@ -603,6 +720,8 @@ int cmd_inject(int argc, const char **argv, const char *prefix __maybe_unused) OPT_CALLBACK_OPTARG(0, "itrace", &inject.itrace_synth_opts, NULL, "opts", "Instruction Tracing options", itrace_parse_synth_opts), + OPT_BOOLEAN(0, "strip", &inject.strip, + "strip non-synthesized events (use with --itrace)"), OPT_END() }; const char * const inject_usage[] = { @@ -618,6 +737,11 @@ int cmd_inject(int argc, const char **argv, const char *prefix __maybe_unused) if (argc) usage_with_options(inject_usage, options); + if (inject.strip && !inject.itrace_synth_opts.set) { + pr_err("--strip option requires --itrace option\n"); + return -1; + } + if (perf_data_file__open(&inject.output)) { perror("failed to create output file"); return -1; diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c index 23b1faaaa4cc..93ce665f976f 100644 --- a/tools/perf/builtin-kmem.c +++ b/tools/perf/builtin-kmem.c @@ -329,7 +329,7 @@ static int build_alloc_func_list(void) return -EINVAL; } - kernel_map = machine->vmlinux_maps[MAP__FUNCTION]; + kernel_map = machine__kernel_map(machine); if (map__load(kernel_map, NULL) < 0) { pr_err("cannot load kernel map\n"); return -ENOENT; diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c index fc1cffb1b7a2..dd94b4ca2213 100644 --- a/tools/perf/builtin-kvm.c +++ b/tools/perf/builtin-kvm.c @@ -13,7 +13,6 @@ #include "util/parse-options.h" #include "util/trace-event.h" #include "util/debug.h" -#include <api/fs/debugfs.h> #include "util/tool.h" #include "util/stat.h" #include "util/top.h" diff --git a/tools/perf/builtin-list.c b/tools/perf/builtin-list.c index af5bd0514108..bf679e2c978b 100644 --- a/tools/perf/builtin-list.c +++ b/tools/perf/builtin-list.c @@ -36,7 +36,7 @@ int cmd_list(int argc, const char **argv, const char *prefix __maybe_unused) setup_pager(); - if (!raw_dump) + if (!raw_dump && pager_in_use()) printf("\nList of pre-defined events (to be used in -e):\n\n"); if (argc == 0) { @@ -45,6 +45,8 @@ int cmd_list(int argc, const char **argv, const char *prefix __maybe_unused) } for (i = 0; i < argc; ++i) { + char *sep, *s; + if (strcmp(argv[i], "tracepoint") == 0) print_tracepoint_events(NULL, NULL, raw_dump); else if (strcmp(argv[i], "hw") == 0 || @@ -60,8 +62,7 @@ int cmd_list(int argc, const char **argv, const char *prefix __maybe_unused) print_hwcache_events(NULL, raw_dump); else if (strcmp(argv[i], "pmu") == 0) print_pmu_events(NULL, raw_dump); - else { - char *sep = strchr(argv[i], ':'), *s; + else if ((sep = strchr(argv[i], ':')) != NULL) { int sep_idx; if (sep == NULL) { @@ -76,6 +77,19 @@ int cmd_list(int argc, const char **argv, const char *prefix __maybe_unused) s[sep_idx] = '\0'; print_tracepoint_events(s, s + sep_idx + 1, raw_dump); free(s); + } else { + if (asprintf(&s, "*%s*", argv[i]) < 0) { + printf("Critical: Not enough memory! Trying to continue...\n"); + continue; + } + print_symbol_events(s, PERF_TYPE_HARDWARE, + event_symbols_hw, PERF_COUNT_HW_MAX, raw_dump); + print_symbol_events(s, PERF_TYPE_SOFTWARE, + event_symbols_sw, PERF_COUNT_SW_MAX, raw_dump); + print_hwcache_events(s, raw_dump); + print_pmu_events(s, raw_dump); + print_tracepoint_events(NULL, s, raw_dump); + free(s); } } return 0; diff --git a/tools/perf/builtin-probe.c b/tools/perf/builtin-probe.c index 1272559fa22d..132afc97676c 100644 --- a/tools/perf/builtin-probe.c +++ b/tools/perf/builtin-probe.c @@ -37,10 +37,10 @@ #include "util/strfilter.h" #include "util/symbol.h" #include "util/debug.h" -#include <api/fs/debugfs.h> #include "util/parse-options.h" #include "util/probe-finder.h" #include "util/probe-event.h" +#include "util/probe-file.h" #define DEFAULT_VAR_FILTER "!__k???tab_* & !__crc_*" #define DEFAULT_FUNC_FILTER "!_*" @@ -182,10 +182,8 @@ static int opt_set_target(const struct option *opt, const char *str, if (str) { if (!strcmp(opt->long_name, "exec")) params.uprobes = true; -#ifdef HAVE_DWARF_SUPPORT else if (!strcmp(opt->long_name, "module")) params.uprobes = false; -#endif else return ret; @@ -297,8 +295,7 @@ static void cleanup_params(void) clear_perf_probe_event(params.events + i); line_range__clear(¶ms.line_range); free(params.target); - if (params.filter) - strfilter__delete(params.filter); + strfilter__delete(params.filter); memset(¶ms, 0, sizeof(params)); } @@ -312,6 +309,119 @@ static void pr_err_with_code(const char *msg, int err) pr_err("\n"); } +static int perf_add_probe_events(struct perf_probe_event *pevs, int npevs) +{ + int ret; + int i, k; + const char *event = NULL, *group = NULL; + + ret = init_probe_symbol_maps(pevs->uprobes); + if (ret < 0) + return ret; + + ret = convert_perf_probe_events(pevs, npevs); + if (ret < 0) + goto out_cleanup; + + ret = apply_perf_probe_events(pevs, npevs); + if (ret < 0) + goto out_cleanup; + + for (i = k = 0; i < npevs; i++) + k += pevs[i].ntevs; + + pr_info("Added new event%s\n", (k > 1) ? "s:" : ":"); + for (i = 0; i < npevs; i++) { + struct perf_probe_event *pev = &pevs[i]; + + for (k = 0; k < pev->ntevs; k++) { + struct probe_trace_event *tev = &pev->tevs[k]; + + /* We use tev's name for showing new events */ + show_perf_probe_event(tev->group, tev->event, pev, + tev->point.module, false); + + /* Save the last valid name */ + event = tev->event; + group = tev->group; + } + } + + /* Note that it is possible to skip all events because of blacklist */ + if (event) { + /* Show how to use the event. */ + pr_info("\nYou can now use it in all perf tools, such as:\n\n"); + pr_info("\tperf record -e %s:%s -aR sleep 1\n\n", group, event); + } + +out_cleanup: + cleanup_perf_probe_events(pevs, npevs); + exit_probe_symbol_maps(); + return ret; +} + +static int perf_del_probe_events(struct strfilter *filter) +{ + int ret, ret2, ufd = -1, kfd = -1; + char *str = strfilter__string(filter); + struct strlist *klist = NULL, *ulist = NULL; + struct str_node *ent; + + if (!str) + return -EINVAL; + + pr_debug("Delete filter: \'%s\'\n", str); + + /* Get current event names */ + ret = probe_file__open_both(&kfd, &ufd, PF_FL_RW); + if (ret < 0) + goto out; + + klist = strlist__new(NULL, NULL); + ulist = strlist__new(NULL, NULL); + if (!klist || !ulist) { + ret = -ENOMEM; + goto out; + } + + ret = probe_file__get_events(kfd, filter, klist); + if (ret == 0) { + strlist__for_each(ent, klist) + pr_info("Removed event: %s\n", ent->s); + + ret = probe_file__del_strlist(kfd, klist); + if (ret < 0) + goto error; + } + + ret2 = probe_file__get_events(ufd, filter, ulist); + if (ret2 == 0) { + strlist__for_each(ent, ulist) + pr_info("Removed event: %s\n", ent->s); + + ret2 = probe_file__del_strlist(ufd, ulist); + if (ret2 < 0) + goto error; + } + + if (ret == -ENOENT && ret2 == -ENOENT) + pr_debug("\"%s\" does not hit any event.\n", str); + /* Note that this is silently ignored */ + ret = 0; + +error: + if (kfd >= 0) + close(kfd); + if (ufd >= 0) + close(ufd); +out: + strlist__delete(klist); + strlist__delete(ulist); + free(str); + + return ret; +} + static int __cmd_probe(int argc, const char **argv, const char *prefix __maybe_unused) { @@ -378,9 +488,6 @@ __cmd_probe(int argc, const char **argv, const char *prefix __maybe_unused) "file", "vmlinux pathname"), OPT_STRING('s', "source", &symbol_conf.source_prefix, "directory", "path to kernel source"), - OPT_CALLBACK('m', "module", NULL, "modname|path", - "target module name (for online) or path (for offline)", - opt_set_target), OPT_BOOLEAN('\0', "no-inlines", &probe_conf.no_inlines, "Don't search inlined functions"), #endif @@ -397,6 +504,9 @@ __cmd_probe(int argc, const char **argv, const char *prefix __maybe_unused) opt_set_filter), OPT_CALLBACK('x', "exec", NULL, "executable|path", "target executable name or path", opt_set_target), + OPT_CALLBACK('m', "module", NULL, "modname|path", + "target module name (for online) or path (for offline)", + opt_set_target), OPT_BOOLEAN(0, "demangle", &symbol_conf.demangle, "Enable symbol demangling"), OPT_BOOLEAN(0, "demangle-kernel", &symbol_conf.demangle_kernel, @@ -418,12 +528,12 @@ __cmd_probe(int argc, const char **argv, const char *prefix __maybe_unused) PARSE_OPT_STOP_AT_NON_OPTION); if (argc > 0) { if (strcmp(argv[0], "-") == 0) { - pr_warning(" Error: '-' is not supported.\n"); - usage_with_options(probe_usage, options); + usage_with_options_msg(probe_usage, options, + "'-' is not supported.\n"); } if (params.command && params.command != 'a') { - pr_warning(" Error: another command except --add is set.\n"); - usage_with_options(probe_usage, options); + usage_with_options_msg(probe_usage, options, + "another command except --add is set.\n"); } ret = parse_probe_event_argv(argc, argv); if (ret < 0) { @@ -452,8 +562,10 @@ __cmd_probe(int argc, const char **argv, const char *prefix __maybe_unused) switch (params.command) { case 'l': if (params.uprobes) { - pr_warning(" Error: Don't use --list with --exec.\n"); - usage_with_options(probe_usage, options); + pr_err(" Error: Don't use --list with --exec.\n"); + parse_options_usage(probe_usage, options, "l", true); + parse_options_usage(NULL, options, "x", true); + return -EINVAL; } ret = show_perf_probe_events(params.filter); if (ret < 0) @@ -484,7 +596,7 @@ __cmd_probe(int argc, const char **argv, const char *prefix __maybe_unused) return ret; #endif case 'd': - ret = del_perf_probe_events(params.filter); + ret = perf_del_probe_events(params.filter); if (ret < 0) { pr_err_with_code(" Error: Failed to delete events.", ret); return ret; @@ -493,11 +605,13 @@ __cmd_probe(int argc, const char **argv, const char *prefix __maybe_unused) case 'a': /* Ensure the last given target is used */ if (params.target && !params.target_used) { - pr_warning(" Error: -x/-m must follow the probe definitions.\n"); - usage_with_options(probe_usage, options); + pr_err(" Error: -x/-m must follow the probe definitions.\n"); + parse_options_usage(probe_usage, options, "m", true); + parse_options_usage(NULL, options, "x", true); + return -EINVAL; } - ret = add_perf_probe_events(params.events, params.nevents); + ret = perf_add_probe_events(params.events, params.nevents); if (ret < 0) { pr_err_with_code(" Error: Failed to add events.", ret); return ret; diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index de165a1b9240..199fc31e3919 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -27,8 +27,11 @@ #include "util/cpumap.h" #include "util/thread_map.h" #include "util/data.h" +#include "util/perf_regs.h" #include "util/auxtrace.h" #include "util/parse-branch-options.h" +#include "util/parse-regs-options.h" +#include "util/llvm-utils.h" #include <unistd.h> #include <sched.h> @@ -47,7 +50,7 @@ struct record { int realtime_prio; bool no_buildid; bool no_buildid_cache; - long samples; + unsigned long long samples; }; static int record__write(struct record *rec, void *bf, size_t size) @@ -279,7 +282,7 @@ static int record__open(struct record *rec) evlist__for_each(evlist, pos) { try_again: - if (perf_evsel__open(pos, evlist->cpus, evlist->threads) < 0) { + if (perf_evsel__open(pos, pos->cpus, pos->threads) < 0) { if (perf_evsel__fallback(pos, errno, msg, sizeof(msg))) { if (verbose) ui__warning("%s\n", msg); @@ -521,6 +524,15 @@ static int __cmd_record(struct record *rec, int argc, const char **argv) goto out_child; } + /* + * Normally perf_session__new would do this, but it doesn't have the + * evlist. + */ + if (rec->tool.ordered_events && !perf_evlist__sample_id_all(rec->evlist)) { + pr_warning("WARNING: No sample_id_all support, falling back to unordered processing\n"); + rec->tool.ordered_events = false; + } + if (!rec->evlist->nr_groups) perf_header__clear_feat(&session->header, HEADER_GROUP_DESC); @@ -625,8 +637,29 @@ static int __cmd_record(struct record *rec, int argc, const char **argv) /* * Let the child rip */ - if (forks) + if (forks) { + union perf_event *event; + + event = malloc(sizeof(event->comm) + machine->id_hdr_size); + if (event == NULL) { + err = -ENOMEM; + goto out_child; + } + + /* + * Some H/W events are generated before COMM event + * which is emitted during exec(), so perf script + * cannot see a correct process name for those events. + * Synthesize COMM event to prevent it. + */ + perf_event__synthesize_comm(tool, event, + rec->evlist->workload.pid, + process_synthesized_event, + machine); + free(event); + perf_evlist__start_workload(rec->evlist); + } if (opts->initial_delay) { usleep(opts->initial_delay * 1000); @@ -635,7 +668,7 @@ static int __cmd_record(struct record *rec, int argc, const char **argv) auxtrace_snapshot_enabled = 1; for (;;) { - int hits = rec->samples; + unsigned long long hits = rec->samples; if (record__mmap_read_all(rec) < 0) { auxtrace_snapshot_enabled = 0; @@ -762,12 +795,14 @@ static void callchain_debug(void) callchain_param.dump_size); } -int record_parse_callchain_opt(const struct option *opt __maybe_unused, +int record_parse_callchain_opt(const struct option *opt, const char *arg, int unset) { int ret; + struct record_opts *record = (struct record_opts *)opt->value; + record->callgraph_set = true; callchain_param.enabled = !unset; /* --no-call-graph */ @@ -777,17 +812,20 @@ int record_parse_callchain_opt(const struct option *opt __maybe_unused, return 0; } - ret = parse_callchain_record_opt(arg); + ret = parse_callchain_record_opt(arg, &callchain_param); if (!ret) callchain_debug(); return ret; } -int record_callchain_opt(const struct option *opt __maybe_unused, +int record_callchain_opt(const struct option *opt, const char *arg __maybe_unused, int unset __maybe_unused) { + struct record_opts *record = (struct record_opts *)opt->value; + + record->callgraph_set = true; callchain_param.enabled = true; if (callchain_param.record_mode == CALLCHAIN_NONE) @@ -965,19 +1003,16 @@ static struct record record = { .tool = { .sample = process_sample_event, .fork = perf_event__process_fork, + .exit = perf_event__process_exit, .comm = perf_event__process_comm, .mmap = perf_event__process_mmap, .mmap2 = perf_event__process_mmap2, + .ordered_events = true, }, }; -#define CALLCHAIN_HELP "setup and enables call-graph (stack chain/backtrace) recording: " - -#ifdef HAVE_DWARF_UNWIND_SUPPORT -const char record_callchain_help[] = CALLCHAIN_HELP "fp dwarf lbr"; -#else -const char record_callchain_help[] = CALLCHAIN_HELP "fp lbr"; -#endif +const char record_callchain_help[] = CALLCHAIN_RECORD_HELP + "\n\t\t\t\tDefault: fp"; /* * XXX Will stay a global variable till we fix builtin-script.c to stop messing @@ -992,6 +1027,9 @@ struct option __record_options[] = { parse_events_option), OPT_CALLBACK(0, "filter", &record.evlist, "filter", "event filter", parse_filter), + OPT_CALLBACK_NOOPT(0, "exclude-perf", &record.evlist, + NULL, "don't record events from perf itself", + exclude_perf), OPT_STRING('p', "pid", &record.opts.target.pid, "pid", "record events on existing process id"), OPT_STRING('t', "tid", &record.opts.target.tid, "tid", @@ -1022,7 +1060,7 @@ struct option __record_options[] = { NULL, "enables call-graph recording" , &record_callchain_opt), OPT_CALLBACK(0, "call-graph", &record.opts, - "mode[,dump_size]", record_callchain_help, + "record_mode[,record_size]", record_callchain_help, &record_parse_callchain_opt), OPT_INCR('v', "verbose", &verbose, "be more verbose (show counter open errors, etc)"), @@ -1030,7 +1068,9 @@ struct option __record_options[] = { OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat, "per thread counts"), OPT_BOOLEAN('d', "data", &record.opts.sample_address, "Record the sample addresses"), - OPT_BOOLEAN('T', "timestamp", &record.opts.sample_time, "Record the sample timestamps"), + OPT_BOOLEAN_SET('T', "timestamp", &record.opts.sample_time, + &record.opts.sample_time_set, + "Record the sample timestamps"), OPT_BOOLEAN('P', "period", &record.opts.period, "Record the sample period"), OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples, "don't sample"), @@ -1059,8 +1099,9 @@ struct option __record_options[] = { "sample transaction flags (special events only)"), OPT_BOOLEAN(0, "per-thread", &record.opts.target.per_thread, "use per-thread mmaps"), - OPT_BOOLEAN('I', "intr-regs", &record.opts.sample_intr_regs, - "Sample machine registers on interrupt"), + OPT_CALLBACK_OPTARG('I', "intr-regs", &record.opts.sample_intr_regs, NULL, "any register", + "sample selected machine registers on interrupt," + " use -I ? to list register names", parse_regs), OPT_BOOLEAN(0, "running-time", &record.opts.running_time, "Record running/enabled time of read (:S) events"), OPT_CALLBACK('k', "clockid", &record.opts, @@ -1070,6 +1111,14 @@ struct option __record_options[] = { "opts", "AUX area tracing Snapshot Mode", ""), OPT_UINTEGER(0, "proc-map-timeout", &record.opts.proc_map_timeout, "per thread proc mmap processing timeout in ms"), + OPT_BOOLEAN(0, "switch-events", &record.opts.record_switch_events, + "Record context switch events"), +#ifdef HAVE_LIBBPF_SUPPORT + OPT_STRING(0, "clang-path", &llvm_param.clang_path, "clang path", + "clang binary to use for compiling BPF scriptlets"), + OPT_STRING(0, "clang-opt", &llvm_param.clang_opt, "clang options", + "options passed to clang when compiling BPF scriptlets"), +#endif OPT_END() }; @@ -1093,9 +1142,15 @@ int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused) usage_with_options(record_usage, record_options); if (nr_cgroups && !rec->opts.target.system_wide) { - ui__error("cgroup monitoring only available in" - " system-wide mode\n"); - usage_with_options(record_usage, record_options); + usage_with_options_msg(record_usage, record_options, + "cgroup monitoring only available in system-wide mode"); + + } + if (rec->opts.record_switch_events && + !perf_can_record_switch_events()) { + ui__error("kernel does not support recording context switch events\n"); + parse_options_usage(record_usage, record_options, "switch-events", 0); + return -EINVAL; } if (!rec->itr) { diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 95a47719aec3..2853ad2bd435 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -53,6 +53,7 @@ struct report { bool mem_mode; bool header; bool header_only; + bool nonany_branch_mode; int max_stack; struct perf_read_values show_threads_values; const char *pretty_printing_style; @@ -61,6 +62,7 @@ struct report { float min_percent; u64 nr_entries; u64 queue_size; + int socket_filter; DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS); }; @@ -102,6 +104,9 @@ static int hist_iter__report_callback(struct hist_entry_iter *iter, if (!ui__has_annotation()) return 0; + hist__account_cycles(iter->sample->branch_stack, al, iter->sample, + rep->nonany_branch_mode); + if (sort__mode == SORT_MODE__BRANCH) { bi = he->branch_info; err = addr_map_symbol__inc_samples(&bi->from, evsel->idx); @@ -158,14 +163,21 @@ static int process_sample_event(struct perf_tool *tool, if (rep->cpu_list && !test_bit(sample->cpu, rep->cpu_bitmap)) goto out_put; - if (sort__mode == SORT_MODE__BRANCH) + if (sort__mode == SORT_MODE__BRANCH) { + /* + * A non-synthesized event might not have a branch stack if + * branch stacks have been synthesized (using itrace options). + */ + if (!sample->branch_stack) + goto out_put; iter.ops = &hist_iter_branch; - else if (rep->mem_mode) + } else if (rep->mem_mode) { iter.ops = &hist_iter_mem; - else if (symbol_conf.cumulate_callchain) + } else if (symbol_conf.cumulate_callchain) { iter.ops = &hist_iter_cumulative; - else + } else { iter.ops = &hist_iter_normal; + } if (al.map != NULL) al.map->dso->hit = 1; @@ -209,6 +221,15 @@ static int report__setup_sample_type(struct report *rep) u64 sample_type = perf_evlist__combined_sample_type(session->evlist); bool is_pipe = perf_data_file__is_pipe(session->file); + if (session->itrace_synth_opts->callchain || + (!is_pipe && + perf_header__has_feat(&session->header, HEADER_AUXTRACE) && + !session->itrace_synth_opts->set)) + sample_type |= PERF_SAMPLE_CALLCHAIN; + + if (session->itrace_synth_opts->last_branch) + sample_type |= PERF_SAMPLE_BRANCH_STACK; + if (!is_pipe && !(sample_type & PERF_SAMPLE_CALLCHAIN)) { if (sort__has_parent) { ui__error("Selected --sort parent, but no " @@ -258,6 +279,12 @@ static int report__setup_sample_type(struct report *rep) else callchain_param.record_mode = CALLCHAIN_FP; } + + /* ??? handle more cases than just ANY? */ + if (!(perf_evlist__combined_branch_type(session->evlist) & + PERF_SAMPLE_BRANCH_ANY)) + rep->nonany_branch_mode = true; + return 0; } @@ -276,6 +303,7 @@ static size_t hists__fprintf_nr_sample_events(struct hists *hists, struct report struct perf_evsel *evsel = hists_to_evsel(hists); char buf[512]; size_t size = sizeof(buf); + int socked_id = hists->socket_filter; if (symbol_conf.filter_relative) { nr_samples = hists->stats.nr_non_filtered_samples; @@ -306,11 +334,20 @@ static size_t hists__fprintf_nr_sample_events(struct hists *hists, struct report if (evname != NULL) ret += fprintf(fp, " of event '%s'", evname); + if (symbol_conf.show_ref_callgraph && + strstr(evname, "call-graph=no")) { + ret += fprintf(fp, ", show reference callgraph"); + } + if (rep->mem_mode) { ret += fprintf(fp, "\n# Total weight : %" PRIu64, nr_events); ret += fprintf(fp, "\n# Sort order : %s", sort_order ? : default_mem_sort_order); } else ret += fprintf(fp, "\n# Event count (approx.): %" PRIu64, nr_events); + + if (socked_id > -1) + ret += fprintf(fp, "\n# Processor Socket: %d", socked_id); + return ret + fprintf(fp, "\n#\n"); } @@ -350,7 +387,7 @@ static int perf_evlist__tty_browse_hists(struct perf_evlist *evlist, static void report__warn_kptr_restrict(const struct report *rep) { - struct map *kernel_map = rep->session->machines.host.vmlinux_maps[MAP__FUNCTION]; + struct map *kernel_map = machine__kernel_map(&rep->session->machines.host); struct kmap *kernel_kmap = kernel_map ? map__kmap(kernel_map) : NULL; if (kernel_map == NULL || @@ -435,6 +472,8 @@ static void report__collapse_hists(struct report *rep) if (pos->idx == 0) hists->symbol_filter_str = rep->symbol_filter_str; + hists->socket_filter = rep->socket_filter; + hists__collapse_resort(hists, &prog); /* Non-group events are considered as leader */ @@ -586,6 +625,12 @@ parse_percent_limit(const struct option *opt, const char *str, return 0; } +#define CALLCHAIN_DEFAULT_OPT "graph,0.5,caller,function" + +const char report_callchain_help[] = "Display call graph (stack chain/backtrace):\n\n" + CALLCHAIN_REPORT_HELP + "\n\t\t\t\tDefault: " CALLCHAIN_DEFAULT_OPT; + int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused) { struct perf_session *session; @@ -594,7 +639,7 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused) bool has_br_stack = false; int branch_mode = -1; bool branch_call_mode = false; - char callchain_default_opt[] = "fractal,0.5,callee"; + char callchain_default_opt[] = CALLCHAIN_DEFAULT_OPT; const char * const report_usage[] = { "perf report [<options>]", NULL @@ -620,6 +665,7 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused) }, .max_stack = PERF_MAX_STACK_DEPTH, .pretty_printing_style = "normal", + .socket_filter = -1, }; const struct option options[] = { OPT_STRING('i', "input", &input_name, "file", @@ -653,15 +699,18 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused) " Please refer the man page for the complete list."), OPT_STRING('F', "fields", &field_order, "key[,keys...]", "output field(s): overhead, period, sample plus all of sort keys"), - OPT_BOOLEAN(0, "showcpuutilization", &symbol_conf.show_cpu_utilization, + OPT_BOOLEAN(0, "show-cpu-utilization", &symbol_conf.show_cpu_utilization, "Show sample percentage for different cpu modes"), + OPT_BOOLEAN_FLAG(0, "showcpuutilization", &symbol_conf.show_cpu_utilization, + "Show sample percentage for different cpu modes", PARSE_OPT_HIDDEN), OPT_STRING('p', "parent", &parent_pattern, "regex", "regex filter to identify parent, see: '--sort parent'"), OPT_BOOLEAN('x', "exclude-other", &symbol_conf.exclude_other, "Only display entries with parent-match"), - OPT_CALLBACK_DEFAULT('g', "call-graph", &report, "output_type,min_percent[,print_limit],call_order[,branch]", - "Display callchains using output_type (graph, flat, fractal, or none) , min percent threshold, optional print limit, callchain order, key (function or address), add branches. " - "Default: fractal,0.5,callee,function", &report_parse_callchain_opt, callchain_default_opt), + OPT_CALLBACK_DEFAULT('g', "call-graph", &report, + "print_type,threshold[,print_limit],order,sort_key[,branch]", + report_callchain_help, &report_parse_callchain_opt, + callchain_default_opt), OPT_BOOLEAN(0, "children", &symbol_conf.cumulate_callchain, "Accumulate callchains of children and show total overhead as well"), OPT_INTEGER(0, "max-stack", &report.max_stack, @@ -728,6 +777,12 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused) OPT_CALLBACK_OPTARG(0, "itrace", &itrace_synth_opts, NULL, "opts", "Instruction Tracing options", itrace_parse_synth_opts), + OPT_BOOLEAN(0, "full-source-path", &srcline_full_filename, + "Show full source file name path for source lines"), + OPT_BOOLEAN(0, "show-ref-call-graph", &symbol_conf.show_ref_callgraph, + "Show callgraph from reference event"), + OPT_INTEGER(0, "socket-filter", &report.socket_filter, + "only show processor socket that match with this filter"), OPT_END() }; struct perf_data_file file = { @@ -762,6 +817,12 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused) if (report.inverted_callchain) callchain_param.order = ORDER_CALLER; + if (symbol_conf.cumulate_callchain && !callchain_param.order_set) + callchain_param.order = ORDER_CALLER; + + if (itrace_synth_opts.callchain && + (int)itrace_synth_opts.callchain_sz > report.max_stack) + report.max_stack = itrace_synth_opts.callchain_sz; if (!input_name || !strlen(input_name)) { if (!fstat(STDIN_FILENO, &st) && S_ISFIFO(st.st_mode)) @@ -790,6 +851,9 @@ repeat: has_br_stack = perf_header__has_feat(&session->header, HEADER_BRANCH_STACK); + if (itrace_synth_opts.last_branch) + has_br_stack = true; + /* * Branch mode is a tristate: * -1 means default, so decide based on the file having branch data. diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index 33962612a5e9..0ee6d900e100 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c @@ -1728,8 +1728,8 @@ static void setup_sorting(struct perf_sched *sched, const struct option *options for (tok = strtok_r(str, ", ", &tmp); tok; tok = strtok_r(NULL, ", ", &tmp)) { if (sort_dimension__add(tok, &sched->sort_list) < 0) { - error("Unknown --sort key: `%s'", tok); - usage_with_options(usage_msg, options); + usage_with_options_msg(usage_msg, options, + "Unknown --sort key: `%s'", tok); } } diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 24809787369f..72b5deb4bd79 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -6,6 +6,7 @@ #include "util/exec_cmd.h" #include "util/header.h" #include "util/parse-options.h" +#include "util/perf_regs.h" #include "util/session.h" #include "util/tool.h" #include "util/symbol.h" @@ -28,9 +29,12 @@ static bool no_callchain; static bool latency_format; static bool system_wide; static bool print_flags; +static bool nanosecs; static const char *cpu_list; static DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS); +unsigned int scripting_max_stack = PERF_MAX_STACK_DEPTH; + enum perf_output_field { PERF_OUTPUT_COMM = 1U << 0, PERF_OUTPUT_TID = 1U << 1, @@ -46,6 +50,9 @@ enum perf_output_field { PERF_OUTPUT_SYMOFFSET = 1U << 11, PERF_OUTPUT_SRCLINE = 1U << 12, PERF_OUTPUT_PERIOD = 1U << 13, + PERF_OUTPUT_IREGS = 1U << 14, + PERF_OUTPUT_BRSTACK = 1U << 15, + PERF_OUTPUT_BRSTACKSYM = 1U << 16, }; struct output_option { @@ -66,6 +73,9 @@ struct output_option { {.str = "symoff", .field = PERF_OUTPUT_SYMOFFSET}, {.str = "srcline", .field = PERF_OUTPUT_SRCLINE}, {.str = "period", .field = PERF_OUTPUT_PERIOD}, + {.str = "iregs", .field = PERF_OUTPUT_IREGS}, + {.str = "brstack", .field = PERF_OUTPUT_BRSTACK}, + {.str = "brstacksym", .field = PERF_OUTPUT_BRSTACKSYM}, }; /* default set to maintain compatibility with current format */ @@ -255,6 +265,11 @@ static int perf_evsel__check_attr(struct perf_evsel *evsel, PERF_OUTPUT_PERIOD)) return -EINVAL; + if (PRINT_FIELD(IREGS) && + perf_evsel__check_stype(evsel, PERF_SAMPLE_REGS_INTR, "IREGS", + PERF_OUTPUT_IREGS)) + return -EINVAL; + return 0; } @@ -352,6 +367,24 @@ out: return 0; } +static void print_sample_iregs(union perf_event *event __maybe_unused, + struct perf_sample *sample, + struct thread *thread __maybe_unused, + struct perf_event_attr *attr) +{ + struct regs_dump *regs = &sample->intr_regs; + uint64_t mask = attr->sample_regs_intr; + unsigned i = 0, r; + + if (!regs) + return; + + for_each_set_bit(r, (unsigned long *) &mask, sizeof(mask) * 8) { + u64 val = regs->regs[i++]; + printf("%5s:0x%"PRIx64" ", perf_reg_name(r), val); + } +} + static void print_sample_start(struct perf_sample *sample, struct thread *thread, struct perf_evsel *evsel) @@ -389,10 +422,84 @@ static void print_sample_start(struct perf_sample *sample, secs = nsecs / NSECS_PER_SEC; nsecs -= secs * NSECS_PER_SEC; usecs = nsecs / NSECS_PER_USEC; - printf("%5lu.%06lu: ", secs, usecs); + if (nanosecs) + printf("%5lu.%09llu: ", secs, nsecs); + else + printf("%5lu.%06lu: ", secs, usecs); } } +static inline char +mispred_str(struct branch_entry *br) +{ + if (!(br->flags.mispred || br->flags.predicted)) + return '-'; + + return br->flags.predicted ? 'P' : 'M'; +} + +static void print_sample_brstack(union perf_event *event __maybe_unused, + struct perf_sample *sample, + struct thread *thread __maybe_unused, + struct perf_event_attr *attr __maybe_unused) +{ + struct branch_stack *br = sample->branch_stack; + u64 i; + + if (!(br && br->nr)) + return; + + for (i = 0; i < br->nr; i++) { + printf(" 0x%"PRIx64"/0x%"PRIx64"/%c/%c/%c/%d ", + br->entries[i].from, + br->entries[i].to, + mispred_str( br->entries + i), + br->entries[i].flags.in_tx? 'X' : '-', + br->entries[i].flags.abort? 'A' : '-', + br->entries[i].flags.cycles); + } +} + +static void print_sample_brstacksym(union perf_event *event __maybe_unused, + struct perf_sample *sample, + struct thread *thread __maybe_unused, + struct perf_event_attr *attr __maybe_unused) +{ + struct branch_stack *br = sample->branch_stack; + struct addr_location alf, alt; + u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; + u64 i, from, to; + + if (!(br && br->nr)) + return; + + for (i = 0; i < br->nr; i++) { + + memset(&alf, 0, sizeof(alf)); + memset(&alt, 0, sizeof(alt)); + from = br->entries[i].from; + to = br->entries[i].to; + + thread__find_addr_map(thread, cpumode, MAP__FUNCTION, from, &alf); + if (alf.map) + alf.sym = map__find_symbol(alf.map, alf.addr, NULL); + + thread__find_addr_map(thread, cpumode, MAP__FUNCTION, to, &alt); + if (alt.map) + alt.sym = map__find_symbol(alt.map, alt.addr, NULL); + + symbol__fprintf_symname_offs(alf.sym, &alf, stdout); + putchar('/'); + symbol__fprintf_symname_offs(alt.sym, &alt, stdout); + printf("/%c/%c/%c/%d ", + mispred_str( br->entries + i), + br->entries[i].flags.in_tx? 'X' : '-', + br->entries[i].flags.abort? 'A' : '-', + br->entries[i].flags.cycles); + } +} + + static void print_sample_addr(union perf_event *event, struct perf_sample *sample, struct thread *thread, @@ -445,7 +552,7 @@ static void print_sample_bts(union perf_event *event, } } perf_evsel__print_ip(evsel, sample, al, print_opts, - PERF_MAX_STACK_DEPTH); + scripting_max_stack); } /* print branch_to information */ @@ -522,9 +629,17 @@ static void process_event(union perf_event *event, struct perf_sample *sample, perf_evsel__print_ip(evsel, sample, al, output[attr->type].print_ip_opts, - PERF_MAX_STACK_DEPTH); + scripting_max_stack); } + if (PRINT_FIELD(IREGS)) + print_sample_iregs(event, sample, thread, attr); + + if (PRINT_FIELD(BRSTACK)) + print_sample_brstack(event, sample, thread, attr); + else if (PRINT_FIELD(BRSTACKSYM)) + print_sample_brstacksym(event, sample, thread, attr); + printf("\n"); } @@ -623,6 +738,7 @@ struct perf_script { struct perf_session *session; bool show_task_events; bool show_mmap_events; + bool show_switch_events; }; static int process_attr(struct perf_tool *tool, union perf_event *event, @@ -650,7 +766,10 @@ static int process_attr(struct perf_tool *tool, union perf_event *event, set_print_ip_opts(&evsel->attr); - return perf_evsel__check_attr(evsel, scr->session); + if (evsel->attr.sample_type) + err = perf_evsel__check_attr(evsel, scr->session); + + return err; } static int process_comm_event(struct perf_tool *tool, @@ -661,7 +780,7 @@ static int process_comm_event(struct perf_tool *tool, struct thread *thread; struct perf_script *script = container_of(tool, struct perf_script, tool); struct perf_session *session = script->session; - struct perf_evsel *evsel = perf_evlist__first(session->evlist); + struct perf_evsel *evsel = perf_evlist__id2evsel(session->evlist, sample->id); int ret = -1; thread = machine__findnew_thread(machine, event->comm.pid, event->comm.tid); @@ -695,7 +814,7 @@ static int process_fork_event(struct perf_tool *tool, struct thread *thread; struct perf_script *script = container_of(tool, struct perf_script, tool); struct perf_session *session = script->session; - struct perf_evsel *evsel = perf_evlist__first(session->evlist); + struct perf_evsel *evsel = perf_evlist__id2evsel(session->evlist, sample->id); if (perf_event__process_fork(tool, event, sample, machine) < 0) return -1; @@ -727,7 +846,7 @@ static int process_exit_event(struct perf_tool *tool, struct thread *thread; struct perf_script *script = container_of(tool, struct perf_script, tool); struct perf_session *session = script->session; - struct perf_evsel *evsel = perf_evlist__first(session->evlist); + struct perf_evsel *evsel = perf_evlist__id2evsel(session->evlist, sample->id); thread = machine__findnew_thread(machine, event->fork.pid, event->fork.tid); if (thread == NULL) { @@ -738,8 +857,8 @@ static int process_exit_event(struct perf_tool *tool, if (!evsel->attr.sample_id_all) { sample->cpu = 0; sample->time = 0; - sample->tid = event->comm.tid; - sample->pid = event->comm.pid; + sample->tid = event->fork.tid; + sample->pid = event->fork.pid; } print_sample_start(sample, thread, evsel); perf_event__fprintf(event, stdout); @@ -759,7 +878,7 @@ static int process_mmap_event(struct perf_tool *tool, struct thread *thread; struct perf_script *script = container_of(tool, struct perf_script, tool); struct perf_session *session = script->session; - struct perf_evsel *evsel = perf_evlist__first(session->evlist); + struct perf_evsel *evsel = perf_evlist__id2evsel(session->evlist, sample->id); if (perf_event__process_mmap(tool, event, sample, machine) < 0) return -1; @@ -790,7 +909,7 @@ static int process_mmap2_event(struct perf_tool *tool, struct thread *thread; struct perf_script *script = container_of(tool, struct perf_script, tool); struct perf_session *session = script->session; - struct perf_evsel *evsel = perf_evlist__first(session->evlist); + struct perf_evsel *evsel = perf_evlist__id2evsel(session->evlist, sample->id); if (perf_event__process_mmap2(tool, event, sample, machine) < 0) return -1; @@ -813,6 +932,32 @@ static int process_mmap2_event(struct perf_tool *tool, return 0; } +static int process_switch_event(struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine) +{ + struct thread *thread; + struct perf_script *script = container_of(tool, struct perf_script, tool); + struct perf_session *session = script->session; + struct perf_evsel *evsel = perf_evlist__id2evsel(session->evlist, sample->id); + + if (perf_event__process_switch(tool, event, sample, machine) < 0) + return -1; + + thread = machine__findnew_thread(machine, sample->pid, + sample->tid); + if (thread == NULL) { + pr_debug("problem processing SWITCH event, skipping it.\n"); + return -1; + } + + print_sample_start(sample, thread, evsel); + perf_event__fprintf(event, stdout); + thread__put(thread); + return 0; +} + static void sig_handler(int sig __maybe_unused) { session_done = 1; @@ -834,6 +979,8 @@ static int __cmd_script(struct perf_script *script) script->tool.mmap = process_mmap_event; script->tool.mmap2 = process_mmap2_event; } + if (script->show_switch_events) + script->tool.context_switch = process_switch_event; ret = perf_session__process_events(script->session); @@ -1532,6 +1679,22 @@ static int have_cmd(int argc, const char **argv) return 0; } +static void script__setup_sample_type(struct perf_script *script) +{ + struct perf_session *session = script->session; + u64 sample_type = perf_evlist__combined_sample_type(session->evlist); + + if (symbol_conf.use_callchain || symbol_conf.cumulate_callchain) { + if ((sample_type & PERF_SAMPLE_REGS_USER) && + (sample_type & PERF_SAMPLE_STACK_USER)) + callchain_param.record_mode = CALLCHAIN_DWARF; + else if (sample_type & PERF_SAMPLE_BRANCH_STACK) + callchain_param.record_mode = CALLCHAIN_LBR; + else + callchain_param.record_mode = CALLCHAIN_FP; + } +} + int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused) { bool show_full_info = false; @@ -1598,7 +1761,7 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused) "comma separated output fields prepend with 'type:'. " "Valid types: hw,sw,trace,raw. " "Fields: comm,tid,pid,time,cpu,event,trace,ip,sym,dso," - "addr,symoff,period,flags", parse_output_fields), + "addr,symoff,period,iregs,brstack,brstacksym,flags", parse_output_fields), OPT_BOOLEAN('a', "all-cpus", &system_wide, "system-wide collection from all CPUs"), OPT_STRING('S', "symbols", &symbol_conf.sym_list_str, "symbol[,symbol...]", @@ -1618,10 +1781,21 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused) "Show the fork/comm/exit events"), OPT_BOOLEAN('\0', "show-mmap-events", &script.show_mmap_events, "Show the mmap events"), + OPT_BOOLEAN('\0', "show-switch-events", &script.show_switch_events, + "Show context switch events (if recorded)"), OPT_BOOLEAN('f', "force", &file.force, "don't complain, do it"), + OPT_BOOLEAN(0, "ns", &nanosecs, + "Use 9 decimal places when displaying time"), OPT_CALLBACK_OPTARG(0, "itrace", &itrace_synth_opts, NULL, "opts", "Instruction Tracing options", itrace_parse_synth_opts), + OPT_BOOLEAN(0, "full-source-path", &srcline_full_filename, + "Show full source file name path for source lines"), + OPT_BOOLEAN(0, "demangle", &symbol_conf.demangle, + "Enable symbol demangling"), + OPT_BOOLEAN(0, "demangle-kernel", &symbol_conf.demangle_kernel, + "Enable kernel symbol demangling"), + OPT_END() }; const char * const script_subcommands[] = { "record", "report", NULL }; @@ -1657,6 +1831,10 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused) } } + if (itrace_synth_opts.callchain && + itrace_synth_opts.callchain_sz > scripting_max_stack) + scripting_max_stack = itrace_synth_opts.callchain_sz; + /* make sure PERF_EXEC_PATH is set for scripts */ perf_set_argv_exec_path(perf_exec_path()); @@ -1669,9 +1847,9 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused) rep_script_path = get_script_path(argv[0], REPORT_SUFFIX); if (!rec_script_path && !rep_script_path) { - fprintf(stderr, " Couldn't find script %s\n\n See perf" + usage_with_options_msg(script_usage, options, + "Couldn't find script `%s'\n\n See perf" " script -l for available scripts.\n", argv[0]); - usage_with_options(script_usage, options); } if (is_top_script(argv[0])) { @@ -1682,10 +1860,10 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused) rep_args = has_required_arg(rep_script_path); rec_args = (argc - 1) - rep_args; if (rec_args < 0) { - fprintf(stderr, " %s script requires options." + usage_with_options_msg(script_usage, options, + "`%s' script requires options." "\n\n See perf script -l for available " "scripts and options.\n", argv[0]); - usage_with_options(script_usage, options); } } @@ -1816,6 +1994,7 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused) goto out_delete; script.session = session; + script__setup_sample_type(&script); session->itrace_synth_opts = &itrace_synth_opts; @@ -1830,6 +2009,14 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused) else symbol_conf.use_callchain = false; + if (session->tevent.pevent && + pevent_set_function_resolver(session->tevent.pevent, + machine__resolve_kernel_addr, + &session->machines.host) < 0) { + pr_err("%s: failed to set libtraceevent function resolver\n", __func__); + return -1; + } + if (generate_script_lang) { struct stat perf_stat; int input; diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index d99d850e1444..2f438f76cceb 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -58,6 +58,7 @@ #include "util/cpumap.h" #include "util/thread.h" #include "util/thread_map.h" +#include "util/counts.h" #include <stdlib.h> #include <sys/prctl.h> @@ -99,10 +100,10 @@ static struct target target = { .uid = UINT_MAX, }; +typedef int (*aggr_get_id_t)(struct cpu_map *m, int cpu); + static int run_count = 1; static bool no_inherit = false; -static bool scale = true; -static enum aggr_mode aggr_mode = AGGR_GLOBAL; static volatile pid_t child_pid = -1; static bool null_run = false; static int detailed_run = 0; @@ -112,20 +113,23 @@ static int big_num_opt = -1; static const char *csv_sep = NULL; static bool csv_output = false; static bool group = false; -static FILE *output = NULL; static const char *pre_cmd = NULL; static const char *post_cmd = NULL; static bool sync_run = false; -static unsigned int interval = 0; static unsigned int initial_delay = 0; static unsigned int unit_width = 4; /* strlen("unit") */ static bool forever = false; static struct timespec ref_time; static struct cpu_map *aggr_map; -static int (*aggr_get_id)(struct cpu_map *m, int cpu); +static aggr_get_id_t aggr_get_id; static volatile int done = 0; +static struct perf_stat_config stat_config = { + .aggr_mode = AGGR_GLOBAL, + .scale = true, +}; + static inline void diff_timespec(struct timespec *r, struct timespec *a, struct timespec *b) { @@ -148,7 +152,7 @@ static int create_perf_stat_counter(struct perf_evsel *evsel) { struct perf_event_attr *attr = &evsel->attr; - if (scale) + if (stat_config.scale) attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | PERF_FORMAT_TOTAL_TIME_RUNNING; @@ -178,142 +182,6 @@ static inline int nsec_counter(struct perf_evsel *evsel) return 0; } -static void zero_per_pkg(struct perf_evsel *counter) -{ - if (counter->per_pkg_mask) - memset(counter->per_pkg_mask, 0, MAX_NR_CPUS); -} - -static int check_per_pkg(struct perf_evsel *counter, int cpu, bool *skip) -{ - unsigned long *mask = counter->per_pkg_mask; - struct cpu_map *cpus = perf_evsel__cpus(counter); - int s; - - *skip = false; - - if (!counter->per_pkg) - return 0; - - if (cpu_map__empty(cpus)) - return 0; - - if (!mask) { - mask = zalloc(MAX_NR_CPUS); - if (!mask) - return -ENOMEM; - - counter->per_pkg_mask = mask; - } - - s = cpu_map__get_socket(cpus, cpu); - if (s < 0) - return -1; - - *skip = test_and_set_bit(s, mask) == 1; - return 0; -} - -static int -process_counter_values(struct perf_evsel *evsel, int cpu, int thread, - struct perf_counts_values *count) -{ - struct perf_counts_values *aggr = &evsel->counts->aggr; - static struct perf_counts_values zero; - bool skip = false; - - if (check_per_pkg(evsel, cpu, &skip)) { - pr_err("failed to read per-pkg counter\n"); - return -1; - } - - if (skip) - count = &zero; - - switch (aggr_mode) { - case AGGR_THREAD: - case AGGR_CORE: - case AGGR_SOCKET: - case AGGR_NONE: - if (!evsel->snapshot) - perf_evsel__compute_deltas(evsel, cpu, thread, count); - perf_counts_values__scale(count, scale, NULL); - if (aggr_mode == AGGR_NONE) - perf_stat__update_shadow_stats(evsel, count->values, cpu); - break; - case AGGR_GLOBAL: - aggr->val += count->val; - if (scale) { - aggr->ena += count->ena; - aggr->run += count->run; - } - default: - break; - } - - return 0; -} - -static int process_counter_maps(struct perf_evsel *counter) -{ - int nthreads = thread_map__nr(counter->threads); - int ncpus = perf_evsel__nr_cpus(counter); - int cpu, thread; - - if (counter->system_wide) - nthreads = 1; - - for (thread = 0; thread < nthreads; thread++) { - for (cpu = 0; cpu < ncpus; cpu++) { - if (process_counter_values(counter, cpu, thread, - perf_counts(counter->counts, cpu, thread))) - return -1; - } - } - - return 0; -} - -static int process_counter(struct perf_evsel *counter) -{ - struct perf_counts_values *aggr = &counter->counts->aggr; - struct perf_stat *ps = counter->priv; - u64 *count = counter->counts->aggr.values; - int i, ret; - - aggr->val = aggr->ena = aggr->run = 0; - init_stats(ps->res_stats); - - if (counter->per_pkg) - zero_per_pkg(counter); - - ret = process_counter_maps(counter); - if (ret) - return ret; - - if (aggr_mode != AGGR_GLOBAL) - return 0; - - if (!counter->snapshot) - perf_evsel__compute_deltas(counter, -1, -1, aggr); - perf_counts_values__scale(aggr, scale, &counter->counts->scaled); - - for (i = 0; i < 3; i++) - update_stats(&ps->res_stats[i], count[i]); - - if (verbose) { - fprintf(output, "%s: %" PRIu64 " %" PRIu64 " %" PRIu64 "\n", - perf_evsel__name(counter), count[0], count[1], count[2]); - } - - /* - * Save the full runtime - to allow normalization during printout: - */ - perf_stat__update_shadow_stats(counter, count, 0); - - return 0; -} - /* * Read out the results of a single counter: * do not aggregate counts across CPUs in system-wide mode @@ -349,9 +217,9 @@ static void read_counters(bool close_counters) evlist__for_each(evsel_list, counter) { if (read_counter(counter)) - pr_warning("failed to read counter %s\n", counter->name); + pr_debug("failed to read counter %s\n", counter->name); - if (process_counter(counter)) + if (perf_stat_process_counter(&stat_config, counter)) pr_warning("failed to process counter %s\n", counter->name); if (close_counters) { @@ -402,6 +270,7 @@ static void workload_exec_failed_signal(int signo __maybe_unused, siginfo_t *inf static int __run_perf_stat(int argc, const char **argv) { + int interval = stat_config.interval; char msg[512]; unsigned long long t0, t1; struct perf_evsel *counter; @@ -545,13 +414,13 @@ static int run_perf_stat(int argc, const char **argv) static void print_running(u64 run, u64 ena) { if (csv_output) { - fprintf(output, "%s%" PRIu64 "%s%.2f", + fprintf(stat_config.output, "%s%" PRIu64 "%s%.2f", csv_sep, run, csv_sep, ena ? 100.0 * run / ena : 100.0); } else if (run != ena) { - fprintf(output, " (%.2f%%)", 100.0 * run / ena); + fprintf(stat_config.output, " (%.2f%%)", 100.0 * run / ena); } } @@ -560,14 +429,14 @@ static void print_noise_pct(double total, double avg) double pct = rel_stddev_stats(total, avg); if (csv_output) - fprintf(output, "%s%.2f%%", csv_sep, pct); + fprintf(stat_config.output, "%s%.2f%%", csv_sep, pct); else if (pct) - fprintf(output, " ( +-%6.2f%% )", pct); + fprintf(stat_config.output, " ( +-%6.2f%% )", pct); } static void print_noise(struct perf_evsel *evsel, double avg) { - struct perf_stat *ps; + struct perf_stat_evsel *ps; if (run_count == 1) return; @@ -578,9 +447,9 @@ static void print_noise(struct perf_evsel *evsel, double avg) static void aggr_printout(struct perf_evsel *evsel, int id, int nr) { - switch (aggr_mode) { + switch (stat_config.aggr_mode) { case AGGR_CORE: - fprintf(output, "S%d-C%*d%s%*d%s", + fprintf(stat_config.output, "S%d-C%*d%s%*d%s", cpu_map__id_to_socket(id), csv_output ? 0 : -8, cpu_map__id_to_cpu(id), @@ -590,7 +459,7 @@ static void aggr_printout(struct perf_evsel *evsel, int id, int nr) csv_sep); break; case AGGR_SOCKET: - fprintf(output, "S%*d%s%*d%s", + fprintf(stat_config.output, "S%*d%s%*d%s", csv_output ? 0 : -5, id, csv_sep, @@ -599,12 +468,12 @@ static void aggr_printout(struct perf_evsel *evsel, int id, int nr) csv_sep); break; case AGGR_NONE: - fprintf(output, "CPU%*d%s", + fprintf(stat_config.output, "CPU%*d%s", csv_output ? 0 : -4, perf_evsel__cpus(evsel)->map[id], csv_sep); break; case AGGR_THREAD: - fprintf(output, "%*s-%*d%s", + fprintf(stat_config.output, "%*s-%*d%s", csv_output ? 0 : 16, thread_map__comm(evsel->threads, id), csv_output ? 0 : -8, @@ -612,6 +481,7 @@ static void aggr_printout(struct perf_evsel *evsel, int id, int nr) csv_sep); break; case AGGR_GLOBAL: + case AGGR_UNSET: default: break; } @@ -619,6 +489,7 @@ static void aggr_printout(struct perf_evsel *evsel, int id, int nr) static void nsec_printout(int id, int nr, struct perf_evsel *evsel, double avg) { + FILE *output = stat_config.output; double msecs = avg / 1e6; const char *fmt_v, *fmt_n; char name[25]; @@ -643,7 +514,7 @@ static void nsec_printout(int id, int nr, struct perf_evsel *evsel, double avg) if (evsel->cgrp) fprintf(output, "%s%s", csv_sep, evsel->cgrp->name); - if (csv_output || interval) + if (csv_output || stat_config.interval) return; if (perf_evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK)) @@ -655,6 +526,7 @@ static void nsec_printout(int id, int nr, struct perf_evsel *evsel, double avg) static void abs_printout(int id, int nr, struct perf_evsel *evsel, double avg) { + FILE *output = stat_config.output; double sc = evsel->scale; const char *fmt; int cpu = cpu_map__id_to_cpu(id); @@ -670,7 +542,7 @@ static void abs_printout(int id, int nr, struct perf_evsel *evsel, double avg) aggr_printout(evsel, id, nr); - if (aggr_mode == AGGR_GLOBAL) + if (stat_config.aggr_mode == AGGR_GLOBAL) cpu = 0; fprintf(output, fmt, avg, csv_sep); @@ -685,16 +557,18 @@ static void abs_printout(int id, int nr, struct perf_evsel *evsel, double avg) if (evsel->cgrp) fprintf(output, "%s%s", csv_sep, evsel->cgrp->name); - if (csv_output || interval) + if (csv_output || stat_config.interval) return; - perf_stat__print_shadow_stats(output, evsel, avg, cpu, aggr_mode); + perf_stat__print_shadow_stats(output, evsel, avg, cpu, + stat_config.aggr_mode); } static void print_aggr(char *prefix) { + FILE *output = stat_config.output; struct perf_evsel *counter; - int cpu, cpu2, s, s2, id, nr; + int cpu, s, s2, id, nr; double uval; u64 ena, run, val; @@ -707,8 +581,7 @@ static void print_aggr(char *prefix) val = ena = run = 0; nr = 0; for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) { - cpu2 = perf_evsel__cpus(counter)->map[cpu]; - s2 = aggr_get_id(evsel_list->cpus, cpu2); + s2 = aggr_get_id(perf_evsel__cpus(counter), cpu); if (s2 != id) continue; val += perf_counts(counter->counts, cpu, 0)->val; @@ -761,6 +634,7 @@ static void print_aggr(char *prefix) static void print_aggr_thread(struct perf_evsel *counter, char *prefix) { + FILE *output = stat_config.output; int nthreads = thread_map__nr(counter->threads); int ncpus = cpu_map__nr(counter->cpus); int cpu, thread; @@ -799,7 +673,8 @@ static void print_aggr_thread(struct perf_evsel *counter, char *prefix) */ static void print_counter_aggr(struct perf_evsel *counter, char *prefix) { - struct perf_stat *ps = counter->priv; + FILE *output = stat_config.output; + struct perf_stat_evsel *ps = counter->priv; double avg = avg_stats(&ps->res_stats[0]); int scaled = counter->counts->scaled; double uval; @@ -850,6 +725,7 @@ static void print_counter_aggr(struct perf_evsel *counter, char *prefix) */ static void print_counter(struct perf_evsel *counter, char *prefix) { + FILE *output = stat_config.output; u64 ena, run, val; double uval; int cpu; @@ -904,12 +780,13 @@ static void print_counter(struct perf_evsel *counter, char *prefix) static void print_interval(char *prefix, struct timespec *ts) { + FILE *output = stat_config.output; static int num_print_interval; sprintf(prefix, "%6lu.%09lu%s", ts->tv_sec, ts->tv_nsec, csv_sep); if (num_print_interval == 0 && !csv_output) { - switch (aggr_mode) { + switch (stat_config.aggr_mode) { case AGGR_SOCKET: fprintf(output, "# time socket cpus counts %*s events\n", unit_width, "unit"); break; @@ -925,6 +802,8 @@ static void print_interval(char *prefix, struct timespec *ts) case AGGR_GLOBAL: default: fprintf(output, "# time counts %*s events\n", unit_width, "unit"); + case AGGR_UNSET: + break; } } @@ -934,6 +813,7 @@ static void print_interval(char *prefix, struct timespec *ts) static void print_header(int argc, const char **argv) { + FILE *output = stat_config.output; int i; fflush(stdout); @@ -963,6 +843,8 @@ static void print_header(int argc, const char **argv) static void print_footer(void) { + FILE *output = stat_config.output; + if (!null_run) fprintf(output, "\n"); fprintf(output, " %17.9f seconds time elapsed", @@ -977,6 +859,7 @@ static void print_footer(void) static void print_counters(struct timespec *ts, int argc, const char **argv) { + int interval = stat_config.interval; struct perf_evsel *counter; char buf[64], *prefix = NULL; @@ -985,7 +868,7 @@ static void print_counters(struct timespec *ts, int argc, const char **argv) else print_header(argc, argv); - switch (aggr_mode) { + switch (stat_config.aggr_mode) { case AGGR_CORE: case AGGR_SOCKET: print_aggr(prefix); @@ -1002,6 +885,7 @@ static void print_counters(struct timespec *ts, int argc, const char **argv) evlist__for_each(evsel_list, counter) print_counter(counter, prefix); break; + case AGGR_UNSET: default: break; } @@ -1009,14 +893,14 @@ static void print_counters(struct timespec *ts, int argc, const char **argv) if (!interval && !csv_output) print_footer(); - fflush(output); + fflush(stat_config.output); } static volatile int signr = -1; static void skip_signal(int signo) { - if ((child_pid == -1) || interval) + if ((child_pid == -1) || stat_config.interval) done = 1; signr = signo; @@ -1062,30 +946,90 @@ static int stat__set_big_num(const struct option *opt __maybe_unused, return 0; } +static int perf_stat__get_socket(struct cpu_map *map, int cpu) +{ + return cpu_map__get_socket(map, cpu, NULL); +} + +static int perf_stat__get_core(struct cpu_map *map, int cpu) +{ + return cpu_map__get_core(map, cpu, NULL); +} + +static int cpu_map__get_max(struct cpu_map *map) +{ + int i, max = -1; + + for (i = 0; i < map->nr; i++) { + if (map->map[i] > max) + max = map->map[i]; + } + + return max; +} + +static struct cpu_map *cpus_aggr_map; + +static int perf_stat__get_aggr(aggr_get_id_t get_id, struct cpu_map *map, int idx) +{ + int cpu; + + if (idx >= map->nr) + return -1; + + cpu = map->map[idx]; + + if (cpus_aggr_map->map[cpu] == -1) + cpus_aggr_map->map[cpu] = get_id(map, idx); + + return cpus_aggr_map->map[cpu]; +} + +static int perf_stat__get_socket_cached(struct cpu_map *map, int idx) +{ + return perf_stat__get_aggr(perf_stat__get_socket, map, idx); +} + +static int perf_stat__get_core_cached(struct cpu_map *map, int idx) +{ + return perf_stat__get_aggr(perf_stat__get_core, map, idx); +} + static int perf_stat_init_aggr_mode(void) { - switch (aggr_mode) { + int nr; + + switch (stat_config.aggr_mode) { case AGGR_SOCKET: if (cpu_map__build_socket_map(evsel_list->cpus, &aggr_map)) { perror("cannot build socket map"); return -1; } - aggr_get_id = cpu_map__get_socket; + aggr_get_id = perf_stat__get_socket_cached; break; case AGGR_CORE: if (cpu_map__build_core_map(evsel_list->cpus, &aggr_map)) { perror("cannot build core map"); return -1; } - aggr_get_id = cpu_map__get_core; + aggr_get_id = perf_stat__get_core_cached; break; case AGGR_NONE: case AGGR_GLOBAL: case AGGR_THREAD: + case AGGR_UNSET: default: break; } - return 0; + + /* + * The evsel_list->cpus is the base we operate on, + * taking the highest cpu number to be the size of + * the aggregation translate cpumap. + */ + nr = cpu_map__get_max(evsel_list->cpus); + cpus_aggr_map = cpu_map__empty_new(nr + 1); + return cpus_aggr_map ? 0 : -ENOMEM; } /* @@ -1270,7 +1214,7 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused) "system-wide collection from all CPUs"), OPT_BOOLEAN('g', "group", &group, "put the counters into a counter group"), - OPT_BOOLEAN('c', "scale", &scale, "scale/normalize counters"), + OPT_BOOLEAN('c', "scale", &stat_config.scale, "scale/normalize counters"), OPT_INCR('v', "verbose", &verbose, "be more verbose (show counter open errors, etc)"), OPT_INTEGER('r', "repeat", &run_count, @@ -1286,7 +1230,7 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused) stat__set_big_num), OPT_STRING('C', "cpu", &target.cpu_list, "cpu", "list of cpus to monitor in system-wide"), - OPT_SET_UINT('A', "no-aggr", &aggr_mode, + OPT_SET_UINT('A', "no-aggr", &stat_config.aggr_mode, "disable CPU count aggregation", AGGR_NONE), OPT_STRING('x', "field-separator", &csv_sep, "separator", "print counts with custom separator"), @@ -1300,13 +1244,13 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused) "command to run prior to the measured command"), OPT_STRING(0, "post", &post_cmd, "command", "command to run after to the measured command"), - OPT_UINTEGER('I', "interval-print", &interval, - "print counts at regular interval in ms (>= 100)"), - OPT_SET_UINT(0, "per-socket", &aggr_mode, + OPT_UINTEGER('I', "interval-print", &stat_config.interval, + "print counts at regular interval in ms (>= 10)"), + OPT_SET_UINT(0, "per-socket", &stat_config.aggr_mode, "aggregate counts per processor socket", AGGR_SOCKET), - OPT_SET_UINT(0, "per-core", &aggr_mode, + OPT_SET_UINT(0, "per-core", &stat_config.aggr_mode, "aggregate counts per physical processor core", AGGR_CORE), - OPT_SET_UINT(0, "per-thread", &aggr_mode, + OPT_SET_UINT(0, "per-thread", &stat_config.aggr_mode, "aggregate counts per thread", AGGR_THREAD), OPT_UINTEGER('D', "delay", &initial_delay, "ms to wait before starting measurement after program start"), @@ -1318,6 +1262,8 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused) }; int status = -EINVAL, run_idx; const char *mode; + FILE *output = stderr; + unsigned int interval; setlocale(LC_ALL, ""); @@ -1328,7 +1274,8 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused) argc = parse_options(argc, argv, options, stat_usage, PARSE_OPT_STOP_AT_NON_OPTION); - output = stderr; + interval = stat_config.interval; + if (output_name && strcmp(output_name, "-")) output = NULL; @@ -1365,6 +1312,8 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused) } } + stat_config.output = output; + if (csv_sep) { csv_output = true; if (!strcmp(csv_sep, "\\t")) @@ -1399,7 +1348,7 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused) run_count = 1; } - if ((aggr_mode == AGGR_THREAD) && !target__has_task(&target)) { + if ((stat_config.aggr_mode == AGGR_THREAD) && !target__has_task(&target)) { fprintf(stderr, "The --per-thread option is only available " "when monitoring via -p -t options.\n"); parse_options_usage(NULL, options, "p", 1); @@ -1411,7 +1360,8 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused) * no_aggr, cgroup are for system-wide only * --per-thread is aggregated per thread, we dont mix it with cpu mode */ - if (((aggr_mode != AGGR_GLOBAL && aggr_mode != AGGR_THREAD) || nr_cgroups) && + if (((stat_config.aggr_mode != AGGR_GLOBAL && + stat_config.aggr_mode != AGGR_THREAD) || nr_cgroups) && !target__has_cpu(&target)) { fprintf(stderr, "both cgroup and no-aggregation " "modes only available in system-wide mode\n"); @@ -1444,13 +1394,18 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused) * Initialize thread_map with comm names, * so we could print it out on output. */ - if (aggr_mode == AGGR_THREAD) + if (stat_config.aggr_mode == AGGR_THREAD) thread_map__read_comms(evsel_list->threads); if (interval && interval < 100) { - pr_err("print interval must be >= 100ms\n"); - parse_options_usage(stat_usage, options, "I", 1); - goto out; + if (interval < 10) { + pr_err("print interval must be >= 10ms\n"); + parse_options_usage(stat_usage, options, "I", 1); + goto out; + } else + pr_warning("print interval < 100ms. " + "The overhead percentage could be high in some cases. " + "Please proceed with caution.\n"); } if (perf_evlist__alloc_stats(evsel_list, interval)) diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index ecf319728f25..7e2e72e6d9d1 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -40,6 +40,7 @@ #include "util/xyarray.h" #include "util/sort.h" #include "util/intlist.h" +#include "util/parse-branch-options.h" #include "arch/common.h" #include "util/debug.h" @@ -601,8 +602,8 @@ static void display_sig(int sig __maybe_unused) static void display_setup_sig(void) { - signal(SIGSEGV, display_sig); - signal(SIGFPE, display_sig); + signal(SIGSEGV, sighandler_dump_stack); + signal(SIGFPE, sighandler_dump_stack); signal(SIGINT, display_sig); signal(SIGQUIT, display_sig); signal(SIGTERM, display_sig); @@ -654,7 +655,7 @@ static int symbol_filter(struct map *map, struct symbol *sym) { const char *name = sym->name; - if (!map->dso->kernel) + if (!__map__is_kernel(map)) return 0; /* * ppc64 uses function descriptors and appends a '.' to the @@ -695,6 +696,8 @@ static int hist_iter__top_callback(struct hist_entry_iter *iter, perf_top__record_precise_ip(top, he, evsel->idx, ip); } + hist__account_cycles(iter->sample->branch_stack, al, iter->sample, + !(top->record_opts.branch_stack & PERF_SAMPLE_BRANCH_ANY)); return 0; } @@ -854,9 +857,12 @@ static void perf_top__mmap_read_idx(struct perf_top *top, int idx) * TODO: we don't process guest user from host side * except simple counting. */ - /* Fall thru */ - default: goto next_event; + default: + if (event->header.type == PERF_RECORD_SAMPLE) + goto next_event; + machine = &session->machines.host; + break; } @@ -949,7 +955,7 @@ static int __cmd_top(struct perf_top *top) machines__set_symbol_filter(&top->session->machines, symbol_filter); if (!objdump_path) { - ret = perf_session_env__lookup_objdump(&top->session->header.env); + ret = perf_env__lookup_objdump(&top->session->header.env); if (ret) goto out_delete; } @@ -958,8 +964,18 @@ static int __cmd_top(struct perf_top *top) if (ret) goto out_delete; + if (perf_session__register_idle_thread(top->session) == NULL) + goto out_delete; + machine__synthesize_threads(&top->session->machines.host, &opts->target, top->evlist->threads, false, opts->proc_map_timeout); + + if (sort__has_socket) { + ret = perf_env__read_cpu_topology_map(&perf_env); + if (ret < 0) + goto out_err_cpu_topo; + } + ret = perf_top__start_counters(top); if (ret) goto out_delete; @@ -1017,6 +1033,14 @@ out_delete: top->session = NULL; return ret; + +out_err_cpu_topo: { + char errbuf[BUFSIZ]; + const char *err = strerror_r(-ret, errbuf, sizeof(errbuf)); + + ui__error("Could not read the CPU topology map: %s\n", err); + goto out_delete; +} } static int @@ -1029,8 +1053,22 @@ callchain_opt(const struct option *opt, const char *arg, int unset) static int parse_callchain_opt(const struct option *opt, const char *arg, int unset) { - symbol_conf.use_callchain = true; - return record_parse_callchain_opt(opt, arg, unset); + struct record_opts *record = (struct record_opts *)opt->value; + + record->callgraph_set = true; + callchain_param.enabled = !unset; + callchain_param.record_mode = CALLCHAIN_FP; + + /* + * --no-call-graph + */ + if (unset) { + symbol_conf.use_callchain = false; + callchain_param.record_mode = CALLCHAIN_NONE; + return 0; + } + + return parse_callchain_top_opt(arg); } static int perf_top_config(const char *var, const char *value, void *cb) @@ -1055,6 +1093,9 @@ parse_percent_limit(const struct option *opt, const char *arg, return 0; } +const char top_callchain_help[] = CALLCHAIN_RECORD_HELP CALLCHAIN_REPORT_HELP + "\n\t\t\t\tDefault: fp,graph,0.5,caller,function"; + int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused) { char errbuf[BUFSIZ]; @@ -1130,11 +1171,11 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused) OPT_BOOLEAN('n', "show-nr-samples", &symbol_conf.show_nr_samples, "Show a column with the number of samples"), OPT_CALLBACK_NOOPT('g', NULL, &top.record_opts, - NULL, "enables call-graph recording", + NULL, "enables call-graph recording and display", &callchain_opt), OPT_CALLBACK(0, "call-graph", &top.record_opts, - "mode[,dump_size]", record_callchain_help, - &parse_callchain_opt), + "record_mode[,record_size],print_type,threshold[,print_limit],order,sort_key[,branch]", + top_callchain_help, &parse_callchain_opt), OPT_BOOLEAN(0, "children", &symbol_conf.cumulate_callchain, "Accumulate callchains of children and show total overhead as well"), OPT_INTEGER(0, "max-stack", &top.max_stack, @@ -1171,6 +1212,12 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused) "don't try to adjust column width, use these fixed values"), OPT_UINTEGER(0, "proc-map-timeout", &opts->proc_map_timeout, "per thread proc mmap processing timeout in ms"), + OPT_CALLBACK_NOOPT('b', "branch-any", &opts->branch_stack, + "branch any", "sample any taken branches", + parse_branch_stack), + OPT_CALLBACK('j', "branch-filter", &opts->branch_stack, + "branch filter mask", "branch stack filter modes", + parse_branch_stack), OPT_END() }; const char * const top_usage[] = { @@ -1258,6 +1305,9 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused) perf_hpp__cancel_cumulate(); } + if (symbol_conf.cumulate_callchain && !callchain_param.order_set) + callchain_param.order = ORDER_CALLER; + symbol_conf.priv_size = sizeof(struct annotation); symbol_conf.try_vmlinux_path = (symbol_conf.vmlinux_name == NULL); diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 39ad4d0ca884..c783d8fd3a80 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -1,8 +1,28 @@ +/* + * builtin-trace.c + * + * Builtin 'trace' command: + * + * Display a continuously updated trace of any workload, CPU, specific PID, + * system wide, etc. Default format is loosely strace like, but any other + * event may be specified using --event. + * + * Copyright (C) 2012, 2013, 2014, 2015 Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com> + * + * Initially based on the 'trace' prototype by Thomas Gleixner: + * + * http://lwn.net/Articles/415728/ ("Announcing a new utility: 'trace'") + * + * Released under the GPL v2. (and only v2, not any later version) + */ + #include <traceevent/event-parse.h> +#include <api/fs/tracing_path.h> #include "builtin.h" #include "util/color.h" #include "util/debug.h" #include "util/evlist.h" +#include "util/exec_cmd.h" #include "util/machine.h" #include "util/session.h" #include "util/thread.h" @@ -18,6 +38,7 @@ #include <stdlib.h> #include <sys/mman.h> #include <linux/futex.h> +#include <linux/err.h> /* For older distros: */ #ifndef MAP_STACK @@ -26,6 +47,7 @@ #ifndef MADV_HWPOISON # define MADV_HWPOISON 100 + #endif #ifndef MADV_MERGEABLE @@ -224,13 +246,14 @@ static struct perf_evsel *perf_evsel__syscall_newtp(const char *direction, void struct perf_evsel *evsel = perf_evsel__newtp("raw_syscalls", direction); /* older kernel (e.g., RHEL6) use syscalls:{enter,exit} */ - if (evsel == NULL) + if (IS_ERR(evsel)) evsel = perf_evsel__newtp("syscalls", direction); - if (evsel) { - if (perf_evsel__init_syscall_tp(evsel, handler)) - goto out_delete; - } + if (IS_ERR(evsel)) + return NULL; + + if (perf_evsel__init_syscall_tp(evsel, handler)) + goto out_delete; return evsel; @@ -247,42 +270,6 @@ out_delete: ({ struct syscall_tp *fields = evsel->priv; \ fields->name.pointer(&fields->name, sample); }) -static int perf_evlist__add_syscall_newtp(struct perf_evlist *evlist, - void *sys_enter_handler, - void *sys_exit_handler) -{ - int ret = -1; - struct perf_evsel *sys_enter, *sys_exit; - - sys_enter = perf_evsel__syscall_newtp("sys_enter", sys_enter_handler); - if (sys_enter == NULL) - goto out; - - if (perf_evsel__init_sc_tp_ptr_field(sys_enter, args)) - goto out_delete_sys_enter; - - sys_exit = perf_evsel__syscall_newtp("sys_exit", sys_exit_handler); - if (sys_exit == NULL) - goto out_delete_sys_enter; - - if (perf_evsel__init_sc_tp_uint_field(sys_exit, ret)) - goto out_delete_sys_exit; - - perf_evlist__add(evlist, sys_enter); - perf_evlist__add(evlist, sys_exit); - - ret = 0; -out: - return ret; - -out_delete_sys_exit: - perf_evsel__delete_priv(sys_exit); -out_delete_sys_enter: - perf_evsel__delete_priv(sys_enter); - goto out; -} - - struct syscall_arg { unsigned long val; struct thread *thread; @@ -598,12 +585,27 @@ static size_t syscall_arg__scnprintf_futex_op(char *bf, size_t size, struct sysc #define SCA_FUTEX_OP syscall_arg__scnprintf_futex_op +static const char *bpf_cmd[] = { + "MAP_CREATE", "MAP_LOOKUP_ELEM", "MAP_UPDATE_ELEM", "MAP_DELETE_ELEM", + "MAP_GET_NEXT_KEY", "PROG_LOAD", +}; +static DEFINE_STRARRAY(bpf_cmd); + static const char *epoll_ctl_ops[] = { "ADD", "DEL", "MOD", }; static DEFINE_STRARRAY_OFFSET(epoll_ctl_ops, 1); static const char *itimers[] = { "REAL", "VIRTUAL", "PROF", }; static DEFINE_STRARRAY(itimers); +static const char *keyctl_options[] = { + "GET_KEYRING_ID", "JOIN_SESSION_KEYRING", "UPDATE", "REVOKE", "CHOWN", + "SETPERM", "DESCRIBE", "CLEAR", "LINK", "UNLINK", "SEARCH", "READ", + "INSTANTIATE", "NEGATE", "SET_REQKEY_KEYRING", "SET_TIMEOUT", + "ASSUME_AUTHORITY", "GET_SECURITY", "SESSION_TO_PARENT", "REJECT", + "INSTANTIATE_IOV", "INVALIDATE", "GET_PERSISTENT", +}; +static DEFINE_STRARRAY(keyctl_options); + static const char *whences[] = { "SET", "CUR", "END", #ifdef SEEK_DATA "DATA", @@ -634,7 +636,8 @@ static DEFINE_STRARRAY(sighow); static const char *clockid[] = { "REALTIME", "MONOTONIC", "PROCESS_CPUTIME_ID", "THREAD_CPUTIME_ID", - "MONOTONIC_RAW", "REALTIME_COARSE", "MONOTONIC_COARSE", + "MONOTONIC_RAW", "REALTIME_COARSE", "MONOTONIC_COARSE", "BOOTTIME", + "REALTIME_ALARM", "BOOTTIME_ALARM", "SGI_CYCLE", "TAI" }; static DEFINE_STRARRAY(clockid); @@ -779,6 +782,11 @@ static size_t syscall_arg__scnprintf_access_mode(char *bf, size_t size, #define SCA_ACCMODE syscall_arg__scnprintf_access_mode +static size_t syscall_arg__scnprintf_filename(char *bf, size_t size, + struct syscall_arg *arg); + +#define SCA_FILENAME syscall_arg__scnprintf_filename + static size_t syscall_arg__scnprintf_open_flags(char *bf, size_t size, struct syscall_arg *arg) { @@ -1006,14 +1014,24 @@ static struct syscall_fmt { bool hexret; } syscall_fmts[] = { { .name = "access", .errmsg = true, - .arg_scnprintf = { [1] = SCA_ACCMODE, /* mode */ }, }, + .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ + [1] = SCA_ACCMODE, /* mode */ }, }, { .name = "arch_prctl", .errmsg = true, .alias = "prctl", }, + { .name = "bpf", .errmsg = true, STRARRAY(0, cmd, bpf_cmd), }, { .name = "brk", .hexret = true, .arg_scnprintf = { [0] = SCA_HEX, /* brk */ }, }, + { .name = "chdir", .errmsg = true, + .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, }, + { .name = "chmod", .errmsg = true, + .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, }, + { .name = "chroot", .errmsg = true, + .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, }, { .name = "clock_gettime", .errmsg = true, STRARRAY(0, clk_id, clockid), }, { .name = "close", .errmsg = true, .arg_scnprintf = { [0] = SCA_CLOSE_FD, /* fd */ }, }, { .name = "connect", .errmsg = true, }, + { .name = "creat", .errmsg = true, + .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, }, { .name = "dup", .errmsg = true, .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, { .name = "dup2", .errmsg = true, @@ -1024,7 +1042,8 @@ static struct syscall_fmt { { .name = "eventfd2", .errmsg = true, .arg_scnprintf = { [1] = SCA_EFD_FLAGS, /* flags */ }, }, { .name = "faccessat", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, + .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ + [1] = SCA_FILENAME, /* filename */ }, }, { .name = "fadvise64", .errmsg = true, .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, { .name = "fallocate", .errmsg = true, @@ -1034,11 +1053,13 @@ static struct syscall_fmt { { .name = "fchmod", .errmsg = true, .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, { .name = "fchmodat", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, + .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ + [1] = SCA_FILENAME, /* filename */ }, }, { .name = "fchown", .errmsg = true, .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, { .name = "fchownat", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, + .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ + [1] = SCA_FILENAME, /* filename */ }, }, { .name = "fcntl", .errmsg = true, .arg_scnprintf = { [0] = SCA_FD, /* fd */ [1] = SCA_STRARRAY, /* cmd */ }, @@ -1053,7 +1074,8 @@ static struct syscall_fmt { { .name = "fstat", .errmsg = true, .alias = "newfstat", .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, { .name = "fstatat", .errmsg = true, .alias = "newfstatat", - .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, + .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ + [1] = SCA_FILENAME, /* filename */ }, }, { .name = "fstatfs", .errmsg = true, .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, { .name = "fsync", .errmsg = true, @@ -1063,13 +1085,18 @@ static struct syscall_fmt { { .name = "futex", .errmsg = true, .arg_scnprintf = { [1] = SCA_FUTEX_OP, /* op */ }, }, { .name = "futimesat", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, + .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ + [1] = SCA_FILENAME, /* filename */ }, }, { .name = "getdents", .errmsg = true, .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, { .name = "getdents64", .errmsg = true, .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, { .name = "getitimer", .errmsg = true, STRARRAY(0, which, itimers), }, { .name = "getrlimit", .errmsg = true, STRARRAY(0, resource, rlimit_resources), }, + { .name = "getxattr", .errmsg = true, + .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, }, + { .name = "inotify_add_watch", .errmsg = true, + .arg_scnprintf = { [1] = SCA_FILENAME, /* pathname */ }, }, { .name = "ioctl", .errmsg = true, .arg_scnprintf = { [0] = SCA_FD, /* fd */ #if defined(__i386__) || defined(__x86_64__) @@ -1082,22 +1109,44 @@ static struct syscall_fmt { #else [2] = SCA_HEX, /* arg */ }, }, #endif + { .name = "keyctl", .errmsg = true, STRARRAY(0, option, keyctl_options), }, { .name = "kill", .errmsg = true, .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, }, + { .name = "lchown", .errmsg = true, + .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, }, + { .name = "lgetxattr", .errmsg = true, + .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, }, { .name = "linkat", .errmsg = true, .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, + { .name = "listxattr", .errmsg = true, + .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, }, + { .name = "llistxattr", .errmsg = true, + .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, }, + { .name = "lremovexattr", .errmsg = true, + .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, }, { .name = "lseek", .errmsg = true, .arg_scnprintf = { [0] = SCA_FD, /* fd */ [2] = SCA_STRARRAY, /* whence */ }, .arg_parm = { [2] = &strarray__whences, /* whence */ }, }, - { .name = "lstat", .errmsg = true, .alias = "newlstat", }, + { .name = "lsetxattr", .errmsg = true, + .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, }, + { .name = "lstat", .errmsg = true, .alias = "newlstat", + .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, }, + { .name = "lsxattr", .errmsg = true, + .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, }, { .name = "madvise", .errmsg = true, .arg_scnprintf = { [0] = SCA_HEX, /* start */ [2] = SCA_MADV_BHV, /* behavior */ }, }, + { .name = "mkdir", .errmsg = true, + .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, }, { .name = "mkdirat", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, + .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ + [1] = SCA_FILENAME, /* pathname */ }, }, + { .name = "mknod", .errmsg = true, + .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, }, { .name = "mknodat", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, + .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ + [1] = SCA_FILENAME, /* filename */ }, }, { .name = "mlock", .errmsg = true, .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, }, { .name = "mlockall", .errmsg = true, @@ -1110,6 +1159,8 @@ static struct syscall_fmt { { .name = "mprotect", .errmsg = true, .arg_scnprintf = { [0] = SCA_HEX, /* start */ [2] = SCA_MMAP_PROT, /* prot */ }, }, + { .name = "mq_unlink", .errmsg = true, + .arg_scnprintf = { [0] = SCA_FILENAME, /* u_name */ }, }, { .name = "mremap", .hexret = true, .arg_scnprintf = { [0] = SCA_HEX, /* addr */ [3] = SCA_MREMAP_FLAGS, /* flags */ @@ -1121,14 +1172,17 @@ static struct syscall_fmt { { .name = "name_to_handle_at", .errmsg = true, .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, { .name = "newfstatat", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, + .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ + [1] = SCA_FILENAME, /* filename */ }, }, { .name = "open", .errmsg = true, - .arg_scnprintf = { [1] = SCA_OPEN_FLAGS, /* flags */ }, }, + .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ + [1] = SCA_OPEN_FLAGS, /* flags */ }, }, { .name = "open_by_handle_at", .errmsg = true, .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ [2] = SCA_OPEN_FLAGS, /* flags */ }, }, { .name = "openat", .errmsg = true, .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ + [1] = SCA_FILENAME, /* filename */ [2] = SCA_OPEN_FLAGS, /* flags */ }, }, { .name = "perf_event_open", .errmsg = true, .arg_scnprintf = { [1] = SCA_INT, /* pid */ @@ -1150,18 +1204,28 @@ static struct syscall_fmt { .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, { .name = "read", .errmsg = true, .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, + { .name = "readlink", .errmsg = true, + .arg_scnprintf = { [0] = SCA_FILENAME, /* path */ }, }, { .name = "readlinkat", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, + .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ + [1] = SCA_FILENAME, /* pathname */ }, }, { .name = "readv", .errmsg = true, .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, { .name = "recvfrom", .errmsg = true, - .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, }, + .arg_scnprintf = { [0] = SCA_FD, /* fd */ + [3] = SCA_MSG_FLAGS, /* flags */ }, }, { .name = "recvmmsg", .errmsg = true, - .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, }, + .arg_scnprintf = { [0] = SCA_FD, /* fd */ + [3] = SCA_MSG_FLAGS, /* flags */ }, }, { .name = "recvmsg", .errmsg = true, - .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, }, + .arg_scnprintf = { [0] = SCA_FD, /* fd */ + [2] = SCA_MSG_FLAGS, /* flags */ }, }, + { .name = "removexattr", .errmsg = true, + .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, }, { .name = "renameat", .errmsg = true, .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, + { .name = "rmdir", .errmsg = true, + .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, }, { .name = "rt_sigaction", .errmsg = true, .arg_scnprintf = { [0] = SCA_SIGNUM, /* sig */ }, }, { .name = "rt_sigprocmask", .errmsg = true, STRARRAY(0, how, sighow), }, @@ -1171,13 +1235,18 @@ static struct syscall_fmt { .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, }, { .name = "select", .errmsg = true, .timeout = true, }, { .name = "sendmmsg", .errmsg = true, - .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, }, + .arg_scnprintf = { [0] = SCA_FD, /* fd */ + [3] = SCA_MSG_FLAGS, /* flags */ }, }, { .name = "sendmsg", .errmsg = true, - .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, }, + .arg_scnprintf = { [0] = SCA_FD, /* fd */ + [2] = SCA_MSG_FLAGS, /* flags */ }, }, { .name = "sendto", .errmsg = true, - .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, }, + .arg_scnprintf = { [0] = SCA_FD, /* fd */ + [3] = SCA_MSG_FLAGS, /* flags */ }, }, { .name = "setitimer", .errmsg = true, STRARRAY(0, which, itimers), }, { .name = "setrlimit", .errmsg = true, STRARRAY(0, resource, rlimit_resources), }, + { .name = "setxattr", .errmsg = true, + .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, }, { .name = "shutdown", .errmsg = true, .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, { .name = "socket", .errmsg = true, @@ -1188,18 +1257,35 @@ static struct syscall_fmt { .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */ [1] = SCA_SK_TYPE, /* type */ }, .arg_parm = { [0] = &strarray__socket_families, /* family */ }, }, - { .name = "stat", .errmsg = true, .alias = "newstat", }, + { .name = "stat", .errmsg = true, .alias = "newstat", + .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, }, + { .name = "statfs", .errmsg = true, + .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, }, + { .name = "swapoff", .errmsg = true, + .arg_scnprintf = { [0] = SCA_FILENAME, /* specialfile */ }, }, + { .name = "swapon", .errmsg = true, + .arg_scnprintf = { [0] = SCA_FILENAME, /* specialfile */ }, }, { .name = "symlinkat", .errmsg = true, .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, { .name = "tgkill", .errmsg = true, .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, }, { .name = "tkill", .errmsg = true, .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, }, + { .name = "truncate", .errmsg = true, + .arg_scnprintf = { [0] = SCA_FILENAME, /* path */ }, }, { .name = "uname", .errmsg = true, .alias = "newuname", }, { .name = "unlinkat", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, + .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ + [1] = SCA_FILENAME, /* pathname */ }, }, + { .name = "utime", .errmsg = true, + .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, }, { .name = "utimensat", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FDAT, /* dirfd */ }, }, + .arg_scnprintf = { [0] = SCA_FDAT, /* dirfd */ + [1] = SCA_FILENAME, /* filename */ }, }, + { .name = "utimes", .errmsg = true, + .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, }, + { .name = "vmsplice", .errmsg = true, + .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, { .name = "write", .errmsg = true, .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, { .name = "writev", .errmsg = true, @@ -1223,7 +1309,6 @@ struct syscall { int nr_args; struct format_field *args; const char *name; - bool filtered; bool is_exit; struct syscall_fmt *fmt; size_t (**arg_scnprintf)(char *bf, size_t size, struct syscall_arg *arg); @@ -1244,6 +1329,11 @@ static size_t fprintf_duration(unsigned long t, FILE *fp) return printed + fprintf(fp, "): "); } +/** + * filename.ptr: The filename char pointer that will be vfs_getname'd + * filename.entry_str_pos: Where to insert the string translated from + * filename.ptr by the vfs_getname tracepoint/kprobe. + */ struct thread_trace { u64 entry_time; u64 exit_time; @@ -1252,6 +1342,13 @@ struct thread_trace { unsigned long pfmaj, pfmin; char *entry_str; double runtime_ms; + struct { + unsigned long ptr; + short int entry_str_pos; + bool pending_open; + unsigned int namelen; + char *name; + } filename; struct { int max; char **table; @@ -1298,6 +1395,8 @@ fail: #define TRACE_PFMAJ (1 << 0) #define TRACE_PFMIN (1 << 1) +static const size_t trace__entry_str_size = 2048; + struct trace { struct perf_tool tool; struct { @@ -1307,6 +1406,10 @@ struct trace { struct { int max; struct syscall *table; + struct { + struct perf_evsel *sys_enter, + *sys_exit; + } events; } syscalls; struct record_opts opts; struct perf_evlist *evlist; @@ -1316,7 +1419,10 @@ struct trace { FILE *output; unsigned long nr_events; struct strlist *ev_qualifier; - const char *last_vfs_getname; + struct { + size_t nr; + int *entries; + } ev_qualifier_ids; struct intlist *tid_list; struct intlist *pid_list; struct { @@ -1340,6 +1446,7 @@ struct trace { bool show_tool_stats; bool trace_syscalls; bool force; + bool vfs_getname; int trace_pgfaults; }; @@ -1443,6 +1550,27 @@ static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size, return printed; } +static void thread__set_filename_pos(struct thread *thread, const char *bf, + unsigned long ptr) +{ + struct thread_trace *ttrace = thread__priv(thread); + + ttrace->filename.ptr = ptr; + ttrace->filename.entry_str_pos = bf - ttrace->entry_str; +} + +static size_t syscall_arg__scnprintf_filename(char *bf, size_t size, + struct syscall_arg *arg) +{ + unsigned long ptr = arg->val; + + if (!arg->trace->vfs_getname) + return scnprintf(bf, size, "%#x", ptr); + + thread__set_filename_pos(arg->thread, bf, ptr); + return 0; +} + static bool trace__filter_duration(struct trace *trace, double t) { return t < (trace->duration_filter * NSEC_PER_MSEC); @@ -1517,6 +1645,9 @@ static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist) if (trace->host == NULL) return -ENOMEM; + if (trace_event__register_resolver(trace->host, machine__resolve_kernel_addr) < 0) + return -errno; + err = __machine__synthesize_threads(trace->host, &trace->tool, &trace->opts.target, evlist->threads, trace__tool_process, false, trace->opts.proc_map_timeout); @@ -1578,30 +1709,17 @@ static int trace__read_syscall_info(struct trace *trace, int id) sc = trace->syscalls.table + id; sc->name = name; - if (trace->ev_qualifier) { - bool in = strlist__find(trace->ev_qualifier, name) != NULL; - - if (!(in ^ trace->not_ev_qualifier)) { - sc->filtered = true; - /* - * No need to do read tracepoint information since this will be - * filtered out. - */ - return 0; - } - } - sc->fmt = syscall_fmt__find(sc->name); snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name); sc->tp_format = trace_event__tp_format("syscalls", tp_name); - if (sc->tp_format == NULL && sc->fmt && sc->fmt->alias) { + if (IS_ERR(sc->tp_format) && sc->fmt && sc->fmt->alias) { snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->fmt->alias); sc->tp_format = trace_event__tp_format("syscalls", tp_name); } - if (sc->tp_format == NULL) + if (IS_ERR(sc->tp_format)) return -1; sc->args = sc->tp_format->format.fields; @@ -1619,13 +1737,27 @@ static int trace__read_syscall_info(struct trace *trace, int id) static int trace__validate_ev_qualifier(struct trace *trace) { - int err = 0; + int err = 0, i; struct str_node *pos; + trace->ev_qualifier_ids.nr = strlist__nr_entries(trace->ev_qualifier); + trace->ev_qualifier_ids.entries = malloc(trace->ev_qualifier_ids.nr * + sizeof(trace->ev_qualifier_ids.entries[0])); + + if (trace->ev_qualifier_ids.entries == NULL) { + fputs("Error:\tNot enough memory for allocating events qualifier ids\n", + trace->output); + err = -EINVAL; + goto out; + } + + i = 0; + strlist__for_each(pos, trace->ev_qualifier) { const char *sc = pos->s; + int id = audit_name_to_syscall(sc, trace->audit.machine); - if (audit_name_to_syscall(sc, trace->audit.machine) < 0) { + if (id < 0) { if (err == 0) { fputs("Error:\tInvalid syscall ", trace->output); err = -EINVAL; @@ -1635,13 +1767,17 @@ static int trace__validate_ev_qualifier(struct trace *trace) fputs(sc, trace->output); } + + trace->ev_qualifier_ids.entries[i++] = id; } if (err < 0) { fputs("\nHint:\ttry 'perf list syscalls:sys_enter_*'" "\nHint:\tand: 'man syscalls'\n", trace->output); + zfree(&trace->ev_qualifier_ids.entries); + trace->ev_qualifier_ids.nr = 0; } - +out: return err; } @@ -1833,9 +1969,6 @@ static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel, if (sc == NULL) return -1; - if (sc->filtered) - return 0; - thread = machine__findnew_thread(trace->host, sample->pid, sample->tid); ttrace = thread__trace(thread, trace->output); if (ttrace == NULL) @@ -1844,7 +1977,7 @@ static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel, args = perf_evsel__sc_tp_ptr(evsel, args, sample); if (ttrace->entry_str == NULL) { - ttrace->entry_str = malloc(1024); + ttrace->entry_str = malloc(trace__entry_str_size); if (!ttrace->entry_str) goto out_put; } @@ -1854,9 +1987,9 @@ static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel, ttrace->entry_time = sample->time; msg = ttrace->entry_str; - printed += scnprintf(msg + printed, 1024 - printed, "%s(", sc->name); + printed += scnprintf(msg + printed, trace__entry_str_size - printed, "%s(", sc->name); - printed += syscall__scnprintf_args(sc, msg + printed, 1024 - printed, + printed += syscall__scnprintf_args(sc, msg + printed, trace__entry_str_size - printed, args, trace, thread); if (sc->is_exit) { @@ -1864,8 +1997,11 @@ static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel, trace__fprintf_entry_head(trace, thread, 1, sample->time, trace->output); fprintf(trace->output, "%-70s\n", ttrace->entry_str); } - } else + } else { ttrace->entry_pending = true; + /* See trace__vfs_getname & trace__sys_exit */ + ttrace->filename.pending_open = false; + } if (trace->current != thread) { thread__put(trace->current); @@ -1891,9 +2027,6 @@ static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel, if (sc == NULL) return -1; - if (sc->filtered) - return 0; - thread = machine__findnew_thread(trace->host, sample->pid, sample->tid); ttrace = thread__trace(thread, trace->output); if (ttrace == NULL) @@ -1904,9 +2037,9 @@ static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel, ret = perf_evsel__sc_tp_uint(evsel, ret, sample); - if (id == trace->audit.open_id && ret >= 0 && trace->last_vfs_getname) { - trace__set_fd_pathname(thread, ret, trace->last_vfs_getname); - trace->last_vfs_getname = NULL; + if (id == trace->audit.open_id && ret >= 0 && ttrace->filename.pending_open) { + trace__set_fd_pathname(thread, ret, ttrace->filename.name); + ttrace->filename.pending_open = false; ++trace->stats.vfs_getname; } @@ -1961,7 +2094,56 @@ static int trace__vfs_getname(struct trace *trace, struct perf_evsel *evsel, union perf_event *event __maybe_unused, struct perf_sample *sample) { - trace->last_vfs_getname = perf_evsel__rawptr(evsel, sample, "pathname"); + struct thread *thread = machine__findnew_thread(trace->host, sample->pid, sample->tid); + struct thread_trace *ttrace; + size_t filename_len, entry_str_len, to_move; + ssize_t remaining_space; + char *pos; + const char *filename = perf_evsel__rawptr(evsel, sample, "pathname"); + + if (!thread) + goto out; + + ttrace = thread__priv(thread); + if (!ttrace) + goto out; + + filename_len = strlen(filename); + + if (ttrace->filename.namelen < filename_len) { + char *f = realloc(ttrace->filename.name, filename_len + 1); + + if (f == NULL) + goto out; + + ttrace->filename.namelen = filename_len; + ttrace->filename.name = f; + } + + strcpy(ttrace->filename.name, filename); + ttrace->filename.pending_open = true; + + if (!ttrace->filename.ptr) + goto out; + + entry_str_len = strlen(ttrace->entry_str); + remaining_space = trace__entry_str_size - entry_str_len - 1; /* \0 */ + if (remaining_space <= 0) + goto out; + + if (filename_len > (size_t)remaining_space) { + filename += filename_len - remaining_space; + filename_len = remaining_space; + } + + to_move = entry_str_len - ttrace->filename.entry_str_pos + 1; /* \0 */ + pos = ttrace->entry_str + ttrace->filename.entry_str_pos; + memmove(pos + filename_len, pos, to_move); + memcpy(pos, filename, filename_len); + + ttrace->filename.ptr = 0; + ttrace->filename.entry_str_pos = 0; +out: return 0; } @@ -2214,19 +2396,21 @@ static int trace__record(struct trace *trace, int argc, const char **argv) static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp); -static void perf_evlist__add_vfs_getname(struct perf_evlist *evlist) +static bool perf_evlist__add_vfs_getname(struct perf_evlist *evlist) { struct perf_evsel *evsel = perf_evsel__newtp("probe", "vfs_getname"); - if (evsel == NULL) - return; + + if (IS_ERR(evsel)) + return false; if (perf_evsel__field(evsel, "pathname") == NULL) { perf_evsel__delete(evsel); - return; + return false; } evsel->handler = trace__vfs_getname; perf_evlist__add(evlist, evsel); + return true; } static int perf_evlist__add_pgfault(struct perf_evlist *evlist, @@ -2283,9 +2467,68 @@ static void trace__handle_event(struct trace *trace, union perf_event *event, st } } +static int trace__add_syscall_newtp(struct trace *trace) +{ + int ret = -1; + struct perf_evlist *evlist = trace->evlist; + struct perf_evsel *sys_enter, *sys_exit; + + sys_enter = perf_evsel__syscall_newtp("sys_enter", trace__sys_enter); + if (sys_enter == NULL) + goto out; + + if (perf_evsel__init_sc_tp_ptr_field(sys_enter, args)) + goto out_delete_sys_enter; + + sys_exit = perf_evsel__syscall_newtp("sys_exit", trace__sys_exit); + if (sys_exit == NULL) + goto out_delete_sys_enter; + + if (perf_evsel__init_sc_tp_uint_field(sys_exit, ret)) + goto out_delete_sys_exit; + + perf_evlist__add(evlist, sys_enter); + perf_evlist__add(evlist, sys_exit); + + trace->syscalls.events.sys_enter = sys_enter; + trace->syscalls.events.sys_exit = sys_exit; + + ret = 0; +out: + return ret; + +out_delete_sys_exit: + perf_evsel__delete_priv(sys_exit); +out_delete_sys_enter: + perf_evsel__delete_priv(sys_enter); + goto out; +} + +static int trace__set_ev_qualifier_filter(struct trace *trace) +{ + int err = -1; + char *filter = asprintf_expr_inout_ints("id", !trace->not_ev_qualifier, + trace->ev_qualifier_ids.nr, + trace->ev_qualifier_ids.entries); + + if (filter == NULL) + goto out_enomem; + + if (!perf_evsel__append_filter(trace->syscalls.events.sys_enter, "&&", filter)) + err = perf_evsel__append_filter(trace->syscalls.events.sys_exit, "&&", filter); + + free(filter); +out: + return err; +out_enomem: + errno = ENOMEM; + goto out; +} + static int trace__run(struct trace *trace, int argc, const char **argv) { struct perf_evlist *evlist = trace->evlist; + struct perf_evsel *evsel; int err = -1, i; unsigned long before; const bool forks = argc > 0; @@ -2293,13 +2536,11 @@ static int trace__run(struct trace *trace, int argc, const char **argv) trace->live = true; - if (trace->trace_syscalls && - perf_evlist__add_syscall_newtp(evlist, trace__sys_enter, - trace__sys_exit)) + if (trace->trace_syscalls && trace__add_syscall_newtp(trace)) goto out_error_raw_syscalls; if (trace->trace_syscalls) - perf_evlist__add_vfs_getname(evlist); + trace->vfs_getname = perf_evlist__add_vfs_getname(evlist); if ((trace->trace_pgfaults & TRACE_PFMAJ) && perf_evlist__add_pgfault(evlist, PERF_COUNT_SW_PAGE_FAULTS_MAJ)) { @@ -2356,11 +2597,22 @@ static int trace__run(struct trace *trace, int argc, const char **argv) else if (thread_map__pid(evlist->threads, 0) == -1) err = perf_evlist__set_filter_pid(evlist, getpid()); - if (err < 0) { - printf("err=%d,%s\n", -err, strerror(-err)); - exit(1); + if (err < 0) + goto out_error_mem; + + if (trace->ev_qualifier_ids.nr > 0) { + err = trace__set_ev_qualifier_filter(trace); + if (err < 0) + goto out_errno; + + pr_debug("event qualifier tracepoint filter: %s\n", + trace->syscalls.events.sys_exit->filter); } + err = perf_evlist__apply_filters(evlist, &evsel); + if (err < 0) + goto out_error_apply_filters; + err = perf_evlist__mmap(evlist, trace->opts.mmap_pages, false); if (err < 0) goto out_error_mmap; @@ -2445,11 +2697,11 @@ out_delete_evlist: char errbuf[BUFSIZ]; out_error_sched_stat_runtime: - debugfs__strerror_open_tp(errno, errbuf, sizeof(errbuf), "sched", "sched_stat_runtime"); + tracing_path__strerror_open_tp(errno, errbuf, sizeof(errbuf), "sched", "sched_stat_runtime"); goto out_error; out_error_raw_syscalls: - debugfs__strerror_open_tp(errno, errbuf, sizeof(errbuf), "raw_syscalls", "sys_(enter|exit)"); + tracing_path__strerror_open_tp(errno, errbuf, sizeof(errbuf), "raw_syscalls", "sys_(enter|exit)"); goto out_error; out_error_mmap: @@ -2462,10 +2714,21 @@ out_error_open: out_error: fprintf(trace->output, "%s\n", errbuf); goto out_delete_evlist; + +out_error_apply_filters: + fprintf(trace->output, + "Failed to set filter \"%s\" on event %s with %d (%s)\n", + evsel->filter, perf_evsel__name(evsel), errno, + strerror_r(errno, errbuf, sizeof(errbuf))); + goto out_delete_evlist; } out_error_mem: fprintf(trace->output, "Not enough memory to run!\n"); goto out_delete_evlist; + +out_errno: + fprintf(trace->output, "errno=%d,%s\n", errno, strerror(errno)); + goto out_delete_evlist; } static int trace__replay(struct trace *trace) @@ -2586,9 +2849,9 @@ static size_t thread__dump_stats(struct thread_trace *ttrace, printed += fprintf(fp, "\n"); - printed += fprintf(fp, " syscall calls min avg max stddev\n"); - printed += fprintf(fp, " (msec) (msec) (msec) (%%)\n"); - printed += fprintf(fp, " --------------- -------- --------- --------- --------- ------\n"); + printed += fprintf(fp, " syscall calls total min avg max stddev\n"); + printed += fprintf(fp, " (msec) (msec) (msec) (msec) (%%)\n"); + printed += fprintf(fp, " --------------- -------- --------- --------- --------- --------- ------\n"); /* each int_node is a syscall */ while (inode) { @@ -2605,8 +2868,8 @@ static size_t thread__dump_stats(struct thread_trace *ttrace, sc = &trace->syscalls.table[inode->i]; printed += fprintf(fp, " %-15s", sc->name); - printed += fprintf(fp, " %8" PRIu64 " %9.3f %9.3f", - n, min, avg); + printed += fprintf(fp, " %8" PRIu64 " %9.3f %9.3f %9.3f", + n, avg * n, min, avg); printed += fprintf(fp, " %9.3f %9.2f%%\n", max, pct); } @@ -2778,7 +3041,7 @@ int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused) .mmap_pages = UINT_MAX, .proc_map_timeout = 500, }, - .output = stdout, + .output = stderr, .show_comm = true, .trace_syscalls = true, }; @@ -2879,11 +3142,14 @@ int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused) if (ev_qualifier_str != NULL) { const char *s = ev_qualifier_str; + struct strlist_config slist_config = { + .dirname = system_path(STRACE_GROUPS_DIR), + }; trace.not_ev_qualifier = *s == '!'; if (trace.not_ev_qualifier) ++s; - trace.ev_qualifier = strlist__new(true, s); + trace.ev_qualifier = strlist__new(s, &slist_config); if (trace.ev_qualifier == NULL) { fputs("Not enough memory to parse event qualifier", trace.output); diff --git a/tools/perf/config/Makefile b/tools/perf/config/Makefile index 094ddaee104c..de89ec574361 100644 --- a/tools/perf/config/Makefile +++ b/tools/perf/config/Makefile @@ -11,7 +11,7 @@ ifneq ($(obj-perf),) obj-perf := $(abspath $(obj-perf))/ endif -$(shell echo -n > $(OUTPUT).config-detected) +$(shell printf "" > $(OUTPUT).config-detected) detected = $(shell echo "$(1)=y" >> $(OUTPUT).config-detected) detected_var = $(shell echo "$(1)=$($(1))" >> $(OUTPUT).config-detected) @@ -106,9 +106,14 @@ ifdef LIBBABELTRACE FEATURE_CHECK_LDFLAGS-libbabeltrace := $(LIBBABELTRACE_LDFLAGS) -lbabeltrace-ctf endif +FEATURE_CHECK_CFLAGS-bpf = -I. -I$(srctree)/tools/include -I$(srctree)/arch/$(ARCH)/include/uapi -I$(srctree)/include/uapi # include ARCH specific config -include $(src-perf)/arch/$(ARCH)/Makefile +ifdef PERF_HAVE_ARCH_REGS_QUERY_REGISTER_OFFSET + CFLAGS += -DHAVE_ARCH_REGS_QUERY_REGISTER_OFFSET +endif + include $(src-perf)/config/utilities.mak ifeq ($(call get-executable,$(FLEX)),) @@ -233,6 +238,7 @@ ifdef NO_LIBELF NO_DEMANGLE := 1 NO_LIBUNWIND := 1 NO_LIBDW_DWARF_UNWIND := 1 + NO_LIBBPF := 1 else ifeq ($(feature-libelf), 0) ifeq ($(feature-glibc), 1) @@ -242,13 +248,14 @@ else LIBC_SUPPORT := 1 endif ifeq ($(LIBC_SUPPORT),1) - msg := $(warning No libelf found, disables 'probe' tool, please install elfutils-libelf-devel/libelf-dev); + msg := $(warning No libelf found, disables 'probe' tool and BPF support in 'perf record', please install elfutils-libelf-devel/libelf-dev); NO_LIBELF := 1 NO_DWARF := 1 NO_DEMANGLE := 1 NO_LIBUNWIND := 1 NO_LIBDW_DWARF_UNWIND := 1 + NO_LIBBPF := 1 else ifneq ($(filter s% -static%,$(LDFLAGS),),) msg := $(error No static glibc found, please install glibc-static); @@ -297,10 +304,21 @@ ifndef NO_LIBELF else CFLAGS += -DHAVE_DWARF_SUPPORT $(LIBDW_CFLAGS) LDFLAGS += $(LIBDW_LDFLAGS) - EXTLIBS += -ldw + DWARFLIBS := -ldw + ifeq ($(findstring -static,${LDFLAGS}),-static) + DWARFLIBS += -lelf -lebl -lz -llzma -lbz2 + endif + EXTLIBS += ${DWARFLIBS} $(call detected,CONFIG_DWARF) endif # PERF_HAVE_DWARF_REGS endif # NO_DWARF + + ifndef NO_LIBBPF + ifeq ($(feature-bpf), 1) + CFLAGS += -DHAVE_LIBBPF_SUPPORT + $(call detected,CONFIG_LIBBPF) + endif + endif # NO_LIBBPF endif # NO_LIBELF ifeq ($(ARCH),powerpc) @@ -316,6 +334,13 @@ ifndef NO_LIBUNWIND endif endif +ifndef NO_LIBBPF + ifneq ($(feature-bpf), 1) + msg := $(warning BPF API too old. Please install recent kernel headers. BPF support in 'perf record' is disabled.) + NO_LIBBPF := 1 + endif +endif + dwarf-post-unwind := 1 dwarf-post-unwind-text := BUG @@ -569,9 +594,14 @@ ifndef NO_LIBNUMA msg := $(warning No numa.h found, disables 'perf bench numa mem' benchmark, please install numactl-devel/libnuma-devel/libnuma-dev); NO_LIBNUMA := 1 else - CFLAGS += -DHAVE_LIBNUMA_SUPPORT - EXTLIBS += -lnuma - $(call detected,CONFIG_NUMA) + ifeq ($(feature-numa_num_possible_cpus), 0) + msg := $(warning Old numa library found, disables 'perf bench numa mem' benchmark, please install numactl-devel/libnuma-devel/libnuma-dev >= 2.0.8); + NO_LIBNUMA := 1 + else + CFLAGS += -DHAVE_LIBNUMA_SUPPORT + EXTLIBS += -lnuma + $(call detected,CONFIG_NUMA) + endif endif endif @@ -617,8 +647,13 @@ ifdef LIBBABELTRACE endif ifndef NO_AUXTRACE - $(call detected,CONFIG_AUXTRACE) - CFLAGS += -DHAVE_AUXTRACE_SUPPORT + ifeq ($(feature-get_cpuid), 0) + msg := $(warning Your gcc lacks the __get_cpuid() builtin, disables support for auxtrace/Intel PT, please install a newer gcc); + NO_AUXTRACE := 1 + else + $(call detected,CONFIG_AUXTRACE) + CFLAGS += -DHAVE_AUXTRACE_SUPPORT + endif endif # Among the variables below, these: @@ -638,12 +673,13 @@ ifndef DESTDIR prefix ?= $(HOME) endif bindir_relative = bin -bindir = $(prefix)/$(bindir_relative) +bindir = $(abspath $(prefix)/$(bindir_relative)) mandir = share/man infodir = share/info perfexecdir = libexec/perf-core sharedir = $(prefix)/share template_dir = share/perf-core/templates +STRACE_GROUPS_DIR = share/perf-core/strace/groups htmldir = share/doc/perf-doc ifeq ($(prefix),/usr) sysconfdir = /etc @@ -663,6 +699,7 @@ libdir = $(prefix)/$(lib) # Shell quote (do not use $(call) to accommodate ancient setups); ETC_PERFCONFIG_SQ = $(subst ','\'',$(ETC_PERFCONFIG)) +STRACE_GROUPS_DIR_SQ = $(subst ','\'',$(STRACE_GROUPS_DIR)) DESTDIR_SQ = $(subst ','\'',$(DESTDIR)) bindir_SQ = $(subst ','\'',$(bindir)) mandir_SQ = $(subst ','\'',$(mandir)) @@ -676,10 +713,13 @@ libdir_SQ = $(subst ','\'',$(libdir)) ifneq ($(filter /%,$(firstword $(perfexecdir))),) perfexec_instdir = $(perfexecdir) +STRACE_GROUPS_INSTDIR = $(STRACE_GROUPS_DIR) else perfexec_instdir = $(prefix)/$(perfexecdir) +STRACE_GROUPS_INSTDIR = $(prefix)/$(STRACE_GROUPS_DIR) endif perfexec_instdir_SQ = $(subst ','\'',$(perfexec_instdir)) +STRACE_GROUPS_INSTDIR_SQ = $(subst ','\'',$(STRACE_GROUPS_INSTDIR)) # If we install to $(HOME) we keep the traceevent default: # $(HOME)/.traceevent/plugins @@ -713,6 +753,7 @@ $(call detected_var,htmldir_SQ) $(call detected_var,infodir_SQ) $(call detected_var,mandir_SQ) $(call detected_var,ETC_PERFCONFIG_SQ) +$(call detected_var,STRACE_GROUPS_DIR_SQ) $(call detected_var,prefix_SQ) $(call detected_var,perfexecdir_SQ) $(call detected_var,LIBDIR) diff --git a/tools/perf/perf-with-kcore.sh b/tools/perf/perf-with-kcore.sh index c7ff90a90e4e..7e47a7cbc195 100644 --- a/tools/perf/perf-with-kcore.sh +++ b/tools/perf/perf-with-kcore.sh @@ -50,7 +50,7 @@ copy_kcore() fi rm -f perf.data.junk - ("$PERF" record -o perf.data.junk $PERF_OPTIONS -- sleep 60) >/dev/null 2>/dev/null & + ("$PERF" record -o perf.data.junk "${PERF_OPTIONS[@]}" -- sleep 60) >/dev/null 2>/dev/null & PERF_PID=$! # Need to make sure that perf has started @@ -160,18 +160,18 @@ record() echo "*** WARNING *** /proc/sys/kernel/kptr_restrict prevents access to kernel addresses" >&2 fi - if echo "$PERF_OPTIONS" | grep -q ' -a \|^-a \| -a$\|^-a$\| --all-cpus \|^--all-cpus \| --all-cpus$\|^--all-cpus$' ; then + if echo "${PERF_OPTIONS[@]}" | grep -q ' -a \|^-a \| -a$\|^-a$\| --all-cpus \|^--all-cpus \| --all-cpus$\|^--all-cpus$' ; then echo "*** WARNING *** system-wide tracing without root access will not be able to read all necessary information from /proc" >&2 fi - if echo "$PERF_OPTIONS" | grep -q 'intel_pt\|intel_bts\| -I\|^-I' ; then + if echo "${PERF_OPTIONS[@]}" | grep -q 'intel_pt\|intel_bts\| -I\|^-I' ; then if [ "$(cat /proc/sys/kernel/perf_event_paranoid)" -gt -1 ] ; then echo "*** WARNING *** /proc/sys/kernel/perf_event_paranoid restricts buffer size and tracepoint (sched_switch) use" >&2 fi - if echo "$PERF_OPTIONS" | grep -q ' --per-thread \|^--per-thread \| --per-thread$\|^--per-thread$' ; then + if echo "${PERF_OPTIONS[@]}" | grep -q ' --per-thread \|^--per-thread \| --per-thread$\|^--per-thread$' ; then true - elif echo "$PERF_OPTIONS" | grep -q ' -t \|^-t \| -t$\|^-t$' ; then + elif echo "${PERF_OPTIONS[@]}" | grep -q ' -t \|^-t \| -t$\|^-t$' ; then true elif [ ! -r /sys/kernel/debug -o ! -x /sys/kernel/debug ] ; then echo "*** WARNING *** /sys/kernel/debug permissions prevent tracepoint (sched_switch) use" >&2 @@ -193,8 +193,8 @@ record() mkdir "$PERF_DATA_DIR" - echo "$PERF record -o $PERF_DATA_DIR/perf.data $PERF_OPTIONS -- $*" - "$PERF" record -o "$PERF_DATA_DIR/perf.data" $PERF_OPTIONS -- $* || true + echo "$PERF record -o $PERF_DATA_DIR/perf.data ${PERF_OPTIONS[@]} -- $@" + "$PERF" record -o "$PERF_DATA_DIR/perf.data" "${PERF_OPTIONS[@]}" -- "$@" || true if rmdir "$PERF_DATA_DIR" > /dev/null 2>/dev/null ; then exit 1 @@ -209,8 +209,8 @@ subcommand() { find_perf check_buildid_cache_permissions - echo "$PERF $PERF_SUB_COMMAND -i $PERF_DATA_DIR/perf.data --kallsyms=$PERF_DATA_DIR/kcore_dir/kallsyms $*" - "$PERF" $PERF_SUB_COMMAND -i "$PERF_DATA_DIR/perf.data" "--kallsyms=$PERF_DATA_DIR/kcore_dir/kallsyms" $* + echo "$PERF $PERF_SUB_COMMAND -i $PERF_DATA_DIR/perf.data --kallsyms=$PERF_DATA_DIR/kcore_dir/kallsyms $@" + "$PERF" $PERF_SUB_COMMAND -i "$PERF_DATA_DIR/perf.data" "--kallsyms=$PERF_DATA_DIR/kcore_dir/kallsyms" "$@" } if [ "$1" = "fix_buildid_cache_permissions" ] ; then @@ -234,7 +234,7 @@ fi case "$PERF_SUB_COMMAND" in "record") while [ "$1" != "--" ] ; do - PERF_OPTIONS+="$1 " + PERF_OPTIONS+=("$1") shift || break done if [ "$1" != "--" ] ; then @@ -242,16 +242,16 @@ case "$PERF_SUB_COMMAND" in usage fi shift - record $* + record "$@" ;; "script") - subcommand $* + subcommand "$@" ;; "report") - subcommand $* + subcommand "$@" ;; "inject") - subcommand $* + subcommand "$@" ;; *) usage diff --git a/tools/perf/perf.c b/tools/perf/perf.c index b857fcbd00cf..3d4c7c09adea 100644 --- a/tools/perf/perf.c +++ b/tools/perf/perf.c @@ -8,14 +8,16 @@ */ #include "builtin.h" +#include "util/env.h" #include "util/exec_cmd.h" #include "util/cache.h" #include "util/quote.h" #include "util/run-command.h" #include "util/parse-events.h" #include "util/parse-options.h" +#include "util/bpf-loader.h" #include "util/debug.h" -#include <api/fs/debugfs.h> +#include <api/fs/tracing_path.h> #include <pthread.h> const char perf_usage_string[] = @@ -161,6 +163,20 @@ static int handle_options(const char ***argv, int *argc, int *envchanged) break; /* + * Shortcut for '-h' and '-v' options to invoke help + * and version command. + */ + if (!strcmp(cmd, "-h")) { + (*argv)[0] = "--help"; + break; + } + + if (!strcmp(cmd, "-v")) { + (*argv)[0] = "--version"; + break; + } + + /* * Check remaining flags. */ if (!prefixcmp(cmd, CMD_EXEC_PATH)) { @@ -214,7 +230,7 @@ static int handle_options(const char ***argv, int *argc, int *envchanged) fprintf(stderr, "No directory given for --debugfs-dir.\n"); usage(perf_usage_string); } - perf_debugfs_set_path((*argv)[1]); + tracing_path_set((*argv)[1]); if (envchanged) *envchanged = 1; (*argv)++; @@ -230,8 +246,8 @@ static int handle_options(const char ***argv, int *argc, int *envchanged) (*argv)++; (*argc)--; } else if (!prefixcmp(cmd, CMD_DEBUGFS_DIR)) { - perf_debugfs_set_path(cmd + strlen(CMD_DEBUGFS_DIR)); - fprintf(stderr, "dir: %s\n", debugfs_mountpoint); + tracing_path_set(cmd + strlen(CMD_DEBUGFS_DIR)); + fprintf(stderr, "dir: %s\n", tracing_path); if (envchanged) *envchanged = 1; } else if (!strcmp(cmd, "--list-cmds")) { @@ -369,6 +385,8 @@ static int run_builtin(struct cmd_struct *p, int argc, const char **argv) status = p->fn(argc, argv, prefix); exit_browser(status); + perf_env__exit(&perf_env); + bpf__clear(); if (status) return status & 0xff; @@ -517,8 +535,10 @@ int main(int argc, const char **argv) cmd = perf_extract_argv0_path(argv[0]); if (!cmd) cmd = "perf-help"; - /* get debugfs mount point from /proc/mounts */ - perf_debugfs_mount(NULL); + + /* get debugfs/tracefs mount point from /proc/mounts */ + tracing_path_mount(); + /* * "perf-xxxx" is the same as "perf xxxx", but we obviously: * diff --git a/tools/perf/perf.h b/tools/perf/perf.h index 4a5827fff799..90129accffbe 100644 --- a/tools/perf/perf.h +++ b/tools/perf/perf.h @@ -51,16 +51,19 @@ struct record_opts { bool sample_address; bool sample_weight; bool sample_time; + bool sample_time_set; + bool callgraph_set; bool period; - bool sample_intr_regs; bool running_time; bool full_auxtrace; bool auxtrace_snapshot_mode; + bool record_switch_events; unsigned int freq; unsigned int mmap_pages; unsigned int auxtrace_mmap_pages; unsigned int user_freq; u64 branch_stack; + u64 sample_intr_regs; u64 default_interval; u64 user_interval; size_t auxtrace_snapshot_size; diff --git a/tools/perf/python/twatch.py b/tools/perf/python/twatch.py index 2225162ee1fc..c235c22b107a 100755 --- a/tools/perf/python/twatch.py +++ b/tools/perf/python/twatch.py @@ -15,13 +15,23 @@ import perf -def main(): +def main(context_switch = 0, thread = -1): cpus = perf.cpu_map() - threads = perf.thread_map() - evsel = perf.evsel(task = 1, comm = 1, mmap = 0, + threads = perf.thread_map(thread) + evsel = perf.evsel(type = perf.TYPE_SOFTWARE, + config = perf.COUNT_SW_DUMMY, + task = 1, comm = 1, mmap = 0, freq = 0, wakeup_events = 1, watermark = 1, - sample_id_all = 1, + sample_id_all = 1, context_switch = context_switch, sample_type = perf.SAMPLE_PERIOD | perf.SAMPLE_TID | perf.SAMPLE_CPU) + + """What we want are just the PERF_RECORD_ lifetime events for threads, + using the default, PERF_TYPE_HARDWARE + PERF_COUNT_HW_CYCLES & freq=1 + (the default), makes perf reenable irq_vectors:local_timer_entry, when + disabling nohz, not good for some use cases where all we want is to get + threads comes and goes... So use (perf.TYPE_SOFTWARE, perf_COUNT_SW_DUMMY, + freq=0) instead.""" + evsel.open(cpus = cpus, threads = threads); evlist = perf.evlist(cpus, threads) evlist.add(evsel) @@ -38,4 +48,21 @@ def main(): print event if __name__ == '__main__': + """ + To test the PERF_RECORD_SWITCH record, pick a pid and replace + in the following line. + + Example output: + +cpu: 3, pid: 31463, tid: 31593 { type: context_switch, next_prev_pid: 31463, next_prev_tid: 31593, switch_out: 1 } +cpu: 1, pid: 31463, tid: 31489 { type: context_switch, next_prev_pid: 31463, next_prev_tid: 31489, switch_out: 1 } +cpu: 2, pid: 31463, tid: 31496 { type: context_switch, next_prev_pid: 31463, next_prev_tid: 31496, switch_out: 1 } +cpu: 3, pid: 31463, tid: 31491 { type: context_switch, next_prev_pid: 31463, next_prev_tid: 31491, switch_out: 0 } + + It is possible as well to use event.misc & perf.PERF_RECORD_MISC_SWITCH_OUT + to figure out if this is a context switch in or out of the monitored threads. + + If bored, please add command line option parsing support for these options :-) + """ + # main(context_switch = 1, thread = 31463) main() diff --git a/tools/perf/scripts/python/bin/compaction-times-record b/tools/perf/scripts/python/bin/compaction-times-record new file mode 100644 index 000000000000..6edcd40e14e8 --- /dev/null +++ b/tools/perf/scripts/python/bin/compaction-times-record @@ -0,0 +1,2 @@ +#!/bin/bash +perf record -e compaction:mm_compaction_begin -e compaction:mm_compaction_end -e compaction:mm_compaction_migratepages -e compaction:mm_compaction_isolate_migratepages -e compaction:mm_compaction_isolate_freepages $@ diff --git a/tools/perf/scripts/python/bin/compaction-times-report b/tools/perf/scripts/python/bin/compaction-times-report new file mode 100644 index 000000000000..3dc13897cfde --- /dev/null +++ b/tools/perf/scripts/python/bin/compaction-times-report @@ -0,0 +1,4 @@ +#!/bin/bash +#description: display time taken by mm compaction +#args: [-h] [-u] [-p|-pv] [-t | [-m] [-fs] [-ms]] [pid|pid-range|comm-regex] +perf script -s "$PERF_EXEC_PATH"/scripts/python/compaction-times.py $@ diff --git a/tools/perf/scripts/python/call-graph-from-postgresql.py b/tools/perf/scripts/python/call-graph-from-postgresql.py new file mode 100644 index 000000000000..e78fdc2a5a9d --- /dev/null +++ b/tools/perf/scripts/python/call-graph-from-postgresql.py @@ -0,0 +1,327 @@ +#!/usr/bin/python2 +# call-graph-from-postgresql.py: create call-graph from postgresql database +# Copyright (c) 2014, Intel Corporation. +# +# This program is free software; you can redistribute it and/or modify it +# under the terms and conditions of the GNU General Public License, +# version 2, as published by the Free Software Foundation. +# +# This program is distributed in the hope it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +# more details. + +# To use this script you will need to have exported data using the +# export-to-postgresql.py script. Refer to that script for details. +# +# Following on from the example in the export-to-postgresql.py script, a +# call-graph can be displayed for the pt_example database like this: +# +# python tools/perf/scripts/python/call-graph-from-postgresql.py pt_example +# +# Note this script supports connecting to remote databases by setting hostname, +# port, username, password, and dbname e.g. +# +# python tools/perf/scripts/python/call-graph-from-postgresql.py "hostname=myhost username=myuser password=mypassword dbname=pt_example" +# +# The result is a GUI window with a tree representing a context-sensitive +# call-graph. Expanding a couple of levels of the tree and adjusting column +# widths to suit will display something like: +# +# Call Graph: pt_example +# Call Path Object Count Time(ns) Time(%) Branch Count Branch Count(%) +# v- ls +# v- 2638:2638 +# v- _start ld-2.19.so 1 10074071 100.0 211135 100.0 +# |- unknown unknown 1 13198 0.1 1 0.0 +# >- _dl_start ld-2.19.so 1 1400980 13.9 19637 9.3 +# >- _d_linit_internal ld-2.19.so 1 448152 4.4 11094 5.3 +# v-__libc_start_main@plt ls 1 8211741 81.5 180397 85.4 +# >- _dl_fixup ld-2.19.so 1 7607 0.1 108 0.1 +# >- __cxa_atexit libc-2.19.so 1 11737 0.1 10 0.0 +# >- __libc_csu_init ls 1 10354 0.1 10 0.0 +# |- _setjmp libc-2.19.so 1 0 0.0 4 0.0 +# v- main ls 1 8182043 99.6 180254 99.9 +# +# Points to note: +# The top level is a command name (comm) +# The next level is a thread (pid:tid) +# Subsequent levels are functions +# 'Count' is the number of calls +# 'Time' is the elapsed time until the function returns +# Percentages are relative to the level above +# 'Branch Count' is the total number of branches for that function and all +# functions that it calls + +import sys +from PySide.QtCore import * +from PySide.QtGui import * +from PySide.QtSql import * +from decimal import * + +class TreeItem(): + + def __init__(self, db, row, parent_item): + self.db = db + self.row = row + self.parent_item = parent_item + self.query_done = False; + self.child_count = 0 + self.child_items = [] + self.data = ["", "", "", "", "", "", ""] + self.comm_id = 0 + self.thread_id = 0 + self.call_path_id = 1 + self.branch_count = 0 + self.time = 0 + if not parent_item: + self.setUpRoot() + + def setUpRoot(self): + self.query_done = True + query = QSqlQuery(self.db) + ret = query.exec_('SELECT id, comm FROM comms') + if not ret: + raise Exception("Query failed: " + query.lastError().text()) + while query.next(): + if not query.value(0): + continue + child_item = TreeItem(self.db, self.child_count, self) + self.child_items.append(child_item) + self.child_count += 1 + child_item.setUpLevel1(query.value(0), query.value(1)) + + def setUpLevel1(self, comm_id, comm): + self.query_done = True; + self.comm_id = comm_id + self.data[0] = comm + self.child_items = [] + self.child_count = 0 + query = QSqlQuery(self.db) + ret = query.exec_('SELECT thread_id, ( SELECT pid FROM threads WHERE id = thread_id ), ( SELECT tid FROM threads WHERE id = thread_id ) FROM comm_threads WHERE comm_id = ' + str(comm_id)) + if not ret: + raise Exception("Query failed: " + query.lastError().text()) + while query.next(): + child_item = TreeItem(self.db, self.child_count, self) + self.child_items.append(child_item) + self.child_count += 1 + child_item.setUpLevel2(comm_id, query.value(0), query.value(1), query.value(2)) + + def setUpLevel2(self, comm_id, thread_id, pid, tid): + self.comm_id = comm_id + self.thread_id = thread_id + self.data[0] = str(pid) + ":" + str(tid) + + def getChildItem(self, row): + return self.child_items[row] + + def getParentItem(self): + return self.parent_item + + def getRow(self): + return self.row + + def timePercent(self, b): + if not self.time: + return "0.0" + x = (b * Decimal(100)) / self.time + return str(x.quantize(Decimal('.1'), rounding=ROUND_HALF_UP)) + + def branchPercent(self, b): + if not self.branch_count: + return "0.0" + x = (b * Decimal(100)) / self.branch_count + return str(x.quantize(Decimal('.1'), rounding=ROUND_HALF_UP)) + + def addChild(self, call_path_id, name, dso, count, time, branch_count): + child_item = TreeItem(self.db, self.child_count, self) + child_item.comm_id = self.comm_id + child_item.thread_id = self.thread_id + child_item.call_path_id = call_path_id + child_item.branch_count = branch_count + child_item.time = time + child_item.data[0] = name + if dso == "[kernel.kallsyms]": + dso = "[kernel]" + child_item.data[1] = dso + child_item.data[2] = str(count) + child_item.data[3] = str(time) + child_item.data[4] = self.timePercent(time) + child_item.data[5] = str(branch_count) + child_item.data[6] = self.branchPercent(branch_count) + self.child_items.append(child_item) + self.child_count += 1 + + def selectCalls(self): + self.query_done = True; + query = QSqlQuery(self.db) + ret = query.exec_('SELECT id, call_path_id, branch_count, call_time, return_time, ' + '( SELECT name FROM symbols WHERE id = ( SELECT symbol_id FROM call_paths WHERE id = call_path_id ) ), ' + '( SELECT short_name FROM dsos WHERE id = ( SELECT dso_id FROM symbols WHERE id = ( SELECT symbol_id FROM call_paths WHERE id = call_path_id ) ) ), ' + '( SELECT ip FROM call_paths where id = call_path_id ) ' + 'FROM calls WHERE parent_call_path_id = ' + str(self.call_path_id) + ' AND comm_id = ' + str(self.comm_id) + ' AND thread_id = ' + str(self.thread_id) + + 'ORDER BY call_path_id') + if not ret: + raise Exception("Query failed: " + query.lastError().text()) + last_call_path_id = 0 + name = "" + dso = "" + count = 0 + branch_count = 0 + total_branch_count = 0 + time = 0 + total_time = 0 + while query.next(): + if query.value(1) == last_call_path_id: + count += 1 + branch_count += query.value(2) + time += query.value(4) - query.value(3) + else: + if count: + self.addChild(last_call_path_id, name, dso, count, time, branch_count) + last_call_path_id = query.value(1) + name = query.value(5) + dso = query.value(6) + count = 1 + total_branch_count += branch_count + total_time += time + branch_count = query.value(2) + time = query.value(4) - query.value(3) + if count: + self.addChild(last_call_path_id, name, dso, count, time, branch_count) + total_branch_count += branch_count + total_time += time + # Top level does not have time or branch count, so fix that here + if total_branch_count > self.branch_count: + self.branch_count = total_branch_count + if self.branch_count: + for child_item in self.child_items: + child_item.data[6] = self.branchPercent(child_item.branch_count) + if total_time > self.time: + self.time = total_time + if self.time: + for child_item in self.child_items: + child_item.data[4] = self.timePercent(child_item.time) + + def childCount(self): + if not self.query_done: + self.selectCalls() + return self.child_count + + def columnCount(self): + return 7 + + def columnHeader(self, column): + headers = ["Call Path", "Object", "Count ", "Time (ns) ", "Time (%) ", "Branch Count ", "Branch Count (%) "] + return headers[column] + + def getData(self, column): + return self.data[column] + +class TreeModel(QAbstractItemModel): + + def __init__(self, db, parent=None): + super(TreeModel, self).__init__(parent) + self.db = db + self.root = TreeItem(db, 0, None) + + def columnCount(self, parent): + return self.root.columnCount() + + def rowCount(self, parent): + if parent.isValid(): + parent_item = parent.internalPointer() + else: + parent_item = self.root + return parent_item.childCount() + + def headerData(self, section, orientation, role): + if role == Qt.TextAlignmentRole: + if section > 1: + return Qt.AlignRight + if role != Qt.DisplayRole: + return None + if orientation != Qt.Horizontal: + return None + return self.root.columnHeader(section) + + def parent(self, child): + child_item = child.internalPointer() + if child_item is self.root: + return QModelIndex() + parent_item = child_item.getParentItem() + return self.createIndex(parent_item.getRow(), 0, parent_item) + + def index(self, row, column, parent): + if parent.isValid(): + parent_item = parent.internalPointer() + else: + parent_item = self.root + child_item = parent_item.getChildItem(row) + return self.createIndex(row, column, child_item) + + def data(self, index, role): + if role == Qt.TextAlignmentRole: + if index.column() > 1: + return Qt.AlignRight + if role != Qt.DisplayRole: + return None + index_item = index.internalPointer() + return index_item.getData(index.column()) + +class MainWindow(QMainWindow): + + def __init__(self, db, dbname, parent=None): + super(MainWindow, self).__init__(parent) + + self.setObjectName("MainWindow") + self.setWindowTitle("Call Graph: " + dbname) + self.move(100, 100) + self.resize(800, 600) + style = self.style() + icon = style.standardIcon(QStyle.SP_MessageBoxInformation) + self.setWindowIcon(icon); + + self.model = TreeModel(db) + + self.view = QTreeView() + self.view.setModel(self.model) + + self.setCentralWidget(self.view) + +if __name__ == '__main__': + if (len(sys.argv) < 2): + print >> sys.stderr, "Usage is: call-graph-from-postgresql.py <database name>" + raise Exception("Too few arguments") + + dbname = sys.argv[1] + + db = QSqlDatabase.addDatabase('QPSQL') + + opts = dbname.split() + for opt in opts: + if '=' in opt: + opt = opt.split('=') + if opt[0] == 'hostname': + db.setHostName(opt[1]) + elif opt[0] == 'port': + db.setPort(int(opt[1])) + elif opt[0] == 'username': + db.setUserName(opt[1]) + elif opt[0] == 'password': + db.setPassword(opt[1]) + elif opt[0] == 'dbname': + dbname = opt[1] + else: + dbname = opt + + db.setDatabaseName(dbname) + if not db.open(): + raise Exception("Failed to open database " + dbname + " error: " + db.lastError().text()) + + app = QApplication(sys.argv) + window = MainWindow(db, dbname) + window.show() + err = app.exec_() + db.close() + sys.exit(err) diff --git a/tools/perf/scripts/python/compaction-times.py b/tools/perf/scripts/python/compaction-times.py new file mode 100644 index 000000000000..239cb0568ec3 --- /dev/null +++ b/tools/perf/scripts/python/compaction-times.py @@ -0,0 +1,311 @@ +# report time spent in compaction +# Licensed under the terms of the GNU GPL License version 2 + +# testing: +# 'echo 1 > /proc/sys/vm/compact_memory' to force compaction of all zones + +import os +import sys +import re + +import signal +signal.signal(signal.SIGPIPE, signal.SIG_DFL) + +usage = "usage: perf script report compaction-times.py -- [-h] [-u] [-p|-pv] [-t | [-m] [-fs] [-ms]] [pid|pid-range|comm-regex]\n" + +class popt: + DISP_DFL = 0 + DISP_PROC = 1 + DISP_PROC_VERBOSE=2 + +class topt: + DISP_TIME = 0 + DISP_MIG = 1 + DISP_ISOLFREE = 2 + DISP_ISOLMIG = 4 + DISP_ALL = 7 + +class comm_filter: + def __init__(self, re): + self.re = re + + def filter(self, pid, comm): + m = self.re.search(comm) + return m == None or m.group() == "" + +class pid_filter: + def __init__(self, low, high): + self.low = (0 if low == "" else int(low)) + self.high = (0 if high == "" else int(high)) + + def filter(self, pid, comm): + return not (pid >= self.low and (self.high == 0 or pid <= self.high)) + +def set_type(t): + global opt_disp + opt_disp = (t if opt_disp == topt.DISP_ALL else opt_disp|t) + +def ns(sec, nsec): + return (sec * 1000000000) + nsec + +def time(ns): + return "%dns" % ns if opt_ns else "%dus" % (round(ns, -3) / 1000) + +class pair: + def __init__(self, aval, bval, alabel = None, blabel = None): + self.alabel = alabel + self.blabel = blabel + self.aval = aval + self.bval = bval + + def __add__(self, rhs): + self.aval += rhs.aval + self.bval += rhs.bval + return self + + def __str__(self): + return "%s=%d %s=%d" % (self.alabel, self.aval, self.blabel, self.bval) + +class cnode: + def __init__(self, ns): + self.ns = ns + self.migrated = pair(0, 0, "moved", "failed") + self.fscan = pair(0,0, "scanned", "isolated") + self.mscan = pair(0,0, "scanned", "isolated") + + def __add__(self, rhs): + self.ns += rhs.ns + self.migrated += rhs.migrated + self.fscan += rhs.fscan + self.mscan += rhs.mscan + return self + + def __str__(self): + prev = 0 + s = "%s " % time(self.ns) + if (opt_disp & topt.DISP_MIG): + s += "migration: %s" % self.migrated + prev = 1 + if (opt_disp & topt.DISP_ISOLFREE): + s += "%sfree_scanner: %s" % (" " if prev else "", self.fscan) + prev = 1 + if (opt_disp & topt.DISP_ISOLMIG): + s += "%smigration_scanner: %s" % (" " if prev else "", self.mscan) + return s + + def complete(self, secs, nsecs): + self.ns = ns(secs, nsecs) - self.ns + + def increment(self, migrated, fscan, mscan): + if (migrated != None): + self.migrated += migrated + if (fscan != None): + self.fscan += fscan + if (mscan != None): + self.mscan += mscan + + +class chead: + heads = {} + val = cnode(0); + fobj = None + + @classmethod + def add_filter(cls, filter): + cls.fobj = filter + + @classmethod + def create_pending(cls, pid, comm, start_secs, start_nsecs): + filtered = 0 + try: + head = cls.heads[pid] + filtered = head.is_filtered() + except KeyError: + if cls.fobj != None: + filtered = cls.fobj.filter(pid, comm) + head = cls.heads[pid] = chead(comm, pid, filtered) + + if not filtered: + head.mark_pending(start_secs, start_nsecs) + + @classmethod + def increment_pending(cls, pid, migrated, fscan, mscan): + head = cls.heads[pid] + if not head.is_filtered(): + if head.is_pending(): + head.do_increment(migrated, fscan, mscan) + else: + sys.stderr.write("missing start compaction event for pid %d\n" % pid) + + @classmethod + def complete_pending(cls, pid, secs, nsecs): + head = cls.heads[pid] + if not head.is_filtered(): + if head.is_pending(): + head.make_complete(secs, nsecs) + else: + sys.stderr.write("missing start compaction event for pid %d\n" % pid) + + @classmethod + def gen(cls): + if opt_proc != popt.DISP_DFL: + for i in cls.heads: + yield cls.heads[i] + + @classmethod + def str(cls): + return cls.val + + def __init__(self, comm, pid, filtered): + self.comm = comm + self.pid = pid + self.val = cnode(0) + self.pending = None + self.filtered = filtered + self.list = [] + + def __add__(self, rhs): + self.ns += rhs.ns + self.val += rhs.val + return self + + def mark_pending(self, secs, nsecs): + self.pending = cnode(ns(secs, nsecs)) + + def do_increment(self, migrated, fscan, mscan): + self.pending.increment(migrated, fscan, mscan) + + def make_complete(self, secs, nsecs): + self.pending.complete(secs, nsecs) + chead.val += self.pending + + if opt_proc != popt.DISP_DFL: + self.val += self.pending + + if opt_proc == popt.DISP_PROC_VERBOSE: + self.list.append(self.pending) + self.pending = None + + def enumerate(self): + if opt_proc == popt.DISP_PROC_VERBOSE and not self.is_filtered(): + for i, pelem in enumerate(self.list): + sys.stdout.write("%d[%s].%d: %s\n" % (self.pid, self.comm, i+1, pelem)) + + def is_pending(self): + return self.pending != None + + def is_filtered(self): + return self.filtered + + def display(self): + if not self.is_filtered(): + sys.stdout.write("%d[%s]: %s\n" % (self.pid, self.comm, self.val)) + + +def trace_end(): + sys.stdout.write("total: %s\n" % chead.str()) + for i in chead.gen(): + i.display(), + i.enumerate() + +def compaction__mm_compaction_migratepages(event_name, context, common_cpu, + common_secs, common_nsecs, common_pid, common_comm, + common_callchain, nr_migrated, nr_failed): + + chead.increment_pending(common_pid, + pair(nr_migrated, nr_failed), None, None) + +def compaction__mm_compaction_isolate_freepages(event_name, context, common_cpu, + common_secs, common_nsecs, common_pid, common_comm, + common_callchain, start_pfn, end_pfn, nr_scanned, nr_taken): + + chead.increment_pending(common_pid, + None, pair(nr_scanned, nr_taken), None) + +def compaction__mm_compaction_isolate_migratepages(event_name, context, common_cpu, + common_secs, common_nsecs, common_pid, common_comm, + common_callchain, start_pfn, end_pfn, nr_scanned, nr_taken): + + chead.increment_pending(common_pid, + None, None, pair(nr_scanned, nr_taken)) + +def compaction__mm_compaction_end(event_name, context, common_cpu, + common_secs, common_nsecs, common_pid, common_comm, + common_callchain, zone_start, migrate_start, free_start, zone_end, + sync, status): + + chead.complete_pending(common_pid, common_secs, common_nsecs) + +def compaction__mm_compaction_begin(event_name, context, common_cpu, + common_secs, common_nsecs, common_pid, common_comm, + common_callchain, zone_start, migrate_start, free_start, zone_end, + sync): + + chead.create_pending(common_pid, common_comm, common_secs, common_nsecs) + +def pr_help(): + global usage + + sys.stdout.write(usage) + sys.stdout.write("\n") + sys.stdout.write("-h display this help\n") + sys.stdout.write("-p display by process\n") + sys.stdout.write("-pv display by process (verbose)\n") + sys.stdout.write("-t display stall times only\n") + sys.stdout.write("-m display stats for migration\n") + sys.stdout.write("-fs display stats for free scanner\n") + sys.stdout.write("-ms display stats for migration scanner\n") + sys.stdout.write("-u display results in microseconds (default nanoseconds)\n") + + +comm_re = None +pid_re = None +pid_regex = "^(\d*)-(\d*)$|^(\d*)$" + +opt_proc = popt.DISP_DFL +opt_disp = topt.DISP_ALL + +opt_ns = True + +argc = len(sys.argv) - 1 +if argc >= 1: + pid_re = re.compile(pid_regex) + + for i, opt in enumerate(sys.argv[1:]): + if opt[0] == "-": + if opt == "-h": + pr_help() + exit(0); + elif opt == "-p": + opt_proc = popt.DISP_PROC + elif opt == "-pv": + opt_proc = popt.DISP_PROC_VERBOSE + elif opt == '-u': + opt_ns = False + elif opt == "-t": + set_type(topt.DISP_TIME) + elif opt == "-m": + set_type(topt.DISP_MIG) + elif opt == "-fs": + set_type(topt.DISP_ISOLFREE) + elif opt == "-ms": + set_type(topt.DISP_ISOLMIG) + else: + sys.exit(usage) + + elif i == argc - 1: + m = pid_re.search(opt) + if m != None and m.group() != "": + if m.group(3) != None: + f = pid_filter(m.group(3), m.group(3)) + else: + f = pid_filter(m.group(1), m.group(2)) + else: + try: + comm_re=re.compile(opt) + except: + sys.stderr.write("invalid regex '%s'" % opt) + sys.exit(usage) + f = comm_filter(comm_re) + + chead.add_filter(f) diff --git a/tools/perf/scripts/python/export-to-postgresql.py b/tools/perf/scripts/python/export-to-postgresql.py index 4cdafd880074..1b02cdc0cab6 100644 --- a/tools/perf/scripts/python/export-to-postgresql.py +++ b/tools/perf/scripts/python/export-to-postgresql.py @@ -15,6 +15,189 @@ import sys import struct import datetime +# To use this script you will need to have installed package python-pyside which +# provides LGPL-licensed Python bindings for Qt. You will also need the package +# libqt4-sql-psql for Qt postgresql support. +# +# The script assumes postgresql is running on the local machine and that the +# user has postgresql permissions to create databases. Examples of installing +# postgresql and adding such a user are: +# +# fedora: +# +# $ sudo yum install postgresql postgresql-server python-pyside qt-postgresql +# $ sudo su - postgres -c initdb +# $ sudo service postgresql start +# $ sudo su - postgres +# $ createuser <your user id here> +# Shall the new role be a superuser? (y/n) y +# +# ubuntu: +# +# $ sudo apt-get install postgresql +# $ sudo su - postgres +# $ createuser <your user id here> +# Shall the new role be a superuser? (y/n) y +# +# An example of using this script with Intel PT: +# +# $ perf record -e intel_pt//u ls +# $ perf script -s ~/libexec/perf-core/scripts/python/export-to-postgresql.py pt_example branches calls +# 2015-05-29 12:49:23.464364 Creating database... +# 2015-05-29 12:49:26.281717 Writing to intermediate files... +# 2015-05-29 12:49:27.190383 Copying to database... +# 2015-05-29 12:49:28.140451 Removing intermediate files... +# 2015-05-29 12:49:28.147451 Adding primary keys +# 2015-05-29 12:49:28.655683 Adding foreign keys +# 2015-05-29 12:49:29.365350 Done +# +# To browse the database, psql can be used e.g. +# +# $ psql pt_example +# pt_example=# select * from samples_view where id < 100; +# pt_example=# \d+ +# pt_example=# \d+ samples_view +# pt_example=# \q +# +# An example of using the database is provided by the script +# call-graph-from-postgresql.py. Refer to that script for details. +# +# Tables: +# +# The tables largely correspond to perf tools' data structures. They are largely self-explanatory. +# +# samples +# +# 'samples' is the main table. It represents what instruction was executing at a point in time +# when something (a selected event) happened. The memory address is the instruction pointer or 'ip'. +# +# calls +# +# 'calls' represents function calls and is related to 'samples' by 'call_id' and 'return_id'. +# 'calls' is only created when the 'calls' option to this script is specified. +# +# call_paths +# +# 'call_paths' represents all the call stacks. Each 'call' has an associated record in 'call_paths'. +# 'calls_paths' is only created when the 'calls' option to this script is specified. +# +# branch_types +# +# 'branch_types' provides descriptions for each type of branch. +# +# comm_threads +# +# 'comm_threads' shows how 'comms' relates to 'threads'. +# +# comms +# +# 'comms' contains a record for each 'comm' - the name given to the executable that is running. +# +# dsos +# +# 'dsos' contains a record for each executable file or library. +# +# machines +# +# 'machines' can be used to distinguish virtual machines if virtualization is supported. +# +# selected_events +# +# 'selected_events' contains a record for each kind of event that has been sampled. +# +# symbols +# +# 'symbols' contains a record for each symbol. Only symbols that have samples are present. +# +# threads +# +# 'threads' contains a record for each thread. +# +# Views: +# +# Most of the tables have views for more friendly display. The views are: +# +# calls_view +# call_paths_view +# comm_threads_view +# dsos_view +# machines_view +# samples_view +# symbols_view +# threads_view +# +# More examples of browsing the database with psql: +# Note that some of the examples are not the most optimal SQL query. +# Note that call information is only available if the script's 'calls' option has been used. +# +# Top 10 function calls (not aggregated by symbol): +# +# SELECT * FROM calls_view ORDER BY elapsed_time DESC LIMIT 10; +# +# Top 10 function calls (aggregated by symbol): +# +# SELECT symbol_id,(SELECT name FROM symbols WHERE id = symbol_id) AS symbol, +# SUM(elapsed_time) AS tot_elapsed_time,SUM(branch_count) AS tot_branch_count +# FROM calls_view GROUP BY symbol_id ORDER BY tot_elapsed_time DESC LIMIT 10; +# +# Note that the branch count gives a rough estimation of cpu usage, so functions +# that took a long time but have a relatively low branch count must have spent time +# waiting. +# +# Find symbols by pattern matching on part of the name (e.g. names containing 'alloc'): +# +# SELECT * FROM symbols_view WHERE name LIKE '%alloc%'; +# +# Top 10 function calls for a specific symbol (e.g. whose symbol_id is 187): +# +# SELECT * FROM calls_view WHERE symbol_id = 187 ORDER BY elapsed_time DESC LIMIT 10; +# +# Show function calls made by function in the same context (i.e. same call path) (e.g. one with call_path_id 254): +# +# SELECT * FROM calls_view WHERE parent_call_path_id = 254; +# +# Show branches made during a function call (e.g. where call_id is 29357 and return_id is 29370 and tid is 29670) +# +# SELECT * FROM samples_view WHERE id >= 29357 AND id <= 29370 AND tid = 29670 AND event LIKE 'branches%'; +# +# Show transactions: +# +# SELECT * FROM samples_view WHERE event = 'transactions'; +# +# Note transaction start has 'in_tx' true whereas, transaction end has 'in_tx' false. +# Transaction aborts have branch_type_name 'transaction abort' +# +# Show transaction aborts: +# +# SELECT * FROM samples_view WHERE event = 'transactions' AND branch_type_name = 'transaction abort'; +# +# To print a call stack requires walking the call_paths table. For example this python script: +# #!/usr/bin/python2 +# +# import sys +# from PySide.QtSql import * +# +# if __name__ == '__main__': +# if (len(sys.argv) < 3): +# print >> sys.stderr, "Usage is: printcallstack.py <database name> <call_path_id>" +# raise Exception("Too few arguments") +# dbname = sys.argv[1] +# call_path_id = sys.argv[2] +# db = QSqlDatabase.addDatabase('QPSQL') +# db.setDatabaseName(dbname) +# if not db.open(): +# raise Exception("Failed to open database " + dbname + " error: " + db.lastError().text()) +# query = QSqlQuery(db) +# print " id ip symbol_id symbol dso_id dso_short_name" +# while call_path_id != 0 and call_path_id != 1: +# ret = query.exec_('SELECT * FROM call_paths_view WHERE id = ' + str(call_path_id)) +# if not ret: +# raise Exception("Query failed: " + query.lastError().text()) +# if not query.next(): +# raise Exception("Query failed") +# print "{0:>6} {1:>10} {2:>9} {3:<30} {4:>6} {5:<30}".format(query.value(0), query.value(1), query.value(2), query.value(3), query.value(4), query.value(5)) +# call_path_id = query.value(6) + from PySide.QtSql import * # Need to access PostgreSQL C library directly to use COPY FROM STDIN @@ -197,6 +380,91 @@ if perf_db_export_calls: 'parent_call_path_id bigint,' 'flags integer)') +do_query(query, 'CREATE VIEW machines_view AS ' + 'SELECT ' + 'id,' + 'pid,' + 'root_dir,' + 'CASE WHEN id=0 THEN \'unknown\' WHEN pid=-1 THEN \'host\' ELSE \'guest\' END AS host_or_guest' + ' FROM machines') + +do_query(query, 'CREATE VIEW dsos_view AS ' + 'SELECT ' + 'id,' + 'machine_id,' + '(SELECT host_or_guest FROM machines_view WHERE id = machine_id) AS host_or_guest,' + 'short_name,' + 'long_name,' + 'build_id' + ' FROM dsos') + +do_query(query, 'CREATE VIEW symbols_view AS ' + 'SELECT ' + 'id,' + 'name,' + '(SELECT short_name FROM dsos WHERE id=dso_id) AS dso,' + 'dso_id,' + 'sym_start,' + 'sym_end,' + 'CASE WHEN binding=0 THEN \'local\' WHEN binding=1 THEN \'global\' ELSE \'weak\' END AS binding' + ' FROM symbols') + +do_query(query, 'CREATE VIEW threads_view AS ' + 'SELECT ' + 'id,' + 'machine_id,' + '(SELECT host_or_guest FROM machines_view WHERE id = machine_id) AS host_or_guest,' + 'process_id,' + 'pid,' + 'tid' + ' FROM threads') + +do_query(query, 'CREATE VIEW comm_threads_view AS ' + 'SELECT ' + 'comm_id,' + '(SELECT comm FROM comms WHERE id = comm_id) AS command,' + 'thread_id,' + '(SELECT pid FROM threads WHERE id = thread_id) AS pid,' + '(SELECT tid FROM threads WHERE id = thread_id) AS tid' + ' FROM comm_threads') + +if perf_db_export_calls: + do_query(query, 'CREATE VIEW call_paths_view AS ' + 'SELECT ' + 'c.id,' + 'to_hex(c.ip) AS ip,' + 'c.symbol_id,' + '(SELECT name FROM symbols WHERE id = c.symbol_id) AS symbol,' + '(SELECT dso_id FROM symbols WHERE id = c.symbol_id) AS dso_id,' + '(SELECT dso FROM symbols_view WHERE id = c.symbol_id) AS dso_short_name,' + 'c.parent_id,' + 'to_hex(p.ip) AS parent_ip,' + 'p.symbol_id AS parent_symbol_id,' + '(SELECT name FROM symbols WHERE id = p.symbol_id) AS parent_symbol,' + '(SELECT dso_id FROM symbols WHERE id = p.symbol_id) AS parent_dso_id,' + '(SELECT dso FROM symbols_view WHERE id = p.symbol_id) AS parent_dso_short_name' + ' FROM call_paths c INNER JOIN call_paths p ON p.id = c.parent_id') + do_query(query, 'CREATE VIEW calls_view AS ' + 'SELECT ' + 'calls.id,' + 'thread_id,' + '(SELECT pid FROM threads WHERE id = thread_id) AS pid,' + '(SELECT tid FROM threads WHERE id = thread_id) AS tid,' + '(SELECT comm FROM comms WHERE id = comm_id) AS command,' + 'call_path_id,' + 'to_hex(ip) AS ip,' + 'symbol_id,' + '(SELECT name FROM symbols WHERE id = symbol_id) AS symbol,' + 'call_time,' + 'return_time,' + 'return_time - call_time AS elapsed_time,' + 'branch_count,' + 'call_id,' + 'return_id,' + 'CASE WHEN flags=1 THEN \'no call\' WHEN flags=2 THEN \'no return\' WHEN flags=3 THEN \'no call/return\' ELSE \'\' END AS flags,' + 'parent_call_path_id' + ' FROM calls INNER JOIN call_paths ON call_paths.id = call_path_id') + do_query(query, 'CREATE VIEW samples_view AS ' 'SELECT ' 'id,' diff --git a/tools/perf/tests/Build b/tools/perf/tests/Build index d20d6e6ab65b..50de2253cff6 100644 --- a/tools/perf/tests/Build +++ b/tools/perf/tests/Build @@ -8,7 +8,6 @@ perf-y += openat-syscall-all-cpus.o perf-y += openat-syscall-tp-fields.o perf-y += mmap-basic.o perf-y += perf-record.o -perf-y += rdpmc.o perf-y += evsel-roundtrip-name.o perf-y += evsel-tp-sched.o perf-y += fdarray.o @@ -32,8 +31,8 @@ perf-y += sample-parsing.o perf-y += parse-no-sample-id-all.o perf-y += kmod-path.o perf-y += thread-map.o - -perf-$(CONFIG_X86) += perf-time-to-tsc.o +perf-y += llvm.o +perf-y += topology.o ifeq ($(ARCH),$(filter $(ARCH),x86 arm arm64)) perf-$(CONFIG_DWARF_UNWIND) += dwarf-unwind.o diff --git a/tools/perf/tests/bpf-script-example.c b/tools/perf/tests/bpf-script-example.c new file mode 100644 index 000000000000..410a70b93b93 --- /dev/null +++ b/tools/perf/tests/bpf-script-example.c @@ -0,0 +1,44 @@ +#ifndef LINUX_VERSION_CODE +# error Need LINUX_VERSION_CODE +# error Example: for 4.2 kernel, put 'clang-opt="-DLINUX_VERSION_CODE=0x40200" into llvm section of ~/.perfconfig' +#endif +#define BPF_ANY 0 +#define BPF_MAP_TYPE_ARRAY 2 +#define BPF_FUNC_map_lookup_elem 1 +#define BPF_FUNC_map_update_elem 2 + +static void *(*bpf_map_lookup_elem)(void *map, void *key) = + (void *) BPF_FUNC_map_lookup_elem; +static void *(*bpf_map_update_elem)(void *map, void *key, void *value, int flags) = + (void *) BPF_FUNC_map_update_elem; + +struct bpf_map_def { + unsigned int type; + unsigned int key_size; + unsigned int value_size; + unsigned int max_entries; +}; + +#define SEC(NAME) __attribute__((section(NAME), used)) +struct bpf_map_def SEC("maps") flip_table = { + .type = BPF_MAP_TYPE_ARRAY, + .key_size = sizeof(int), + .value_size = sizeof(int), + .max_entries = 1, +}; + +SEC("func=sys_epoll_pwait") +int bpf_func__sys_epoll_pwait(void *ctx) +{ + int ind =0; + int *flag = bpf_map_lookup_elem(&flip_table, &ind); + int new_flag; + if (!flag) + return 0; + /* flip flag and store back */ + new_flag = !*flag; + bpf_map_update_elem(&flip_table, &ind, &new_flag, BPF_ANY); + return new_flag; +} +char _license[] SEC("license") = "GPL"; +int _version SEC("version") = LINUX_VERSION_CODE; diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c index c1dde733c3a6..66f72d3d6677 100644 --- a/tools/perf/tests/builtin-test.c +++ b/tools/perf/tests/builtin-test.c @@ -14,10 +14,13 @@ #include "parse-options.h" #include "symbol.h" -static struct test { - const char *desc; - int (*func)(void); -} tests[] = { +struct test __weak arch_tests[] = { + { + .func = NULL, + }, +}; + +static struct test generic_tests[] = { { .desc = "vmlinux symtab matches kallsyms", .func = test__vmlinux_matches_kallsyms, @@ -38,12 +41,6 @@ static struct test { .desc = "parse events tests", .func = test__parse_events, }, -#if defined(__x86_64__) || defined(__i386__) - { - .desc = "x86 rdpmc test", - .func = test__rdpmc, - }, -#endif { .desc = "Validate PERF_RECORD_* events & perf_sample fields", .func = test__PERF_RECORD, @@ -104,12 +101,6 @@ static struct test { .desc = "Test software clock events have valid period values", .func = test__sw_clock_freq, }, -#if defined(__x86_64__) || defined(__i386__) - { - .desc = "Test converting perf time to TSC", - .func = test__perf_time_to_tsc, - }, -#endif { .desc = "Test object code reading", .func = test__code_reading, @@ -126,14 +117,6 @@ static struct test { .desc = "Test parsing with no sample_id_all bit set", .func = test__parse_no_sample_id_all, }, -#if defined(__x86_64__) || defined(__i386__) || defined(__arm__) || defined(__aarch64__) -#ifdef HAVE_DWARF_UNWIND_SUPPORT - { - .desc = "Test dwarf unwind", - .func = test__dwarf_unwind, - }, -#endif -#endif { .desc = "Test filtering hist entries", .func = test__hists_filter, @@ -175,11 +158,24 @@ static struct test { .func = test__thread_map, }, { + .desc = "Test LLVM searching and compiling", + .func = test__llvm, + }, + { + .desc = "Test topology in session", + .func = test_session_topology, + }, + { .func = NULL, }, }; -static bool perf_test__matches(int curr, int argc, const char *argv[]) +static struct test *tests[] = { + generic_tests, + arch_tests, +}; + +static bool perf_test__matches(struct test *test, int curr, int argc, const char *argv[]) { int i; @@ -196,7 +192,7 @@ static bool perf_test__matches(int curr, int argc, const char *argv[]) continue; } - if (strstr(tests[curr].desc, argv[i])) + if (strstr(test->desc, argv[i])) return true; } @@ -233,27 +229,31 @@ static int run_test(struct test *test) return err; } +#define for_each_test(j, t) \ + for (j = 0; j < ARRAY_SIZE(tests); j++) \ + for (t = &tests[j][0]; t->func; t++) + static int __cmd_test(int argc, const char *argv[], struct intlist *skiplist) { + struct test *t; + unsigned int j; int i = 0; int width = 0; - while (tests[i].func) { - int len = strlen(tests[i].desc); + for_each_test(j, t) { + int len = strlen(t->desc); if (width < len) width = len; - ++i; } - i = 0; - while (tests[i].func) { + for_each_test(j, t) { int curr = i++, err; - if (!perf_test__matches(curr, argc, argv)) + if (!perf_test__matches(t, curr, argc, argv)) continue; - pr_info("%2d: %-*s:", i, width, tests[curr].desc); + pr_info("%2d: %-*s:", i, width, t->desc); if (intlist__find(skiplist, i)) { color_fprintf(stderr, PERF_COLOR_YELLOW, " Skip (user override)\n"); @@ -261,8 +261,8 @@ static int __cmd_test(int argc, const char *argv[], struct intlist *skiplist) } pr_debug("\n--- start ---\n"); - err = run_test(&tests[curr]); - pr_debug("---- end ----\n%s:", tests[curr].desc); + err = run_test(t); + pr_debug("---- end ----\n%s:", t->desc); switch (err) { case TEST_OK: @@ -283,15 +283,15 @@ static int __cmd_test(int argc, const char *argv[], struct intlist *skiplist) static int perf_test__list(int argc, const char **argv) { + unsigned int j; + struct test *t; int i = 0; - while (tests[i].func) { - int curr = i++; - - if (argc > 1 && !strstr(tests[curr].desc, argv[1])) + for_each_test(j, t) { + if (argc > 1 && !strstr(t->desc, argv[1])) continue; - pr_info("%2d: %s\n", i, tests[curr].desc); + pr_info("%2d: %s\n", ++i, t->desc); } return 0; diff --git a/tools/perf/tests/code-reading.c b/tools/perf/tests/code-reading.c index 39c784a100a9..49b1959dda41 100644 --- a/tools/perf/tests/code-reading.c +++ b/tools/perf/tests/code-reading.c @@ -33,20 +33,20 @@ static unsigned int hex(char c) return c - 'A' + 10; } -static void read_objdump_line(const char *line, size_t line_len, void **buf, - size_t *len) +static size_t read_objdump_line(const char *line, size_t line_len, void *buf, + size_t len) { const char *p; - size_t i; + size_t i, j = 0; /* Skip to a colon */ p = strchr(line, ':'); if (!p) - return; + return 0; i = p + 1 - line; /* Read bytes */ - while (*len) { + while (j < len) { char c1, c2; /* Skip spaces */ @@ -65,20 +65,26 @@ static void read_objdump_line(const char *line, size_t line_len, void **buf, if (i < line_len && line[i] && !isspace(line[i])) break; /* Store byte */ - *(unsigned char *)*buf = (hex(c1) << 4) | hex(c2); - *buf += 1; - *len -= 1; + *(unsigned char *)buf = (hex(c1) << 4) | hex(c2); + buf += 1; + j++; } + /* return number of successfully read bytes */ + return j; } -static int read_objdump_output(FILE *f, void **buf, size_t *len) +static int read_objdump_output(FILE *f, void *buf, size_t *len, u64 start_addr) { char *line = NULL; - size_t line_len; + size_t line_len, off_last = 0; ssize_t ret; int err = 0; + u64 addr, last_addr = start_addr; + + while (off_last < *len) { + size_t off, read_bytes, written_bytes; + unsigned char tmp[BUFSZ]; - while (1) { ret = getline(&line, &line_len, f); if (feof(f)) break; @@ -87,9 +93,33 @@ static int read_objdump_output(FILE *f, void **buf, size_t *len) err = -1; break; } - read_objdump_line(line, ret, buf, len); + + /* read objdump data into temporary buffer */ + read_bytes = read_objdump_line(line, ret, tmp, sizeof(tmp)); + if (!read_bytes) + continue; + + if (sscanf(line, "%"PRIx64, &addr) != 1) + continue; + if (addr < last_addr) { + pr_debug("addr going backwards, read beyond section?\n"); + break; + } + last_addr = addr; + + /* copy it from temporary buffer to 'buf' according + * to address on current objdump line */ + off = addr - start_addr; + if (off >= *len) + break; + written_bytes = MIN(read_bytes, *len - off); + memcpy(buf + off, tmp, written_bytes); + off_last = off + written_bytes; } + /* len returns number of bytes that could not be read */ + *len -= off_last; + free(line); return err; @@ -103,7 +133,7 @@ static int read_via_objdump(const char *filename, u64 addr, void *buf, FILE *f; int ret; - fmt = "%s -d --start-address=0x%"PRIx64" --stop-address=0x%"PRIx64" %s"; + fmt = "%s -z -d --start-address=0x%"PRIx64" --stop-address=0x%"PRIx64" %s"; ret = snprintf(cmd, sizeof(cmd), fmt, "objdump", addr, addr + len, filename); if (ret <= 0 || (size_t)ret >= sizeof(cmd)) @@ -120,7 +150,7 @@ static int read_via_objdump(const char *filename, u64 addr, void *buf, return -1; } - ret = read_objdump_output(f, &buf, &len); + ret = read_objdump_output(f, buf, &len, addr); if (len) { pr_debug("objdump read too few bytes\n"); if (!ret) @@ -132,6 +162,18 @@ static int read_via_objdump(const char *filename, u64 addr, void *buf, return ret; } +static void dump_buf(unsigned char *buf, size_t len) +{ + size_t i; + + for (i = 0; i < len; i++) { + pr_debug("0x%02x ", buf[i]); + if (i % 16 == 15) + pr_debug("\n"); + } + pr_debug("\n"); +} + static int read_object_code(u64 addr, size_t len, u8 cpumode, struct thread *thread, struct state *state) { @@ -234,6 +276,10 @@ static int read_object_code(u64 addr, size_t len, u8 cpumode, /* The results should be identical */ if (memcmp(buf1, buf2, len)) { pr_debug("Bytes read differ from those read by objdump\n"); + pr_debug("buf1 (dso):\n"); + dump_buf(buf1, len); + pr_debug("buf2 (objdump):\n"); + dump_buf(buf2, len); return -1; } pr_debug("Bytes read match those read by objdump\n"); @@ -427,7 +473,7 @@ static int do_test_code_reading(bool try_kcore) symbol_conf.kallsyms_name = "/proc/kallsyms"; /* Load kernel map */ - map = machine->vmlinux_maps[MAP__FUNCTION]; + map = machine__kernel_map(machine); ret = map__load(map, NULL); if (ret < 0) { pr_debug("map__load failed\n"); diff --git a/tools/perf/tests/dwarf-unwind.c b/tools/perf/tests/dwarf-unwind.c index 40b36c462427..07221793a3ac 100644 --- a/tools/perf/tests/dwarf-unwind.c +++ b/tools/perf/tests/dwarf-unwind.c @@ -11,6 +11,10 @@ #include "thread.h" #include "callchain.h" +#if defined (__x86_64__) || defined (__i386__) +#include "arch-tests.h" +#endif + /* For bsearch. We try to unwind functions in shared object. */ #include <stdlib.h> diff --git a/tools/perf/tests/evsel-tp-sched.c b/tools/perf/tests/evsel-tp-sched.c index 52162425c969..790e413d9a1f 100644 --- a/tools/perf/tests/evsel-tp-sched.c +++ b/tools/perf/tests/evsel-tp-sched.c @@ -1,3 +1,4 @@ +#include <linux/err.h> #include <traceevent/event-parse.h> #include "evsel.h" #include "tests.h" @@ -36,8 +37,8 @@ int test__perf_evsel__tp_sched_test(void) struct perf_evsel *evsel = perf_evsel__newtp("sched", "sched_switch"); int ret = 0; - if (evsel == NULL) { - pr_debug("perf_evsel__new\n"); + if (IS_ERR(evsel)) { + pr_debug("perf_evsel__newtp failed with %ld\n", PTR_ERR(evsel)); return -1; } @@ -66,6 +67,11 @@ int test__perf_evsel__tp_sched_test(void) evsel = perf_evsel__newtp("sched", "sched_wakeup"); + if (IS_ERR(evsel)) { + pr_debug("perf_evsel__newtp failed with %ld\n", PTR_ERR(evsel)); + return -1; + } + if (perf_evsel__test_field(evsel, "comm", 16, true)) ret = -1; diff --git a/tools/perf/tests/hists_cumulate.c b/tools/perf/tests/hists_cumulate.c index 7d82c8be5e36..7ed737019de7 100644 --- a/tools/perf/tests/hists_cumulate.c +++ b/tools/perf/tests/hists_cumulate.c @@ -279,6 +279,7 @@ static int test1(struct perf_evsel *evsel, struct machine *machine) symbol_conf.use_callchain = false; symbol_conf.cumulate_callchain = false; + perf_evsel__reset_sample_bit(evsel, CALLCHAIN); setup_sorting(); callchain_register_param(&callchain_param); @@ -425,6 +426,7 @@ static int test2(struct perf_evsel *evsel, struct machine *machine) symbol_conf.use_callchain = true; symbol_conf.cumulate_callchain = false; + perf_evsel__set_sample_bit(evsel, CALLCHAIN); setup_sorting(); callchain_register_param(&callchain_param); @@ -482,6 +484,7 @@ static int test3(struct perf_evsel *evsel, struct machine *machine) symbol_conf.use_callchain = false; symbol_conf.cumulate_callchain = true; + perf_evsel__reset_sample_bit(evsel, CALLCHAIN); setup_sorting(); callchain_register_param(&callchain_param); @@ -665,6 +668,7 @@ static int test4(struct perf_evsel *evsel, struct machine *machine) symbol_conf.use_callchain = true; symbol_conf.cumulate_callchain = true; + perf_evsel__set_sample_bit(evsel, CALLCHAIN); setup_sorting(); callchain_register_param(&callchain_param); diff --git a/tools/perf/tests/hists_filter.c b/tools/perf/tests/hists_filter.c index ce48775e6ada..818acf875dd0 100644 --- a/tools/perf/tests/hists_filter.c +++ b/tools/perf/tests/hists_filter.c @@ -16,30 +16,31 @@ struct sample { struct thread *thread; struct map *map; struct symbol *sym; + int socket; }; /* For the numbers, see hists_common.c */ static struct sample fake_samples[] = { /* perf [kernel] schedule() */ - { .pid = FAKE_PID_PERF1, .ip = FAKE_IP_KERNEL_SCHEDULE, }, + { .pid = FAKE_PID_PERF1, .ip = FAKE_IP_KERNEL_SCHEDULE, .socket = 0 }, /* perf [perf] main() */ - { .pid = FAKE_PID_PERF1, .ip = FAKE_IP_PERF_MAIN, }, + { .pid = FAKE_PID_PERF1, .ip = FAKE_IP_PERF_MAIN, .socket = 0 }, /* perf [libc] malloc() */ - { .pid = FAKE_PID_PERF1, .ip = FAKE_IP_LIBC_MALLOC, }, + { .pid = FAKE_PID_PERF1, .ip = FAKE_IP_LIBC_MALLOC, .socket = 0 }, /* perf [perf] main() */ - { .pid = FAKE_PID_PERF2, .ip = FAKE_IP_PERF_MAIN, }, /* will be merged */ + { .pid = FAKE_PID_PERF2, .ip = FAKE_IP_PERF_MAIN, .socket = 0 }, /* will be merged */ /* perf [perf] cmd_record() */ - { .pid = FAKE_PID_PERF2, .ip = FAKE_IP_PERF_CMD_RECORD, }, + { .pid = FAKE_PID_PERF2, .ip = FAKE_IP_PERF_CMD_RECORD, .socket = 1 }, /* perf [kernel] page_fault() */ - { .pid = FAKE_PID_PERF2, .ip = FAKE_IP_KERNEL_PAGE_FAULT, }, + { .pid = FAKE_PID_PERF2, .ip = FAKE_IP_KERNEL_PAGE_FAULT, .socket = 1 }, /* bash [bash] main() */ - { .pid = FAKE_PID_BASH, .ip = FAKE_IP_BASH_MAIN, }, + { .pid = FAKE_PID_BASH, .ip = FAKE_IP_BASH_MAIN, .socket = 2 }, /* bash [bash] xmalloc() */ - { .pid = FAKE_PID_BASH, .ip = FAKE_IP_BASH_XMALLOC, }, + { .pid = FAKE_PID_BASH, .ip = FAKE_IP_BASH_XMALLOC, .socket = 2 }, /* bash [libc] malloc() */ - { .pid = FAKE_PID_BASH, .ip = FAKE_IP_LIBC_MALLOC, }, + { .pid = FAKE_PID_BASH, .ip = FAKE_IP_LIBC_MALLOC, .socket = 3 }, /* bash [kernel] page_fault() */ - { .pid = FAKE_PID_BASH, .ip = FAKE_IP_KERNEL_PAGE_FAULT, }, + { .pid = FAKE_PID_BASH, .ip = FAKE_IP_KERNEL_PAGE_FAULT, .socket = 3 }, }; static int add_hist_entries(struct perf_evlist *evlist, @@ -83,6 +84,7 @@ static int add_hist_entries(struct perf_evlist *evlist, &sample) < 0) goto out; + al.socket = fake_samples[i].socket; if (hist_entry_iter__add(&iter, &al, PERF_MAX_STACK_DEPTH, NULL) < 0) { addr_location__put(&al); @@ -253,6 +255,39 @@ int test__hists_filter(void) TEST_ASSERT_VAL("Unmatched total period for symbol filter", hists->stats.total_non_filtered_period == 300); + /* remove symbol filter first */ + hists->symbol_filter_str = NULL; + hists__filter_by_symbol(hists); + + /* now applying socket filters */ + hists->socket_filter = 2; + hists__filter_by_socket(hists); + + if (verbose > 2) { + pr_info("Histogram for socket filters\n"); + print_hists_out(hists); + } + + /* normal stats should be invariant */ + TEST_ASSERT_VAL("Invalid nr samples", + hists->stats.nr_events[PERF_RECORD_SAMPLE] == 10); + TEST_ASSERT_VAL("Invalid nr hist entries", + hists->nr_entries == 9); + TEST_ASSERT_VAL("Invalid total period", + hists->stats.total_period == 1000); + + /* but filter stats are changed */ + TEST_ASSERT_VAL("Unmatched nr samples for socket filter", + hists->stats.nr_non_filtered_samples == 2); + TEST_ASSERT_VAL("Unmatched nr hist entries for socket filter", + hists->nr_non_filtered_entries == 2); + TEST_ASSERT_VAL("Unmatched total period for socket filter", + hists->stats.total_non_filtered_period == 200); + + /* remove socket filter first */ + hists->socket_filter = -1; + hists__filter_by_socket(hists); + /* now applying all filters at once. */ hists->thread_filter = fake_samples[1].thread; hists->dso_filter = fake_samples[1].map->dso; diff --git a/tools/perf/tests/llvm.c b/tools/perf/tests/llvm.c new file mode 100644 index 000000000000..52d55971f66f --- /dev/null +++ b/tools/perf/tests/llvm.c @@ -0,0 +1,98 @@ +#include <stdio.h> +#include <bpf/libbpf.h> +#include <util/llvm-utils.h> +#include <util/cache.h> +#include "tests.h" +#include "debug.h" + +static int perf_config_cb(const char *var, const char *val, + void *arg __maybe_unused) +{ + return perf_default_config(var, val, arg); +} + +/* + * Randomly give it a "version" section since we don't really load it + * into kernel + */ +static const char test_bpf_prog[] = + "__attribute__((section(\"do_fork\"), used)) " + "int fork(void *ctx) {return 0;} " + "char _license[] __attribute__((section(\"license\"), used)) = \"GPL\";" + "int _version __attribute__((section(\"version\"), used)) = 0x40100;"; + +#ifdef HAVE_LIBBPF_SUPPORT +static int test__bpf_parsing(void *obj_buf, size_t obj_buf_sz) +{ + struct bpf_object *obj; + + obj = bpf_object__open_buffer(obj_buf, obj_buf_sz, NULL); + if (!obj) + return -1; + bpf_object__close(obj); + return 0; +} +#else +static int test__bpf_parsing(void *obj_buf __maybe_unused, + size_t obj_buf_sz __maybe_unused) +{ + fprintf(stderr, " (skip bpf parsing)"); + return 0; +} +#endif + +int test__llvm(void) +{ + char *tmpl_new, *clang_opt_new; + void *obj_buf; + size_t obj_buf_sz; + int err, old_verbose; + + perf_config(perf_config_cb, NULL); + + /* + * Skip this test if user's .perfconfig doesn't set [llvm] section + * and clang is not found in $PATH, and this is not perf test -v + */ + if (verbose == 0 && !llvm_param.user_set_param && llvm__search_clang()) { + fprintf(stderr, " (no clang, try 'perf test -v LLVM')"); + return TEST_SKIP; + } + + old_verbose = verbose; + /* + * llvm is verbosity when error. Suppress all error output if + * not 'perf test -v'. + */ + if (verbose == 0) + verbose = -1; + + if (!llvm_param.clang_bpf_cmd_template) + return -1; + + if (!llvm_param.clang_opt) + llvm_param.clang_opt = strdup(""); + + err = asprintf(&tmpl_new, "echo '%s' | %s", test_bpf_prog, + llvm_param.clang_bpf_cmd_template); + if (err < 0) + return -1; + err = asprintf(&clang_opt_new, "-xc %s", llvm_param.clang_opt); + if (err < 0) + return -1; + + llvm_param.clang_bpf_cmd_template = tmpl_new; + llvm_param.clang_opt = clang_opt_new; + err = llvm__compile_bpf("-", &obj_buf, &obj_buf_sz); + + verbose = old_verbose; + if (err) { + if (!verbose) + fprintf(stderr, " (use -v to see error message)"); + return -1; + } + + err = test__bpf_parsing(obj_buf, obj_buf_sz); + free(obj_buf); + return err; +} diff --git a/tools/perf/tests/make b/tools/perf/tests/make index 729112f4cfaa..2cbd0c6901e3 100644 --- a/tools/perf/tests/make +++ b/tools/perf/tests/make @@ -44,6 +44,7 @@ make_no_libnuma := NO_LIBNUMA=1 make_no_libaudit := NO_LIBAUDIT=1 make_no_libbionic := NO_LIBBIONIC=1 make_no_auxtrace := NO_AUXTRACE=1 +make_no_libbpf := NO_LIBBPF=1 make_tags := tags make_cscope := cscope make_help := help @@ -58,14 +59,15 @@ make_install_man := install-man make_install_html := install-html make_install_info := install-info make_install_pdf := install-pdf -make_install_prefix := install prefix=/tmp/krava +make_install_prefix := install prefix=/tmp/krava +make_install_prefix_slash := install prefix=/tmp/krava/ make_static := LDFLAGS=-static # all the NO_* variable combined make_minimal := NO_LIBPERL=1 NO_LIBPYTHON=1 NO_NEWT=1 NO_GTK2=1 make_minimal += NO_DEMANGLE=1 NO_LIBELF=1 NO_LIBUNWIND=1 NO_BACKTRACE=1 make_minimal += NO_LIBNUMA=1 NO_LIBAUDIT=1 NO_LIBBIONIC=1 -make_minimal += NO_LIBDW_DWARF_UNWIND=1 NO_AUXTRACE=1 +make_minimal += NO_LIBDW_DWARF_UNWIND=1 NO_AUXTRACE=1 NO_LIBBPF=1 # $(run) contains all available tests run := make_pure @@ -93,6 +95,7 @@ run += make_no_libnuma run += make_no_libaudit run += make_no_libbionic run += make_no_auxtrace +run += make_no_libbpf run += make_help run += make_doc run += make_perf_o @@ -101,6 +104,7 @@ run += make_util_pmu_bison_o run += make_install run += make_install_bin run += make_install_prefix +run += make_install_prefix_slash # FIXME 'install-*' commented out till they're fixed # run += make_install_doc # run += make_install_man @@ -175,11 +179,14 @@ test_make_install_O := $(call test_dest_files,$(installed_files_all)) test_make_install_bin := $(call test_dest_files,$(installed_files_bin)) test_make_install_bin_O := $(call test_dest_files,$(installed_files_bin)) -# We prefix all installed files for make_install_prefix +# We prefix all installed files for make_install_prefix(_slash) # with '/tmp/krava' to match installed/prefix-ed files. installed_files_all_prefix := $(addprefix /tmp/krava/,$(installed_files_all)) -test_make_install_prefix := $(call test_dest_files,$(installed_files_all_prefix)) -test_make_install_prefix_O := $(call test_dest_files,$(installed_files_all_prefix)) +test_make_install_prefix := $(call test_dest_files,$(installed_files_all_prefix)) +test_make_install_prefix_O := $(call test_dest_files,$(installed_files_all_prefix)) + +test_make_install_prefix_slash := $(test_make_install_prefix) +test_make_install_prefix_slash_O := $(test_make_install_prefix_O) # FIXME nothing gets installed test_make_install_man := test -f $$TMP_DEST/share/man/man1/perf.1 diff --git a/tools/perf/tests/mmap-basic.c b/tools/perf/tests/mmap-basic.c index 666b67a4df9d..4495493c9431 100644 --- a/tools/perf/tests/mmap-basic.c +++ b/tools/perf/tests/mmap-basic.c @@ -3,6 +3,7 @@ #include "thread_map.h" #include "cpumap.h" #include "tests.h" +#include <linux/err.h> /* * This test will generate random numbers of calls to some getpid syscalls, @@ -65,7 +66,7 @@ int test__basic_mmap(void) snprintf(name, sizeof(name), "sys_enter_%s", syscall_names[i]); evsels[i] = perf_evsel__newtp("syscalls", name); - if (evsels[i] == NULL) { + if (IS_ERR(evsels[i])) { pr_debug("perf_evsel__new\n"); goto out_delete_evlist; } diff --git a/tools/perf/tests/openat-syscall-all-cpus.c b/tools/perf/tests/openat-syscall-all-cpus.c index a572f87e9c8d..2006485a2859 100644 --- a/tools/perf/tests/openat-syscall-all-cpus.c +++ b/tools/perf/tests/openat-syscall-all-cpus.c @@ -1,3 +1,5 @@ +#include <api/fs/fs.h> +#include <linux/err.h> #include "evsel.h" #include "tests.h" #include "thread_map.h" @@ -14,6 +16,7 @@ int test__openat_syscall_event_on_all_cpus(void) cpu_set_t cpu_set; struct thread_map *threads = thread_map__new(-1, getpid(), UINT_MAX); char sbuf[STRERR_BUFSIZE]; + char errbuf[BUFSIZ]; if (threads == NULL) { pr_debug("thread_map__new\n"); @@ -29,13 +32,9 @@ int test__openat_syscall_event_on_all_cpus(void) CPU_ZERO(&cpu_set); evsel = perf_evsel__newtp("syscalls", "sys_enter_openat"); - if (evsel == NULL) { - if (tracefs_configured()) - pr_debug("is tracefs mounted on /sys/kernel/tracing?\n"); - else if (debugfs_configured()) - pr_debug("is debugfs mounted on /sys/kernel/debug?\n"); - else - pr_debug("Neither tracefs or debugfs is enabled in this kernel\n"); + if (IS_ERR(evsel)) { + tracing_path__strerror_open_tp(errno, errbuf, sizeof(errbuf), "syscalls", "sys_enter_openat"); + pr_debug("%s\n", errbuf); goto out_thread_map_delete; } diff --git a/tools/perf/tests/openat-syscall-tp-fields.c b/tools/perf/tests/openat-syscall-tp-fields.c index 01a19626c846..5e811cd8f1c3 100644 --- a/tools/perf/tests/openat-syscall-tp-fields.c +++ b/tools/perf/tests/openat-syscall-tp-fields.c @@ -1,3 +1,4 @@ +#include <linux/err.h> #include "perf.h" #include "evlist.h" #include "evsel.h" @@ -30,7 +31,7 @@ int test__syscall_openat_tp_fields(void) } evsel = perf_evsel__newtp("syscalls", "sys_enter_openat"); - if (evsel == NULL) { + if (IS_ERR(evsel)) { pr_debug("%s: perf_evsel__newtp\n", __func__); goto out_delete_evlist; } @@ -88,7 +89,7 @@ int test__syscall_openat_tp_fields(void) err = perf_evsel__parse_sample(evsel, event, &sample); if (err) { - pr_err("Can't parse sample, err = %d\n", err); + pr_debug("Can't parse sample, err = %d\n", err); goto out_delete_evlist; } diff --git a/tools/perf/tests/openat-syscall.c b/tools/perf/tests/openat-syscall.c index c9a37bc6b33a..033b54797b8a 100644 --- a/tools/perf/tests/openat-syscall.c +++ b/tools/perf/tests/openat-syscall.c @@ -1,3 +1,5 @@ +#include <api/fs/tracing_path.h> +#include <linux/err.h> #include "thread_map.h" #include "evsel.h" #include "debug.h" @@ -10,6 +12,7 @@ int test__openat_syscall_event(void) unsigned int nr_openat_calls = 111, i; struct thread_map *threads = thread_map__new(-1, getpid(), UINT_MAX); char sbuf[STRERR_BUFSIZE]; + char errbuf[BUFSIZ]; if (threads == NULL) { pr_debug("thread_map__new\n"); @@ -17,13 +20,9 @@ int test__openat_syscall_event(void) } evsel = perf_evsel__newtp("syscalls", "sys_enter_openat"); - if (evsel == NULL) { - if (tracefs_configured()) - pr_debug("is tracefs mounted on /sys/kernel/tracing?\n"); - else if (debugfs_configured()) - pr_debug("is debugfs mounted on /sys/kernel/debug?\n"); - else - pr_debug("Neither tracefs or debugfs is enabled in this kernel\n"); + if (IS_ERR(evsel)) { + tracing_path__strerror_open_tp(errno, errbuf, sizeof(errbuf), "syscalls", "sys_enter_openat"); + pr_debug("%s\n", errbuf); goto out_thread_map_delete; } diff --git a/tools/perf/tests/parse-events.c b/tools/perf/tests/parse-events.c index d76963f7ad3d..636d7b42d844 100644 --- a/tools/perf/tests/parse-events.c +++ b/tools/perf/tests/parse-events.c @@ -3,11 +3,11 @@ #include "evsel.h" #include "evlist.h" #include <api/fs/fs.h> -#include <api/fs/tracefs.h> -#include <api/fs/debugfs.h> #include "tests.h" #include "debug.h" +#include "util.h" #include <linux/hw_breakpoint.h> +#include <api/fs/fs.h> #define PERF_TP_SAMPLE_TYPE (PERF_SAMPLE_RAW | PERF_SAMPLE_TIME | \ PERF_SAMPLE_CPU | PERF_SAMPLE_PERIOD) @@ -82,8 +82,12 @@ static int test__checkevent_symbolic_name_config(struct perf_evlist *evlist) TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->attr.type); TEST_ASSERT_VAL("wrong config", PERF_COUNT_HW_CPU_CYCLES == evsel->attr.config); + /* + * The period value gets configured within perf_evlist__config, + * while this test executes only parse events method. + */ TEST_ASSERT_VAL("wrong period", - 100000 == evsel->attr.sample_period); + 0 == evsel->attr.sample_period); TEST_ASSERT_VAL("wrong config1", 0 == evsel->attr.config1); TEST_ASSERT_VAL("wrong config2", @@ -406,7 +410,11 @@ static int test__checkevent_pmu(struct perf_evlist *evlist) TEST_ASSERT_VAL("wrong config", 10 == evsel->attr.config); TEST_ASSERT_VAL("wrong config1", 1 == evsel->attr.config1); TEST_ASSERT_VAL("wrong config2", 3 == evsel->attr.config2); - TEST_ASSERT_VAL("wrong period", 1000 == evsel->attr.sample_period); + /* + * The period value gets configured within perf_evlist__config, + * while this test executes only parse events method. + */ + TEST_ASSERT_VAL("wrong period", 0 == evsel->attr.sample_period); return 0; } @@ -471,6 +479,39 @@ static int test__checkevent_pmu_name(struct perf_evlist *evlist) return 0; } +static int test__checkevent_pmu_partial_time_callgraph(struct perf_evlist *evlist) +{ + struct perf_evsel *evsel = perf_evlist__first(evlist); + + /* cpu/config=1,call-graph=fp,time,period=100000/ */ + TEST_ASSERT_VAL("wrong number of entries", 2 == evlist->nr_entries); + TEST_ASSERT_VAL("wrong type", PERF_TYPE_RAW == evsel->attr.type); + TEST_ASSERT_VAL("wrong config", 1 == evsel->attr.config); + /* + * The period, time and callgraph value gets configured + * within perf_evlist__config, + * while this test executes only parse events method. + */ + TEST_ASSERT_VAL("wrong period", 0 == evsel->attr.sample_period); + TEST_ASSERT_VAL("wrong callgraph", !(PERF_SAMPLE_CALLCHAIN & evsel->attr.sample_type)); + TEST_ASSERT_VAL("wrong time", !(PERF_SAMPLE_TIME & evsel->attr.sample_type)); + + /* cpu/config=2,call-graph=no,time=0,period=2000/ */ + evsel = perf_evsel__next(evsel); + TEST_ASSERT_VAL("wrong type", PERF_TYPE_RAW == evsel->attr.type); + TEST_ASSERT_VAL("wrong config", 2 == evsel->attr.config); + /* + * The period, time and callgraph value gets configured + * within perf_evlist__config, + * while this test executes only parse events method. + */ + TEST_ASSERT_VAL("wrong period", 0 == evsel->attr.sample_period); + TEST_ASSERT_VAL("wrong callgraph", !(PERF_SAMPLE_CALLCHAIN & evsel->attr.sample_type)); + TEST_ASSERT_VAL("wrong time", !(PERF_SAMPLE_TIME & evsel->attr.sample_type)); + + return 0; +} + static int test__checkevent_pmu_events(struct perf_evlist *evlist) { struct perf_evsel *evsel = perf_evlist__first(evlist); @@ -1219,25 +1260,24 @@ test__checkevent_breakpoint_len_rw_modifier(struct perf_evlist *evlist) return test__checkevent_breakpoint_rw(evlist); } +static int test__checkevent_precise_max_modifier(struct perf_evlist *evlist) +{ + struct perf_evsel *evsel = perf_evlist__first(evlist); + + TEST_ASSERT_VAL("wrong number of entries", 2 == evlist->nr_entries); + TEST_ASSERT_VAL("wrong type", PERF_TYPE_SOFTWARE == evsel->attr.type); + TEST_ASSERT_VAL("wrong config", + PERF_COUNT_SW_TASK_CLOCK == evsel->attr.config); + return 0; +} + static int count_tracepoints(void) { - char events_path[PATH_MAX]; struct dirent *events_ent; - const char *mountpoint; DIR *events_dir; int cnt = 0; - mountpoint = tracefs_find_mountpoint(); - if (mountpoint) { - scnprintf(events_path, PATH_MAX, "%s/events", - mountpoint); - } else { - mountpoint = debugfs_find_mountpoint(); - scnprintf(events_path, PATH_MAX, "%s/tracing/events", - mountpoint); - } - - events_dir = opendir(events_path); + events_dir = opendir(tracing_events_path); TEST_ASSERT_VAL("Can't open events dir", events_dir); @@ -1254,7 +1294,7 @@ static int count_tracepoints(void) continue; scnprintf(sys_path, PATH_MAX, "%s/%s", - events_path, events_ent->d_name); + tracing_events_path, events_ent->d_name); sys_dir = opendir(sys_path); TEST_ASSERT_VAL("Can't open sys dir", sys_dir); @@ -1534,6 +1574,11 @@ static struct evlist_test test__events[] = { .check = test__checkevent_exclude_idle_modifier_1, .id = 46, }, + { + .name = "task-clock:P,cycles", + .check = test__checkevent_precise_max_modifier, + .id = 47, + }, }; static struct evlist_test test__events_pmu[] = { @@ -1547,6 +1592,11 @@ static struct evlist_test test__events_pmu[] = { .check = test__checkevent_pmu_name, .id = 1, }, + { + .name = "cpu/config=1,call-graph=fp,time,period=100000/,cpu/config=2,call-graph=no,time=0,period=2000/", + .check = test__checkevent_pmu_partial_time_callgraph, + .id = 2, + }, }; struct terms_test { @@ -1704,6 +1754,17 @@ static int test_pmu_events(void) return ret; } +static void debug_warn(const char *warn, va_list params) +{ + char msg[1024]; + + if (!verbose) + return; + + vsnprintf(msg, sizeof(msg), warn, params); + fprintf(stderr, " Warning: %s\n", msg); +} + int test__parse_events(void) { int ret1, ret2 = 0; @@ -1715,6 +1776,8 @@ do { \ ret2 = ret1; \ } while (0) + set_warning_routine(debug_warn); + TEST_EVENTS(test__events); if (test_pmu()) diff --git a/tools/perf/tests/sw-clock.c b/tools/perf/tests/sw-clock.c index 1aa21c90731b..5b83f56a3b6f 100644 --- a/tools/perf/tests/sw-clock.c +++ b/tools/perf/tests/sw-clock.c @@ -34,6 +34,8 @@ static int __test__sw_clock_freq(enum perf_sw_ids clock_id) .disabled = 1, .freq = 1, }; + struct cpu_map *cpus; + struct thread_map *threads; attr.sample_freq = 500; @@ -50,14 +52,19 @@ static int __test__sw_clock_freq(enum perf_sw_ids clock_id) } perf_evlist__add(evlist, evsel); - evlist->cpus = cpu_map__dummy_new(); - evlist->threads = thread_map__new_by_tid(getpid()); - if (!evlist->cpus || !evlist->threads) { + cpus = cpu_map__dummy_new(); + threads = thread_map__new_by_tid(getpid()); + if (!cpus || !threads) { err = -ENOMEM; pr_debug("Not enough memory to create thread/cpu maps\n"); - goto out_delete_evlist; + goto out_free_maps; } + perf_evlist__set_maps(evlist, cpus, threads); + + cpus = NULL; + threads = NULL; + if (perf_evlist__open(evlist)) { const char *knob = "/proc/sys/kernel/perf_event_max_sample_rate"; @@ -107,6 +114,9 @@ next_event: err = -1; } +out_free_maps: + cpu_map__put(cpus); + thread_map__put(threads); out_delete_evlist: perf_evlist__delete(evlist); return err; diff --git a/tools/perf/tests/task-exit.c b/tools/perf/tests/task-exit.c index 3a8fedef83bc..add16385f13e 100644 --- a/tools/perf/tests/task-exit.c +++ b/tools/perf/tests/task-exit.c @@ -43,6 +43,8 @@ int test__task_exit(void) }; const char *argv[] = { "true", NULL }; char sbuf[STRERR_BUFSIZE]; + struct cpu_map *cpus; + struct thread_map *threads; signal(SIGCHLD, sig_handler); @@ -58,14 +60,19 @@ int test__task_exit(void) * perf_evlist__prepare_workload we'll fill in the only thread * we're monitoring, the one forked there. */ - evlist->cpus = cpu_map__dummy_new(); - evlist->threads = thread_map__new_by_tid(-1); - if (!evlist->cpus || !evlist->threads) { + cpus = cpu_map__dummy_new(); + threads = thread_map__new_by_tid(-1); + if (!cpus || !threads) { err = -ENOMEM; pr_debug("Not enough memory to create thread/cpu maps\n"); - goto out_delete_evlist; + goto out_free_maps; } + perf_evlist__set_maps(evlist, cpus, threads); + + cpus = NULL; + threads = NULL; + err = perf_evlist__prepare_workload(evlist, &target, argv, false, workload_exec_failed_signal); if (err < 0) { @@ -114,6 +121,9 @@ retry: err = -1; } +out_free_maps: + cpu_map__put(cpus); + thread_map__put(threads); out_delete_evlist: perf_evlist__delete(evlist); return err; diff --git a/tools/perf/tests/tests.h b/tools/perf/tests/tests.h index ebb47d96bc0b..c80486969f83 100644 --- a/tools/perf/tests/tests.h +++ b/tools/perf/tests/tests.h @@ -24,13 +24,17 @@ enum { TEST_SKIP = -2, }; +struct test { + const char *desc; + int (*func)(void); +}; + /* Tests */ int test__vmlinux_matches_kallsyms(void); int test__openat_syscall_event(void); int test__openat_syscall_event_on_all_cpus(void); int test__basic_mmap(void); int test__PERF_RECORD(void); -int test__rdpmc(void); int test__perf_evsel__roundtrip_name_test(void); int test__perf_evsel__tp_sched_test(void); int test__syscall_openat_tp_fields(void); @@ -46,7 +50,6 @@ int test__bp_signal(void); int test__bp_signal_overflow(void); int test__task_exit(void); int test__sw_clock_freq(void); -int test__perf_time_to_tsc(void); int test__code_reading(void); int test__sample_parsing(void); int test__keep_tracking(void); @@ -62,8 +65,10 @@ int test__fdarray__filter(void); int test__fdarray__add(void); int test__kmod_path__parse(void); int test__thread_map(void); +int test__llvm(void); +int test_session_topology(void); -#if defined(__x86_64__) || defined(__i386__) || defined(__arm__) || defined(__aarch64__) +#if defined(__arm__) || defined(__aarch64__) #ifdef HAVE_DWARF_UNWIND_SUPPORT struct thread; struct perf_sample; diff --git a/tools/perf/tests/thread-map.c b/tools/perf/tests/thread-map.c index 5acf000939ea..138a0e3431fa 100644 --- a/tools/perf/tests/thread-map.c +++ b/tools/perf/tests/thread-map.c @@ -20,6 +20,8 @@ int test__thread_map(void) TEST_ASSERT_VAL("wrong comm", thread_map__comm(map, 0) && !strcmp(thread_map__comm(map, 0), "perf")); + TEST_ASSERT_VAL("wrong refcnt", + atomic_read(&map->refcnt) == 1); thread_map__put(map); /* test dummy pid */ @@ -33,6 +35,8 @@ int test__thread_map(void) TEST_ASSERT_VAL("wrong comm", thread_map__comm(map, 0) && !strcmp(thread_map__comm(map, 0), "dummy")); + TEST_ASSERT_VAL("wrong refcnt", + atomic_read(&map->refcnt) == 1); thread_map__put(map); return 0; } diff --git a/tools/perf/tests/topology.c b/tools/perf/tests/topology.c new file mode 100644 index 000000000000..f5bb096c3bd9 --- /dev/null +++ b/tools/perf/tests/topology.c @@ -0,0 +1,115 @@ +#include <string.h> +#include <stdlib.h> +#include <stdio.h> +#include "tests.h" +#include "util.h" +#include "session.h" +#include "evlist.h" +#include "debug.h" + +#define TEMPL "/tmp/perf-test-XXXXXX" +#define DATA_SIZE 10 + +static int get_temp(char *path) +{ + int fd; + + strcpy(path, TEMPL); + + fd = mkstemp(path); + if (fd < 0) { + perror("mkstemp failed"); + return -1; + } + + close(fd); + return 0; +} + +static int session_write_header(char *path) +{ + struct perf_session *session; + struct perf_data_file file = { + .path = path, + .mode = PERF_DATA_MODE_WRITE, + }; + + session = perf_session__new(&file, false, NULL); + TEST_ASSERT_VAL("can't get session", session); + + session->evlist = perf_evlist__new_default(); + TEST_ASSERT_VAL("can't get evlist", session->evlist); + + perf_header__set_feat(&session->header, HEADER_CPU_TOPOLOGY); + perf_header__set_feat(&session->header, HEADER_NRCPUS); + + session->header.data_size += DATA_SIZE; + + TEST_ASSERT_VAL("failed to write header", + !perf_session__write_header(session, session->evlist, file.fd, true)); + + perf_session__delete(session); + + return 0; +} + +static int check_cpu_topology(char *path, struct cpu_map *map) +{ + struct perf_session *session; + struct perf_data_file file = { + .path = path, + .mode = PERF_DATA_MODE_READ, + }; + int i; + + session = perf_session__new(&file, false, NULL); + TEST_ASSERT_VAL("can't get session", session); + + for (i = 0; i < session->header.env.nr_cpus_online; i++) { + pr_debug("CPU %d, core %d, socket %d\n", i, + session->header.env.cpu[i].core_id, + session->header.env.cpu[i].socket_id); + } + + for (i = 0; i < map->nr; i++) { + TEST_ASSERT_VAL("Core ID doesn't match", + (session->header.env.cpu[map->map[i]].core_id == (cpu_map__get_core(map, i, NULL) & 0xffff))); + + TEST_ASSERT_VAL("Socket ID doesn't match", + (session->header.env.cpu[map->map[i]].socket_id == cpu_map__get_socket(map, i, NULL))); + } + + perf_session__delete(session); + + return 0; +} + +int test_session_topology(void) +{ + char path[PATH_MAX]; + struct cpu_map *map; + int ret = -1; + + TEST_ASSERT_VAL("can't get templ file", !get_temp(path)); + + pr_debug("templ file: %s\n", path); + + if (session_write_header(path)) + goto free_path; + + map = cpu_map__new(NULL); + if (map == NULL) { + pr_debug("failed to get system cpumap\n"); + goto free_path; + } + + if (check_cpu_topology(path, map)) + goto free_map; + ret = 0; + +free_map: + cpu_map__put(map); +free_path: + unlink(path); + return ret; +} diff --git a/tools/perf/tests/vmlinux-kallsyms.c b/tools/perf/tests/vmlinux-kallsyms.c index b34c5fc829ae..d677e018e504 100644 --- a/tools/perf/tests/vmlinux-kallsyms.c +++ b/tools/perf/tests/vmlinux-kallsyms.c @@ -68,7 +68,7 @@ int test__vmlinux_matches_kallsyms(void) * to see if the running kernel was relocated by checking if it has the * same value in the vmlinux file we load. */ - kallsyms_map = machine__kernel_map(&kallsyms, type); + kallsyms_map = machine__kernel_map(&kallsyms); /* * Step 5: @@ -80,7 +80,7 @@ int test__vmlinux_matches_kallsyms(void) goto out; } - vmlinux_map = machine__kernel_map(&vmlinux, type); + vmlinux_map = machine__kernel_map(&vmlinux); /* * Step 6: diff --git a/tools/perf/trace/strace/groups/file b/tools/perf/trace/strace/groups/file new file mode 100644 index 000000000000..722e25d200bf --- /dev/null +++ b/tools/perf/trace/strace/groups/file @@ -0,0 +1,20 @@ +access +chmod +creat +execve +faccessat +getcwd +lstat +mkdir +open +openat +quotactl +read +readlink +rename +rmdir +stat +statfs +symlink +unlink +write diff --git a/tools/perf/ui/browser.c b/tools/perf/ui/browser.c index 6680fa5cb9dd..e9703c0829f1 100644 --- a/tools/perf/ui/browser.c +++ b/tools/perf/ui/browser.c @@ -46,6 +46,21 @@ void ui_browser__gotorc(struct ui_browser *browser, int y, int x) SLsmg_gotorc(browser->y + y, browser->x + x); } +void ui_browser__write_nstring(struct ui_browser *browser __maybe_unused, const char *msg, + unsigned int width) +{ + slsmg_write_nstring(msg, width); +} + +void ui_browser__printf(struct ui_browser *browser __maybe_unused, const char *fmt, ...) +{ + va_list args; + + va_start(args, fmt); + slsmg_vprintf(fmt, args); + va_end(args); +} + static struct list_head * ui_browser__list_head_filter_entries(struct ui_browser *browser, struct list_head *pos) @@ -234,7 +249,7 @@ void __ui_browser__show_title(struct ui_browser *browser, const char *title) { SLsmg_gotorc(0, 0); ui_browser__set_color(browser, HE_COLORSET_ROOT); - slsmg_write_nstring(title, browser->width + 1); + ui_browser__write_nstring(browser, title, browser->width + 1); } void ui_browser__show_title(struct ui_browser *browser, const char *title) @@ -378,6 +393,7 @@ int ui_browser__run(struct ui_browser *browser, int delay_secs) if (browser->use_navkeypressed && !browser->navkeypressed) { if (key == K_DOWN || key == K_UP || + (browser->columns && (key == K_LEFT || key == K_RIGHT)) || key == K_PGDN || key == K_PGUP || key == K_HOME || key == K_END || key == ' ') { @@ -406,6 +422,18 @@ int ui_browser__run(struct ui_browser *browser, int delay_secs) browser->seek(browser, -1, SEEK_CUR); } break; + case K_RIGHT: + if (!browser->columns) + goto out; + if (browser->horiz_scroll < browser->columns - 1) + ++browser->horiz_scroll; + break; + case K_LEFT: + if (!browser->columns) + goto out; + if (browser->horiz_scroll != 0) + --browser->horiz_scroll; + break; case K_PGDN: case ' ': if (browser->top_idx + browser->rows > browser->nr_entries - 1) @@ -444,6 +472,7 @@ int ui_browser__run(struct ui_browser *browser, int delay_secs) browser->seek(browser, -offset, SEEK_END); break; default: + out: return key; } } diff --git a/tools/perf/ui/browser.h b/tools/perf/ui/browser.h index 92ae72113965..01781de59532 100644 --- a/tools/perf/ui/browser.h +++ b/tools/perf/ui/browser.h @@ -14,7 +14,7 @@ struct ui_browser { u64 index, top_idx; void *top, *entries; - u16 y, x, width, height, rows; + u16 y, x, width, height, rows, columns, horiz_scroll; int current_color; void *priv; const char *title; @@ -37,6 +37,9 @@ void ui_browser__refresh_dimensions(struct ui_browser *browser); void ui_browser__reset_index(struct ui_browser *browser); void ui_browser__gotorc(struct ui_browser *browser, int y, int x); +void ui_browser__write_nstring(struct ui_browser *browser, const char *msg, + unsigned int width); +void ui_browser__printf(struct ui_browser *browser, const char *fmt, ...); void ui_browser__write_graph(struct ui_browser *browser, int graph); void __ui_browser__line_arrow(struct ui_browser *browser, unsigned int column, u64 start, u64 end); @@ -58,8 +61,8 @@ int ui_browser__help_window(struct ui_browser *browser, const char *text); bool ui_browser__dialog_yesno(struct ui_browser *browser, const char *text); int ui_browser__input_window(const char *title, const char *text, char *input, const char *exit_msg, int delay_sec); -struct perf_session_env; -int tui__header_window(struct perf_session_env *env); +struct perf_env; +int tui__header_window(struct perf_env *env); void ui_browser__argv_seek(struct ui_browser *browser, off_t offset, int whence); unsigned int ui_browser__argv_refresh(struct ui_browser *browser); diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c index 5995a8bd7c69..d4d7cc27252f 100644 --- a/tools/perf/ui/browsers/annotate.c +++ b/tools/perf/ui/browsers/annotate.c @@ -1,7 +1,6 @@ #include "../../util/util.h" #include "../browser.h" #include "../helpline.h" -#include "../libslang.h" #include "../ui.h" #include "../util.h" #include "../../util/annotate.h" @@ -16,6 +15,9 @@ struct disasm_line_samples { u64 nr; }; +#define IPC_WIDTH 6 +#define CYCLES_WIDTH 6 + struct browser_disasm_line { struct rb_node rb_node; u32 idx; @@ -53,6 +55,7 @@ struct annotate_browser { int max_jump_sources; int nr_jumps; bool searching_backwards; + bool have_cycles; u8 addr_width; u8 jumps_width; u8 target_width; @@ -96,6 +99,15 @@ static int annotate_browser__set_jumps_percent_color(struct annotate_browser *br return ui_browser__set_color(&browser->b, color); } +static int annotate_browser__pcnt_width(struct annotate_browser *ab) +{ + int w = 7 * ab->nr_events; + + if (ab->have_cycles) + w += IPC_WIDTH + CYCLES_WIDTH; + return w; +} + static void annotate_browser__write(struct ui_browser *browser, void *entry, int row) { struct annotate_browser *ab = container_of(browser, struct annotate_browser, b); @@ -106,7 +118,7 @@ static void annotate_browser__write(struct ui_browser *browser, void *entry, int (!current_entry || (browser->use_navkeypressed && !browser->navkeypressed))); int width = browser->width, printed; - int i, pcnt_width = 7 * ab->nr_events; + int i, pcnt_width = annotate_browser__pcnt_width(ab); double percent_max = 0.0; char bf[256]; @@ -116,19 +128,36 @@ static void annotate_browser__write(struct ui_browser *browser, void *entry, int } if (dl->offset != -1 && percent_max != 0.0) { - for (i = 0; i < ab->nr_events; i++) { - ui_browser__set_percent_color(browser, - bdl->samples[i].percent, - current_entry); - if (annotate_browser__opts.show_total_period) - slsmg_printf("%6" PRIu64 " ", - bdl->samples[i].nr); - else - slsmg_printf("%6.2f ", bdl->samples[i].percent); + if (percent_max != 0.0) { + for (i = 0; i < ab->nr_events; i++) { + ui_browser__set_percent_color(browser, + bdl->samples[i].percent, + current_entry); + if (annotate_browser__opts.show_total_period) { + ui_browser__printf(browser, "%6" PRIu64 " ", + bdl->samples[i].nr); + } else { + ui_browser__printf(browser, "%6.2f ", + bdl->samples[i].percent); + } + } + } else { + ui_browser__write_nstring(browser, " ", 7 * ab->nr_events); } } else { ui_browser__set_percent_color(browser, 0, current_entry); - slsmg_write_nstring(" ", pcnt_width); + ui_browser__write_nstring(browser, " ", 7 * ab->nr_events); + } + if (ab->have_cycles) { + if (dl->ipc) + ui_browser__printf(browser, "%*.2f ", IPC_WIDTH - 1, dl->ipc); + else + ui_browser__write_nstring(browser, " ", IPC_WIDTH); + if (dl->cycles) + ui_browser__printf(browser, "%*" PRIu64 " ", + CYCLES_WIDTH - 1, dl->cycles); + else + ui_browser__write_nstring(browser, " ", CYCLES_WIDTH); } SLsmg_write_char(' '); @@ -138,7 +167,7 @@ static void annotate_browser__write(struct ui_browser *browser, void *entry, int width += 1; if (!*dl->line) - slsmg_write_nstring(" ", width - pcnt_width); + ui_browser__write_nstring(browser, " ", width - pcnt_width); else if (dl->offset == -1) { if (dl->line_nr && annotate_browser__opts.show_linenr) printed = scnprintf(bf, sizeof(bf), "%-*d ", @@ -146,8 +175,8 @@ static void annotate_browser__write(struct ui_browser *browser, void *entry, int else printed = scnprintf(bf, sizeof(bf), "%*s ", ab->addr_width, " "); - slsmg_write_nstring(bf, printed); - slsmg_write_nstring(dl->line, width - printed - pcnt_width + 1); + ui_browser__write_nstring(browser, bf, printed); + ui_browser__write_nstring(browser, dl->line, width - printed - pcnt_width + 1); } else { u64 addr = dl->offset; int color = -1; @@ -166,7 +195,7 @@ static void annotate_browser__write(struct ui_browser *browser, void *entry, int bdl->jump_sources); prev = annotate_browser__set_jumps_percent_color(ab, bdl->jump_sources, current_entry); - slsmg_write_nstring(bf, printed); + ui_browser__write_nstring(browser, bf, printed); ui_browser__set_color(browser, prev); } @@ -180,7 +209,7 @@ static void annotate_browser__write(struct ui_browser *browser, void *entry, int if (change_color) color = ui_browser__set_color(browser, HE_COLORSET_ADDR); - slsmg_write_nstring(bf, printed); + ui_browser__write_nstring(browser, bf, printed); if (change_color) ui_browser__set_color(browser, color); if (dl->ins && dl->ins->ops->scnprintf) { @@ -194,11 +223,11 @@ static void annotate_browser__write(struct ui_browser *browser, void *entry, int ui_browser__write_graph(browser, SLSMG_RARROW_CHAR); SLsmg_write_char(' '); } else { - slsmg_write_nstring(" ", 2); + ui_browser__write_nstring(browser, " ", 2); } } else { if (strcmp(dl->name, "retq")) { - slsmg_write_nstring(" ", 2); + ui_browser__write_nstring(browser, " ", 2); } else { ui_browser__write_graph(browser, SLSMG_LARROW_CHAR); SLsmg_write_char(' '); @@ -206,7 +235,7 @@ static void annotate_browser__write(struct ui_browser *browser, void *entry, int } disasm_line__scnprintf(dl, bf, sizeof(bf), !annotate_browser__opts.use_offset); - slsmg_write_nstring(bf, width - pcnt_width - 3 - printed); + ui_browser__write_nstring(browser, bf, width - pcnt_width - 3 - printed); } if (current_entry) @@ -231,7 +260,7 @@ static void annotate_browser__draw_current_jump(struct ui_browser *browser) unsigned int from, to; struct map_symbol *ms = ab->b.priv; struct symbol *sym = ms->sym; - u8 pcnt_width = 7; + u8 pcnt_width = annotate_browser__pcnt_width(ab); /* PLT symbols contain external offsets */ if (strstr(sym->name, "@plt")) @@ -255,8 +284,6 @@ static void annotate_browser__draw_current_jump(struct ui_browser *browser) to = (u64)btarget->idx; } - pcnt_width *= ab->nr_events; - ui_browser__set_color(browser, HE_COLORSET_CODE); __ui_browser__line_arrow(browser, pcnt_width + 2 + ab->addr_width, from, to); @@ -266,9 +293,7 @@ static unsigned int annotate_browser__refresh(struct ui_browser *browser) { struct annotate_browser *ab = container_of(browser, struct annotate_browser, b); int ret = ui_browser__list_head_refresh(browser); - int pcnt_width; - - pcnt_width = 7 * ab->nr_events; + int pcnt_width = annotate_browser__pcnt_width(ab); if (annotate_browser__opts.jump_arrows) annotate_browser__draw_current_jump(browser); @@ -390,7 +415,7 @@ static void annotate_browser__calc_percent(struct annotate_browser *browser, max_percent = bpos->samples[i].percent; } - if (max_percent < 0.01) { + if (max_percent < 0.01 && pos->ipc == 0) { RB_CLEAR_NODE(&bpos->rb_node); continue; } @@ -743,8 +768,8 @@ static int annotate_browser__run(struct annotate_browser *browser, "UP/DOWN/PGUP\n" "PGDN/SPACE Navigate\n" "q/ESC/CTRL+C Exit\n\n" - "-> Go to target\n" - "<- Exit\n" + "ENTER Go to target\n" + "ESC Exit\n" "H Cycle thru hottest instructions\n" "j Toggle showing jump to target arrows\n" "J Toggle showing number of jump sources on targets\n" @@ -869,6 +894,75 @@ int hist_entry__tui_annotate(struct hist_entry *he, struct perf_evsel *evsel, return map_symbol__tui_annotate(&he->ms, evsel, hbt); } + +static unsigned count_insn(struct annotate_browser *browser, u64 start, u64 end) +{ + unsigned n_insn = 0; + u64 offset; + + for (offset = start; offset <= end; offset++) { + if (browser->offsets[offset]) + n_insn++; + } + return n_insn; +} + +static void count_and_fill(struct annotate_browser *browser, u64 start, u64 end, + struct cyc_hist *ch) +{ + unsigned n_insn; + u64 offset; + + n_insn = count_insn(browser, start, end); + if (n_insn && ch->num && ch->cycles) { + float ipc = n_insn / ((double)ch->cycles / (double)ch->num); + + /* Hide data when there are too many overlaps. */ + if (ch->reset >= 0x7fff || ch->reset >= ch->num / 2) + return; + + for (offset = start; offset <= end; offset++) { + struct disasm_line *dl = browser->offsets[offset]; + + if (dl) + dl->ipc = ipc; + } + } +} + +/* + * This should probably be in util/annotate.c to share with the tty + * annotate, but right now we need the per byte offsets arrays, + * which are only here. + */ +static void annotate__compute_ipc(struct annotate_browser *browser, size_t size, + struct symbol *sym) +{ + u64 offset; + struct annotation *notes = symbol__annotation(sym); + + if (!notes->src || !notes->src->cycles_hist) + return; + + pthread_mutex_lock(¬es->lock); + for (offset = 0; offset < size; ++offset) { + struct cyc_hist *ch; + + ch = ¬es->src->cycles_hist[offset]; + if (ch && ch->cycles) { + struct disasm_line *dl; + + if (ch->have_start) + count_and_fill(browser, ch->start, offset, ch); + dl = browser->offsets[offset]; + if (dl && ch->num_aggr) + dl->cycles = ch->cycles_aggr / ch->num_aggr; + browser->have_cycles = true; + } + } + pthread_mutex_unlock(¬es->lock); +} + static void annotate_browser__mark_jump_targets(struct annotate_browser *browser, size_t size) { @@ -962,7 +1056,7 @@ int symbol__tui_annotate(struct symbol *sym, struct map *map, goto out_free_offsets; } - ui_helpline__push("Press <- or ESC to exit"); + ui_helpline__push("Press ESC to exit"); notes = symbol__annotation(sym); browser.start = map__rip_2objdump(map, sym->start); @@ -991,6 +1085,7 @@ int symbol__tui_annotate(struct symbol *sym, struct map *map, } annotate_browser__mark_jump_targets(&browser, size); + annotate__compute_ipc(&browser, size, sym); browser.addr_width = browser.target_width = browser.min_addr_width = hex_width(size); browser.max_addr_width = hex_width(sym->end); @@ -1030,8 +1125,8 @@ static struct annotate_config { ANNOTATE_CFG(jump_arrows), ANNOTATE_CFG(show_linenr), ANNOTATE_CFG(show_nr_jumps), - ANNOTATE_CFG(use_offset), ANNOTATE_CFG(show_total_period), + ANNOTATE_CFG(use_offset), }; #undef ANNOTATE_CFG @@ -1057,9 +1152,9 @@ static int annotate__config(const char *var, const char *value, sizeof(struct annotate_config), annotate_config__cmp); if (cfg == NULL) - return -1; - - *cfg->value = perf_config_bool(name, value); + ui__warning("%s variable unknown, ignoring...", var); + else + *cfg->value = perf_config_bool(name, value); return 0; } diff --git a/tools/perf/ui/browsers/header.c b/tools/perf/ui/browsers/header.c index e8278c558d4a..edbeaaf31ace 100644 --- a/tools/perf/ui/browsers/header.c +++ b/tools/perf/ui/browsers/header.c @@ -25,7 +25,7 @@ static void ui_browser__argv_write(struct ui_browser *browser, ui_browser__set_color(browser, current_entry ? HE_COLORSET_SELECTED : HE_COLORSET_NORMAL); - slsmg_write_nstring(str, browser->width); + ui_browser__write_nstring(browser, str, browser->width); } static int list_menu__run(struct ui_browser *menu) @@ -91,7 +91,7 @@ static int ui__list_menu(int argc, char * const argv[]) return list_menu__run(&menu); } -int tui__header_window(struct perf_session_env *env) +int tui__header_window(struct perf_env *env) { int i, argc = 0; char **argv; diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index fa67613976a8..e5afb8936040 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -1,5 +1,4 @@ #include <stdio.h> -#include "../libslang.h" #include <stdlib.h> #include <string.h> #include <linux/rbtree.h> @@ -27,7 +26,7 @@ struct hist_browser { struct map_symbol *selection; struct hist_browser_timer *hbt; struct pstack *pstack; - struct perf_session_env *env; + struct perf_env *env; int print_seq; bool show_dso; bool show_headers; @@ -540,10 +539,10 @@ static void hist_browser__show_callchain_entry(struct hist_browser *browser, ui_browser__set_color(&browser->b, color); hist_browser__gotorc(browser, row, 0); - slsmg_write_nstring(" ", offset); - slsmg_printf("%c", folded_sign); + ui_browser__write_nstring(&browser->b, " ", offset); + ui_browser__printf(&browser->b, "%c", folded_sign); ui_browser__write_graph(&browser->b, show_annotated ? SLSMG_RARROW_CHAR : ' '); - slsmg_write_nstring(str, width); + ui_browser__write_nstring(&browser->b, str, width); } static void hist_browser__fprintf_callchain_entry(struct hist_browser *b __maybe_unused, @@ -680,7 +679,7 @@ static int __hpp__slsmg_color_printf(struct perf_hpp *hpp, const char *fmt, ...) ui_browser__set_percent_color(arg->b, percent, arg->current_entry); ret = scnprintf(hpp->buf, hpp->size, fmt, len, percent); - slsmg_printf("%s", hpp->buf); + ui_browser__printf(arg->b, "%s", hpp->buf); advance_hpp(hpp, ret); return ret; @@ -713,10 +712,11 @@ hist_browser__hpp_color_##_type(struct perf_hpp_fmt *fmt, \ struct hist_entry *he) \ { \ if (!symbol_conf.cumulate_callchain) { \ + struct hpp_arg *arg = hpp->ptr; \ int len = fmt->user_len ?: fmt->len; \ int ret = scnprintf(hpp->buf, hpp->size, \ "%*s", len, "N/A"); \ - slsmg_printf("%s", hpp->buf); \ + ui_browser__printf(arg->b, "%s", hpp->buf); \ \ return ret; \ } \ @@ -784,11 +784,12 @@ static int hist_browser__show_entry(struct hist_browser *browser, .size = sizeof(s), .ptr = &arg, }; + int column = 0; hist_browser__gotorc(browser, row, 0); perf_hpp__for_each_format(fmt) { - if (perf_hpp__should_skip(fmt)) + if (perf_hpp__should_skip(fmt) || column++ < browser->b.horiz_scroll) continue; if (current_entry && browser->b.navkeypressed) { @@ -801,12 +802,12 @@ static int hist_browser__show_entry(struct hist_browser *browser, if (first) { if (symbol_conf.use_callchain) { - slsmg_printf("%c ", folded_sign); + ui_browser__printf(&browser->b, "%c ", folded_sign); width -= 2; } first = false; } else { - slsmg_printf(" "); + ui_browser__printf(&browser->b, " "); width -= 2; } @@ -814,7 +815,7 @@ static int hist_browser__show_entry(struct hist_browser *browser, width -= fmt->color(fmt, &hpp, entry); } else { width -= fmt->entry(fmt, &hpp, entry); - slsmg_printf("%s", s); + ui_browser__printf(&browser->b, "%s", s); } } @@ -822,7 +823,7 @@ static int hist_browser__show_entry(struct hist_browser *browser, if (!browser->b.navkeypressed) width += 1; - slsmg_write_nstring("", width); + ui_browser__write_nstring(&browser->b, "", width); ++row; ++printed; @@ -861,14 +862,16 @@ static int advance_hpp_check(struct perf_hpp *hpp, int inc) return hpp->size <= 0; } -static int hists__scnprintf_headers(char *buf, size_t size, struct hists *hists) +static int hists_browser__scnprintf_headers(struct hist_browser *browser, char *buf, size_t size) { + struct hists *hists = browser->hists; struct perf_hpp dummy_hpp = { .buf = buf, .size = size, }; struct perf_hpp_fmt *fmt; size_t ret = 0; + int column = 0; if (symbol_conf.use_callchain) { ret = scnprintf(buf, size, " "); @@ -877,7 +880,7 @@ static int hists__scnprintf_headers(char *buf, size_t size, struct hists *hists) } perf_hpp__for_each_format(fmt) { - if (perf_hpp__should_skip(fmt)) + if (perf_hpp__should_skip(fmt) || column++ < browser->b.horiz_scroll) continue; ret = fmt->header(fmt, &dummy_hpp, hists_to_evsel(hists)); @@ -896,10 +899,10 @@ static void hist_browser__show_headers(struct hist_browser *browser) { char headers[1024]; - hists__scnprintf_headers(headers, sizeof(headers), browser->hists); + hists_browser__scnprintf_headers(browser, headers, sizeof(headers)); ui_browser__gotorc(&browser->b, 0, 0); ui_browser__set_color(&browser->b, HE_COLORSET_ROOT); - slsmg_write_nstring(headers, browser->b.width + 1); + ui_browser__write_nstring(&browser->b, headers, browser->b.width + 1); } static void ui_browser__hists_init_top(struct ui_browser *browser) @@ -1214,7 +1217,7 @@ static int hist_browser__dump(struct hist_browser *browser) static struct hist_browser *hist_browser__new(struct hists *hists, struct hist_browser_timer *hbt, - struct perf_session_env *env) + struct perf_env *env) { struct hist_browser *browser = zalloc(sizeof(*browser)); @@ -1261,12 +1264,15 @@ static int hists__browser_title(struct hists *hists, int printed; const struct dso *dso = hists->dso_filter; const struct thread *thread = hists->thread_filter; + int socket_id = hists->socket_filter; unsigned long nr_samples = hists->stats.nr_events[PERF_RECORD_SAMPLE]; u64 nr_events = hists->stats.total_period; struct perf_evsel *evsel = hists_to_evsel(hists); const char *ev_name = perf_evsel__name(evsel); char buf[512]; size_t buflen = sizeof(buf); + char ref[30] = " show reference callgraph, "; + bool enable_ref = false; if (symbol_conf.filter_relative) { nr_samples = hists->stats.nr_non_filtered_samples; @@ -1292,10 +1298,13 @@ static int hists__browser_title(struct hists *hists, } } + if (symbol_conf.show_ref_callgraph && + strstr(ev_name, "call-graph=no")) + enable_ref = true; nr_samples = convert_unit(nr_samples, &unit); printed = scnprintf(bf, size, - "Samples: %lu%c of event '%s', Event count (approx.): %" PRIu64, - nr_samples, unit, ev_name, nr_events); + "Samples: %lu%c of event '%s',%sEvent count (approx.): %" PRIu64, + nr_samples, unit, ev_name, enable_ref ? ref : " ", nr_events); if (hists->uid_filter_str) @@ -1309,6 +1318,9 @@ static int hists__browser_title(struct hists *hists, if (dso) printed += scnprintf(bf + printed, size - printed, ", DSO: %s", dso->short_name); + if (socket_id > -1) + printed += scnprintf(bf + printed, size - printed, + ", Processor Socket: %d", socket_id); if (!is_report_browser(hbt)) { struct perf_top *top = hbt->arg; @@ -1420,6 +1432,7 @@ struct popup_action { struct thread *thread; struct dso *dso; struct map_symbol ms; + int socket; int (*fn)(struct hist_browser *browser, struct popup_action *act); }; @@ -1432,7 +1445,7 @@ do_annotate(struct hist_browser *browser, struct popup_action *act) struct hist_entry *he; int err; - if (!objdump_path && perf_session_env__lookup_objdump(browser->env)) + if (!objdump_path && perf_env__lookup_objdump(browser->env)) return 0; notes = symbol__annotation(act->ms.sym); @@ -1483,7 +1496,7 @@ do_zoom_thread(struct hist_browser *browser, struct popup_action *act) thread__zput(browser->hists->thread_filter); ui_helpline__pop(); } else { - ui_helpline__fpush("To zoom out press <- or -> + \"Zoom out of %s(%d) thread\"", + ui_helpline__fpush("To zoom out press ESC or ENTER + \"Zoom out of %s(%d) thread\"", thread->comm_set ? thread__comm_str(thread) : "", thread->tid); browser->hists->thread_filter = thread__get(thread); @@ -1517,7 +1530,7 @@ add_thread_opt(struct hist_browser *browser, struct popup_action *act, static int do_zoom_dso(struct hist_browser *browser, struct popup_action *act) { - struct dso *dso = act->dso; + struct map *map = act->ms.map; if (browser->hists->dso_filter) { pstack__remove(browser->pstack, &browser->hists->dso_filter); @@ -1525,11 +1538,11 @@ do_zoom_dso(struct hist_browser *browser, struct popup_action *act) browser->hists->dso_filter = NULL; ui_helpline__pop(); } else { - if (dso == NULL) + if (map == NULL) return 0; - ui_helpline__fpush("To zoom out press <- or -> + \"Zoom out of %s DSO\"", - dso->kernel ? "the Kernel" : dso->short_name); - browser->hists->dso_filter = dso; + ui_helpline__fpush("To zoom out press ESC or ENTER + \"Zoom out of %s DSO\"", + __map__is_kernel(map) ? "the Kernel" : map->dso->short_name); + browser->hists->dso_filter = map->dso; perf_hpp__set_elide(HISTC_DSO, true); pstack__push(browser->pstack, &browser->hists->dso_filter); } @@ -1541,17 +1554,18 @@ do_zoom_dso(struct hist_browser *browser, struct popup_action *act) static int add_dso_opt(struct hist_browser *browser, struct popup_action *act, - char **optstr, struct dso *dso) + char **optstr, struct map *map) { - if (dso == NULL) + if (map == NULL) return 0; if (asprintf(optstr, "Zoom %s %s DSO", browser->hists->dso_filter ? "out of" : "into", - dso->kernel ? "the Kernel" : dso->short_name) < 0) + __map__is_kernel(map) ? "the Kernel" : map->dso->short_name) < 0) return 0; - act->dso = dso; + act->ms.map = map; + act->dso = map->dso; act->fn = do_zoom_dso; return 1; } @@ -1667,6 +1681,41 @@ add_exit_opt(struct hist_browser *browser __maybe_unused, return 1; } +static int +do_zoom_socket(struct hist_browser *browser, struct popup_action *act) +{ + if (browser->hists->socket_filter > -1) { + pstack__remove(browser->pstack, &browser->hists->socket_filter); + browser->hists->socket_filter = -1; + perf_hpp__set_elide(HISTC_SOCKET, false); + } else { + browser->hists->socket_filter = act->socket; + perf_hpp__set_elide(HISTC_SOCKET, true); + pstack__push(browser->pstack, &browser->hists->socket_filter); + } + + hists__filter_by_socket(browser->hists); + hist_browser__reset(browser); + return 0; +} + +static int +add_socket_opt(struct hist_browser *browser, struct popup_action *act, + char **optstr, int socket_id) +{ + if (socket_id < 0) + return 0; + + if (asprintf(optstr, "Zoom %s Processor Socket %d", + (browser->hists->socket_filter > -1) ? "out of" : "into", + socket_id) < 0) + return 0; + + act->socket = socket_id; + act->fn = do_zoom_socket; + return 1; +} + static void hist_browser__update_nr_entries(struct hist_browser *hb) { u64 nr_entries = 0; @@ -1690,7 +1739,7 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events, bool left_exits, struct hist_browser_timer *hbt, float min_pcnt, - struct perf_session_env *env) + struct perf_env *env) { struct hists *hists = evsel__hists(evsel); struct hist_browser *browser = hist_browser__new(hists, hbt, env); @@ -1712,14 +1761,16 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events, "For multiple event sessions:\n\n" \ "TAB/UNTAB Switch events\n\n" \ "For symbolic views (--sort has sym):\n\n" \ - "-> Zoom into DSO/Threads & Annotate current symbol\n" \ - "<- Zoom out\n" \ + "ENTER Zoom into DSO/Threads & Annotate current symbol\n" \ + "ESC Zoom out\n" \ "a Annotate current symbol\n" \ "C Collapse all callchains\n" \ "d Zoom into current DSO\n" \ "E Expand all callchains\n" \ "F Toggle percentage of filtered entries\n" \ "H Display column headers\n" \ + "m Display context menu\n" \ + "S Zoom into current Processor Socket\n" \ /* help messages are sorted by lexical order of the hotkey */ const char report_help[] = HIST_BROWSER_HELP_COMMON @@ -1750,7 +1801,7 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events, hist_browser__update_nr_entries(browser); } - browser->pstack = pstack__new(2); + browser->pstack = pstack__new(3); if (browser->pstack == NULL) goto out; @@ -1759,8 +1810,17 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events, memset(options, 0, sizeof(options)); memset(actions, 0, sizeof(actions)); - perf_hpp__for_each_format(fmt) + perf_hpp__for_each_format(fmt) { perf_hpp__reset_width(fmt, hists); + /* + * This is done just once, and activates the horizontal scrolling + * code in the ui_browser code, it would be better to have a the + * counter in the perf_hpp code, but I couldn't find doing it here + * works, FIXME by setting this in hist_browser__new, for now, be + * clever 8-) + */ + ++browser->b.columns; + } if (symbol_conf.col_width_list_str) perf_hpp__set_user_width(symbol_conf.col_width_list_str); @@ -1768,7 +1828,9 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events, while (1) { struct thread *thread = NULL; struct dso *dso = NULL; + struct map *map = NULL; int choice = 0; + int socked_id = -1; nr_options = 0; @@ -1776,7 +1838,10 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events, if (browser->he_selection != NULL) { thread = hist_browser__selected_thread(browser); - dso = browser->selection->map ? browser->selection->map->dso : NULL; + map = browser->selection->map; + if (map) + dso = map->dso; + socked_id = browser->he_selection->socket; } switch (key) { case K_TAB: @@ -1819,9 +1884,14 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events, actions->thread = thread; do_zoom_thread(browser, actions); continue; + case 'S': + actions->socket = socked_id; + do_zoom_socket(browser, actions); + continue; case '/': if (ui_browser__input_window("Symbol to show", - "Please enter the name of symbol you want to see", + "Please enter the name of symbol you want to see.\n" + "To remove the filter later, press / + ENTER.", buf, "ENTER: OK, ESC: Cancel", delay_secs * 2) == K_ENTER) { hists->symbol_filter_str = *buf ? buf : NULL; @@ -1866,8 +1936,10 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events, continue; case K_ENTER: case K_RIGHT: + case 'm': /* menu */ break; + case K_ESC: case K_LEFT: { const void *top; @@ -1877,6 +1949,12 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events, */ if (left_exits) goto out_free_stack; + + if (key == K_ESC && + ui_browser__dialog_yesno(&browser->b, + "Do you really want to exit?")) + goto out_free_stack; + continue; } top = pstack__peek(browser->pstack); @@ -1887,17 +1965,13 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events, * Ditto for thread below. */ do_zoom_dso(browser, actions); - } - if (top == &browser->hists->thread_filter) + } else if (top == &browser->hists->thread_filter) { do_zoom_thread(browser, actions); + } else if (top == &browser->hists->socket_filter) { + do_zoom_socket(browser, actions); + } continue; } - case K_ESC: - if (!left_exits && - !ui_browser__dialog_yesno(&browser->b, - "Do you really want to exit?")) - continue; - /* Fall thru */ case 'q': case CTRL('c'): goto out_free_stack; @@ -1959,17 +2033,29 @@ skip_annotation: nr_options += add_thread_opt(browser, &actions[nr_options], &options[nr_options], thread); nr_options += add_dso_opt(browser, &actions[nr_options], - &options[nr_options], dso); + &options[nr_options], map); nr_options += add_map_opt(browser, &actions[nr_options], &options[nr_options], - browser->selection->map); - + browser->selection ? + browser->selection->map : NULL); + nr_options += add_socket_opt(browser, &actions[nr_options], + &options[nr_options], + socked_id); /* perf script support */ if (browser->he_selection) { nr_options += add_script_opt(browser, &actions[nr_options], &options[nr_options], thread, NULL); + /* + * Note that browser->selection != NULL + * when browser->he_selection is not NULL, + * so we don't need to check browser->selection + * before fetching browser->selection->sym like what + * we do before fetching browser->selection->map. + * + * See hist_browser__show_entry. + */ nr_options += add_script_opt(browser, &actions[nr_options], &options[nr_options], @@ -2010,7 +2096,7 @@ struct perf_evsel_menu { struct perf_evsel *selection; bool lost_events, lost_events_warned; float min_pcnt; - struct perf_session_env *env; + struct perf_env *env; }; static void perf_evsel_menu__write(struct ui_browser *browser, @@ -2044,7 +2130,7 @@ static void perf_evsel_menu__write(struct ui_browser *browser, nr_events = convert_unit(nr_events, &unit); printed = scnprintf(bf, sizeof(bf), "%lu%c%s%s", nr_events, unit, unit == ' ' ? "" : " ", ev_name); - slsmg_printf("%s", bf); + ui_browser__printf(browser, "%s", bf); nr_events = hists->stats.nr_events[PERF_RECORD_LOST]; if (nr_events != 0) { @@ -2057,7 +2143,7 @@ static void perf_evsel_menu__write(struct ui_browser *browser, warn = bf; } - slsmg_write_nstring(warn, browser->width - printed); + ui_browser__write_nstring(browser, warn, browser->width - printed); if (current_entry) menu->selection = evsel; @@ -2120,15 +2206,11 @@ browse_hists: else pos = perf_evsel__prev(pos); goto browse_hists; - case K_ESC: - if (!ui_browser__dialog_yesno(&menu->b, - "Do you really want to exit?")) - continue; - /* Fall thru */ case K_SWITCH_INPUT_DATA: case 'q': case CTRL('c'): goto out; + case K_ESC: default: continue; } @@ -2167,7 +2249,7 @@ static int __perf_evlist__tui_browse_hists(struct perf_evlist *evlist, int nr_entries, const char *help, struct hist_browser_timer *hbt, float min_pcnt, - struct perf_session_env *env) + struct perf_env *env) { struct perf_evsel *pos; struct perf_evsel_menu menu = { @@ -2200,7 +2282,7 @@ static int __perf_evlist__tui_browse_hists(struct perf_evlist *evlist, int perf_evlist__tui_browse_hists(struct perf_evlist *evlist, const char *help, struct hist_browser_timer *hbt, float min_pcnt, - struct perf_session_env *env) + struct perf_env *env) { int nr_entries = evlist->nr_entries; diff --git a/tools/perf/ui/browsers/map.c b/tools/perf/ui/browsers/map.c index b11639f33682..80912778bb6d 100644 --- a/tools/perf/ui/browsers/map.c +++ b/tools/perf/ui/browsers/map.c @@ -1,4 +1,3 @@ -#include "../libslang.h" #include <elf.h> #include <inttypes.h> #include <sys/ttydefaults.h> @@ -26,13 +25,13 @@ static void map_browser__write(struct ui_browser *browser, void *nd, int row) int width; ui_browser__set_percent_color(browser, 0, current_entry); - slsmg_printf("%*" PRIx64 " %*" PRIx64 " %c ", - mb->addrlen, sym->start, mb->addrlen, sym->end, - sym->binding == STB_GLOBAL ? 'g' : - sym->binding == STB_LOCAL ? 'l' : 'w'); + ui_browser__printf(browser, "%*" PRIx64 " %*" PRIx64 " %c ", + mb->addrlen, sym->start, mb->addrlen, sym->end, + sym->binding == STB_GLOBAL ? 'g' : + sym->binding == STB_LOCAL ? 'l' : 'w'); width = browser->width - ((mb->addrlen * 2) + 4); if (width > 0) - slsmg_write_nstring(sym->name, width); + ui_browser__write_nstring(browser, sym->name, width); } /* FIXME uber-kludgy, see comment on cmd_report... */ @@ -73,7 +72,7 @@ static int map_browser__run(struct map_browser *browser) int key; if (ui_browser__show(&browser->b, browser->map->dso->long_name, - "Press <- or ESC to exit, %s / to search", + "Press ESC to exit, %s / to search", verbose ? "" : "restart with -v to use") < 0) return -1; diff --git a/tools/perf/ui/browsers/scripts.c b/tools/perf/ui/browsers/scripts.c index 402d2bd30b09..ad6b6ee3770e 100644 --- a/tools/perf/ui/browsers/scripts.c +++ b/tools/perf/ui/browsers/scripts.c @@ -81,7 +81,7 @@ static void script_browser__write(struct ui_browser *browser, ui_browser__set_color(browser, current_entry ? HE_COLORSET_SELECTED : HE_COLORSET_NORMAL); - slsmg_write_nstring(sline->line, browser->width); + ui_browser__write_nstring(browser, sline->line, browser->width); } static int script_browser__run(struct perf_script_browser *browser) @@ -89,7 +89,7 @@ static int script_browser__run(struct perf_script_browser *browser) int key; if (ui_browser__show(&browser->b, browser->script_name, - "Press <- or ESC to exit") < 0) + "Press ESC to exit") < 0) return -1; while (1) { diff --git a/tools/perf/ui/hist.c b/tools/perf/ui/hist.c index 25d608394d74..5029ba2b55af 100644 --- a/tools/perf/ui/hist.c +++ b/tools/perf/ui/hist.c @@ -463,27 +463,27 @@ void perf_hpp__init(void) return; if (symbol_conf.cumulate_callchain) { - perf_hpp__column_enable(PERF_HPP__OVERHEAD_ACC); + hpp_dimension__add_output(PERF_HPP__OVERHEAD_ACC); perf_hpp__format[PERF_HPP__OVERHEAD].name = "Self"; } - perf_hpp__column_enable(PERF_HPP__OVERHEAD); + hpp_dimension__add_output(PERF_HPP__OVERHEAD); if (symbol_conf.show_cpu_utilization) { - perf_hpp__column_enable(PERF_HPP__OVERHEAD_SYS); - perf_hpp__column_enable(PERF_HPP__OVERHEAD_US); + hpp_dimension__add_output(PERF_HPP__OVERHEAD_SYS); + hpp_dimension__add_output(PERF_HPP__OVERHEAD_US); if (perf_guest) { - perf_hpp__column_enable(PERF_HPP__OVERHEAD_GUEST_SYS); - perf_hpp__column_enable(PERF_HPP__OVERHEAD_GUEST_US); + hpp_dimension__add_output(PERF_HPP__OVERHEAD_GUEST_SYS); + hpp_dimension__add_output(PERF_HPP__OVERHEAD_GUEST_US); } } if (symbol_conf.show_nr_samples) - perf_hpp__column_enable(PERF_HPP__SAMPLES); + hpp_dimension__add_output(PERF_HPP__SAMPLES); if (symbol_conf.show_total_period) - perf_hpp__column_enable(PERF_HPP__PERIOD); + hpp_dimension__add_output(PERF_HPP__PERIOD); /* prepend overhead field for backward compatiblity. */ list = &perf_hpp__format[PERF_HPP__OVERHEAD].sort_list; diff --git a/tools/perf/ui/libslang.h b/tools/perf/ui/libslang.h index 4d54b6450f5b..db816695ad97 100644 --- a/tools/perf/ui/libslang.h +++ b/tools/perf/ui/libslang.h @@ -14,12 +14,15 @@ #if SLANG_VERSION < 20104 #define slsmg_printf(msg, args...) \ SLsmg_printf((char *)(msg), ##args) +#define slsmg_vprintf(msg, vargs) \ + SLsmg_vprintf((char *)(msg), vargs) #define slsmg_write_nstring(msg, len) \ SLsmg_write_nstring((char *)(msg), len) #define sltt_set_color(obj, name, fg, bg) \ SLtt_set_color(obj,(char *)(name), (char *)(fg), (char *)(bg)) #else #define slsmg_printf SLsmg_printf +#define slsmg_vprintf SLsmg_vprintf #define slsmg_write_nstring SLsmg_write_nstring #define sltt_set_color SLtt_set_color #endif diff --git a/tools/perf/ui/tui/progress.c b/tools/perf/ui/tui/progress.c index c61d14b101e0..c4b99008e2c9 100644 --- a/tools/perf/ui/tui/progress.c +++ b/tools/perf/ui/tui/progress.c @@ -33,9 +33,26 @@ static void tui_progress__update(struct ui_progress *p) pthread_mutex_unlock(&ui__lock); } +static void tui_progress__finish(void) +{ + int y; + + if (use_browser <= 0) + return; + + ui__refresh_dimensions(false); + pthread_mutex_lock(&ui__lock); + y = SLtt_Screen_Rows / 2 - 2; + SLsmg_set_color(0); + SLsmg_fill_region(y, 0, 3, SLtt_Screen_Cols, ' '); + SLsmg_refresh(); + pthread_mutex_unlock(&ui__lock); +} + static struct ui_progress_ops tui_progress__ops = { - .update = tui_progress__update, + .update = tui_progress__update, + .finish = tui_progress__finish, }; void tui_progress__init(void) diff --git a/tools/perf/ui/tui/setup.c b/tools/perf/ui/tui/setup.c index 60d1f29b4b50..7dfeba0a91f3 100644 --- a/tools/perf/ui/tui/setup.c +++ b/tools/perf/ui/tui/setup.c @@ -141,10 +141,6 @@ int ui__init(void) SLkp_define_keysym((char *)"^(kB)", SL_KEY_UNTAB); - ui_helpline__init(); - ui_browser__init(); - tui_progress__init(); - signal(SIGSEGV, ui__signal_backtrace); signal(SIGFPE, ui__signal_backtrace); signal(SIGINT, ui__signal); @@ -153,6 +149,10 @@ int ui__init(void) perf_error__register(&perf_tui_eops); + ui_helpline__init(); + ui_browser__init(); + tui_progress__init(); + hist_browser__init_hpp(); out: return err; diff --git a/tools/perf/ui/tui/util.c b/tools/perf/ui/tui/util.c index bf890f72fe80..d96ad7c8325d 100644 --- a/tools/perf/ui/tui/util.c +++ b/tools/perf/ui/tui/util.c @@ -21,7 +21,7 @@ static void ui_browser__argv_write(struct ui_browser *browser, ui_browser__set_color(browser, current_entry ? HE_COLORSET_SELECTED : HE_COLORSET_NORMAL); - slsmg_write_nstring(*arg, browser->width); + ui_browser__write_nstring(browser, *arg, browser->width); } static int popup_menu__run(struct ui_browser *menu) diff --git a/tools/perf/util/Build b/tools/perf/util/Build index d2d318c59b37..591b3fe3ed49 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -5,6 +5,7 @@ libperf-y += build-id.o libperf-y += config.o libperf-y += ctype.o libperf-y += db-export.o +libperf-y += env.o libperf-y += environment.o libperf-y += event.o libperf-y += evlist.o @@ -14,8 +15,10 @@ libperf-y += find_next_bit.o libperf-y += help.o libperf-y += kallsyms.o libperf-y += levenshtein.o +libperf-y += llvm-utils.o libperf-y += parse-options.o libperf-y += parse-events.o +libperf-y += perf_regs.o libperf-y += path.o libperf-y += rbtree.o libperf-y += bitmap.o @@ -67,18 +70,26 @@ libperf-y += target.o libperf-y += rblist.o libperf-y += intlist.o libperf-y += vdso.o +libperf-y += counts.o libperf-y += stat.o libperf-y += stat-shadow.o libperf-y += record.o libperf-y += srcline.o libperf-y += data.o libperf-$(CONFIG_X86) += tsc.o +libperf-$(CONFIG_AUXTRACE) += tsc.o libperf-y += cloexec.o libperf-y += thread-stack.o libperf-$(CONFIG_AUXTRACE) += auxtrace.o +libperf-$(CONFIG_AUXTRACE) += intel-pt-decoder/ +libperf-$(CONFIG_AUXTRACE) += intel-pt.o +libperf-$(CONFIG_AUXTRACE) += intel-bts.o libperf-y += parse-branch-options.o +libperf-y += parse-regs-options.o +libperf-$(CONFIG_LIBBPF) += bpf-loader.o libperf-$(CONFIG_LIBELF) += symbol-elf.o +libperf-$(CONFIG_LIBELF) += probe-file.o libperf-$(CONFIG_LIBELF) += probe-event.o ifndef CONFIG_LIBELF @@ -95,7 +106,6 @@ libperf-$(CONFIG_LIBBABELTRACE) += data-convert-bt.o libperf-y += scripting-engines/ -libperf-$(CONFIG_PERF_REGS) += perf_regs.o libperf-$(CONFIG_ZLIB) += zlib.o libperf-$(CONFIG_LZMA) += lzma.o diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 03b7bc70eb66..0fc8d7a2fea5 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -473,17 +473,73 @@ int symbol__alloc_hist(struct symbol *sym) return 0; } +/* The cycles histogram is lazily allocated. */ +static int symbol__alloc_hist_cycles(struct symbol *sym) +{ + struct annotation *notes = symbol__annotation(sym); + const size_t size = symbol__size(sym); + + notes->src->cycles_hist = calloc(size, sizeof(struct cyc_hist)); + if (notes->src->cycles_hist == NULL) + return -1; + return 0; +} + void symbol__annotate_zero_histograms(struct symbol *sym) { struct annotation *notes = symbol__annotation(sym); pthread_mutex_lock(¬es->lock); - if (notes->src != NULL) + if (notes->src != NULL) { memset(notes->src->histograms, 0, notes->src->nr_histograms * notes->src->sizeof_sym_hist); + if (notes->src->cycles_hist) + memset(notes->src->cycles_hist, 0, + symbol__size(sym) * sizeof(struct cyc_hist)); + } pthread_mutex_unlock(¬es->lock); } +static int __symbol__account_cycles(struct annotation *notes, + u64 start, + unsigned offset, unsigned cycles, + unsigned have_start) +{ + struct cyc_hist *ch; + + ch = notes->src->cycles_hist; + /* + * For now we can only account one basic block per + * final jump. But multiple could be overlapping. + * Always account the longest one. So when + * a shorter one has been already seen throw it away. + * + * We separately always account the full cycles. + */ + ch[offset].num_aggr++; + ch[offset].cycles_aggr += cycles; + + if (!have_start && ch[offset].have_start) + return 0; + if (ch[offset].num) { + if (have_start && (!ch[offset].have_start || + ch[offset].start > start)) { + ch[offset].have_start = 0; + ch[offset].cycles = 0; + ch[offset].num = 0; + if (ch[offset].reset < 0xffff) + ch[offset].reset++; + } else if (have_start && + ch[offset].start < start) + return 0; + } + ch[offset].have_start = have_start; + ch[offset].start = start; + ch[offset].cycles += cycles; + ch[offset].num++; + return 0; +} + static int __symbol__inc_addr_samples(struct symbol *sym, struct map *map, struct annotation *notes, int evidx, u64 addr) { @@ -492,8 +548,11 @@ static int __symbol__inc_addr_samples(struct symbol *sym, struct map *map, pr_debug3("%s: addr=%#" PRIx64 "\n", __func__, map->unmap_ip(map, addr)); - if (addr < sym->start || addr >= sym->end) + if (addr < sym->start || addr >= sym->end) { + pr_debug("%s(%d): ERANGE! sym->name=%s, start=%#" PRIx64 ", addr=%#" PRIx64 ", end=%#" PRIx64 "\n", + __func__, __LINE__, sym->name, sym->start, addr, sym->end); return -ERANGE; + } offset = addr - sym->start; h = annotation__histogram(notes, evidx); @@ -506,7 +565,7 @@ static int __symbol__inc_addr_samples(struct symbol *sym, struct map *map, return 0; } -static struct annotation *symbol__get_annotation(struct symbol *sym) +static struct annotation *symbol__get_annotation(struct symbol *sym, bool cycles) { struct annotation *notes = symbol__annotation(sym); @@ -514,6 +573,10 @@ static struct annotation *symbol__get_annotation(struct symbol *sym) if (symbol__alloc_hist(sym) < 0) return NULL; } + if (!notes->src->cycles_hist && cycles) { + if (symbol__alloc_hist_cycles(sym) < 0) + return NULL; + } return notes; } @@ -524,12 +587,73 @@ static int symbol__inc_addr_samples(struct symbol *sym, struct map *map, if (sym == NULL) return 0; - notes = symbol__get_annotation(sym); + notes = symbol__get_annotation(sym, false); if (notes == NULL) return -ENOMEM; return __symbol__inc_addr_samples(sym, map, notes, evidx, addr); } +static int symbol__account_cycles(u64 addr, u64 start, + struct symbol *sym, unsigned cycles) +{ + struct annotation *notes; + unsigned offset; + + if (sym == NULL) + return 0; + notes = symbol__get_annotation(sym, true); + if (notes == NULL) + return -ENOMEM; + if (addr < sym->start || addr >= sym->end) + return -ERANGE; + + if (start) { + if (start < sym->start || start >= sym->end) + return -ERANGE; + if (start >= addr) + start = 0; + } + offset = addr - sym->start; + return __symbol__account_cycles(notes, + start ? start - sym->start : 0, + offset, cycles, + !!start); +} + +int addr_map_symbol__account_cycles(struct addr_map_symbol *ams, + struct addr_map_symbol *start, + unsigned cycles) +{ + u64 saddr = 0; + int err; + + if (!cycles) + return 0; + + /* + * Only set start when IPC can be computed. We can only + * compute it when the basic block is completely in a single + * function. + * Special case the case when the jump is elsewhere, but + * it starts on the function start. + */ + if (start && + (start->sym == ams->sym || + (ams->sym && + start->addr == ams->sym->start + ams->map->start))) + saddr = start->al_addr; + if (saddr == 0) + pr_debug2("BB with bad start: addr %"PRIx64" start %"PRIx64" sym %"PRIx64" saddr %"PRIx64"\n", + ams->addr, + start ? start->addr : 0, + ams->sym ? ams->sym->start + ams->map->start : 0, + saddr); + err = symbol__account_cycles(ams->al_addr, saddr, ams->sym, cycles); + if (err) + pr_debug2("account_cycles failed %d\n", err); + return err; +} + int addr_map_symbol__inc_samples(struct addr_map_symbol *ams, int evidx) { return symbol__inc_addr_samples(ams->sym, ams->map, evidx, ams->al_addr); @@ -1005,6 +1129,7 @@ fallback: dso->annotate_warned = 1; pr_err("Can't annotate %s:\n\n" "No vmlinux file%s\nwas found in the path.\n\n" + "Note that annotation using /proc/kcore requires CAP_SYS_RAWIO capability.\n\n" "Please use:\n\n" " perf buildid-cache -vu vmlinux\n\n" "or:\n\n" diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h index 7e78e6c27078..cea323d9ee7e 100644 --- a/tools/perf/util/annotate.h +++ b/tools/perf/util/annotate.h @@ -59,6 +59,8 @@ struct disasm_line { char *name; struct ins *ins; int line_nr; + float ipc; + u64 cycles; struct ins_operands ops; }; @@ -79,6 +81,17 @@ struct sym_hist { u64 addr[0]; }; +struct cyc_hist { + u64 start; + u64 cycles; + u64 cycles_aggr; + u32 num; + u32 num_aggr; + u8 have_start; + /* 1 byte padding */ + u16 reset; +}; + struct source_line_samples { double percent; double percent_sum; @@ -97,6 +110,7 @@ struct source_line { * @histogram: Array of addr hit histograms per event being monitored * @lines: If 'print_lines' is specified, per source code line percentages * @source: source parsed from a disassembler like objdump -dS + * @cyc_hist: Average cycles per basic block * * lines is allocated, percentages calculated and all sorted by percentage * when the annotation is about to be presented, so the percentages are for @@ -108,7 +122,8 @@ struct annotated_source { struct list_head source; struct source_line *lines; int nr_histograms; - int sizeof_sym_hist; + size_t sizeof_sym_hist; + struct cyc_hist *cycles_hist; struct sym_hist histograms[0]; }; @@ -130,6 +145,10 @@ static inline struct annotation *symbol__annotation(struct symbol *sym) int addr_map_symbol__inc_samples(struct addr_map_symbol *ams, int evidx); +int addr_map_symbol__account_cycles(struct addr_map_symbol *ams, + struct addr_map_symbol *start, + unsigned cycles); + int hist_entry__inc_addr_samples(struct hist_entry *he, int evidx, u64 addr); int symbol__alloc_hist(struct symbol *sym); diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c index 83d9dd96fe08..7f10430af39c 100644 --- a/tools/perf/util/auxtrace.c +++ b/tools/perf/util/auxtrace.c @@ -47,6 +47,9 @@ #include "debug.h" #include "parse-options.h" +#include "intel-pt.h" +#include "intel-bts.h" + int auxtrace_mmap__mmap(struct auxtrace_mmap *mm, struct auxtrace_mmap_params *mp, void *userpg, int fd) @@ -876,7 +879,7 @@ static bool auxtrace__dont_decode(struct perf_session *session) int perf_event__process_auxtrace_info(struct perf_tool *tool __maybe_unused, union perf_event *event, - struct perf_session *session __maybe_unused) + struct perf_session *session) { enum auxtrace_type type = event->auxtrace_info.type; @@ -884,6 +887,10 @@ int perf_event__process_auxtrace_info(struct perf_tool *tool __maybe_unused, fprintf(stdout, " type: %u\n", type); switch (type) { + case PERF_AUXTRACE_INTEL_PT: + return intel_pt_process_auxtrace_info(event, session); + case PERF_AUXTRACE_INTEL_BTS: + return intel_bts_process_auxtrace_info(event, session); case PERF_AUXTRACE_UNKNOWN: default: return -EINVAL; @@ -919,6 +926,8 @@ s64 perf_event__process_auxtrace(struct perf_tool *tool, #define PERF_ITRACE_DEFAULT_PERIOD 100000 #define PERF_ITRACE_DEFAULT_CALLCHAIN_SZ 16 #define PERF_ITRACE_MAX_CALLCHAIN_SZ 1024 +#define PERF_ITRACE_DEFAULT_LAST_BRANCH_SZ 64 +#define PERF_ITRACE_MAX_LAST_BRANCH_SZ 1024 void itrace_synth_opts__set_default(struct itrace_synth_opts *synth_opts) { @@ -929,6 +938,7 @@ void itrace_synth_opts__set_default(struct itrace_synth_opts *synth_opts) synth_opts->period_type = PERF_ITRACE_DEFAULT_PERIOD_TYPE; synth_opts->period = PERF_ITRACE_DEFAULT_PERIOD; synth_opts->callchain_sz = PERF_ITRACE_DEFAULT_CALLCHAIN_SZ; + synth_opts->last_branch_sz = PERF_ITRACE_DEFAULT_LAST_BRANCH_SZ; } /* @@ -942,6 +952,8 @@ int itrace_parse_synth_opts(const struct option *opt, const char *str, struct itrace_synth_opts *synth_opts = opt->value; const char *p; char *endptr; + bool period_type_set = false; + bool period_set = false; synth_opts->set = true; @@ -963,6 +975,7 @@ int itrace_parse_synth_opts(const struct option *opt, const char *str, p += 1; if (isdigit(*p)) { synth_opts->period = strtoull(p, &endptr, 10); + period_set = true; p = endptr; while (*p == ' ' || *p == ',') p += 1; @@ -970,10 +983,12 @@ int itrace_parse_synth_opts(const struct option *opt, const char *str, case 'i': synth_opts->period_type = PERF_ITRACE_PERIOD_INSTRUCTIONS; + period_type_set = true; break; case 't': synth_opts->period_type = PERF_ITRACE_PERIOD_TICKS; + period_type_set = true; break; case 'm': synth_opts->period *= 1000; @@ -986,6 +1001,7 @@ int itrace_parse_synth_opts(const struct option *opt, const char *str, goto out_err; synth_opts->period_type = PERF_ITRACE_PERIOD_NANOSECS; + period_type_set = true; break; case '\0': goto out; @@ -1030,6 +1046,23 @@ int itrace_parse_synth_opts(const struct option *opt, const char *str, synth_opts->callchain_sz = val; } break; + case 'l': + synth_opts->last_branch = true; + synth_opts->last_branch_sz = + PERF_ITRACE_DEFAULT_LAST_BRANCH_SZ; + while (*p == ' ' || *p == ',') + p += 1; + if (isdigit(*p)) { + unsigned int val; + + val = strtoul(p, &endptr, 10); + p = endptr; + if (!val || + val > PERF_ITRACE_MAX_LAST_BRANCH_SZ) + goto out_err; + synth_opts->last_branch_sz = val; + } + break; case ' ': case ',': break; @@ -1039,10 +1072,10 @@ int itrace_parse_synth_opts(const struct option *opt, const char *str, } out: if (synth_opts->instructions) { - if (!synth_opts->period_type) + if (!period_type_set) synth_opts->period_type = PERF_ITRACE_DEFAULT_PERIOD_TYPE; - if (!synth_opts->period) + if (!period_set) synth_opts->period = PERF_ITRACE_DEFAULT_PERIOD; } diff --git a/tools/perf/util/auxtrace.h b/tools/perf/util/auxtrace.h index 471aecbc4d68..b86f90db1352 100644 --- a/tools/perf/util/auxtrace.h +++ b/tools/perf/util/auxtrace.h @@ -39,6 +39,8 @@ struct events_stats; enum auxtrace_type { PERF_AUXTRACE_UNKNOWN, + PERF_AUXTRACE_INTEL_PT, + PERF_AUXTRACE_INTEL_BTS, }; enum itrace_period_type { @@ -61,7 +63,9 @@ enum itrace_period_type { * @calls: limit branch samples to calls (can be combined with @returns) * @returns: limit branch samples to returns (can be combined with @calls) * @callchain: add callchain to 'instructions' events + * @last_branch: add branch context to 'instruction' events * @callchain_sz: maximum callchain size + * @last_branch_sz: branch context size * @period: 'instructions' events period * @period_type: 'instructions' events period type */ @@ -77,7 +81,9 @@ struct itrace_synth_opts { bool calls; bool returns; bool callchain; + bool last_branch; unsigned int callchain_sz; + unsigned int last_branch_sz; unsigned long long period; enum itrace_period_type period_type; }; diff --git a/tools/perf/util/bpf-loader.c b/tools/perf/util/bpf-loader.c new file mode 100644 index 000000000000..ba6f7526b282 --- /dev/null +++ b/tools/perf/util/bpf-loader.c @@ -0,0 +1,352 @@ +/* + * bpf-loader.c + * + * Copyright (C) 2015 Wang Nan <wangnan0@huawei.com> + * Copyright (C) 2015 Huawei Inc. + */ + +#include <bpf/libbpf.h> +#include <linux/err.h> +#include "perf.h" +#include "debug.h" +#include "bpf-loader.h" +#include "probe-event.h" +#include "probe-finder.h" // for MAX_PROBES +#include "llvm-utils.h" + +#define DEFINE_PRINT_FN(name, level) \ +static int libbpf_##name(const char *fmt, ...) \ +{ \ + va_list args; \ + int ret; \ + \ + va_start(args, fmt); \ + ret = veprintf(level, verbose, pr_fmt(fmt), args);\ + va_end(args); \ + return ret; \ +} + +DEFINE_PRINT_FN(warning, 0) +DEFINE_PRINT_FN(info, 0) +DEFINE_PRINT_FN(debug, 1) + +struct bpf_prog_priv { + struct perf_probe_event pev; +}; + +struct bpf_object *bpf__prepare_load(const char *filename, bool source) +{ + struct bpf_object *obj; + static bool libbpf_initialized; + + if (!libbpf_initialized) { + libbpf_set_print(libbpf_warning, + libbpf_info, + libbpf_debug); + libbpf_initialized = true; + } + + if (source) { + int err; + void *obj_buf; + size_t obj_buf_sz; + + err = llvm__compile_bpf(filename, &obj_buf, &obj_buf_sz); + if (err) + return ERR_PTR(err); + obj = bpf_object__open_buffer(obj_buf, obj_buf_sz, filename); + free(obj_buf); + } else + obj = bpf_object__open(filename); + + if (!obj) { + pr_debug("bpf: failed to load %s\n", filename); + return ERR_PTR(-EINVAL); + } + + return obj; +} + +void bpf__clear(void) +{ + struct bpf_object *obj, *tmp; + + bpf_object__for_each_safe(obj, tmp) { + bpf__unprobe(obj); + bpf_object__close(obj); + } +} + +static void +bpf_prog_priv__clear(struct bpf_program *prog __maybe_unused, + void *_priv) +{ + struct bpf_prog_priv *priv = _priv; + + cleanup_perf_probe_events(&priv->pev, 1); + free(priv); +} + +static int +config_bpf_program(struct bpf_program *prog) +{ + struct perf_probe_event *pev = NULL; + struct bpf_prog_priv *priv = NULL; + const char *config_str; + int err; + + config_str = bpf_program__title(prog, false); + if (!config_str) { + pr_debug("bpf: unable to get title for program\n"); + return -EINVAL; + } + + priv = calloc(sizeof(*priv), 1); + if (!priv) { + pr_debug("bpf: failed to alloc priv\n"); + return -ENOMEM; + } + pev = &priv->pev; + + pr_debug("bpf: config program '%s'\n", config_str); + err = parse_perf_probe_command(config_str, pev); + if (err < 0) { + pr_debug("bpf: '%s' is not a valid config string\n", + config_str); + err = -EINVAL; + goto errout; + } + + if (pev->group && strcmp(pev->group, PERF_BPF_PROBE_GROUP)) { + pr_debug("bpf: '%s': group for event is set and not '%s'.\n", + config_str, PERF_BPF_PROBE_GROUP); + err = -EINVAL; + goto errout; + } else if (!pev->group) + pev->group = strdup(PERF_BPF_PROBE_GROUP); + + if (!pev->group) { + pr_debug("bpf: strdup failed\n"); + err = -ENOMEM; + goto errout; + } + + if (!pev->event) { + pr_debug("bpf: '%s': event name is missing\n", + config_str); + err = -EINVAL; + goto errout; + } + pr_debug("bpf: config '%s' is ok\n", config_str); + + err = bpf_program__set_private(prog, priv, bpf_prog_priv__clear); + if (err) { + pr_debug("Failed to set priv for program '%s'\n", config_str); + goto errout; + } + + return 0; + +errout: + if (pev) + clear_perf_probe_event(pev); + free(priv); + return err; +} + +static int bpf__prepare_probe(void) +{ + static int err = 0; + static bool initialized = false; + + /* + * Make err static, so if init failed the first, bpf__prepare_probe() + * fails each time without calling init_probe_symbol_maps multiple + * times. + */ + if (initialized) + return err; + + initialized = true; + err = init_probe_symbol_maps(false); + if (err < 0) + pr_debug("Failed to init_probe_symbol_maps\n"); + probe_conf.max_probes = MAX_PROBES; + return err; +} + +int bpf__probe(struct bpf_object *obj) +{ + int err = 0; + struct bpf_program *prog; + struct bpf_prog_priv *priv; + struct perf_probe_event *pev; + + err = bpf__prepare_probe(); + if (err) { + pr_debug("bpf__prepare_probe failed\n"); + return err; + } + + bpf_object__for_each_program(prog, obj) { + err = config_bpf_program(prog); + if (err) + goto out; + + err = bpf_program__get_private(prog, (void **)&priv); + if (err || !priv) + goto out; + pev = &priv->pev; + + err = convert_perf_probe_events(pev, 1); + if (err < 0) { + pr_debug("bpf_probe: failed to convert perf probe events"); + goto out; + } + + err = apply_perf_probe_events(pev, 1); + if (err < 0) { + pr_debug("bpf_probe: failed to apply perf probe events"); + goto out; + } + } +out: + return err < 0 ? err : 0; +} + +#define EVENTS_WRITE_BUFSIZE 4096 +int bpf__unprobe(struct bpf_object *obj) +{ + int err, ret = 0; + struct bpf_program *prog; + struct bpf_prog_priv *priv; + + bpf_object__for_each_program(prog, obj) { + int i; + + err = bpf_program__get_private(prog, (void **)&priv); + if (err || !priv) + continue; + + for (i = 0; i < priv->pev.ntevs; i++) { + struct probe_trace_event *tev = &priv->pev.tevs[i]; + char name_buf[EVENTS_WRITE_BUFSIZE]; + struct strfilter *delfilter; + + snprintf(name_buf, EVENTS_WRITE_BUFSIZE, + "%s:%s", tev->group, tev->event); + name_buf[EVENTS_WRITE_BUFSIZE - 1] = '\0'; + + delfilter = strfilter__new(name_buf, NULL); + if (!delfilter) { + pr_debug("Failed to create filter for unprobing\n"); + ret = -ENOMEM; + continue; + } + + err = del_perf_probe_events(delfilter); + strfilter__delete(delfilter); + if (err) { + pr_debug("Failed to delete %s\n", name_buf); + ret = err; + continue; + } + } + } + return ret; +} + +int bpf__load(struct bpf_object *obj) +{ + int err; + + err = bpf_object__load(obj); + if (err) { + pr_debug("bpf: load objects failed\n"); + return err; + } + return 0; +} + +int bpf__foreach_tev(struct bpf_object *obj, + bpf_prog_iter_callback_t func, + void *arg) +{ + struct bpf_program *prog; + int err; + + bpf_object__for_each_program(prog, obj) { + struct probe_trace_event *tev; + struct perf_probe_event *pev; + struct bpf_prog_priv *priv; + int i, fd; + + err = bpf_program__get_private(prog, + (void **)&priv); + if (err || !priv) { + pr_debug("bpf: failed to get private field\n"); + return -EINVAL; + } + + pev = &priv->pev; + for (i = 0; i < pev->ntevs; i++) { + tev = &pev->tevs[i]; + + fd = bpf_program__fd(prog); + if (fd < 0) { + pr_debug("bpf: failed to get file descriptor\n"); + return fd; + } + + err = (*func)(tev, fd, arg); + if (err) { + pr_debug("bpf: call back failed, stop iterate\n"); + return err; + } + } + } + return 0; +} + +#define bpf__strerror_head(err, buf, size) \ + char sbuf[STRERR_BUFSIZE], *emsg;\ + if (!size)\ + return 0;\ + if (err < 0)\ + err = -err;\ + emsg = strerror_r(err, sbuf, sizeof(sbuf));\ + switch (err) {\ + default:\ + scnprintf(buf, size, "%s", emsg);\ + break; + +#define bpf__strerror_entry(val, fmt...)\ + case val: {\ + scnprintf(buf, size, fmt);\ + break;\ + } + +#define bpf__strerror_end(buf, size)\ + }\ + buf[size - 1] = '\0'; + +int bpf__strerror_probe(struct bpf_object *obj __maybe_unused, + int err, char *buf, size_t size) +{ + bpf__strerror_head(err, buf, size); + bpf__strerror_entry(EEXIST, "Probe point exist. Try use 'perf probe -d \"*\"'"); + bpf__strerror_entry(EPERM, "You need to be root, and /proc/sys/kernel/kptr_restrict should be 0\n"); + bpf__strerror_entry(ENOENT, "You need to check probing points in BPF file\n"); + bpf__strerror_end(buf, size); + return 0; +} + +int bpf__strerror_load(struct bpf_object *obj __maybe_unused, + int err, char *buf, size_t size) +{ + bpf__strerror_head(err, buf, size); + bpf__strerror_entry(EINVAL, "%s: Are you root and runing a CONFIG_BPF_SYSCALL kernel?", + emsg) + bpf__strerror_end(buf, size); + return 0; +} diff --git a/tools/perf/util/bpf-loader.h b/tools/perf/util/bpf-loader.h new file mode 100644 index 000000000000..ccd8d7fd79d3 --- /dev/null +++ b/tools/perf/util/bpf-loader.h @@ -0,0 +1,85 @@ +/* + * Copyright (C) 2015, Wang Nan <wangnan0@huawei.com> + * Copyright (C) 2015, Huawei Inc. + */ +#ifndef __BPF_LOADER_H +#define __BPF_LOADER_H + +#include <linux/compiler.h> +#include <linux/err.h> +#include <string.h> +#include "probe-event.h" +#include "debug.h" + +struct bpf_object; +#define PERF_BPF_PROBE_GROUP "perf_bpf_probe" + +typedef int (*bpf_prog_iter_callback_t)(struct probe_trace_event *tev, + int fd, void *arg); + +#ifdef HAVE_LIBBPF_SUPPORT +struct bpf_object *bpf__prepare_load(const char *filename, bool source); + +void bpf__clear(void); + +int bpf__probe(struct bpf_object *obj); +int bpf__unprobe(struct bpf_object *obj); +int bpf__strerror_probe(struct bpf_object *obj, int err, + char *buf, size_t size); + +int bpf__load(struct bpf_object *obj); +int bpf__strerror_load(struct bpf_object *obj, int err, + char *buf, size_t size); +int bpf__foreach_tev(struct bpf_object *obj, + bpf_prog_iter_callback_t func, void *arg); +#else +static inline struct bpf_object * +bpf__prepare_load(const char *filename __maybe_unused, + bool source __maybe_unused) +{ + pr_debug("ERROR: eBPF object loading is disabled during compiling.\n"); + return ERR_PTR(-ENOTSUP); +} + +static inline void bpf__clear(void) { } + +static inline int bpf__probe(struct bpf_object *obj __maybe_unused) { return 0;} +static inline int bpf__unprobe(struct bpf_object *obj __maybe_unused) { return 0;} +static inline int bpf__load(struct bpf_object *obj __maybe_unused) { return 0; } + +static inline int +bpf__foreach_tev(struct bpf_object *obj __maybe_unused, + bpf_prog_iter_callback_t func __maybe_unused, + void *arg __maybe_unused) +{ + return 0; +} + +static inline int +__bpf_strerror(char *buf, size_t size) +{ + if (!size) + return 0; + strncpy(buf, + "ERROR: eBPF object loading is disabled during compiling.\n", + size); + buf[size - 1] = '\0'; + return 0; +} + +static inline int +bpf__strerror_probe(struct bpf_object *obj __maybe_unused, + int err __maybe_unused, + char *buf, size_t size) +{ + return __bpf_strerror(buf, size); +} + +static inline int bpf__strerror_load(struct bpf_object *obj __maybe_unused, + int err __maybe_unused, + char *buf, size_t size) +{ + return __bpf_strerror(buf, size); +} +#endif +#endif diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c index 1f6fc2323ef9..d909459fb54c 100644 --- a/tools/perf/util/build-id.c +++ b/tools/perf/util/build-id.c @@ -93,6 +93,38 @@ int build_id__sprintf(const u8 *build_id, int len, char *bf) return raw - build_id; } +int sysfs__sprintf_build_id(const char *root_dir, char *sbuild_id) +{ + char notes[PATH_MAX]; + u8 build_id[BUILD_ID_SIZE]; + int ret; + + if (!root_dir) + root_dir = ""; + + scnprintf(notes, sizeof(notes), "%s/sys/kernel/notes", root_dir); + + ret = sysfs__read_build_id(notes, build_id, sizeof(build_id)); + if (ret < 0) + return ret; + + return build_id__sprintf(build_id, sizeof(build_id), sbuild_id); +} + +int filename__sprintf_build_id(const char *pathname, char *sbuild_id) +{ + u8 build_id[BUILD_ID_SIZE]; + int ret; + + ret = filename__read_build_id(pathname, build_id, sizeof(build_id)); + if (ret < 0) + return ret; + else if (ret != sizeof(build_id)) + return -EINVAL; + + return build_id__sprintf(build_id, sizeof(build_id), sbuild_id); +} + /* asnprintf consolidates asprintf and snprintf */ static int asnprintf(char **strp, size_t size, const char *fmt, ...) { @@ -124,7 +156,7 @@ static char *build_id__filename(const char *sbuild_id, char *bf, size_t size) char *dso__build_id_filename(const struct dso *dso, char *bf, size_t size) { - char build_id_hex[BUILD_ID_SIZE * 2 + 1]; + char build_id_hex[SBUILD_ID_SIZE]; if (!dso->has_build_id) return NULL; @@ -291,7 +323,7 @@ int build_id_cache__list_build_ids(const char *pathname, struct dirent *d; int ret = 0; - list = strlist__new(true, NULL); + list = strlist__new(NULL, NULL); dir_name = build_id_cache__dirname_from_path(pathname, false, false); if (!list || !dir_name) { ret = -ENOMEM; @@ -384,7 +416,7 @@ static int build_id_cache__add_b(const u8 *build_id, size_t build_id_size, const char *name, bool is_kallsyms, bool is_vdso) { - char sbuild_id[BUILD_ID_SIZE * 2 + 1]; + char sbuild_id[SBUILD_ID_SIZE]; build_id__sprintf(build_id, build_id_size, sbuild_id); diff --git a/tools/perf/util/build-id.h b/tools/perf/util/build-id.h index 85011222cc14..27a14a8a945b 100644 --- a/tools/perf/util/build-id.h +++ b/tools/perf/util/build-id.h @@ -1,7 +1,8 @@ #ifndef PERF_BUILD_ID_H_ #define PERF_BUILD_ID_H_ 1 -#define BUILD_ID_SIZE 20 +#define BUILD_ID_SIZE 20 +#define SBUILD_ID_SIZE (BUILD_ID_SIZE * 2 + 1) #include "tool.h" #include "strlist.h" @@ -11,6 +12,9 @@ extern struct perf_tool build_id__mark_dso_hit_ops; struct dso; int build_id__sprintf(const u8 *build_id, int len, char *bf); +int sysfs__sprintf_build_id(const char *root_dir, char *sbuild_id); +int filename__sprintf_build_id(const char *pathname, char *sbuild_id); + char *dso__build_id_filename(const struct dso *dso, char *bf, size_t size); int build_id__mark_dso_hit(struct perf_tool *tool, union perf_event *event, diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c index 9f643ee77001..735ad48e1858 100644 --- a/tools/perf/util/callchain.c +++ b/tools/perf/util/callchain.c @@ -25,96 +25,9 @@ __thread struct callchain_cursor callchain_cursor; -#ifdef HAVE_DWARF_UNWIND_SUPPORT -static int get_stack_size(const char *str, unsigned long *_size) -{ - char *endptr; - unsigned long size; - unsigned long max_size = round_down(USHRT_MAX, sizeof(u64)); - - size = strtoul(str, &endptr, 0); - - do { - if (*endptr) - break; - - size = round_up(size, sizeof(u64)); - if (!size || size > max_size) - break; - - *_size = size; - return 0; - - } while (0); - - pr_err("callchain: Incorrect stack dump size (max %ld): %s\n", - max_size, str); - return -1; -} -#endif /* HAVE_DWARF_UNWIND_SUPPORT */ - -int parse_callchain_record_opt(const char *arg) +int parse_callchain_record_opt(const char *arg, struct callchain_param *param) { - char *tok, *name, *saveptr = NULL; - char *buf; - int ret = -1; - - /* We need buffer that we know we can write to. */ - buf = malloc(strlen(arg) + 1); - if (!buf) - return -ENOMEM; - - strcpy(buf, arg); - - tok = strtok_r((char *)buf, ",", &saveptr); - name = tok ? : (char *)buf; - - do { - /* Framepointer style */ - if (!strncmp(name, "fp", sizeof("fp"))) { - if (!strtok_r(NULL, ",", &saveptr)) { - callchain_param.record_mode = CALLCHAIN_FP; - ret = 0; - } else - pr_err("callchain: No more arguments " - "needed for --call-graph fp\n"); - break; - -#ifdef HAVE_DWARF_UNWIND_SUPPORT - /* Dwarf style */ - } else if (!strncmp(name, "dwarf", sizeof("dwarf"))) { - const unsigned long default_stack_dump_size = 8192; - - ret = 0; - callchain_param.record_mode = CALLCHAIN_DWARF; - callchain_param.dump_size = default_stack_dump_size; - - tok = strtok_r(NULL, ",", &saveptr); - if (tok) { - unsigned long size = 0; - - ret = get_stack_size(tok, &size); - callchain_param.dump_size = size; - } -#endif /* HAVE_DWARF_UNWIND_SUPPORT */ - } else if (!strncmp(name, "lbr", sizeof("lbr"))) { - if (!strtok_r(NULL, ",", &saveptr)) { - callchain_param.record_mode = CALLCHAIN_LBR; - ret = 0; - } else - pr_err("callchain: No more arguments " - "needed for --call-graph lbr\n"); - break; - } else { - pr_err("callchain: Unknown --call-graph option " - "value: %s\n", arg); - break; - } - - } while (0); - - free(buf); - return ret; + return parse_callchain_record(arg, param); } static int parse_callchain_mode(const char *value) @@ -138,10 +51,12 @@ static int parse_callchain_order(const char *value) { if (!strncmp(value, "caller", strlen(value))) { callchain_param.order = ORDER_CALLER; + callchain_param.order_set = true; return 0; } if (!strncmp(value, "callee", strlen(value))) { callchain_param.order = ORDER_CALLEE; + callchain_param.order_set = true; return 0; } return -1; @@ -164,12 +79,14 @@ static int parse_callchain_sort_key(const char *value) return -1; } -int -parse_callchain_report_opt(const char *arg) +static int +__parse_callchain_report_opt(const char *arg, bool allow_record_opt) { char *tok; char *endptr; bool minpcnt_set = false; + bool record_opt_set = false; + bool try_stack_size = false; symbol_conf.use_callchain = true; @@ -187,6 +104,28 @@ parse_callchain_report_opt(const char *arg) !parse_callchain_order(tok) || !parse_callchain_sort_key(tok)) { /* parsing ok - move on to the next */ + try_stack_size = false; + goto next; + } else if (allow_record_opt && !record_opt_set) { + if (parse_callchain_record(tok, &callchain_param)) + goto try_numbers; + + /* assume that number followed by 'dwarf' is stack size */ + if (callchain_param.record_mode == CALLCHAIN_DWARF) + try_stack_size = true; + + record_opt_set = true; + goto next; + } + +try_numbers: + if (try_stack_size) { + unsigned long size = 0; + + if (get_stack_size(tok, &size) < 0) + return -1; + callchain_param.dump_size = size; + try_stack_size = false; } else if (!minpcnt_set) { /* try to get the min percent */ callchain_param.min_percent = strtod(tok, &endptr); @@ -199,7 +138,7 @@ parse_callchain_report_opt(const char *arg) if (tok == endptr) return -1; } - +next: arg = NULL; } @@ -210,6 +149,16 @@ parse_callchain_report_opt(const char *arg) return 0; } +int parse_callchain_report_opt(const char *arg) +{ + return __parse_callchain_report_opt(arg, false); +} + +int parse_callchain_top_opt(const char *arg) +{ + return __parse_callchain_report_opt(arg, true); +} + int perf_callchain_config(const char *var, const char *value) { char *endptr; @@ -219,7 +168,7 @@ int perf_callchain_config(const char *var, const char *value) var += sizeof("call-graph.") - 1; if (!strcmp(var, "record-mode")) - return parse_callchain_record_opt(value); + return parse_callchain_record_opt(value, &callchain_param); #ifdef HAVE_DWARF_UNWIND_SUPPORT if (!strcmp(var, "dump-size")) { unsigned long size = 0; diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h index 679c2c6d8ade..fce8161e54db 100644 --- a/tools/perf/util/callchain.h +++ b/tools/perf/util/callchain.h @@ -7,6 +7,30 @@ #include "event.h" #include "symbol.h" +#define HELP_PAD "\t\t\t\t" + +#define CALLCHAIN_HELP "setup and enables call-graph (stack chain/backtrace):\n\n" + +#ifdef HAVE_DWARF_UNWIND_SUPPORT +# define RECORD_MODE_HELP HELP_PAD "record_mode:\tcall graph recording mode (fp|dwarf|lbr)\n" +#else +# define RECORD_MODE_HELP HELP_PAD "record_mode:\tcall graph recording mode (fp|lbr)\n" +#endif + +#define RECORD_SIZE_HELP \ + HELP_PAD "record_size:\tif record_mode is 'dwarf', max size of stack recording (<bytes>)\n" \ + HELP_PAD "\t\tdefault: 8192 (bytes)\n" + +#define CALLCHAIN_RECORD_HELP CALLCHAIN_HELP RECORD_MODE_HELP RECORD_SIZE_HELP + +#define CALLCHAIN_REPORT_HELP \ + HELP_PAD "print_type:\tcall graph printing style (graph|flat|fractal|none)\n" \ + HELP_PAD "threshold:\tminimum call graph inclusion threshold (<percent>)\n" \ + HELP_PAD "print_limit:\tmaximum number of call graph entry (<number>)\n" \ + HELP_PAD "order:\t\tcall graph order (caller|callee)\n" \ + HELP_PAD "sort_key:\tcall graph sort key (function|address)\n" \ + HELP_PAD "branch:\t\tinclude last branch info to call graph (branch)\n" + enum perf_call_graph_mode { CALLCHAIN_NONE, CALLCHAIN_FP, @@ -63,6 +87,7 @@ struct callchain_param { double min_percent; sort_chain_func_t sort; enum chain_order order; + bool order_set; enum chain_key key; bool branch_callstack; }; @@ -177,8 +202,10 @@ int fill_callchain_info(struct addr_location *al, struct callchain_cursor_node * bool hide_unresolved); extern const char record_callchain_help[]; -int parse_callchain_record_opt(const char *arg); +extern int parse_callchain_record(const char *arg, struct callchain_param *param); +int parse_callchain_record_opt(const char *arg, struct callchain_param *param); int parse_callchain_report_opt(const char *arg); +int parse_callchain_top_opt(const char *arg); int perf_callchain_config(const char *var, const char *value); static inline void callchain_cursor_snapshot(struct callchain_cursor *dest, diff --git a/tools/perf/util/cloexec.h b/tools/perf/util/cloexec.h index 68888c29b04a..3bee6773ddb0 100644 --- a/tools/perf/util/cloexec.h +++ b/tools/perf/util/cloexec.h @@ -4,7 +4,7 @@ unsigned long perf_event_open_cloexec_flag(void); #ifdef __GLIBC_PREREQ -#if !__GLIBC_PREREQ(2, 6) +#if !__GLIBC_PREREQ(2, 6) && !defined(__UCLIBC__) extern int sched_getcpu(void) __THROW; #endif #endif diff --git a/tools/perf/util/color.c b/tools/perf/util/color.c index 55355b3d4f85..9b9565416f90 100644 --- a/tools/perf/util/color.c +++ b/tools/perf/util/color.c @@ -67,8 +67,9 @@ static int __color_vsnprintf(char *bf, size_t size, const char *color, return r; } +/* Colors are not included in return value */ static int __color_vfprintf(FILE *fp, const char *color, const char *fmt, - va_list args, const char *trail) + va_list args) { int r = 0; @@ -83,12 +84,10 @@ static int __color_vfprintf(FILE *fp, const char *color, const char *fmt, } if (perf_use_color_default && *color) - r += fprintf(fp, "%s", color); + fprintf(fp, "%s", color); r += vfprintf(fp, fmt, args); if (perf_use_color_default && *color) - r += fprintf(fp, "%s", PERF_COLOR_RESET); - if (trail) - r += fprintf(fp, "%s", trail); + fprintf(fp, "%s", PERF_COLOR_RESET); return r; } @@ -100,7 +99,7 @@ int color_vsnprintf(char *bf, size_t size, const char *color, int color_vfprintf(FILE *fp, const char *color, const char *fmt, va_list args) { - return __color_vfprintf(fp, color, fmt, args, NULL); + return __color_vfprintf(fp, color, fmt, args); } int color_snprintf(char *bf, size_t size, const char *color, @@ -126,16 +125,6 @@ int color_fprintf(FILE *fp, const char *color, const char *fmt, ...) return r; } -int color_fprintf_ln(FILE *fp, const char *color, const char *fmt, ...) -{ - va_list args; - int r; - va_start(args, fmt); - r = __color_vfprintf(fp, color, fmt, args, "\n"); - va_end(args); - return r; -} - /* * This function splits the buffer by newlines and colors the lines individually. * diff --git a/tools/perf/util/color.h b/tools/perf/util/color.h index 38146f922c54..a93997f16dec 100644 --- a/tools/perf/util/color.h +++ b/tools/perf/util/color.h @@ -35,7 +35,6 @@ int color_vsnprintf(char *bf, size_t size, const char *color, int color_vfprintf(FILE *fp, const char *color, const char *fmt, va_list args); int color_fprintf(FILE *fp, const char *color, const char *fmt, ...); int color_snprintf(char *bf, size_t size, const char *color, const char *fmt, ...); -int color_fprintf_ln(FILE *fp, const char *color, const char *fmt, ...); int color_fwrite_lines(FILE *fp, const char *color, size_t count, const char *buf); int value_color_snprintf(char *bf, size_t size, const char *fmt, double value); int percent_color_snprintf(char *bf, size_t size, const char *fmt, ...); diff --git a/tools/perf/util/config.c b/tools/perf/util/config.c index e18f653cd7db..2e452ac1353d 100644 --- a/tools/perf/util/config.c +++ b/tools/perf/util/config.c @@ -12,6 +12,7 @@ #include "cache.h" #include "exec_cmd.h" #include "util/hist.h" /* perf_hist_config */ +#include "util/llvm-utils.h" /* perf_llvm_config */ #define MAXNAME (256) @@ -408,6 +409,9 @@ int perf_default_config(const char *var, const char *value, if (!prefixcmp(var, "call-graph.")) return perf_callchain_config(var, value); + if (!prefixcmp(var, "llvm.")) + return perf_llvm_config(var, value); + /* Add other config variables here. */ return 0; } diff --git a/tools/perf/util/counts.c b/tools/perf/util/counts.c new file mode 100644 index 000000000000..e3fde313deb2 --- /dev/null +++ b/tools/perf/util/counts.c @@ -0,0 +1,52 @@ +#include <stdlib.h> +#include "evsel.h" +#include "counts.h" + +struct perf_counts *perf_counts__new(int ncpus, int nthreads) +{ + struct perf_counts *counts = zalloc(sizeof(*counts)); + + if (counts) { + struct xyarray *values; + + values = xyarray__new(ncpus, nthreads, sizeof(struct perf_counts_values)); + if (!values) { + free(counts); + return NULL; + } + + counts->values = values; + } + + return counts; +} + +void perf_counts__delete(struct perf_counts *counts) +{ + if (counts) { + xyarray__delete(counts->values); + free(counts); + } +} + +static void perf_counts__reset(struct perf_counts *counts) +{ + xyarray__reset(counts->values); +} + +void perf_evsel__reset_counts(struct perf_evsel *evsel) +{ + perf_counts__reset(evsel->counts); +} + +int perf_evsel__alloc_counts(struct perf_evsel *evsel, int ncpus, int nthreads) +{ + evsel->counts = perf_counts__new(ncpus, nthreads); + return evsel->counts != NULL ? 0 : -ENOMEM; +} + +void perf_evsel__free_counts(struct perf_evsel *evsel) +{ + perf_counts__delete(evsel->counts); + evsel->counts = NULL; +} diff --git a/tools/perf/util/counts.h b/tools/perf/util/counts.h new file mode 100644 index 000000000000..34d8baaf558a --- /dev/null +++ b/tools/perf/util/counts.h @@ -0,0 +1,37 @@ +#ifndef __PERF_COUNTS_H +#define __PERF_COUNTS_H + +#include "xyarray.h" + +struct perf_counts_values { + union { + struct { + u64 val; + u64 ena; + u64 run; + }; + u64 values[3]; + }; +}; + +struct perf_counts { + s8 scaled; + struct perf_counts_values aggr; + struct xyarray *values; +}; + + +static inline struct perf_counts_values* +perf_counts(struct perf_counts *counts, int cpu, int thread) +{ + return xyarray__entry(counts->values, cpu, thread); +} + +struct perf_counts *perf_counts__new(int ncpus, int nthreads); +void perf_counts__delete(struct perf_counts *counts); + +void perf_evsel__reset_counts(struct perf_evsel *evsel); +int perf_evsel__alloc_counts(struct perf_evsel *evsel, int ncpus, int nthreads); +void perf_evsel__free_counts(struct perf_evsel *evsel); + +#endif /* __PERF_COUNTS_H */ diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c index 3667e2123e5b..10af1e7524fb 100644 --- a/tools/perf/util/cpumap.c +++ b/tools/perf/util/cpumap.c @@ -203,6 +203,23 @@ struct cpu_map *cpu_map__dummy_new(void) return cpus; } +struct cpu_map *cpu_map__empty_new(int nr) +{ + struct cpu_map *cpus = malloc(sizeof(*cpus) + sizeof(int) * nr); + + if (cpus != NULL) { + int i; + + cpus->nr = nr; + for (i = 0; i < nr; i++) + cpus->map[i] = -1; + + atomic_set(&cpus->refcnt, 1); + } + + return cpus; +} + static void cpu_map__delete(struct cpu_map *map) { if (map) { @@ -225,32 +242,32 @@ void cpu_map__put(struct cpu_map *map) cpu_map__delete(map); } -int cpu_map__get_socket(struct cpu_map *map, int idx) +static int cpu__get_topology_int(int cpu, const char *name, int *value) { - FILE *fp; - const char *mnt; char path[PATH_MAX]; - int cpu, ret; - if (idx > map->nr) - return -1; + snprintf(path, PATH_MAX, + "devices/system/cpu/cpu%d/topology/%s", cpu, name); - cpu = map->map[idx]; + return sysfs__read_int(path, value); +} - mnt = sysfs__mountpoint(); - if (!mnt) - return -1; +int cpu_map__get_socket_id(int cpu) +{ + int value, ret = cpu__get_topology_int(cpu, "physical_package_id", &value); + return ret ?: value; +} - snprintf(path, PATH_MAX, - "%s/devices/system/cpu/cpu%d/topology/physical_package_id", - mnt, cpu); +int cpu_map__get_socket(struct cpu_map *map, int idx, void *data __maybe_unused) +{ + int cpu; - fp = fopen(path, "r"); - if (!fp) + if (idx > map->nr) return -1; - ret = fscanf(fp, "%d", &cpu); - fclose(fp); - return ret == 1 ? cpu : -1; + + cpu = map->map[idx]; + + return cpu_map__get_socket_id(cpu); } static int cmp_ids(const void *a, const void *b) @@ -258,8 +275,9 @@ static int cmp_ids(const void *a, const void *b) return *(int *)a - *(int *)b; } -static int cpu_map__build_map(struct cpu_map *cpus, struct cpu_map **res, - int (*f)(struct cpu_map *map, int cpu)) +int cpu_map__build_map(struct cpu_map *cpus, struct cpu_map **res, + int (*f)(struct cpu_map *map, int cpu, void *data), + void *data) { struct cpu_map *c; int nr = cpus->nr; @@ -271,7 +289,7 @@ static int cpu_map__build_map(struct cpu_map *cpus, struct cpu_map **res, return -1; for (cpu = 0; cpu < nr; cpu++) { - s1 = f(cpus, cpu); + s1 = f(cpus, cpu, data); for (s2 = 0; s2 < c->nr; s2++) { if (s1 == c->map[s2]) break; @@ -284,40 +302,29 @@ static int cpu_map__build_map(struct cpu_map *cpus, struct cpu_map **res, /* ensure we process id in increasing order */ qsort(c->map, c->nr, sizeof(int), cmp_ids); - atomic_set(&cpus->refcnt, 1); + atomic_set(&c->refcnt, 1); *res = c; return 0; } -int cpu_map__get_core(struct cpu_map *map, int idx) +int cpu_map__get_core_id(int cpu) { - FILE *fp; - const char *mnt; - char path[PATH_MAX]; - int cpu, ret, s; + int value, ret = cpu__get_topology_int(cpu, "core_id", &value); + return ret ?: value; +} + +int cpu_map__get_core(struct cpu_map *map, int idx, void *data) +{ + int cpu, s; if (idx > map->nr) return -1; cpu = map->map[idx]; - mnt = sysfs__mountpoint(); - if (!mnt) - return -1; - - snprintf(path, PATH_MAX, - "%s/devices/system/cpu/cpu%d/topology/core_id", - mnt, cpu); - - fp = fopen(path, "r"); - if (!fp) - return -1; - ret = fscanf(fp, "%d", &cpu); - fclose(fp); - if (ret != 1) - return -1; + cpu = cpu_map__get_core_id(cpu); - s = cpu_map__get_socket(map, idx); + s = cpu_map__get_socket(map, idx, data); if (s == -1) return -1; @@ -332,12 +339,12 @@ int cpu_map__get_core(struct cpu_map *map, int idx) int cpu_map__build_socket_map(struct cpu_map *cpus, struct cpu_map **sockp) { - return cpu_map__build_map(cpus, sockp, cpu_map__get_socket); + return cpu_map__build_map(cpus, sockp, cpu_map__get_socket, NULL); } int cpu_map__build_core_map(struct cpu_map *cpus, struct cpu_map **corep) { - return cpu_map__build_map(cpus, corep, cpu_map__get_core); + return cpu_map__build_map(cpus, corep, cpu_map__get_core, NULL); } /* setup simple routines to easily access node numbers given a cpu number */ diff --git a/tools/perf/util/cpumap.h b/tools/perf/util/cpumap.h index 0af9cecb4c51..85f7772457fa 100644 --- a/tools/perf/util/cpumap.h +++ b/tools/perf/util/cpumap.h @@ -15,11 +15,14 @@ struct cpu_map { }; struct cpu_map *cpu_map__new(const char *cpu_list); +struct cpu_map *cpu_map__empty_new(int nr); struct cpu_map *cpu_map__dummy_new(void); struct cpu_map *cpu_map__read(FILE *file); size_t cpu_map__fprintf(struct cpu_map *map, FILE *fp); -int cpu_map__get_socket(struct cpu_map *map, int idx); -int cpu_map__get_core(struct cpu_map *map, int idx); +int cpu_map__get_socket_id(int cpu); +int cpu_map__get_socket(struct cpu_map *map, int idx, void *data); +int cpu_map__get_core_id(int cpu); +int cpu_map__get_core(struct cpu_map *map, int idx, void *data); int cpu_map__build_socket_map(struct cpu_map *cpus, struct cpu_map **sockp); int cpu_map__build_core_map(struct cpu_map *cpus, struct cpu_map **corep); @@ -85,4 +88,7 @@ static inline int cpu__get_node(int cpu) return cpunode_map[cpu]; } +int cpu_map__build_map(struct cpu_map *cpus, struct cpu_map **res, + int (*f)(struct cpu_map *map, int cpu, void *data), + void *data); #endif /* __PERF_CPUMAP_H */ diff --git a/tools/perf/util/debug.c b/tools/perf/util/debug.c index 2da5581ec74d..86d9c7302598 100644 --- a/tools/perf/util/debug.c +++ b/tools/perf/util/debug.c @@ -36,6 +36,11 @@ static int _eprintf(int level, int var, const char *fmt, va_list args) return ret; } +int veprintf(int level, int var, const char *fmt, va_list args) +{ + return _eprintf(level, var, fmt, args); +} + int eprintf(int level, int var, const char *fmt, ...) { va_list args; diff --git a/tools/perf/util/debug.h b/tools/perf/util/debug.h index caac2fdc6105..8b9a088c32ab 100644 --- a/tools/perf/util/debug.h +++ b/tools/perf/util/debug.h @@ -50,6 +50,7 @@ void pr_stat(const char *fmt, ...); int eprintf(int level, int var, const char *fmt, ...) __attribute__((format(printf, 3, 4))); int eprintf_time(int level, int var, u64 t, const char *fmt, ...) __attribute__((format(printf, 4, 5))); +int veprintf(int level, int var, const char *fmt, va_list args); int perf_debug_option(const char *str); diff --git a/tools/perf/util/dso.h b/tools/perf/util/dso.h index 2fe98bb0e95b..fc8db9c764ac 100644 --- a/tools/perf/util/dso.h +++ b/tools/perf/util/dso.h @@ -137,6 +137,10 @@ struct dso { struct rb_node rb_node; /* rbtree node sorted by long name */ struct rb_root symbols[MAP__NR_TYPES]; struct rb_root symbol_names[MAP__NR_TYPES]; + struct { + u64 addr; + struct symbol *symbol; + } last_find_result[MAP__NR_TYPES]; void *a2l; char *symsrc_filename; unsigned int a2l_fails; @@ -320,6 +324,8 @@ struct dso *__dsos__findnew(struct dsos *dsos, const char *name); struct dso *dsos__findnew(struct dsos *dsos, const char *name); bool __dsos__read_build_ids(struct list_head *head, bool with_hits); +void dso__reset_find_symbol_cache(struct dso *dso); + size_t __dsos__fprintf_buildid(struct list_head *head, FILE *fp, bool (skip)(struct dso *dso, int parm), int parm); size_t __dsos__fprintf(struct list_head *head, FILE *fp); diff --git a/tools/perf/util/dwarf-aux.c b/tools/perf/util/dwarf-aux.c index 57f3ef41c2bc..a509aa8433a1 100644 --- a/tools/perf/util/dwarf-aux.c +++ b/tools/perf/util/dwarf-aux.c @@ -734,15 +734,18 @@ int die_walk_lines(Dwarf_Die *rt_die, line_walk_callback_t callback, void *data) Dwarf_Lines *lines; Dwarf_Line *line; Dwarf_Addr addr; - const char *fname; + const char *fname, *decf = NULL; int lineno, ret = 0; + int decl = 0, inl; Dwarf_Die die_mem, *cu_die; size_t nlines, i; /* Get the CU die */ - if (dwarf_tag(rt_die) != DW_TAG_compile_unit) + if (dwarf_tag(rt_die) != DW_TAG_compile_unit) { cu_die = dwarf_diecu(rt_die, &die_mem, NULL, NULL); - else + dwarf_decl_line(rt_die, &decl); + decf = dwarf_decl_file(rt_die); + } else cu_die = rt_die; if (!cu_die) { pr_debug2("Failed to get CU from given DIE.\n"); @@ -767,15 +770,21 @@ int die_walk_lines(Dwarf_Die *rt_die, line_walk_callback_t callback, void *data) continue; } /* Filter lines based on address */ - if (rt_die != cu_die) + if (rt_die != cu_die) { /* * Address filtering * The line is included in given function, and * no inline block includes it. */ - if (!dwarf_haspc(rt_die, addr) || - die_find_inlinefunc(rt_die, addr, &die_mem)) + if (!dwarf_haspc(rt_die, addr)) continue; + if (die_find_inlinefunc(rt_die, addr, &die_mem)) { + dwarf_decl_line(&die_mem, &inl); + if (inl != decl || + decf != dwarf_decl_file(&die_mem)) + continue; + } + } /* Get source line */ fname = dwarf_linesrc(line, NULL, NULL); diff --git a/tools/perf/util/env.c b/tools/perf/util/env.c new file mode 100644 index 000000000000..6af4f7c36820 --- /dev/null +++ b/tools/perf/util/env.c @@ -0,0 +1,86 @@ +#include "cpumap.h" +#include "env.h" +#include "util.h" + +struct perf_env perf_env; + +void perf_env__exit(struct perf_env *env) +{ + zfree(&env->hostname); + zfree(&env->os_release); + zfree(&env->version); + zfree(&env->arch); + zfree(&env->cpu_desc); + zfree(&env->cpuid); + zfree(&env->cmdline); + zfree(&env->cmdline_argv); + zfree(&env->sibling_cores); + zfree(&env->sibling_threads); + zfree(&env->numa_nodes); + zfree(&env->pmu_mappings); + zfree(&env->cpu); +} + +int perf_env__set_cmdline(struct perf_env *env, int argc, const char *argv[]) +{ + int i; + + /* + * If env->cmdline_argv has already been set, do not override it. This allows + * a command to set the cmdline, parse args and then call another + * builtin function that implements a command -- e.g, cmd_kvm calling + * cmd_record. + */ + if (env->cmdline_argv != NULL) + return 0; + + /* do not include NULL termination */ + env->cmdline_argv = calloc(argc, sizeof(char *)); + if (env->cmdline_argv == NULL) + goto out_enomem; + + /* + * Must copy argv contents because it gets moved around during option + * parsing: + */ + for (i = 0; i < argc ; i++) { + env->cmdline_argv[i] = argv[i]; + if (env->cmdline_argv[i] == NULL) + goto out_free; + } + + env->nr_cmdline = argc; + + return 0; +out_free: + zfree(&env->cmdline_argv); +out_enomem: + return -ENOMEM; +} + +int perf_env__read_cpu_topology_map(struct perf_env *env) +{ + int cpu, nr_cpus; + + if (env->cpu != NULL) + return 0; + + if (env->nr_cpus_avail == 0) + env->nr_cpus_avail = sysconf(_SC_NPROCESSORS_CONF); + + nr_cpus = env->nr_cpus_avail; + if (nr_cpus == -1) + return -EINVAL; + + env->cpu = calloc(nr_cpus, sizeof(env->cpu[0])); + if (env->cpu == NULL) + return -ENOMEM; + + for (cpu = 0; cpu < nr_cpus; ++cpu) { + env->cpu[cpu].core_id = cpu_map__get_core_id(cpu); + env->cpu[cpu].socket_id = cpu_map__get_socket_id(cpu); + } + + env->nr_cpus_avail = nr_cpus; + return 0; +} diff --git a/tools/perf/util/env.h b/tools/perf/util/env.h new file mode 100644 index 000000000000..0132b9557c02 --- /dev/null +++ b/tools/perf/util/env.h @@ -0,0 +1,44 @@ +#ifndef __PERF_ENV_H +#define __PERF_ENV_H + +struct cpu_topology_map { + int socket_id; + int core_id; +}; + +struct perf_env { + char *hostname; + char *os_release; + char *version; + char *arch; + int nr_cpus_online; + int nr_cpus_avail; + char *cpu_desc; + char *cpuid; + unsigned long long total_mem; + unsigned int msr_pmu_type; + + int nr_cmdline; + int nr_sibling_cores; + int nr_sibling_threads; + int nr_numa_nodes; + int nr_pmu_mappings; + int nr_groups; + char *cmdline; + const char **cmdline_argv; + char *sibling_cores; + char *sibling_threads; + char *numa_nodes; + char *pmu_mappings; + struct cpu_topology_map *cpu; +}; + +extern struct perf_env perf_env; + +void perf_env__exit(struct perf_env *env); + +int perf_env__set_cmdline(struct perf_env *env, int argc, const char *argv[]); + +int perf_env__read_cpu_topology_map(struct perf_env *env); + +#endif /* __PERF_ENV_H */ diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index 67a977e5d0ab..8b10621b415c 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -26,6 +26,8 @@ static const char *perf_event__names[] = { [PERF_RECORD_AUX] = "AUX", [PERF_RECORD_ITRACE_START] = "ITRACE_START", [PERF_RECORD_LOST_SAMPLES] = "LOST_SAMPLES", + [PERF_RECORD_SWITCH] = "SWITCH", + [PERF_RECORD_SWITCH_CPU_WIDE] = "SWITCH_CPU_WIDE", [PERF_RECORD_HEADER_ATTR] = "ATTR", [PERF_RECORD_HEADER_EVENT_TYPE] = "EVENT_TYPE", [PERF_RECORD_HEADER_TRACING_DATA] = "TRACING_DATA", @@ -65,7 +67,8 @@ static int perf_event__get_comm_ids(pid_t pid, char *comm, size_t len, char filename[PATH_MAX]; char bf[4096]; int fd; - size_t size = 0, n; + size_t size = 0; + ssize_t n; char *nl, *name, *tgids, *ppids; *tgid = -1; @@ -165,7 +168,7 @@ static int perf_event__prepare_comm(union perf_event *event, pid_t pid, return 0; } -static pid_t perf_event__synthesize_comm(struct perf_tool *tool, +pid_t perf_event__synthesize_comm(struct perf_tool *tool, union perf_event *event, pid_t pid, perf_event__handler_t process, struct machine *machine) @@ -376,7 +379,7 @@ int perf_event__synthesize_modules(struct perf_tool *tool, for (pos = maps__first(maps); pos; pos = map__next(pos)) { size_t size; - if (pos->dso->kernel) + if (__map__is_kernel(pos)) continue; size = PERF_ALIGN(pos->dso->long_name_len + 1, sizeof(u64)); @@ -647,12 +650,12 @@ int perf_event__synthesize_kernel_mmap(struct perf_tool *tool, size_t size; const char *mmap_name; char name_buff[PATH_MAX]; - struct map *map; + struct map *map = machine__kernel_map(machine); struct kmap *kmap; int err; union perf_event *event; - if (machine->vmlinux_maps[0] == NULL) + if (map == NULL) return -1; /* @@ -678,7 +681,6 @@ int perf_event__synthesize_kernel_mmap(struct perf_tool *tool, event->header.misc = PERF_RECORD_MISC_GUEST_KERNEL; } - map = machine->vmlinux_maps[MAP__FUNCTION]; kmap = map__kmap(map); size = snprintf(event->mmap.filename, sizeof(event->mmap.filename), "%s%s", mmap_name, kmap->ref_reloc_sym->name) + 1; @@ -749,6 +751,14 @@ int perf_event__process_lost_samples(struct perf_tool *tool __maybe_unused, return machine__process_lost_samples_event(machine, event, sample); } +int perf_event__process_switch(struct perf_tool *tool __maybe_unused, + union perf_event *event, + struct perf_sample *sample __maybe_unused, + struct machine *machine) +{ + return machine__process_switch_event(machine, event); +} + size_t perf_event__fprintf_mmap(union perf_event *event, FILE *fp) { return fprintf(fp, " %d/%d: [%#" PRIx64 "(%#" PRIx64 ") @ %#" PRIx64 "]: %c %s\n", @@ -827,6 +837,20 @@ size_t perf_event__fprintf_itrace_start(union perf_event *event, FILE *fp) event->itrace_start.pid, event->itrace_start.tid); } +size_t perf_event__fprintf_switch(union perf_event *event, FILE *fp) +{ + bool out = event->header.misc & PERF_RECORD_MISC_SWITCH_OUT; + const char *in_out = out ? "OUT" : "IN "; + + if (event->header.type == PERF_RECORD_SWITCH) + return fprintf(fp, " %s\n", in_out); + + return fprintf(fp, " %s %s pid/tid: %5u/%-5u\n", + in_out, out ? "next" : "prev", + event->context_switch.next_prev_pid, + event->context_switch.next_prev_tid); +} + size_t perf_event__fprintf(union perf_event *event, FILE *fp) { size_t ret = fprintf(fp, "PERF_RECORD_%s", @@ -852,6 +876,10 @@ size_t perf_event__fprintf(union perf_event *event, FILE *fp) case PERF_RECORD_ITRACE_START: ret += perf_event__fprintf_itrace_start(event, fp); break; + case PERF_RECORD_SWITCH: + case PERF_RECORD_SWITCH_CPU_WIDE: + ret += perf_event__fprintf_switch(event, fp); + break; default: ret += fprintf(fp, "\n"); } @@ -980,7 +1008,7 @@ int perf_event__preprocess_sample(const union perf_event *event, * it now. */ if (cpumode == PERF_RECORD_MISC_KERNEL && - machine->vmlinux_maps[MAP__FUNCTION] == NULL) + machine__kernel_map(machine) == NULL) machine__create_kernel_maps(machine); thread__find_addr_map(thread, cpumode, MAP__FUNCTION, sample->ip, al); @@ -993,6 +1021,14 @@ int perf_event__preprocess_sample(const union perf_event *event, al->sym = NULL; al->cpu = sample->cpu; + al->socket = -1; + + if (al->cpu >= 0) { + struct perf_env *env = machine->env; + + if (env && env->cpu) + al->socket = env->cpu[al->cpu].socket_id; + } if (al->map) { struct dso *dso = al->map->dso; diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h index c53f36384b64..a0dbcbd4f6d8 100644 --- a/tools/perf/util/event.h +++ b/tools/perf/util/event.h @@ -134,7 +134,8 @@ struct branch_flags { u64 predicted:1; u64 in_tx:1; u64 abort:1; - u64 reserved:60; + u64 cycles:16; + u64 reserved:44; }; struct branch_entry { @@ -256,6 +257,7 @@ struct events_stats { u64 total_non_filtered_period; u64 total_lost; u64 total_lost_samples; + u64 total_aux_lost; u64 total_invalid_chains; u32 nr_events[PERF_RECORD_HEADER_MAX]; u32 nr_non_filtered_samples; @@ -348,6 +350,12 @@ struct itrace_start_event { u32 pid, tid; }; +struct context_switch_event { + struct perf_event_header header; + u32 next_prev_pid; + u32 next_prev_tid; +}; + union perf_event { struct perf_event_header header; struct mmap_event mmap; @@ -369,6 +377,7 @@ union perf_event { struct auxtrace_error_event auxtrace_error; struct aux_event aux; struct itrace_start_event itrace_start; + struct context_switch_event context_switch; }; void perf_event__print_totals(void); @@ -418,6 +427,10 @@ int perf_event__process_itrace_start(struct perf_tool *tool, union perf_event *event, struct perf_sample *sample, struct machine *machine); +int perf_event__process_switch(struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine); int perf_event__process_mmap(struct perf_tool *tool, union perf_event *event, struct perf_sample *sample, @@ -466,6 +479,11 @@ int perf_event__synthesize_sample(union perf_event *event, u64 type, const struct perf_sample *sample, bool swapped); +pid_t perf_event__synthesize_comm(struct perf_tool *tool, + union perf_event *event, pid_t pid, + perf_event__handler_t process, + struct machine *machine); + int perf_event__synthesize_mmap_events(struct perf_tool *tool, union perf_event *event, pid_t pid, pid_t tgid, @@ -480,6 +498,7 @@ size_t perf_event__fprintf_mmap2(union perf_event *event, FILE *fp); size_t perf_event__fprintf_task(union perf_event *event, FILE *fp); size_t perf_event__fprintf_aux(union perf_event *event, FILE *fp); size_t perf_event__fprintf_itrace_start(union perf_event *event, FILE *fp); +size_t perf_event__fprintf_switch(union perf_event *event, FILE *fp); size_t perf_event__fprintf(union perf_event *event, FILE *fp); u64 kallsyms__get_function_start(const char *kallsyms_filename, diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 6cfdee68e763..d1392194a9a9 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -25,6 +25,7 @@ #include <linux/bitops.h> #include <linux/hash.h> #include <linux/log2.h> +#include <linux/err.h> static void perf_evlist__mmap_put(struct perf_evlist *evlist, int idx); static void __perf_evlist__munmap(struct perf_evlist *evlist, int idx); @@ -98,6 +99,7 @@ static void perf_evlist__purge(struct perf_evlist *evlist) evlist__for_each_safe(evlist, n, pos) { list_del_init(&pos->node); + pos->evlist = NULL; perf_evsel__delete(pos); } @@ -123,26 +125,62 @@ void perf_evlist__delete(struct perf_evlist *evlist) free(evlist); } +static void __perf_evlist__propagate_maps(struct perf_evlist *evlist, + struct perf_evsel *evsel) +{ + /* + * We already have cpus for evsel (via PMU sysfs) so + * keep it, if there's no target cpu list defined. + */ + if (!evsel->own_cpus || evlist->has_user_cpus) { + cpu_map__put(evsel->cpus); + evsel->cpus = cpu_map__get(evlist->cpus); + } else if (evsel->cpus != evsel->own_cpus) { + cpu_map__put(evsel->cpus); + evsel->cpus = cpu_map__get(evsel->own_cpus); + } + + thread_map__put(evsel->threads); + evsel->threads = thread_map__get(evlist->threads); +} + +static void perf_evlist__propagate_maps(struct perf_evlist *evlist) +{ + struct perf_evsel *evsel; + + evlist__for_each(evlist, evsel) + __perf_evlist__propagate_maps(evlist, evsel); +} + void perf_evlist__add(struct perf_evlist *evlist, struct perf_evsel *entry) { + entry->evlist = evlist; list_add_tail(&entry->node, &evlist->entries); entry->idx = evlist->nr_entries; entry->tracking = !entry->idx; if (!evlist->nr_entries++) perf_evlist__set_id_pos(evlist); + + __perf_evlist__propagate_maps(evlist, entry); +} + +void perf_evlist__remove(struct perf_evlist *evlist, struct perf_evsel *evsel) +{ + evsel->evlist = NULL; + list_del_init(&evsel->node); + evlist->nr_entries -= 1; } void perf_evlist__splice_list_tail(struct perf_evlist *evlist, - struct list_head *list, - int nr_entries) + struct list_head *list) { - bool set_id_pos = !evlist->nr_entries; + struct perf_evsel *evsel, *temp; - list_splice_tail(list, &evlist->entries); - evlist->nr_entries += nr_entries; - if (set_id_pos) - perf_evlist__set_id_pos(evlist); + __evlist__for_each_safe(list, temp, evsel) { + list_del_init(&evsel->node); + perf_evlist__add(evlist, evsel); + } } void __perf_evlist__set_leader(struct list_head *list) @@ -167,6 +205,20 @@ void perf_evlist__set_leader(struct perf_evlist *evlist) } } +void perf_event_attr__set_max_precise_ip(struct perf_event_attr *attr) +{ + attr->precise_ip = 3; + + while (attr->precise_ip != 0) { + int fd = sys_perf_event_open(attr, 0, -1, -1, 0); + if (fd != -1) { + close(fd); + break; + } + --attr->precise_ip; + } +} + int perf_evlist__add_default(struct perf_evlist *evlist) { struct perf_event_attr attr = { @@ -177,13 +229,15 @@ int perf_evlist__add_default(struct perf_evlist *evlist) event_attr_init(&attr); + perf_event_attr__set_max_precise_ip(&attr); + evsel = perf_evsel__new(&attr); if (evsel == NULL) goto error; - /* use strdup() because free(evsel) assumes name is allocated */ - evsel->name = strdup("cycles"); - if (!evsel->name) + /* use asprintf() because free(evsel) assumes name is allocated */ + if (asprintf(&evsel->name, "cycles%.*s", + attr.precise_ip ? attr.precise_ip + 1 : 0, ":ppp") < 0) goto error_free; perf_evlist__add(evlist, evsel); @@ -208,7 +262,7 @@ static int perf_evlist__add_attrs(struct perf_evlist *evlist, list_add_tail(&evsel->node, &head); } - perf_evlist__splice_list_tail(evlist, &head, nr_attrs); + perf_evlist__splice_list_tail(evlist, &head); return 0; @@ -263,7 +317,7 @@ int perf_evlist__add_newtp(struct perf_evlist *evlist, { struct perf_evsel *evsel = perf_evsel__newtp(sys, name); - if (evsel == NULL) + if (IS_ERR(evsel)) return -1; evsel->handler = handler; @@ -573,7 +627,7 @@ struct perf_evsel *perf_evlist__id2evsel(struct perf_evlist *evlist, u64 id) { struct perf_sample_id *sid; - if (evlist->nr_entries == 1) + if (evlist->nr_entries == 1 || !id) return perf_evlist__first(evlist); sid = perf_evlist__id2sid(evlist, id); @@ -586,6 +640,21 @@ struct perf_evsel *perf_evlist__id2evsel(struct perf_evlist *evlist, u64 id) return NULL; } +struct perf_evsel *perf_evlist__id2evsel_strict(struct perf_evlist *evlist, + u64 id) +{ + struct perf_sample_id *sid; + + if (!id) + return NULL; + + sid = perf_evlist__id2sid(evlist, id); + if (sid) + return sid->evsel; + + return NULL; +} + static int perf_evlist__event2id(struct perf_evlist *evlist, union perf_event *event, u64 *id) { @@ -1101,55 +1170,58 @@ int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages, return perf_evlist__mmap_ex(evlist, pages, overwrite, 0, false); } -static int perf_evlist__propagate_maps(struct perf_evlist *evlist, - struct target *target) -{ - struct perf_evsel *evsel; - - evlist__for_each(evlist, evsel) { - /* - * We already have cpus for evsel (via PMU sysfs) so - * keep it, if there's no target cpu list defined. - */ - if (evsel->cpus && target->cpu_list) - cpu_map__put(evsel->cpus); - - if (!evsel->cpus || target->cpu_list) - evsel->cpus = cpu_map__get(evlist->cpus); - - evsel->threads = thread_map__get(evlist->threads); - - if (!evsel->cpus || !evsel->threads) - return -ENOMEM; - } - - return 0; -} - int perf_evlist__create_maps(struct perf_evlist *evlist, struct target *target) { - evlist->threads = thread_map__new_str(target->pid, target->tid, - target->uid); + struct cpu_map *cpus; + struct thread_map *threads; - if (evlist->threads == NULL) + threads = thread_map__new_str(target->pid, target->tid, target->uid); + + if (!threads) return -1; if (target__uses_dummy_map(target)) - evlist->cpus = cpu_map__dummy_new(); + cpus = cpu_map__dummy_new(); else - evlist->cpus = cpu_map__new(target->cpu_list); + cpus = cpu_map__new(target->cpu_list); - if (evlist->cpus == NULL) + if (!cpus) goto out_delete_threads; - return perf_evlist__propagate_maps(evlist, target); + evlist->has_user_cpus = !!target->cpu_list; + + perf_evlist__set_maps(evlist, cpus, threads); + + return 0; out_delete_threads: - thread_map__put(evlist->threads); - evlist->threads = NULL; + thread_map__put(threads); return -1; } +void perf_evlist__set_maps(struct perf_evlist *evlist, struct cpu_map *cpus, + struct thread_map *threads) +{ + /* + * Allow for the possibility that one or another of the maps isn't being + * changed i.e. don't put it. Note we are assuming the maps that are + * being applied are brand new and evlist is taking ownership of the + * original reference count of 1. If that is not the case it is up to + * the caller to increase the reference count. + */ + if (cpus != evlist->cpus) { + cpu_map__put(evlist->cpus); + evlist->cpus = cpus; + } + + if (threads != evlist->threads) { + thread_map__put(evlist->threads); + evlist->threads = threads; + } + + perf_evlist__propagate_maps(evlist); +} + int perf_evlist__apply_filters(struct perf_evlist *evlist, struct perf_evsel **err_evsel) { struct perf_evsel *evsel; @@ -1161,7 +1233,11 @@ int perf_evlist__apply_filters(struct perf_evlist *evlist, struct perf_evsel **e if (evsel->filter == NULL) continue; - err = perf_evsel__set_filter(evsel, ncpus, nthreads, evsel->filter); + /* + * filters only work for tracepoint event, which doesn't have cpu limit. + * So evlist and evsel should always be same. + */ + err = perf_evsel__apply_filter(evsel, ncpus, nthreads, evsel->filter); if (err) { *err_evsel = evsel; break; @@ -1175,11 +1251,9 @@ int perf_evlist__set_filter(struct perf_evlist *evlist, const char *filter) { struct perf_evsel *evsel; int err = 0; - const int ncpus = cpu_map__nr(evlist->cpus), - nthreads = thread_map__nr(evlist->threads); evlist__for_each(evlist, evsel) { - err = perf_evsel__set_filter(evsel, ncpus, nthreads, filter); + err = perf_evsel__set_filter(evsel, filter); if (err) break; } @@ -1257,6 +1331,16 @@ u64 perf_evlist__combined_sample_type(struct perf_evlist *evlist) return __perf_evlist__combined_sample_type(evlist); } +u64 perf_evlist__combined_branch_type(struct perf_evlist *evlist) +{ + struct perf_evsel *evsel; + u64 branch_type = 0; + + evlist__for_each(evlist, evsel) + branch_type |= evsel->attr.branch_sample_type; + return branch_type; +} + bool perf_evlist__valid_read_format(struct perf_evlist *evlist) { struct perf_evsel *first = perf_evlist__first(evlist), *pos = first; @@ -1355,6 +1439,8 @@ void perf_evlist__close(struct perf_evlist *evlist) static int perf_evlist__create_syswide_maps(struct perf_evlist *evlist) { + struct cpu_map *cpus; + struct thread_map *threads; int err = -ENOMEM; /* @@ -1366,20 +1452,19 @@ static int perf_evlist__create_syswide_maps(struct perf_evlist *evlist) * error, and we may not want to do that fallback to a * default cpu identity map :-\ */ - evlist->cpus = cpu_map__new(NULL); - if (evlist->cpus == NULL) + cpus = cpu_map__new(NULL); + if (!cpus) goto out; - evlist->threads = thread_map__new_dummy(); - if (evlist->threads == NULL) - goto out_free_cpus; + threads = thread_map__new_dummy(); + if (!threads) + goto out_put; - err = 0; + perf_evlist__set_maps(evlist, cpus, threads); out: return err; -out_free_cpus: - cpu_map__put(evlist->cpus); - evlist->cpus = NULL; +out_put: + cpu_map__put(cpus); goto out; } diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index 037633c1da9d..a459fe71b452 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -42,6 +42,7 @@ struct perf_evlist { int nr_mmaps; bool overwrite; bool enabled; + bool has_user_cpus; size_t mmap_len; int id_pos; int is_pos; @@ -56,6 +57,7 @@ struct perf_evlist { struct cpu_map *cpus; struct perf_evsel *selected; struct events_stats stats; + struct perf_env *env; }; struct perf_evsel_str_handler { @@ -71,6 +73,7 @@ void perf_evlist__exit(struct perf_evlist *evlist); void perf_evlist__delete(struct perf_evlist *evlist); void perf_evlist__add(struct perf_evlist *evlist, struct perf_evsel *entry); +void perf_evlist__remove(struct perf_evlist *evlist, struct perf_evsel *evsel); int perf_evlist__add_default(struct perf_evlist *evlist); int __perf_evlist__add_default_attrs(struct perf_evlist *evlist, struct perf_event_attr *attrs, size_t nr_attrs); @@ -102,6 +105,8 @@ int perf_evlist__filter_pollfd(struct perf_evlist *evlist, short revents_and_mas int perf_evlist__poll(struct perf_evlist *evlist, int timeout); struct perf_evsel *perf_evlist__id2evsel(struct perf_evlist *evlist, u64 id); +struct perf_evsel *perf_evlist__id2evsel_strict(struct perf_evlist *evlist, + u64 id); struct perf_sample_id *perf_evlist__id2sid(struct perf_evlist *evlist, u64 id); @@ -114,6 +119,8 @@ void perf_evlist__close(struct perf_evlist *evlist); void perf_evlist__set_id_pos(struct perf_evlist *evlist); bool perf_can_sample_identifier(void); +bool perf_can_record_switch_events(void); +bool perf_can_record_cpu_wide(void); void perf_evlist__config(struct perf_evlist *evlist, struct record_opts *opts); int record_opts__config(struct record_opts *opts); @@ -152,14 +159,8 @@ int perf_evlist__enable_event_idx(struct perf_evlist *evlist, void perf_evlist__set_selected(struct perf_evlist *evlist, struct perf_evsel *evsel); -static inline void perf_evlist__set_maps(struct perf_evlist *evlist, - struct cpu_map *cpus, - struct thread_map *threads) -{ - evlist->cpus = cpus; - evlist->threads = threads; -} - +void perf_evlist__set_maps(struct perf_evlist *evlist, struct cpu_map *cpus, + struct thread_map *threads); int perf_evlist__create_maps(struct perf_evlist *evlist, struct target *target); int perf_evlist__apply_filters(struct perf_evlist *evlist, struct perf_evsel **err_evsel); @@ -169,6 +170,7 @@ void perf_evlist__set_leader(struct perf_evlist *evlist); u64 perf_evlist__read_format(struct perf_evlist *evlist); u64 __perf_evlist__combined_sample_type(struct perf_evlist *evlist); u64 perf_evlist__combined_sample_type(struct perf_evlist *evlist); +u64 perf_evlist__combined_branch_type(struct perf_evlist *evlist); bool perf_evlist__sample_id_all(struct perf_evlist *evlist); u16 perf_evlist__id_hdr_size(struct perf_evlist *evlist); @@ -180,8 +182,7 @@ bool perf_evlist__valid_sample_id_all(struct perf_evlist *evlist); bool perf_evlist__valid_read_format(struct perf_evlist *evlist); void perf_evlist__splice_list_tail(struct perf_evlist *evlist, - struct list_head *list, - int nr_entries); + struct list_head *list); static inline struct perf_evsel *perf_evlist__first(struct perf_evlist *evlist) { @@ -289,4 +290,6 @@ void perf_evlist__to_front(struct perf_evlist *evlist, void perf_evlist__set_tracking_event(struct perf_evlist *evlist, struct perf_evsel *tracking_evsel); + +void perf_event_attr__set_max_precise_ip(struct perf_event_attr *attr); #endif /* __PERF_EVLIST_H */ diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 2936b3080722..397fb4ed3c97 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -9,10 +9,11 @@ #include <byteswap.h> #include <linux/bitops.h> -#include <api/fs/debugfs.h> +#include <api/fs/tracing_path.h> #include <traceevent/event-parse.h> #include <linux/hw_breakpoint.h> #include <linux/perf_event.h> +#include <linux/err.h> #include <sys/resource.h> #include "asm/bug.h" #include "callchain.h" @@ -206,10 +207,14 @@ void perf_evsel__init(struct perf_evsel *evsel, evsel->leader = evsel; evsel->unit = ""; evsel->scale = 1.0; + evsel->evlist = NULL; + evsel->bpf_fd = -1; INIT_LIST_HEAD(&evsel->node); + INIT_LIST_HEAD(&evsel->config_terms); perf_evsel__object.init(evsel); evsel->sample_size = __perf_evsel__sample_size(attr->sample_type); perf_evsel__calc_id_pos(evsel); + evsel->cmdline_group_boundary = false; } struct perf_evsel *perf_evsel__new_idx(struct perf_event_attr *attr, int idx) @@ -222,11 +227,17 @@ struct perf_evsel *perf_evsel__new_idx(struct perf_event_attr *attr, int idx) return evsel; } +/* + * Returns pointer with encoded error via <linux/err.h> interface. + */ struct perf_evsel *perf_evsel__newtp_idx(const char *sys, const char *name, int idx) { struct perf_evsel *evsel = zalloc(perf_evsel__object.size); + int err = -ENOMEM; - if (evsel != NULL) { + if (evsel == NULL) { + goto out_err; + } else { struct perf_event_attr attr = { .type = PERF_TYPE_TRACEPOINT, .sample_type = (PERF_SAMPLE_RAW | PERF_SAMPLE_TIME | @@ -237,8 +248,10 @@ struct perf_evsel *perf_evsel__newtp_idx(const char *sys, const char *name, int goto out_free; evsel->tp_format = trace_event__tp_format(sys, name); - if (evsel->tp_format == NULL) + if (IS_ERR(evsel->tp_format)) { + err = PTR_ERR(evsel->tp_format); goto out_free; + } event_attr_init(&attr); attr.config = evsel->tp_format->id; @@ -251,7 +264,8 @@ struct perf_evsel *perf_evsel__newtp_idx(const char *sys, const char *name, int out_free: zfree(&evsel->name); free(evsel); - return NULL; +out_err: + return ERR_PTR(err); } const char *perf_evsel__hw_names[PERF_COUNT_HW_MAX] = { @@ -543,14 +557,15 @@ int perf_evsel__group_desc(struct perf_evsel *evsel, char *buf, size_t size) static void perf_evsel__config_callgraph(struct perf_evsel *evsel, - struct record_opts *opts) + struct record_opts *opts, + struct callchain_param *param) { bool function = perf_evsel__is_function_event(evsel); struct perf_event_attr *attr = &evsel->attr; perf_evsel__set_sample_bit(evsel, CALLCHAIN); - if (callchain_param.record_mode == CALLCHAIN_LBR) { + if (param->record_mode == CALLCHAIN_LBR) { if (!opts->branch_stack) { if (attr->exclude_user) { pr_warning("LBR callstack option is only available " @@ -566,12 +581,12 @@ perf_evsel__config_callgraph(struct perf_evsel *evsel, "Falling back to framepointers.\n"); } - if (callchain_param.record_mode == CALLCHAIN_DWARF) { + if (param->record_mode == CALLCHAIN_DWARF) { if (!function) { perf_evsel__set_sample_bit(evsel, REGS_USER); perf_evsel__set_sample_bit(evsel, STACK_USER); attr->sample_regs_user = PERF_REGS_MASK; - attr->sample_stack_user = callchain_param.dump_size; + attr->sample_stack_user = param->dump_size; attr->exclude_callchain_user = 1; } else { pr_info("Cannot use DWARF unwind for function trace event," @@ -585,6 +600,106 @@ perf_evsel__config_callgraph(struct perf_evsel *evsel, } } +static void +perf_evsel__reset_callgraph(struct perf_evsel *evsel, + struct callchain_param *param) +{ + struct perf_event_attr *attr = &evsel->attr; + + perf_evsel__reset_sample_bit(evsel, CALLCHAIN); + if (param->record_mode == CALLCHAIN_LBR) { + perf_evsel__reset_sample_bit(evsel, BRANCH_STACK); + attr->branch_sample_type &= ~(PERF_SAMPLE_BRANCH_USER | + PERF_SAMPLE_BRANCH_CALL_STACK); + } + if (param->record_mode == CALLCHAIN_DWARF) { + perf_evsel__reset_sample_bit(evsel, REGS_USER); + perf_evsel__reset_sample_bit(evsel, STACK_USER); + } +} + +static void apply_config_terms(struct perf_evsel *evsel, + struct record_opts *opts) +{ + struct perf_evsel_config_term *term; + struct list_head *config_terms = &evsel->config_terms; + struct perf_event_attr *attr = &evsel->attr; + struct callchain_param param; + u32 dump_size = 0; + char *callgraph_buf = NULL; + + /* callgraph default */ + param.record_mode = callchain_param.record_mode; + + list_for_each_entry(term, config_terms, list) { + switch (term->type) { + case PERF_EVSEL__CONFIG_TERM_PERIOD: + attr->sample_period = term->val.period; + attr->freq = 0; + break; + case PERF_EVSEL__CONFIG_TERM_FREQ: + attr->sample_freq = term->val.freq; + attr->freq = 1; + break; + case PERF_EVSEL__CONFIG_TERM_TIME: + if (term->val.time) + perf_evsel__set_sample_bit(evsel, TIME); + else + perf_evsel__reset_sample_bit(evsel, TIME); + break; + case PERF_EVSEL__CONFIG_TERM_CALLGRAPH: + callgraph_buf = term->val.callgraph; + break; + case PERF_EVSEL__CONFIG_TERM_STACK_USER: + dump_size = term->val.stack_user; + break; + case PERF_EVSEL__CONFIG_TERM_INHERIT: + /* + * attr->inherit should has already been set by + * perf_evsel__config. If user explicitly set + * inherit using config terms, override global + * opt->no_inherit setting. + */ + attr->inherit = term->val.inherit ? 1 : 0; + break; + default: + break; + } + } + + /* User explicitly set per-event callgraph, clear the old setting and reset. */ + if ((callgraph_buf != NULL) || (dump_size > 0)) { + + /* parse callgraph parameters */ + if (callgraph_buf != NULL) { + if (!strcmp(callgraph_buf, "no")) { + param.enabled = false; + param.record_mode = CALLCHAIN_NONE; + } else { + param.enabled = true; + if (parse_callchain_record(callgraph_buf, ¶m)) { + pr_err("per-event callgraph setting for %s failed. " + "Apply callgraph global setting for it\n", + evsel->name); + return; + } + } + } + if (dump_size > 0) { + dump_size = round_up(dump_size, sizeof(u64)); + param.dump_size = dump_size; + } + + /* If global callgraph set, clear it */ + if (callchain_param.enabled) + perf_evsel__reset_callgraph(evsel, &callchain_param); + + /* set perf-event callgraph */ + if (param.enabled) + perf_evsel__config_callgraph(evsel, opts, ¶m); + } +} + /* * The enable_on_exec/disabled value strategy: * @@ -689,10 +804,10 @@ void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts) evsel->attr.exclude_callchain_user = 1; if (callchain_param.enabled && !evsel->no_aux_samples) - perf_evsel__config_callgraph(evsel, opts); + perf_evsel__config_callgraph(evsel, opts, &callchain_param); if (opts->sample_intr_regs) { - attr->sample_regs_intr = PERF_REGS_MASK; + attr->sample_regs_intr = opts->sample_intr_regs; perf_evsel__set_sample_bit(evsel, REGS_INTR); } @@ -707,7 +822,8 @@ void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts) */ if (opts->sample_time && (!perf_missing_features.sample_id_all && - (!opts->no_inherit || target__has_cpu(&opts->target) || per_cpu))) + (!opts->no_inherit || target__has_cpu(&opts->target) || per_cpu || + opts->sample_time_set))) perf_evsel__set_sample_bit(evsel, TIME); if (opts->raw_samples && !evsel->no_aux_samples) { @@ -736,6 +852,9 @@ void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts) attr->mmap2 = track && !perf_missing_features.mmap2; attr->comm = track; + if (opts->record_switch_events) + attr->context_switch = track; + if (opts->sample_transaction) perf_evsel__set_sample_bit(evsel, TRANSACTION); @@ -772,6 +891,15 @@ void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts) attr->use_clockid = 1; attr->clockid = opts->clockid; } + + if (evsel->precise_max) + perf_event_attr__set_max_precise_ip(attr); + + /* + * Apply event specific term settings, + * it overloads any global configuration. + */ + apply_config_terms(evsel, opts); } static int perf_evsel__alloc_fd(struct perf_evsel *evsel, int ncpus, int nthreads) @@ -815,14 +943,44 @@ static int perf_evsel__run_ioctl(struct perf_evsel *evsel, int ncpus, int nthrea return 0; } -int perf_evsel__set_filter(struct perf_evsel *evsel, int ncpus, int nthreads, - const char *filter) +int perf_evsel__apply_filter(struct perf_evsel *evsel, int ncpus, int nthreads, + const char *filter) { return perf_evsel__run_ioctl(evsel, ncpus, nthreads, PERF_EVENT_IOC_SET_FILTER, (void *)filter); } +int perf_evsel__set_filter(struct perf_evsel *evsel, const char *filter) +{ + char *new_filter = strdup(filter); + + if (new_filter != NULL) { + free(evsel->filter); + evsel->filter = new_filter; + return 0; + } + + return -1; +} + +int perf_evsel__append_filter(struct perf_evsel *evsel, + const char *op, const char *filter) +{ + char *new_filter; + + if (evsel->filter == NULL) + return perf_evsel__set_filter(evsel, filter); + + if (asprintf(&new_filter,"(%s) %s (%s)", evsel->filter, op, filter) > 0) { + free(evsel->filter); + evsel->filter = new_filter; + return 0; + } + + return -1; +} + int perf_evsel__enable(struct perf_evsel *evsel, int ncpus, int nthreads) { return perf_evsel__run_ioctl(evsel, ncpus, nthreads, @@ -865,6 +1023,16 @@ static void perf_evsel__free_id(struct perf_evsel *evsel) zfree(&evsel->id); } +static void perf_evsel__free_config_terms(struct perf_evsel *evsel) +{ + struct perf_evsel_config_term *term, *h; + + list_for_each_entry_safe(term, h, &evsel->config_terms, list) { + list_del(&term->list); + free(term); + } +} + void perf_evsel__close_fd(struct perf_evsel *evsel, int ncpus, int nthreads) { int cpu, thread; @@ -882,10 +1050,13 @@ void perf_evsel__close_fd(struct perf_evsel *evsel, int ncpus, int nthreads) void perf_evsel__exit(struct perf_evsel *evsel) { assert(list_empty(&evsel->node)); + assert(evsel->evlist == NULL); perf_evsel__free_fd(evsel); perf_evsel__free_id(evsel); + perf_evsel__free_config_terms(evsel); close_cgroup(evsel->cgrp); cpu_map__put(evsel->cpus); + cpu_map__put(evsel->own_cpus); thread_map__put(evsel->threads); zfree(&evsel->group_name); zfree(&evsel->name); @@ -1020,7 +1191,7 @@ static void __p_sample_type(char *buf, size_t size, u64 value) bit_name(READ), bit_name(CALLCHAIN), bit_name(ID), bit_name(CPU), bit_name(PERIOD), bit_name(STREAM_ID), bit_name(RAW), bit_name(BRANCH_STACK), bit_name(REGS_USER), bit_name(STACK_USER), - bit_name(IDENTIFIER), bit_name(REGS_INTR), + bit_name(IDENTIFIER), bit_name(REGS_INTR), bit_name(DATA_SRC), { .name = NULL, } }; #undef bit_name @@ -1095,11 +1266,13 @@ int perf_event_attr__fprintf(FILE *fp, struct perf_event_attr *attr, PRINT_ATTRf(mmap2, p_unsigned); PRINT_ATTRf(comm_exec, p_unsigned); PRINT_ATTRf(use_clockid, p_unsigned); + PRINT_ATTRf(context_switch, p_unsigned); PRINT_ATTRn("{ wakeup_events, wakeup_watermark }", wakeup_events, p_unsigned); PRINT_ATTRf(bp_type, p_unsigned); PRINT_ATTRn("{ bp_addr, config1 }", bp_addr, p_hex); PRINT_ATTRn("{ bp_len, config2 }", bp_len, p_hex); + PRINT_ATTRf(branch_sample_type, p_unsigned); PRINT_ATTRf(sample_regs_user, p_hex); PRINT_ATTRf(sample_stack_user, p_unsigned); PRINT_ATTRf(clockid, p_signed); @@ -1184,6 +1357,22 @@ retry_open: err); goto try_fallback; } + + if (evsel->bpf_fd >= 0) { + int evt_fd = FD(evsel, cpu, thread); + int bpf_fd = evsel->bpf_fd; + + err = ioctl(evt_fd, + PERF_EVENT_IOC_SET_BPF, + bpf_fd); + if (err && errno != EEXIST) { + pr_err("failed to attach bpf fd %d: %s\n", + bpf_fd, strerror(errno)); + err = -EINVAL; + goto out_close; + } + } + set_rlimit = NO_CHANGE; /* @@ -2075,8 +2264,13 @@ int perf_evsel__fprintf(struct perf_evsel *evsel, printed += perf_event_attr__fprintf(fp, &evsel->attr, __print_attr__fprintf, &first); } else if (details->freq) { - printed += comma_fprintf(fp, &first, " sample_freq=%" PRIu64, - (u64)evsel->attr.sample_freq); + const char *term = "sample_freq"; + + if (!evsel->attr.freq) + term = "sample_period"; + + printed += comma_fprintf(fp, &first, " %s=%" PRIu64, + term, (u64)evsel->attr.sample_freq); } out: fputc('\n', fp); diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index 4a7ed5656cf0..0e49bd742c63 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -9,7 +9,7 @@ #include "xyarray.h" #include "symbol.h" #include "cpumap.h" -#include "stat.h" +#include "counts.h" struct perf_evsel; @@ -31,8 +31,40 @@ struct perf_sample_id { struct cgroup_sel; +/* + * The 'struct perf_evsel_config_term' is used to pass event + * specific configuration data to perf_evsel__config routine. + * It is allocated within event parsing and attached to + * perf_evsel::config_terms list head. +*/ +enum { + PERF_EVSEL__CONFIG_TERM_PERIOD, + PERF_EVSEL__CONFIG_TERM_FREQ, + PERF_EVSEL__CONFIG_TERM_TIME, + PERF_EVSEL__CONFIG_TERM_CALLGRAPH, + PERF_EVSEL__CONFIG_TERM_STACK_USER, + PERF_EVSEL__CONFIG_TERM_INHERIT, + PERF_EVSEL__CONFIG_TERM_MAX, +}; + +struct perf_evsel_config_term { + struct list_head list; + int type; + union { + u64 period; + u64 freq; + bool time; + char *callgraph; + u64 stack_user; + bool inherit; + } val; +}; + /** struct perf_evsel - event selector * + * @evlist - evlist this evsel is in, if it is in one. + * @node - To insert it into evlist->entries or in other list_heads, say in + * the event parsing routines. * @name - Can be set to retain the original event name passed by the user, * so that when showing results in tools such as 'perf stat', we * show the name used, not some alias. @@ -46,6 +78,7 @@ struct cgroup_sel; */ struct perf_evsel { struct list_head node; + struct perf_evlist *evlist; struct perf_event_attr attr; char *filter; struct xyarray *fd; @@ -59,14 +92,15 @@ struct perf_evsel { double scale; const char *unit; struct event_format *tp_format; + off_t id_offset; union { void *priv; - off_t id_offset; u64 db_id; }; struct cgroup_sel *cgrp; void *handler; struct cpu_map *cpus; + struct cpu_map *own_cpus; struct thread_map *threads; unsigned int sample_size; int id_pos; @@ -79,6 +113,7 @@ struct perf_evsel { bool system_wide; bool tracking; bool per_pkg; + bool precise_max; /* parse modifier helper */ int exclude_GH; int nr_members; @@ -86,6 +121,9 @@ struct perf_evsel { unsigned long *per_pkg_mask; struct perf_evsel *leader; char *group_name; + bool cmdline_group_boundary; + struct list_head config_terms; + int bpf_fd; }; union u64_swap { @@ -96,7 +134,6 @@ union u64_swap { struct cpu_map; struct target; struct thread_map; -struct perf_evlist; struct record_opts; static inline struct cpu_map *perf_evsel__cpus(struct perf_evsel *evsel) @@ -128,6 +165,9 @@ static inline struct perf_evsel *perf_evsel__new(struct perf_event_attr *attr) struct perf_evsel *perf_evsel__newtp_idx(const char *sys, const char *name, int idx); +/* + * Returns pointer with encoded error via <linux/err.h> interface. + */ static inline struct perf_evsel *perf_evsel__newtp(const char *sys, const char *name) { return perf_evsel__newtp_idx(sys, name, 0); @@ -182,8 +222,11 @@ void __perf_evsel__reset_sample_bit(struct perf_evsel *evsel, void perf_evsel__set_sample_id(struct perf_evsel *evsel, bool use_sample_identifier); -int perf_evsel__set_filter(struct perf_evsel *evsel, int ncpus, int nthreads, - const char *filter); +int perf_evsel__set_filter(struct perf_evsel *evsel, const char *filter); +int perf_evsel__append_filter(struct perf_evsel *evsel, + const char *op, const char *filter); +int perf_evsel__apply_filter(struct perf_evsel *evsel, int ncpus, int nthreads, + const char *filter); int perf_evsel__enable(struct perf_evsel *evsel, int ncpus, int nthreads); int perf_evsel__open_per_cpu(struct perf_evsel *evsel, diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 03ace57a800c..43838003c1a1 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -24,9 +24,6 @@ #include "build-id.h" #include "data.h" -static u32 header_argc; -static const char **header_argv; - /* * magic2 = "PERFILE2" * must be a numerical value to let the endianness @@ -88,6 +85,9 @@ int write_padded(int fd, const void *bf, size_t count, size_t count_aligned) return err; } +#define string_size(str) \ + (PERF_ALIGN((strlen(str) + 1), NAME_ALIGN) + sizeof(u32)) + static int do_write_string(int fd, const char *str) { u32 len, olen; @@ -135,37 +135,6 @@ static char *do_read_string(int fd, struct perf_header *ph) return NULL; } -int -perf_header__set_cmdline(int argc, const char **argv) -{ - int i; - - /* - * If header_argv has already been set, do not override it. - * This allows a command to set the cmdline, parse args and - * then call another builtin function that implements a - * command -- e.g, cmd_kvm calling cmd_record. - */ - if (header_argv) - return 0; - - header_argc = (u32)argc; - - /* do not include NULL termination */ - header_argv = calloc(argc, sizeof(char *)); - if (!header_argv) - return -ENOMEM; - - /* - * must copy argv contents because it gets moved - * around during option parsing - */ - for (i = 0; i < argc ; i++) - header_argv[i] = argv[i]; - - return 0; -} - static int write_tracing_data(int fd, struct perf_header *h __maybe_unused, struct perf_evlist *evlist) { @@ -402,8 +371,8 @@ static int write_cmdline(int fd, struct perf_header *h __maybe_unused, { char buf[MAXPATHLEN]; char proc[32]; - u32 i, n; - int ret; + u32 n; + int i, ret; /* * actual atual path to perf binary @@ -417,7 +386,7 @@ static int write_cmdline(int fd, struct perf_header *h __maybe_unused, buf[ret] = '\0'; /* account for binary path */ - n = header_argc + 1; + n = perf_env.nr_cmdline + 1; ret = do_write(fd, &n, sizeof(n)); if (ret < 0) @@ -427,8 +396,8 @@ static int write_cmdline(int fd, struct perf_header *h __maybe_unused, if (ret < 0) return ret; - for (i = 0 ; i < header_argc; i++) { - ret = do_write_string(fd, header_argv[i]); + for (i = 0 ; i < perf_env.nr_cmdline; i++) { + ret = do_write_string(fd, perf_env.cmdline_argv[i]); if (ret < 0) return ret; } @@ -441,6 +410,7 @@ static int write_cmdline(int fd, struct perf_header *h __maybe_unused, "/sys/devices/system/cpu/cpu%d/topology/thread_siblings_list" struct cpu_topo { + u32 cpu_nr; u32 core_sib; u32 thread_sib; char **core_siblings; @@ -551,7 +521,7 @@ static struct cpu_topo *build_cpu_topology(void) return NULL; tp = addr; - + tp->cpu_nr = nr; addr += sizeof(*tp); tp->core_siblings = addr; addr += sz; @@ -574,7 +544,7 @@ static int write_cpu_topology(int fd, struct perf_header *h __maybe_unused, { struct cpu_topo *tp; u32 i; - int ret; + int ret, j; tp = build_cpu_topology(); if (!tp) @@ -598,6 +568,21 @@ static int write_cpu_topology(int fd, struct perf_header *h __maybe_unused, if (ret < 0) break; } + + ret = perf_env__read_cpu_topology_map(&perf_env); + if (ret < 0) + goto done; + + for (j = 0; j < perf_env.nr_cpus_avail; j++) { + ret = do_write(fd, &perf_env.cpu[j].core_id, + sizeof(perf_env.cpu[j].core_id)); + if (ret < 0) + return ret; + ret = do_write(fd, &perf_env.cpu[j].socket_id, + sizeof(perf_env.cpu[j].socket_id)); + if (ret < 0) + return ret; + } done: free_cpu_topo(tp); return ret; @@ -923,17 +908,13 @@ static void print_cmdline(struct perf_header *ph, int fd __maybe_unused, FILE *fp) { int nr, i; - char *str; nr = ph->env.nr_cmdline; - str = ph->env.cmdline; fprintf(fp, "# cmdline : "); - for (i = 0; i < nr; i++) { - fprintf(fp, "%s ", str); - str += strlen(str) + 1; - } + for (i = 0; i < nr; i++) + fprintf(fp, "%s ", ph->env.cmdline_argv[i]); fputc('\n', fp); } @@ -942,6 +923,7 @@ static void print_cpu_topology(struct perf_header *ph, int fd __maybe_unused, { int nr, i; char *str; + int cpu_nr = ph->env.nr_cpus_online; nr = ph->env.nr_sibling_cores; str = ph->env.sibling_cores; @@ -958,6 +940,13 @@ static void print_cpu_topology(struct perf_header *ph, int fd __maybe_unused, fprintf(fp, "# sibling threads : %s\n", str); str += strlen(str) + 1; } + + if (ph->env.cpu != NULL) { + for (i = 0; i < cpu_nr; i++) + fprintf(fp, "# CPU %d: Core ID %d, Socket ID %d\n", i, + ph->env.cpu[i].core_id, ph->env.cpu[i].socket_id); + } else + fprintf(fp, "# Core ID and Socket ID information is not available\n"); } static void free_event_desc(struct perf_evsel *events) @@ -1442,7 +1431,7 @@ static int process_nrcpus(struct perf_file_section *section __maybe_unused, if (ph->needs_swap) nr = bswap_32(nr); - ph->env.nr_cpus_online = nr; + ph->env.nr_cpus_avail = nr; ret = readn(fd, &nr, sizeof(nr)); if (ret != sizeof(nr)) @@ -1451,7 +1440,7 @@ static int process_nrcpus(struct perf_file_section *section __maybe_unused, if (ph->needs_swap) nr = bswap_32(nr); - ph->env.nr_cpus_avail = nr; + ph->env.nr_cpus_online = nr; return 0; } @@ -1541,14 +1530,13 @@ process_event_desc(struct perf_file_section *section __maybe_unused, return 0; } -static int process_cmdline(struct perf_file_section *section __maybe_unused, +static int process_cmdline(struct perf_file_section *section, struct perf_header *ph, int fd, void *data __maybe_unused) { ssize_t ret; - char *str; - u32 nr, i; - struct strbuf sb; + char *str, *cmdline = NULL, **argv = NULL; + u32 nr, i, len = 0; ret = readn(fd, &nr, sizeof(nr)); if (ret != sizeof(nr)) @@ -1558,26 +1546,36 @@ static int process_cmdline(struct perf_file_section *section __maybe_unused, nr = bswap_32(nr); ph->env.nr_cmdline = nr; - strbuf_init(&sb, 128); + + cmdline = zalloc(section->size + nr + 1); + if (!cmdline) + return -1; + + argv = zalloc(sizeof(char *) * (nr + 1)); + if (!argv) + goto error; for (i = 0; i < nr; i++) { str = do_read_string(fd, ph); if (!str) goto error; - /* include a NULL character at the end */ - strbuf_add(&sb, str, strlen(str) + 1); + argv[i] = cmdline + len; + memcpy(argv[i], str, strlen(str) + 1); + len += strlen(str) + 1; free(str); } - ph->env.cmdline = strbuf_detach(&sb, NULL); + ph->env.cmdline = cmdline; + ph->env.cmdline_argv = (const char **) argv; return 0; error: - strbuf_release(&sb); + free(argv); + free(cmdline); return -1; } -static int process_cpu_topology(struct perf_file_section *section __maybe_unused, +static int process_cpu_topology(struct perf_file_section *section, struct perf_header *ph, int fd, void *data __maybe_unused) { @@ -1585,15 +1583,22 @@ static int process_cpu_topology(struct perf_file_section *section __maybe_unused u32 nr, i; char *str; struct strbuf sb; + int cpu_nr = ph->env.nr_cpus_online; + u64 size = 0; + + ph->env.cpu = calloc(cpu_nr, sizeof(*ph->env.cpu)); + if (!ph->env.cpu) + return -1; ret = readn(fd, &nr, sizeof(nr)); if (ret != sizeof(nr)) - return -1; + goto free_cpu; if (ph->needs_swap) nr = bswap_32(nr); ph->env.nr_sibling_cores = nr; + size += sizeof(u32); strbuf_init(&sb, 128); for (i = 0; i < nr; i++) { @@ -1603,6 +1608,7 @@ static int process_cpu_topology(struct perf_file_section *section __maybe_unused /* include a NULL character at the end */ strbuf_add(&sb, str, strlen(str) + 1); + size += string_size(str); free(str); } ph->env.sibling_cores = strbuf_detach(&sb, NULL); @@ -1615,6 +1621,7 @@ static int process_cpu_topology(struct perf_file_section *section __maybe_unused nr = bswap_32(nr); ph->env.nr_sibling_threads = nr; + size += sizeof(u32); for (i = 0; i < nr; i++) { str = do_read_string(fd, ph); @@ -1623,13 +1630,57 @@ static int process_cpu_topology(struct perf_file_section *section __maybe_unused /* include a NULL character at the end */ strbuf_add(&sb, str, strlen(str) + 1); + size += string_size(str); free(str); } ph->env.sibling_threads = strbuf_detach(&sb, NULL); + + /* + * The header may be from old perf, + * which doesn't include core id and socket id information. + */ + if (section->size <= size) { + zfree(&ph->env.cpu); + return 0; + } + + for (i = 0; i < (u32)cpu_nr; i++) { + ret = readn(fd, &nr, sizeof(nr)); + if (ret != sizeof(nr)) + goto free_cpu; + + if (ph->needs_swap) + nr = bswap_32(nr); + + if (nr > (u32)cpu_nr) { + pr_debug("core_id number is too big." + "You may need to upgrade the perf tool.\n"); + goto free_cpu; + } + ph->env.cpu[i].core_id = nr; + + ret = readn(fd, &nr, sizeof(nr)); + if (ret != sizeof(nr)) + goto free_cpu; + + if (ph->needs_swap) + nr = bswap_32(nr); + + if (nr > (u32)cpu_nr) { + pr_debug("socket_id number is too big." + "You may need to upgrade the perf tool.\n"); + goto free_cpu; + } + + ph->env.cpu[i].socket_id = nr; + } + return 0; error: strbuf_release(&sb); +free_cpu: + zfree(&ph->env.cpu); return -1; } @@ -1732,6 +1783,9 @@ static int process_pmu_mappings(struct perf_file_section *section __maybe_unused /* include a NULL character at the end */ strbuf_add(&sb, "", 1); + if (!strcmp(name, "msr")) + ph->env.msr_pmu_type = type; + free(name); pmu_num--; } @@ -2509,6 +2563,8 @@ int perf_session__read_header(struct perf_session *session) if (session->evlist == NULL) return -ENOMEM; + session->evlist->env = &header->env; + session->machines.host.env = &header->env; if (perf_data_file__is_pipe(file)) return perf_header__read_pipe(session); diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h index d4d57962c591..05f27cb6b7e3 100644 --- a/tools/perf/util/header.h +++ b/tools/perf/util/header.h @@ -7,7 +7,7 @@ #include <linux/bitmap.h> #include <linux/types.h> #include "event.h" - +#include "env.h" enum { HEADER_RESERVED = 0, /* always cleared */ @@ -66,30 +66,6 @@ struct perf_header; int perf_file_header__read(struct perf_file_header *header, struct perf_header *ph, int fd); -struct perf_session_env { - char *hostname; - char *os_release; - char *version; - char *arch; - int nr_cpus_online; - int nr_cpus_avail; - char *cpu_desc; - char *cpuid; - unsigned long long total_mem; - - int nr_cmdline; - int nr_sibling_cores; - int nr_sibling_threads; - int nr_numa_nodes; - int nr_pmu_mappings; - int nr_groups; - char *cmdline; - char *sibling_cores; - char *sibling_threads; - char *numa_nodes; - char *pmu_mappings; -}; - struct perf_header { enum perf_header_version version; bool needs_swap; @@ -97,7 +73,7 @@ struct perf_header { u64 data_size; u64 feat_offset; DECLARE_BITMAP(adds_features, HEADER_FEAT_BITS); - struct perf_session_env env; + struct perf_env env; }; struct perf_evlist; diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index 6f28d53d4e46..4fd37d6708cb 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -15,6 +15,8 @@ static bool hists__filter_entry_by_thread(struct hists *hists, struct hist_entry *he); static bool hists__filter_entry_by_symbol(struct hists *hists, struct hist_entry *he); +static bool hists__filter_entry_by_socket(struct hists *hists, + struct hist_entry *he); u16 hists__col_len(struct hists *hists, enum hist_column col) { @@ -130,6 +132,18 @@ void hists__calc_col_len(struct hists *hists, struct hist_entry *h) hists__new_col_len(hists, HISTC_MEM_DADDR_SYMBOL, symlen); } + + if (h->mem_info->iaddr.sym) { + symlen = (int)h->mem_info->iaddr.sym->namelen + 4 + + unresolved_col_width + 2; + hists__new_col_len(hists, HISTC_MEM_IADDR_SYMBOL, + symlen); + } else { + symlen = unresolved_col_width + 4 + 2; + hists__new_col_len(hists, HISTC_MEM_IADDR_SYMBOL, + symlen); + } + if (h->mem_info->daddr.map) { symlen = dso__name_len(h->mem_info->daddr.map->dso); hists__new_col_len(hists, HISTC_MEM_DADDR_DSO, @@ -141,9 +155,12 @@ void hists__calc_col_len(struct hists *hists, struct hist_entry *h) } else { symlen = unresolved_col_width + 4 + 2; hists__new_col_len(hists, HISTC_MEM_DADDR_SYMBOL, symlen); + hists__new_col_len(hists, HISTC_MEM_IADDR_SYMBOL, symlen); hists__set_unres_dso_col_len(hists, HISTC_MEM_DADDR_DSO); } + hists__new_col_len(hists, HISTC_CPU, 3); + hists__new_col_len(hists, HISTC_SOCKET, 6); hists__new_col_len(hists, HISTC_MEM_LOCKED, 6); hists__new_col_len(hists, HISTC_MEM_TLB, 22); hists__new_col_len(hists, HISTC_MEM_SNOOP, 12); @@ -151,6 +168,12 @@ void hists__calc_col_len(struct hists *hists, struct hist_entry *h) hists__new_col_len(hists, HISTC_LOCAL_WEIGHT, 12); hists__new_col_len(hists, HISTC_GLOBAL_WEIGHT, 12); + if (h->srcline) + hists__new_col_len(hists, HISTC_SRCLINE, strlen(h->srcline)); + + if (h->srcfile) + hists__new_col_len(hists, HISTC_SRCFILE, strlen(h->srcfile)); + if (h->transaction) hists__new_col_len(hists, HISTC_TRANSACTION, hist_entry__transaction_len()); @@ -446,6 +469,7 @@ struct hist_entry *__hists__add_entry(struct hists *hists, .map = al->map, .sym = al->sym, }, + .socket = al->socket, .cpu = al->cpu, .cpumode = al->cpumode, .ip = al->addr, @@ -618,7 +642,8 @@ iter_add_next_branch_entry(struct hist_entry_iter *iter, struct addr_location *a * and not events sampled. Thus we use a pseudo period of 1. */ he = __hists__add_entry(hists, al, iter->parent, &bi[i], NULL, - 1, 1, 0, true); + 1, bi->flags.cycles ? bi->flags.cycles : 1, + 0, true); if (he == NULL) return -ENOMEM; @@ -683,7 +708,7 @@ iter_finish_normal_entry(struct hist_entry_iter *iter, } static int -iter_prepare_cumulative_entry(struct hist_entry_iter *iter __maybe_unused, +iter_prepare_cumulative_entry(struct hist_entry_iter *iter, struct addr_location *al __maybe_unused) { struct hist_entry **he_cache; @@ -695,7 +720,7 @@ iter_prepare_cumulative_entry(struct hist_entry_iter *iter __maybe_unused, * cumulated only one time to prevent entries more than 100% * overhead. */ - he_cache = malloc(sizeof(*he_cache) * (PERF_MAX_STACK_DEPTH + 1)); + he_cache = malloc(sizeof(*he_cache) * (iter->max_stack + 1)); if (he_cache == NULL) return -ENOMEM; @@ -760,6 +785,7 @@ iter_add_next_cumulative_entry(struct hist_entry_iter *iter, struct hist_entry **he_cache = iter->priv; struct hist_entry *he; struct hist_entry he_tmp = { + .hists = evsel__hists(evsel), .cpu = al->cpu, .thread = al->thread, .comm = thread__comm(al->thread), @@ -855,6 +881,8 @@ int hist_entry_iter__add(struct hist_entry_iter *iter, struct addr_location *al, if (err) return err; + iter->max_stack = max_stack_depth; + err = iter->ops->prepare_entry(iter, al); if (err) goto out; @@ -944,6 +972,8 @@ void hist_entry__delete(struct hist_entry *he) zfree(&he->stat_acc); free_srcline(he->srcline); + if (he->srcfile && he->srcfile[0]) + free(he->srcfile); free_callchain(he->callchain); free(he); } @@ -1014,6 +1044,7 @@ static void hists__apply_filters(struct hists *hists, struct hist_entry *he) hists__filter_entry_by_dso(hists, he); hists__filter_entry_by_thread(hists, he); hists__filter_entry_by_symbol(hists, he); + hists__filter_entry_by_socket(hists, he); } void hists__collapse_resort(struct hists *hists, struct ui_progress *prog) @@ -1099,13 +1130,14 @@ void hists__inc_stats(struct hists *hists, struct hist_entry *h) static void __hists__insert_output_entry(struct rb_root *entries, struct hist_entry *he, - u64 min_callchain_hits) + u64 min_callchain_hits, + bool use_callchain) { struct rb_node **p = &entries->rb_node; struct rb_node *parent = NULL; struct hist_entry *iter; - if (symbol_conf.use_callchain) + if (use_callchain) callchain_param.sort(&he->sorted_chain, he->callchain, min_callchain_hits, &callchain_param); @@ -1129,6 +1161,13 @@ void hists__output_resort(struct hists *hists, struct ui_progress *prog) struct rb_node *next; struct hist_entry *n; u64 min_callchain_hits; + struct perf_evsel *evsel = hists_to_evsel(hists); + bool use_callchain; + + if (evsel && symbol_conf.use_callchain && !symbol_conf.show_ref_callgraph) + use_callchain = evsel->attr.sample_type & PERF_SAMPLE_CALLCHAIN; + else + use_callchain = symbol_conf.use_callchain; min_callchain_hits = hists->stats.total_period * (callchain_param.min_percent / 100); @@ -1147,7 +1186,7 @@ void hists__output_resort(struct hists *hists, struct ui_progress *prog) n = rb_entry(next, struct hist_entry, rb_node_in); next = rb_next(&n->rb_node_in); - __hists__insert_output_entry(&hists->entries, n, min_callchain_hits); + __hists__insert_output_entry(&hists->entries, n, min_callchain_hits, use_callchain); hists__inc_stats(hists, n); if (!n->filtered) @@ -1274,6 +1313,37 @@ void hists__filter_by_symbol(struct hists *hists) } } +static bool hists__filter_entry_by_socket(struct hists *hists, + struct hist_entry *he) +{ + if ((hists->socket_filter > -1) && + (he->socket != hists->socket_filter)) { + he->filtered |= (1 << HIST_FILTER__SOCKET); + return true; + } + + return false; +} + +void hists__filter_by_socket(struct hists *hists) +{ + struct rb_node *nd; + + hists->stats.nr_non_filtered_samples = 0; + + hists__reset_filter_stats(hists); + hists__reset_col_len(hists); + + for (nd = rb_first(&hists->entries); nd; nd = rb_next(nd)) { + struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node); + + if (hists__filter_entry_by_socket(hists, h)) + continue; + + hists__remove_entry_filter(hists, h, HIST_FILTER__SOCKET); + } +} + void events_stats__inc(struct events_stats *stats, u32 type) { ++stats->nr_events[0]; @@ -1414,6 +1484,39 @@ int hists__link(struct hists *leader, struct hists *other) return 0; } +void hist__account_cycles(struct branch_stack *bs, struct addr_location *al, + struct perf_sample *sample, bool nonany_branch_mode) +{ + struct branch_info *bi; + + /* If we have branch cycles always annotate them. */ + if (bs && bs->nr && bs->entries[0].flags.cycles) { + int i; + + bi = sample__resolve_bstack(sample, al); + if (bi) { + struct addr_map_symbol *prev = NULL; + + /* + * Ignore errors, still want to process the + * other entries. + * + * For non standard branch modes always + * force no IPC (prev == NULL) + * + * Note that perf stores branches reversed from + * program order! + */ + for (i = bs->nr - 1; i >= 0; i--) { + addr_map_symbol__account_cycles(&bi[i].from, + nonany_branch_mode ? NULL : prev, + bi[i].flags.cycles); + prev = &bi[i].to; + } + free(bi); + } + } +} size_t perf_evlist__fprintf_nr_events(struct perf_evlist *evlist, FILE *fp) { @@ -1466,6 +1569,7 @@ static int hists_evsel__init(struct perf_evsel *evsel) hists->entries_collapsed = RB_ROOT; hists->entries = RB_ROOT; pthread_mutex_init(&hists->lock, NULL); + hists->socket_filter = -1; return 0; } diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index 5ed8d9c22981..a48a2078d288 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -20,6 +20,7 @@ enum hist_filter { HIST_FILTER__SYMBOL, HIST_FILTER__GUEST, HIST_FILTER__HOST, + HIST_FILTER__SOCKET, }; enum hist_column { @@ -29,7 +30,9 @@ enum hist_column { HISTC_COMM, HISTC_PARENT, HISTC_CPU, + HISTC_SOCKET, HISTC_SRCLINE, + HISTC_SRCFILE, HISTC_MISPREDICT, HISTC_IN_TX, HISTC_ABORT, @@ -46,7 +49,9 @@ enum hist_column { HISTC_MEM_LVL, HISTC_MEM_SNOOP, HISTC_MEM_DCACHELINE, + HISTC_MEM_IADDR_SYMBOL, HISTC_TRANSACTION, + HISTC_CYCLES, HISTC_NR_COLS, /* Last entry */ }; @@ -68,6 +73,7 @@ struct hists { struct events_stats stats; u64 event_stream; u16 col_len[HISTC_NR_COLS]; + int socket_filter; }; struct hist_entry_iter; @@ -85,6 +91,7 @@ struct hist_entry_iter { int curr; bool hide_unresolved; + int max_stack; struct perf_evsel *evsel; struct perf_sample *sample; @@ -142,11 +149,12 @@ size_t perf_evlist__fprintf_nr_events(struct perf_evlist *evlist, FILE *fp); void hists__filter_by_dso(struct hists *hists); void hists__filter_by_thread(struct hists *hists); void hists__filter_by_symbol(struct hists *hists); +void hists__filter_by_socket(struct hists *hists); static inline bool hists__has_filter(struct hists *hists) { return hists->thread_filter || hists->dso_filter || - hists->symbol_filter_str; + hists->symbol_filter_str || (hists->socket_filter > -1); } u16 hists__col_len(struct hists *hists, enum hist_column col); @@ -311,7 +319,7 @@ int hist_entry__tui_annotate(struct hist_entry *he, struct perf_evsel *evsel, int perf_evlist__tui_browse_hists(struct perf_evlist *evlist, const char *help, struct hist_browser_timer *hbt, float min_pcnt, - struct perf_session_env *env); + struct perf_env *env); int script_browse(const char *script_opt); #else static inline @@ -319,7 +327,7 @@ int perf_evlist__tui_browse_hists(struct perf_evlist *evlist __maybe_unused, const char *help __maybe_unused, struct hist_browser_timer *hbt __maybe_unused, float min_pcnt __maybe_unused, - struct perf_session_env *env __maybe_unused) + struct perf_env *env __maybe_unused) { return 0; } @@ -349,6 +357,9 @@ static inline int script_browse(const char *script_opt __maybe_unused) unsigned int hists__sort_list_width(struct hists *hists); +void hist__account_cycles(struct branch_stack *bs, struct addr_location *al, + struct perf_sample *sample, bool nonany_branch_mode); + struct option; int parse_filter_percentage(const struct option *opt __maybe_unused, const char *arg, int unset __maybe_unused); diff --git a/tools/perf/util/include/dwarf-regs.h b/tools/perf/util/include/dwarf-regs.h index 8f149655f497..07c644ed64c4 100644 --- a/tools/perf/util/include/dwarf-regs.h +++ b/tools/perf/util/include/dwarf-regs.h @@ -5,4 +5,12 @@ const char *get_arch_regstr(unsigned int n); #endif +#ifdef HAVE_ARCH_REGS_QUERY_REGISTER_OFFSET +/* + * Arch should support fetching the offset of a register in pt_regs + * by its name. See kernel's regs_query_register_offset in + * arch/xxx/kernel/ptrace.c. + */ +int regs_query_register_offset(const char *name); +#endif #endif diff --git a/tools/perf/util/intel-bts.c b/tools/perf/util/intel-bts.c new file mode 100644 index 000000000000..eb0e7f8bf515 --- /dev/null +++ b/tools/perf/util/intel-bts.c @@ -0,0 +1,933 @@ +/* + * intel-bts.c: Intel Processor Trace support + * Copyright (c) 2013-2015, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + */ + +#include <endian.h> +#include <byteswap.h> +#include <linux/kernel.h> +#include <linux/types.h> +#include <linux/bitops.h> +#include <linux/log2.h> + +#include "cpumap.h" +#include "color.h" +#include "evsel.h" +#include "evlist.h" +#include "machine.h" +#include "session.h" +#include "util.h" +#include "thread.h" +#include "thread-stack.h" +#include "debug.h" +#include "tsc.h" +#include "auxtrace.h" +#include "intel-pt-decoder/intel-pt-insn-decoder.h" +#include "intel-bts.h" + +#define MAX_TIMESTAMP (~0ULL) + +#define INTEL_BTS_ERR_NOINSN 5 +#define INTEL_BTS_ERR_LOST 9 + +#if __BYTE_ORDER == __BIG_ENDIAN +#define le64_to_cpu bswap_64 +#else +#define le64_to_cpu +#endif + +struct intel_bts { + struct auxtrace auxtrace; + struct auxtrace_queues queues; + struct auxtrace_heap heap; + u32 auxtrace_type; + struct perf_session *session; + struct machine *machine; + bool sampling_mode; + bool snapshot_mode; + bool data_queued; + u32 pmu_type; + struct perf_tsc_conversion tc; + bool cap_user_time_zero; + struct itrace_synth_opts synth_opts; + bool sample_branches; + u32 branches_filter; + u64 branches_sample_type; + u64 branches_id; + size_t branches_event_size; + bool synth_needs_swap; +}; + +struct intel_bts_queue { + struct intel_bts *bts; + unsigned int queue_nr; + struct auxtrace_buffer *buffer; + bool on_heap; + bool done; + pid_t pid; + pid_t tid; + int cpu; + u64 time; + struct intel_pt_insn intel_pt_insn; + u32 sample_flags; +}; + +struct branch { + u64 from; + u64 to; + u64 misc; +}; + +static void intel_bts_dump(struct intel_bts *bts __maybe_unused, + unsigned char *buf, size_t len) +{ + struct branch *branch; + size_t i, pos = 0, br_sz = sizeof(struct branch), sz; + const char *color = PERF_COLOR_BLUE; + + color_fprintf(stdout, color, + ". ... Intel BTS data: size %zu bytes\n", + len); + + while (len) { + if (len >= br_sz) + sz = br_sz; + else + sz = len; + printf("."); + color_fprintf(stdout, color, " %08x: ", pos); + for (i = 0; i < sz; i++) + color_fprintf(stdout, color, " %02x", buf[i]); + for (; i < br_sz; i++) + color_fprintf(stdout, color, " "); + if (len >= br_sz) { + branch = (struct branch *)buf; + color_fprintf(stdout, color, " %"PRIx64" -> %"PRIx64" %s\n", + le64_to_cpu(branch->from), + le64_to_cpu(branch->to), + le64_to_cpu(branch->misc) & 0x10 ? + "pred" : "miss"); + } else { + color_fprintf(stdout, color, " Bad record!\n"); + } + pos += sz; + buf += sz; + len -= sz; + } +} + +static void intel_bts_dump_event(struct intel_bts *bts, unsigned char *buf, + size_t len) +{ + printf(".\n"); + intel_bts_dump(bts, buf, len); +} + +static int intel_bts_lost(struct intel_bts *bts, struct perf_sample *sample) +{ + union perf_event event; + int err; + + auxtrace_synth_error(&event.auxtrace_error, PERF_AUXTRACE_ERROR_ITRACE, + INTEL_BTS_ERR_LOST, sample->cpu, sample->pid, + sample->tid, 0, "Lost trace data"); + + err = perf_session__deliver_synth_event(bts->session, &event, NULL); + if (err) + pr_err("Intel BTS: failed to deliver error event, error %d\n", + err); + + return err; +} + +static struct intel_bts_queue *intel_bts_alloc_queue(struct intel_bts *bts, + unsigned int queue_nr) +{ + struct intel_bts_queue *btsq; + + btsq = zalloc(sizeof(struct intel_bts_queue)); + if (!btsq) + return NULL; + + btsq->bts = bts; + btsq->queue_nr = queue_nr; + btsq->pid = -1; + btsq->tid = -1; + btsq->cpu = -1; + + return btsq; +} + +static int intel_bts_setup_queue(struct intel_bts *bts, + struct auxtrace_queue *queue, + unsigned int queue_nr) +{ + struct intel_bts_queue *btsq = queue->priv; + + if (list_empty(&queue->head)) + return 0; + + if (!btsq) { + btsq = intel_bts_alloc_queue(bts, queue_nr); + if (!btsq) + return -ENOMEM; + queue->priv = btsq; + + if (queue->cpu != -1) + btsq->cpu = queue->cpu; + btsq->tid = queue->tid; + } + + if (bts->sampling_mode) + return 0; + + if (!btsq->on_heap && !btsq->buffer) { + int ret; + + btsq->buffer = auxtrace_buffer__next(queue, NULL); + if (!btsq->buffer) + return 0; + + ret = auxtrace_heap__add(&bts->heap, queue_nr, + btsq->buffer->reference); + if (ret) + return ret; + btsq->on_heap = true; + } + + return 0; +} + +static int intel_bts_setup_queues(struct intel_bts *bts) +{ + unsigned int i; + int ret; + + for (i = 0; i < bts->queues.nr_queues; i++) { + ret = intel_bts_setup_queue(bts, &bts->queues.queue_array[i], + i); + if (ret) + return ret; + } + return 0; +} + +static inline int intel_bts_update_queues(struct intel_bts *bts) +{ + if (bts->queues.new_data) { + bts->queues.new_data = false; + return intel_bts_setup_queues(bts); + } + return 0; +} + +static unsigned char *intel_bts_find_overlap(unsigned char *buf_a, size_t len_a, + unsigned char *buf_b, size_t len_b) +{ + size_t offs, len; + + if (len_a > len_b) + offs = len_a - len_b; + else + offs = 0; + + for (; offs < len_a; offs += sizeof(struct branch)) { + len = len_a - offs; + if (!memcmp(buf_a + offs, buf_b, len)) + return buf_b + len; + } + + return buf_b; +} + +static int intel_bts_do_fix_overlap(struct auxtrace_queue *queue, + struct auxtrace_buffer *b) +{ + struct auxtrace_buffer *a; + void *start; + + if (b->list.prev == &queue->head) + return 0; + a = list_entry(b->list.prev, struct auxtrace_buffer, list); + start = intel_bts_find_overlap(a->data, a->size, b->data, b->size); + if (!start) + return -EINVAL; + b->use_size = b->data + b->size - start; + b->use_data = start; + return 0; +} + +static int intel_bts_synth_branch_sample(struct intel_bts_queue *btsq, + struct branch *branch) +{ + int ret; + struct intel_bts *bts = btsq->bts; + union perf_event event; + struct perf_sample sample = { .ip = 0, }; + + event.sample.header.type = PERF_RECORD_SAMPLE; + event.sample.header.misc = PERF_RECORD_MISC_USER; + event.sample.header.size = sizeof(struct perf_event_header); + + sample.ip = le64_to_cpu(branch->from); + sample.pid = btsq->pid; + sample.tid = btsq->tid; + sample.addr = le64_to_cpu(branch->to); + sample.id = btsq->bts->branches_id; + sample.stream_id = btsq->bts->branches_id; + sample.period = 1; + sample.cpu = btsq->cpu; + sample.flags = btsq->sample_flags; + sample.insn_len = btsq->intel_pt_insn.length; + + if (bts->synth_opts.inject) { + event.sample.header.size = bts->branches_event_size; + ret = perf_event__synthesize_sample(&event, + bts->branches_sample_type, + 0, &sample, + bts->synth_needs_swap); + if (ret) + return ret; + } + + ret = perf_session__deliver_synth_event(bts->session, &event, &sample); + if (ret) + pr_err("Intel BTS: failed to deliver branch event, error %d\n", + ret); + + return ret; +} + +static int intel_bts_get_next_insn(struct intel_bts_queue *btsq, u64 ip) +{ + struct machine *machine = btsq->bts->machine; + struct thread *thread; + struct addr_location al; + unsigned char buf[1024]; + size_t bufsz; + ssize_t len; + int x86_64; + uint8_t cpumode; + int err = -1; + + bufsz = intel_pt_insn_max_size(); + + if (machine__kernel_ip(machine, ip)) + cpumode = PERF_RECORD_MISC_KERNEL; + else + cpumode = PERF_RECORD_MISC_USER; + + thread = machine__find_thread(machine, -1, btsq->tid); + if (!thread) + return -1; + + thread__find_addr_map(thread, cpumode, MAP__FUNCTION, ip, &al); + if (!al.map || !al.map->dso) + goto out_put; + + len = dso__data_read_addr(al.map->dso, al.map, machine, ip, buf, bufsz); + if (len <= 0) + goto out_put; + + /* Load maps to ensure dso->is_64_bit has been updated */ + map__load(al.map, machine->symbol_filter); + + x86_64 = al.map->dso->is_64_bit; + + if (intel_pt_get_insn(buf, len, x86_64, &btsq->intel_pt_insn)) + goto out_put; + + err = 0; +out_put: + thread__put(thread); + return err; +} + +static int intel_bts_synth_error(struct intel_bts *bts, int cpu, pid_t pid, + pid_t tid, u64 ip) +{ + union perf_event event; + int err; + + auxtrace_synth_error(&event.auxtrace_error, PERF_AUXTRACE_ERROR_ITRACE, + INTEL_BTS_ERR_NOINSN, cpu, pid, tid, ip, + "Failed to get instruction"); + + err = perf_session__deliver_synth_event(bts->session, &event, NULL); + if (err) + pr_err("Intel BTS: failed to deliver error event, error %d\n", + err); + + return err; +} + +static int intel_bts_get_branch_type(struct intel_bts_queue *btsq, + struct branch *branch) +{ + int err; + + if (!branch->from) { + if (branch->to) + btsq->sample_flags = PERF_IP_FLAG_BRANCH | + PERF_IP_FLAG_TRACE_BEGIN; + else + btsq->sample_flags = 0; + btsq->intel_pt_insn.length = 0; + } else if (!branch->to) { + btsq->sample_flags = PERF_IP_FLAG_BRANCH | + PERF_IP_FLAG_TRACE_END; + btsq->intel_pt_insn.length = 0; + } else { + err = intel_bts_get_next_insn(btsq, branch->from); + if (err) { + btsq->sample_flags = 0; + btsq->intel_pt_insn.length = 0; + if (!btsq->bts->synth_opts.errors) + return 0; + err = intel_bts_synth_error(btsq->bts, btsq->cpu, + btsq->pid, btsq->tid, + branch->from); + return err; + } + btsq->sample_flags = intel_pt_insn_type(btsq->intel_pt_insn.op); + /* Check for an async branch into the kernel */ + if (!machine__kernel_ip(btsq->bts->machine, branch->from) && + machine__kernel_ip(btsq->bts->machine, branch->to) && + btsq->sample_flags != (PERF_IP_FLAG_BRANCH | + PERF_IP_FLAG_CALL | + PERF_IP_FLAG_SYSCALLRET)) + btsq->sample_flags = PERF_IP_FLAG_BRANCH | + PERF_IP_FLAG_CALL | + PERF_IP_FLAG_ASYNC | + PERF_IP_FLAG_INTERRUPT; + } + + return 0; +} + +static int intel_bts_process_buffer(struct intel_bts_queue *btsq, + struct auxtrace_buffer *buffer) +{ + struct branch *branch; + size_t sz, bsz = sizeof(struct branch); + u32 filter = btsq->bts->branches_filter; + int err = 0; + + if (buffer->use_data) { + sz = buffer->use_size; + branch = buffer->use_data; + } else { + sz = buffer->size; + branch = buffer->data; + } + + if (!btsq->bts->sample_branches) + return 0; + + for (; sz > bsz; branch += 1, sz -= bsz) { + if (!branch->from && !branch->to) + continue; + intel_bts_get_branch_type(btsq, branch); + if (filter && !(filter & btsq->sample_flags)) + continue; + err = intel_bts_synth_branch_sample(btsq, branch); + if (err) + break; + } + return err; +} + +static int intel_bts_process_queue(struct intel_bts_queue *btsq, u64 *timestamp) +{ + struct auxtrace_buffer *buffer = btsq->buffer, *old_buffer = buffer; + struct auxtrace_queue *queue; + struct thread *thread; + int err; + + if (btsq->done) + return 1; + + if (btsq->pid == -1) { + thread = machine__find_thread(btsq->bts->machine, -1, + btsq->tid); + if (thread) + btsq->pid = thread->pid_; + } else { + thread = machine__findnew_thread(btsq->bts->machine, btsq->pid, + btsq->tid); + } + + queue = &btsq->bts->queues.queue_array[btsq->queue_nr]; + + if (!buffer) + buffer = auxtrace_buffer__next(queue, NULL); + + if (!buffer) { + if (!btsq->bts->sampling_mode) + btsq->done = 1; + err = 1; + goto out_put; + } + + /* Currently there is no support for split buffers */ + if (buffer->consecutive) { + err = -EINVAL; + goto out_put; + } + + if (!buffer->data) { + int fd = perf_data_file__fd(btsq->bts->session->file); + + buffer->data = auxtrace_buffer__get_data(buffer, fd); + if (!buffer->data) { + err = -ENOMEM; + goto out_put; + } + } + + if (btsq->bts->snapshot_mode && !buffer->consecutive && + intel_bts_do_fix_overlap(queue, buffer)) { + err = -ENOMEM; + goto out_put; + } + + if (!btsq->bts->synth_opts.callchain && thread && + (!old_buffer || btsq->bts->sampling_mode || + (btsq->bts->snapshot_mode && !buffer->consecutive))) + thread_stack__set_trace_nr(thread, buffer->buffer_nr + 1); + + err = intel_bts_process_buffer(btsq, buffer); + + auxtrace_buffer__drop_data(buffer); + + btsq->buffer = auxtrace_buffer__next(queue, buffer); + if (btsq->buffer) { + if (timestamp) + *timestamp = btsq->buffer->reference; + } else { + if (!btsq->bts->sampling_mode) + btsq->done = 1; + } +out_put: + thread__put(thread); + return err; +} + +static int intel_bts_flush_queue(struct intel_bts_queue *btsq) +{ + u64 ts = 0; + int ret; + + while (1) { + ret = intel_bts_process_queue(btsq, &ts); + if (ret < 0) + return ret; + if (ret) + break; + } + return 0; +} + +static int intel_bts_process_tid_exit(struct intel_bts *bts, pid_t tid) +{ + struct auxtrace_queues *queues = &bts->queues; + unsigned int i; + + for (i = 0; i < queues->nr_queues; i++) { + struct auxtrace_queue *queue = &bts->queues.queue_array[i]; + struct intel_bts_queue *btsq = queue->priv; + + if (btsq && btsq->tid == tid) + return intel_bts_flush_queue(btsq); + } + return 0; +} + +static int intel_bts_process_queues(struct intel_bts *bts, u64 timestamp) +{ + while (1) { + unsigned int queue_nr; + struct auxtrace_queue *queue; + struct intel_bts_queue *btsq; + u64 ts = 0; + int ret; + + if (!bts->heap.heap_cnt) + return 0; + + if (bts->heap.heap_array[0].ordinal > timestamp) + return 0; + + queue_nr = bts->heap.heap_array[0].queue_nr; + queue = &bts->queues.queue_array[queue_nr]; + btsq = queue->priv; + + auxtrace_heap__pop(&bts->heap); + + ret = intel_bts_process_queue(btsq, &ts); + if (ret < 0) { + auxtrace_heap__add(&bts->heap, queue_nr, ts); + return ret; + } + + if (!ret) { + ret = auxtrace_heap__add(&bts->heap, queue_nr, ts); + if (ret < 0) + return ret; + } else { + btsq->on_heap = false; + } + } + + return 0; +} + +static int intel_bts_process_event(struct perf_session *session, + union perf_event *event, + struct perf_sample *sample, + struct perf_tool *tool) +{ + struct intel_bts *bts = container_of(session->auxtrace, struct intel_bts, + auxtrace); + u64 timestamp; + int err; + + if (dump_trace) + return 0; + + if (!tool->ordered_events) { + pr_err("Intel BTS requires ordered events\n"); + return -EINVAL; + } + + if (sample->time && sample->time != (u64)-1) + timestamp = perf_time_to_tsc(sample->time, &bts->tc); + else + timestamp = 0; + + err = intel_bts_update_queues(bts); + if (err) + return err; + + err = intel_bts_process_queues(bts, timestamp); + if (err) + return err; + if (event->header.type == PERF_RECORD_EXIT) { + err = intel_bts_process_tid_exit(bts, event->fork.tid); + if (err) + return err; + } + + if (event->header.type == PERF_RECORD_AUX && + (event->aux.flags & PERF_AUX_FLAG_TRUNCATED) && + bts->synth_opts.errors) + err = intel_bts_lost(bts, sample); + + return err; +} + +static int intel_bts_process_auxtrace_event(struct perf_session *session, + union perf_event *event, + struct perf_tool *tool __maybe_unused) +{ + struct intel_bts *bts = container_of(session->auxtrace, struct intel_bts, + auxtrace); + + if (bts->sampling_mode) + return 0; + + if (!bts->data_queued) { + struct auxtrace_buffer *buffer; + off_t data_offset; + int fd = perf_data_file__fd(session->file); + int err; + + if (perf_data_file__is_pipe(session->file)) { + data_offset = 0; + } else { + data_offset = lseek(fd, 0, SEEK_CUR); + if (data_offset == -1) + return -errno; + } + + err = auxtrace_queues__add_event(&bts->queues, session, event, + data_offset, &buffer); + if (err) + return err; + + /* Dump here now we have copied a piped trace out of the pipe */ + if (dump_trace) { + if (auxtrace_buffer__get_data(buffer, fd)) { + intel_bts_dump_event(bts, buffer->data, + buffer->size); + auxtrace_buffer__put_data(buffer); + } + } + } + + return 0; +} + +static int intel_bts_flush(struct perf_session *session __maybe_unused, + struct perf_tool *tool __maybe_unused) +{ + struct intel_bts *bts = container_of(session->auxtrace, struct intel_bts, + auxtrace); + int ret; + + if (dump_trace || bts->sampling_mode) + return 0; + + if (!tool->ordered_events) + return -EINVAL; + + ret = intel_bts_update_queues(bts); + if (ret < 0) + return ret; + + return intel_bts_process_queues(bts, MAX_TIMESTAMP); +} + +static void intel_bts_free_queue(void *priv) +{ + struct intel_bts_queue *btsq = priv; + + if (!btsq) + return; + free(btsq); +} + +static void intel_bts_free_events(struct perf_session *session) +{ + struct intel_bts *bts = container_of(session->auxtrace, struct intel_bts, + auxtrace); + struct auxtrace_queues *queues = &bts->queues; + unsigned int i; + + for (i = 0; i < queues->nr_queues; i++) { + intel_bts_free_queue(queues->queue_array[i].priv); + queues->queue_array[i].priv = NULL; + } + auxtrace_queues__free(queues); +} + +static void intel_bts_free(struct perf_session *session) +{ + struct intel_bts *bts = container_of(session->auxtrace, struct intel_bts, + auxtrace); + + auxtrace_heap__free(&bts->heap); + intel_bts_free_events(session); + session->auxtrace = NULL; + free(bts); +} + +struct intel_bts_synth { + struct perf_tool dummy_tool; + struct perf_session *session; +}; + +static int intel_bts_event_synth(struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample __maybe_unused, + struct machine *machine __maybe_unused) +{ + struct intel_bts_synth *intel_bts_synth = + container_of(tool, struct intel_bts_synth, dummy_tool); + + return perf_session__deliver_synth_event(intel_bts_synth->session, + event, NULL); +} + +static int intel_bts_synth_event(struct perf_session *session, + struct perf_event_attr *attr, u64 id) +{ + struct intel_bts_synth intel_bts_synth; + + memset(&intel_bts_synth, 0, sizeof(struct intel_bts_synth)); + intel_bts_synth.session = session; + + return perf_event__synthesize_attr(&intel_bts_synth.dummy_tool, attr, 1, + &id, intel_bts_event_synth); +} + +static int intel_bts_synth_events(struct intel_bts *bts, + struct perf_session *session) +{ + struct perf_evlist *evlist = session->evlist; + struct perf_evsel *evsel; + struct perf_event_attr attr; + bool found = false; + u64 id; + int err; + + evlist__for_each(evlist, evsel) { + if (evsel->attr.type == bts->pmu_type && evsel->ids) { + found = true; + break; + } + } + + if (!found) { + pr_debug("There are no selected events with Intel BTS data\n"); + return 0; + } + + memset(&attr, 0, sizeof(struct perf_event_attr)); + attr.size = sizeof(struct perf_event_attr); + attr.type = PERF_TYPE_HARDWARE; + attr.sample_type = evsel->attr.sample_type & PERF_SAMPLE_MASK; + attr.sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID | + PERF_SAMPLE_PERIOD; + attr.sample_type &= ~(u64)PERF_SAMPLE_TIME; + attr.sample_type &= ~(u64)PERF_SAMPLE_CPU; + attr.exclude_user = evsel->attr.exclude_user; + attr.exclude_kernel = evsel->attr.exclude_kernel; + attr.exclude_hv = evsel->attr.exclude_hv; + attr.exclude_host = evsel->attr.exclude_host; + attr.exclude_guest = evsel->attr.exclude_guest; + attr.sample_id_all = evsel->attr.sample_id_all; + attr.read_format = evsel->attr.read_format; + + id = evsel->id[0] + 1000000000; + if (!id) + id = 1; + + if (bts->synth_opts.branches) { + attr.config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS; + attr.sample_period = 1; + attr.sample_type |= PERF_SAMPLE_ADDR; + pr_debug("Synthesizing 'branches' event with id %" PRIu64 " sample type %#" PRIx64 "\n", + id, (u64)attr.sample_type); + err = intel_bts_synth_event(session, &attr, id); + if (err) { + pr_err("%s: failed to synthesize 'branches' event type\n", + __func__); + return err; + } + bts->sample_branches = true; + bts->branches_sample_type = attr.sample_type; + bts->branches_id = id; + /* + * We only use sample types from PERF_SAMPLE_MASK so we can use + * __perf_evsel__sample_size() here. + */ + bts->branches_event_size = sizeof(struct sample_event) + + __perf_evsel__sample_size(attr.sample_type); + } + + bts->synth_needs_swap = evsel->needs_swap; + + return 0; +} + +static const char * const intel_bts_info_fmts[] = { + [INTEL_BTS_PMU_TYPE] = " PMU Type %"PRId64"\n", + [INTEL_BTS_TIME_SHIFT] = " Time Shift %"PRIu64"\n", + [INTEL_BTS_TIME_MULT] = " Time Muliplier %"PRIu64"\n", + [INTEL_BTS_TIME_ZERO] = " Time Zero %"PRIu64"\n", + [INTEL_BTS_CAP_USER_TIME_ZERO] = " Cap Time Zero %"PRId64"\n", + [INTEL_BTS_SNAPSHOT_MODE] = " Snapshot mode %"PRId64"\n", +}; + +static void intel_bts_print_info(u64 *arr, int start, int finish) +{ + int i; + + if (!dump_trace) + return; + + for (i = start; i <= finish; i++) + fprintf(stdout, intel_bts_info_fmts[i], arr[i]); +} + +u64 intel_bts_auxtrace_info_priv[INTEL_BTS_AUXTRACE_PRIV_SIZE]; + +int intel_bts_process_auxtrace_info(union perf_event *event, + struct perf_session *session) +{ + struct auxtrace_info_event *auxtrace_info = &event->auxtrace_info; + size_t min_sz = sizeof(u64) * INTEL_BTS_SNAPSHOT_MODE; + struct intel_bts *bts; + int err; + + if (auxtrace_info->header.size < sizeof(struct auxtrace_info_event) + + min_sz) + return -EINVAL; + + bts = zalloc(sizeof(struct intel_bts)); + if (!bts) + return -ENOMEM; + + err = auxtrace_queues__init(&bts->queues); + if (err) + goto err_free; + + bts->session = session; + bts->machine = &session->machines.host; /* No kvm support */ + bts->auxtrace_type = auxtrace_info->type; + bts->pmu_type = auxtrace_info->priv[INTEL_BTS_PMU_TYPE]; + bts->tc.time_shift = auxtrace_info->priv[INTEL_BTS_TIME_SHIFT]; + bts->tc.time_mult = auxtrace_info->priv[INTEL_BTS_TIME_MULT]; + bts->tc.time_zero = auxtrace_info->priv[INTEL_BTS_TIME_ZERO]; + bts->cap_user_time_zero = + auxtrace_info->priv[INTEL_BTS_CAP_USER_TIME_ZERO]; + bts->snapshot_mode = auxtrace_info->priv[INTEL_BTS_SNAPSHOT_MODE]; + + bts->sampling_mode = false; + + bts->auxtrace.process_event = intel_bts_process_event; + bts->auxtrace.process_auxtrace_event = intel_bts_process_auxtrace_event; + bts->auxtrace.flush_events = intel_bts_flush; + bts->auxtrace.free_events = intel_bts_free_events; + bts->auxtrace.free = intel_bts_free; + session->auxtrace = &bts->auxtrace; + + intel_bts_print_info(&auxtrace_info->priv[0], INTEL_BTS_PMU_TYPE, + INTEL_BTS_SNAPSHOT_MODE); + + if (dump_trace) + return 0; + + if (session->itrace_synth_opts && session->itrace_synth_opts->set) + bts->synth_opts = *session->itrace_synth_opts; + else + itrace_synth_opts__set_default(&bts->synth_opts); + + if (bts->synth_opts.calls) + bts->branches_filter |= PERF_IP_FLAG_CALL | PERF_IP_FLAG_ASYNC | + PERF_IP_FLAG_TRACE_END; + if (bts->synth_opts.returns) + bts->branches_filter |= PERF_IP_FLAG_RETURN | + PERF_IP_FLAG_TRACE_BEGIN; + + err = intel_bts_synth_events(bts, session); + if (err) + goto err_free_queues; + + err = auxtrace_queues__process_index(&bts->queues, session); + if (err) + goto err_free_queues; + + if (bts->queues.populated) + bts->data_queued = true; + + return 0; + +err_free_queues: + auxtrace_queues__free(&bts->queues); + session->auxtrace = NULL; +err_free: + free(bts); + return err; +} diff --git a/tools/perf/util/intel-bts.h b/tools/perf/util/intel-bts.h new file mode 100644 index 000000000000..ca65e21b3e83 --- /dev/null +++ b/tools/perf/util/intel-bts.h @@ -0,0 +1,43 @@ +/* + * intel-bts.h: Intel Processor Trace support + * Copyright (c) 2013-2014, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + */ + +#ifndef INCLUDE__PERF_INTEL_BTS_H__ +#define INCLUDE__PERF_INTEL_BTS_H__ + +#define INTEL_BTS_PMU_NAME "intel_bts" + +enum { + INTEL_BTS_PMU_TYPE, + INTEL_BTS_TIME_SHIFT, + INTEL_BTS_TIME_MULT, + INTEL_BTS_TIME_ZERO, + INTEL_BTS_CAP_USER_TIME_ZERO, + INTEL_BTS_SNAPSHOT_MODE, + INTEL_BTS_AUXTRACE_PRIV_MAX, +}; + +#define INTEL_BTS_AUXTRACE_PRIV_SIZE (INTEL_BTS_AUXTRACE_PRIV_MAX * sizeof(u64)) + +struct auxtrace_record; +struct perf_tool; +union perf_event; +struct perf_session; + +struct auxtrace_record *intel_bts_recording_init(int *err); + +int intel_bts_process_auxtrace_info(union perf_event *event, + struct perf_session *session); + +#endif diff --git a/tools/perf/util/intel-pt-decoder/Build b/tools/perf/util/intel-pt-decoder/Build new file mode 100644 index 000000000000..0611d619a42e --- /dev/null +++ b/tools/perf/util/intel-pt-decoder/Build @@ -0,0 +1,23 @@ +libperf-$(CONFIG_AUXTRACE) += intel-pt-pkt-decoder.o intel-pt-insn-decoder.o intel-pt-log.o intel-pt-decoder.o + +inat_tables_script = util/intel-pt-decoder/gen-insn-attr-x86.awk +inat_tables_maps = util/intel-pt-decoder/x86-opcode-map.txt + +$(OUTPUT)util/intel-pt-decoder/inat-tables.c: $(inat_tables_script) $(inat_tables_maps) + $(call rule_mkdir) + @$(call echo-cmd,gen)$(AWK) -f $(inat_tables_script) $(inat_tables_maps) > $@ || rm -f $@ + +$(OUTPUT)util/intel-pt-decoder/intel-pt-insn-decoder.o: util/intel-pt-decoder/intel-pt-insn-decoder.c util/intel-pt-decoder/inat.c $(OUTPUT)util/intel-pt-decoder/inat-tables.c + @(test -d ../../kernel -a -d ../../tools -a -d ../perf && (( \ + diff -B -I'^#include' util/intel-pt-decoder/insn.c ../../arch/x86/lib/insn.c >/dev/null && \ + diff -B -I'^#include' util/intel-pt-decoder/inat.c ../../arch/x86/lib/inat.c >/dev/null && \ + diff -B util/intel-pt-decoder/x86-opcode-map.txt ../../arch/x86/lib/x86-opcode-map.txt >/dev/null && \ + diff -B util/intel-pt-decoder/gen-insn-attr-x86.awk ../../arch/x86/tools/gen-insn-attr-x86.awk >/dev/null && \ + diff -B -I'^#include' util/intel-pt-decoder/insn.h ../../arch/x86/include/asm/insn.h >/dev/null && \ + diff -B -I'^#include' util/intel-pt-decoder/inat.h ../../arch/x86/include/asm/inat.h >/dev/null && \ + diff -B -I'^#include' util/intel-pt-decoder/inat_types.h ../../arch/x86/include/asm/inat_types.h >/dev/null) \ + || echo "Warning: Intel PT: x86 instruction decoder differs from kernel" >&2 )) || true + $(call rule_mkdir) + $(call if_changed_dep,cc_o_c) + +CFLAGS_intel-pt-insn-decoder.o += -I$(OUTPUT)util/intel-pt-decoder -Wno-override-init diff --git a/tools/perf/util/intel-pt-decoder/gen-insn-attr-x86.awk b/tools/perf/util/intel-pt-decoder/gen-insn-attr-x86.awk new file mode 100644 index 000000000000..517567347aac --- /dev/null +++ b/tools/perf/util/intel-pt-decoder/gen-insn-attr-x86.awk @@ -0,0 +1,386 @@ +#!/bin/awk -f +# gen-insn-attr-x86.awk: Instruction attribute table generator +# Written by Masami Hiramatsu <mhiramat@redhat.com> +# +# Usage: awk -f gen-insn-attr-x86.awk x86-opcode-map.txt > inat-tables.c + +# Awk implementation sanity check +function check_awk_implement() { + if (sprintf("%x", 0) != "0") + return "Your awk has a printf-format problem." + return "" +} + +# Clear working vars +function clear_vars() { + delete table + delete lptable2 + delete lptable1 + delete lptable3 + eid = -1 # escape id + gid = -1 # group id + aid = -1 # AVX id + tname = "" +} + +BEGIN { + # Implementation error checking + awkchecked = check_awk_implement() + if (awkchecked != "") { + print "Error: " awkchecked > "/dev/stderr" + print "Please try to use gawk." > "/dev/stderr" + exit 1 + } + + # Setup generating tables + print "/* x86 opcode map generated from x86-opcode-map.txt */" + print "/* Do not change this code. */\n" + ggid = 1 + geid = 1 + gaid = 0 + delete etable + delete gtable + delete atable + + opnd_expr = "^[A-Za-z/]" + ext_expr = "^\\(" + sep_expr = "^\\|$" + group_expr = "^Grp[0-9A-Za-z]+" + + imm_expr = "^[IJAOL][a-z]" + imm_flag["Ib"] = "INAT_MAKE_IMM(INAT_IMM_BYTE)" + imm_flag["Jb"] = "INAT_MAKE_IMM(INAT_IMM_BYTE)" + imm_flag["Iw"] = "INAT_MAKE_IMM(INAT_IMM_WORD)" + imm_flag["Id"] = "INAT_MAKE_IMM(INAT_IMM_DWORD)" + imm_flag["Iq"] = "INAT_MAKE_IMM(INAT_IMM_QWORD)" + imm_flag["Ap"] = "INAT_MAKE_IMM(INAT_IMM_PTR)" + imm_flag["Iz"] = "INAT_MAKE_IMM(INAT_IMM_VWORD32)" + imm_flag["Jz"] = "INAT_MAKE_IMM(INAT_IMM_VWORD32)" + imm_flag["Iv"] = "INAT_MAKE_IMM(INAT_IMM_VWORD)" + imm_flag["Ob"] = "INAT_MOFFSET" + imm_flag["Ov"] = "INAT_MOFFSET" + imm_flag["Lx"] = "INAT_MAKE_IMM(INAT_IMM_BYTE)" + + modrm_expr = "^([CDEGMNPQRSUVW/][a-z]+|NTA|T[012])" + force64_expr = "\\([df]64\\)" + rex_expr = "^REX(\\.[XRWB]+)*" + fpu_expr = "^ESC" # TODO + + lprefix1_expr = "\\((66|!F3)\\)" + lprefix2_expr = "\\(F3\\)" + lprefix3_expr = "\\((F2|!F3|66\\&F2)\\)" + lprefix_expr = "\\((66|F2|F3)\\)" + max_lprefix = 4 + + # All opcodes starting with lower-case 'v' or with (v1) superscript + # accepts VEX prefix + vexok_opcode_expr = "^v.*" + vexok_expr = "\\(v1\\)" + # All opcodes with (v) superscript supports *only* VEX prefix + vexonly_expr = "\\(v\\)" + + prefix_expr = "\\(Prefix\\)" + prefix_num["Operand-Size"] = "INAT_PFX_OPNDSZ" + prefix_num["REPNE"] = "INAT_PFX_REPNE" + prefix_num["REP/REPE"] = "INAT_PFX_REPE" + prefix_num["XACQUIRE"] = "INAT_PFX_REPNE" + prefix_num["XRELEASE"] = "INAT_PFX_REPE" + prefix_num["LOCK"] = "INAT_PFX_LOCK" + prefix_num["SEG=CS"] = "INAT_PFX_CS" + prefix_num["SEG=DS"] = "INAT_PFX_DS" + prefix_num["SEG=ES"] = "INAT_PFX_ES" + prefix_num["SEG=FS"] = "INAT_PFX_FS" + prefix_num["SEG=GS"] = "INAT_PFX_GS" + prefix_num["SEG=SS"] = "INAT_PFX_SS" + prefix_num["Address-Size"] = "INAT_PFX_ADDRSZ" + prefix_num["VEX+1byte"] = "INAT_PFX_VEX2" + prefix_num["VEX+2byte"] = "INAT_PFX_VEX3" + + clear_vars() +} + +function semantic_error(msg) { + print "Semantic error at " NR ": " msg > "/dev/stderr" + exit 1 +} + +function debug(msg) { + print "DEBUG: " msg +} + +function array_size(arr, i,c) { + c = 0 + for (i in arr) + c++ + return c +} + +/^Table:/ { + print "/* " $0 " */" + if (tname != "") + semantic_error("Hit Table: before EndTable:."); +} + +/^Referrer:/ { + if (NF != 1) { + # escape opcode table + ref = "" + for (i = 2; i <= NF; i++) + ref = ref $i + eid = escape[ref] + tname = sprintf("inat_escape_table_%d", eid) + } +} + +/^AVXcode:/ { + if (NF != 1) { + # AVX/escape opcode table + aid = $2 + if (gaid <= aid) + gaid = aid + 1 + if (tname == "") # AVX only opcode table + tname = sprintf("inat_avx_table_%d", $2) + } + if (aid == -1 && eid == -1) # primary opcode table + tname = "inat_primary_table" +} + +/^GrpTable:/ { + print "/* " $0 " */" + if (!($2 in group)) + semantic_error("No group: " $2 ) + gid = group[$2] + tname = "inat_group_table_" gid +} + +function print_table(tbl,name,fmt,n) +{ + print "const insn_attr_t " name " = {" + for (i = 0; i < n; i++) { + id = sprintf(fmt, i) + if (tbl[id]) + print " [" id "] = " tbl[id] "," + } + print "};" +} + +/^EndTable/ { + if (gid != -1) { + # print group tables + if (array_size(table) != 0) { + print_table(table, tname "[INAT_GROUP_TABLE_SIZE]", + "0x%x", 8) + gtable[gid,0] = tname + } + if (array_size(lptable1) != 0) { + print_table(lptable1, tname "_1[INAT_GROUP_TABLE_SIZE]", + "0x%x", 8) + gtable[gid,1] = tname "_1" + } + if (array_size(lptable2) != 0) { + print_table(lptable2, tname "_2[INAT_GROUP_TABLE_SIZE]", + "0x%x", 8) + gtable[gid,2] = tname "_2" + } + if (array_size(lptable3) != 0) { + print_table(lptable3, tname "_3[INAT_GROUP_TABLE_SIZE]", + "0x%x", 8) + gtable[gid,3] = tname "_3" + } + } else { + # print primary/escaped tables + if (array_size(table) != 0) { + print_table(table, tname "[INAT_OPCODE_TABLE_SIZE]", + "0x%02x", 256) + etable[eid,0] = tname + if (aid >= 0) + atable[aid,0] = tname + } + if (array_size(lptable1) != 0) { + print_table(lptable1,tname "_1[INAT_OPCODE_TABLE_SIZE]", + "0x%02x", 256) + etable[eid,1] = tname "_1" + if (aid >= 0) + atable[aid,1] = tname "_1" + } + if (array_size(lptable2) != 0) { + print_table(lptable2,tname "_2[INAT_OPCODE_TABLE_SIZE]", + "0x%02x", 256) + etable[eid,2] = tname "_2" + if (aid >= 0) + atable[aid,2] = tname "_2" + } + if (array_size(lptable3) != 0) { + print_table(lptable3,tname "_3[INAT_OPCODE_TABLE_SIZE]", + "0x%02x", 256) + etable[eid,3] = tname "_3" + if (aid >= 0) + atable[aid,3] = tname "_3" + } + } + print "" + clear_vars() +} + +function add_flags(old,new) { + if (old && new) + return old " | " new + else if (old) + return old + else + return new +} + +# convert operands to flags. +function convert_operands(count,opnd, i,j,imm,mod) +{ + imm = null + mod = null + for (j = 1; j <= count; j++) { + i = opnd[j] + if (match(i, imm_expr) == 1) { + if (!imm_flag[i]) + semantic_error("Unknown imm opnd: " i) + if (imm) { + if (i != "Ib") + semantic_error("Second IMM error") + imm = add_flags(imm, "INAT_SCNDIMM") + } else + imm = imm_flag[i] + } else if (match(i, modrm_expr)) + mod = "INAT_MODRM" + } + return add_flags(imm, mod) +} + +/^[0-9a-f]+\:/ { + if (NR == 1) + next + # get index + idx = "0x" substr($1, 1, index($1,":") - 1) + if (idx in table) + semantic_error("Redefine " idx " in " tname) + + # check if escaped opcode + if ("escape" == $2) { + if ($3 != "#") + semantic_error("No escaped name") + ref = "" + for (i = 4; i <= NF; i++) + ref = ref $i + if (ref in escape) + semantic_error("Redefine escape (" ref ")") + escape[ref] = geid + geid++ + table[idx] = "INAT_MAKE_ESCAPE(" escape[ref] ")" + next + } + + variant = null + # converts + i = 2 + while (i <= NF) { + opcode = $(i++) + delete opnds + ext = null + flags = null + opnd = null + # parse one opcode + if (match($i, opnd_expr)) { + opnd = $i + count = split($(i++), opnds, ",") + flags = convert_operands(count, opnds) + } + if (match($i, ext_expr)) + ext = $(i++) + if (match($i, sep_expr)) + i++ + else if (i < NF) + semantic_error($i " is not a separator") + + # check if group opcode + if (match(opcode, group_expr)) { + if (!(opcode in group)) { + group[opcode] = ggid + ggid++ + } + flags = add_flags(flags, "INAT_MAKE_GROUP(" group[opcode] ")") + } + # check force(or default) 64bit + if (match(ext, force64_expr)) + flags = add_flags(flags, "INAT_FORCE64") + + # check REX prefix + if (match(opcode, rex_expr)) + flags = add_flags(flags, "INAT_MAKE_PREFIX(INAT_PFX_REX)") + + # check coprocessor escape : TODO + if (match(opcode, fpu_expr)) + flags = add_flags(flags, "INAT_MODRM") + + # check VEX codes + if (match(ext, vexonly_expr)) + flags = add_flags(flags, "INAT_VEXOK | INAT_VEXONLY") + else if (match(ext, vexok_expr) || match(opcode, vexok_opcode_expr)) + flags = add_flags(flags, "INAT_VEXOK") + + # check prefixes + if (match(ext, prefix_expr)) { + if (!prefix_num[opcode]) + semantic_error("Unknown prefix: " opcode) + flags = add_flags(flags, "INAT_MAKE_PREFIX(" prefix_num[opcode] ")") + } + if (length(flags) == 0) + continue + # check if last prefix + if (match(ext, lprefix1_expr)) { + lptable1[idx] = add_flags(lptable1[idx],flags) + variant = "INAT_VARIANT" + } + if (match(ext, lprefix2_expr)) { + lptable2[idx] = add_flags(lptable2[idx],flags) + variant = "INAT_VARIANT" + } + if (match(ext, lprefix3_expr)) { + lptable3[idx] = add_flags(lptable3[idx],flags) + variant = "INAT_VARIANT" + } + if (!match(ext, lprefix_expr)){ + table[idx] = add_flags(table[idx],flags) + } + } + if (variant) + table[idx] = add_flags(table[idx],variant) +} + +END { + if (awkchecked != "") + exit 1 + # print escape opcode map's array + print "/* Escape opcode map array */" + print "const insn_attr_t * const inat_escape_tables[INAT_ESC_MAX + 1]" \ + "[INAT_LSTPFX_MAX + 1] = {" + for (i = 0; i < geid; i++) + for (j = 0; j < max_lprefix; j++) + if (etable[i,j]) + print " ["i"]["j"] = "etable[i,j]"," + print "};\n" + # print group opcode map's array + print "/* Group opcode map array */" + print "const insn_attr_t * const inat_group_tables[INAT_GRP_MAX + 1]"\ + "[INAT_LSTPFX_MAX + 1] = {" + for (i = 0; i < ggid; i++) + for (j = 0; j < max_lprefix; j++) + if (gtable[i,j]) + print " ["i"]["j"] = "gtable[i,j]"," + print "};\n" + # print AVX opcode map's array + print "/* AVX opcode map array */" + print "const insn_attr_t * const inat_avx_tables[X86_VEX_M_MAX + 1]"\ + "[INAT_LSTPFX_MAX + 1] = {" + for (i = 0; i < gaid; i++) + for (j = 0; j < max_lprefix; j++) + if (atable[i,j]) + print " ["i"]["j"] = "atable[i,j]"," + print "};" +} diff --git a/tools/perf/util/intel-pt-decoder/inat.c b/tools/perf/util/intel-pt-decoder/inat.c new file mode 100644 index 000000000000..906d94aa0a24 --- /dev/null +++ b/tools/perf/util/intel-pt-decoder/inat.c @@ -0,0 +1,96 @@ +/* + * x86 instruction attribute tables + * + * Written by Masami Hiramatsu <mhiramat@redhat.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + */ +#include "insn.h" + +/* Attribute tables are generated from opcode map */ +#include "inat-tables.c" + +/* Attribute search APIs */ +insn_attr_t inat_get_opcode_attribute(insn_byte_t opcode) +{ + return inat_primary_table[opcode]; +} + +int inat_get_last_prefix_id(insn_byte_t last_pfx) +{ + insn_attr_t lpfx_attr; + + lpfx_attr = inat_get_opcode_attribute(last_pfx); + return inat_last_prefix_id(lpfx_attr); +} + +insn_attr_t inat_get_escape_attribute(insn_byte_t opcode, int lpfx_id, + insn_attr_t esc_attr) +{ + const insn_attr_t *table; + int n; + + n = inat_escape_id(esc_attr); + + table = inat_escape_tables[n][0]; + if (!table) + return 0; + if (inat_has_variant(table[opcode]) && lpfx_id) { + table = inat_escape_tables[n][lpfx_id]; + if (!table) + return 0; + } + return table[opcode]; +} + +insn_attr_t inat_get_group_attribute(insn_byte_t modrm, int lpfx_id, + insn_attr_t grp_attr) +{ + const insn_attr_t *table; + int n; + + n = inat_group_id(grp_attr); + + table = inat_group_tables[n][0]; + if (!table) + return inat_group_common_attribute(grp_attr); + if (inat_has_variant(table[X86_MODRM_REG(modrm)]) && lpfx_id) { + table = inat_group_tables[n][lpfx_id]; + if (!table) + return inat_group_common_attribute(grp_attr); + } + return table[X86_MODRM_REG(modrm)] | + inat_group_common_attribute(grp_attr); +} + +insn_attr_t inat_get_avx_attribute(insn_byte_t opcode, insn_byte_t vex_m, + insn_byte_t vex_p) +{ + const insn_attr_t *table; + if (vex_m > X86_VEX_M_MAX || vex_p > INAT_LSTPFX_MAX) + return 0; + /* At first, this checks the master table */ + table = inat_avx_tables[vex_m][0]; + if (!table) + return 0; + if (!inat_is_group(table[opcode]) && vex_p) { + /* If this is not a group, get attribute directly */ + table = inat_avx_tables[vex_m][vex_p]; + if (!table) + return 0; + } + return table[opcode]; +} diff --git a/tools/perf/util/intel-pt-decoder/inat.h b/tools/perf/util/intel-pt-decoder/inat.h new file mode 100644 index 000000000000..611645e903a8 --- /dev/null +++ b/tools/perf/util/intel-pt-decoder/inat.h @@ -0,0 +1,221 @@ +#ifndef _ASM_X86_INAT_H +#define _ASM_X86_INAT_H +/* + * x86 instruction attributes + * + * Written by Masami Hiramatsu <mhiramat@redhat.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + */ +#include "inat_types.h" + +/* + * Internal bits. Don't use bitmasks directly, because these bits are + * unstable. You should use checking functions. + */ + +#define INAT_OPCODE_TABLE_SIZE 256 +#define INAT_GROUP_TABLE_SIZE 8 + +/* Legacy last prefixes */ +#define INAT_PFX_OPNDSZ 1 /* 0x66 */ /* LPFX1 */ +#define INAT_PFX_REPE 2 /* 0xF3 */ /* LPFX2 */ +#define INAT_PFX_REPNE 3 /* 0xF2 */ /* LPFX3 */ +/* Other Legacy prefixes */ +#define INAT_PFX_LOCK 4 /* 0xF0 */ +#define INAT_PFX_CS 5 /* 0x2E */ +#define INAT_PFX_DS 6 /* 0x3E */ +#define INAT_PFX_ES 7 /* 0x26 */ +#define INAT_PFX_FS 8 /* 0x64 */ +#define INAT_PFX_GS 9 /* 0x65 */ +#define INAT_PFX_SS 10 /* 0x36 */ +#define INAT_PFX_ADDRSZ 11 /* 0x67 */ +/* x86-64 REX prefix */ +#define INAT_PFX_REX 12 /* 0x4X */ +/* AVX VEX prefixes */ +#define INAT_PFX_VEX2 13 /* 2-bytes VEX prefix */ +#define INAT_PFX_VEX3 14 /* 3-bytes VEX prefix */ + +#define INAT_LSTPFX_MAX 3 +#define INAT_LGCPFX_MAX 11 + +/* Immediate size */ +#define INAT_IMM_BYTE 1 +#define INAT_IMM_WORD 2 +#define INAT_IMM_DWORD 3 +#define INAT_IMM_QWORD 4 +#define INAT_IMM_PTR 5 +#define INAT_IMM_VWORD32 6 +#define INAT_IMM_VWORD 7 + +/* Legacy prefix */ +#define INAT_PFX_OFFS 0 +#define INAT_PFX_BITS 4 +#define INAT_PFX_MAX ((1 << INAT_PFX_BITS) - 1) +#define INAT_PFX_MASK (INAT_PFX_MAX << INAT_PFX_OFFS) +/* Escape opcodes */ +#define INAT_ESC_OFFS (INAT_PFX_OFFS + INAT_PFX_BITS) +#define INAT_ESC_BITS 2 +#define INAT_ESC_MAX ((1 << INAT_ESC_BITS) - 1) +#define INAT_ESC_MASK (INAT_ESC_MAX << INAT_ESC_OFFS) +/* Group opcodes (1-16) */ +#define INAT_GRP_OFFS (INAT_ESC_OFFS + INAT_ESC_BITS) +#define INAT_GRP_BITS 5 +#define INAT_GRP_MAX ((1 << INAT_GRP_BITS) - 1) +#define INAT_GRP_MASK (INAT_GRP_MAX << INAT_GRP_OFFS) +/* Immediates */ +#define INAT_IMM_OFFS (INAT_GRP_OFFS + INAT_GRP_BITS) +#define INAT_IMM_BITS 3 +#define INAT_IMM_MASK (((1 << INAT_IMM_BITS) - 1) << INAT_IMM_OFFS) +/* Flags */ +#define INAT_FLAG_OFFS (INAT_IMM_OFFS + INAT_IMM_BITS) +#define INAT_MODRM (1 << (INAT_FLAG_OFFS)) +#define INAT_FORCE64 (1 << (INAT_FLAG_OFFS + 1)) +#define INAT_SCNDIMM (1 << (INAT_FLAG_OFFS + 2)) +#define INAT_MOFFSET (1 << (INAT_FLAG_OFFS + 3)) +#define INAT_VARIANT (1 << (INAT_FLAG_OFFS + 4)) +#define INAT_VEXOK (1 << (INAT_FLAG_OFFS + 5)) +#define INAT_VEXONLY (1 << (INAT_FLAG_OFFS + 6)) +/* Attribute making macros for attribute tables */ +#define INAT_MAKE_PREFIX(pfx) (pfx << INAT_PFX_OFFS) +#define INAT_MAKE_ESCAPE(esc) (esc << INAT_ESC_OFFS) +#define INAT_MAKE_GROUP(grp) ((grp << INAT_GRP_OFFS) | INAT_MODRM) +#define INAT_MAKE_IMM(imm) (imm << INAT_IMM_OFFS) + +/* Attribute search APIs */ +extern insn_attr_t inat_get_opcode_attribute(insn_byte_t opcode); +extern int inat_get_last_prefix_id(insn_byte_t last_pfx); +extern insn_attr_t inat_get_escape_attribute(insn_byte_t opcode, + int lpfx_id, + insn_attr_t esc_attr); +extern insn_attr_t inat_get_group_attribute(insn_byte_t modrm, + int lpfx_id, + insn_attr_t esc_attr); +extern insn_attr_t inat_get_avx_attribute(insn_byte_t opcode, + insn_byte_t vex_m, + insn_byte_t vex_pp); + +/* Attribute checking functions */ +static inline int inat_is_legacy_prefix(insn_attr_t attr) +{ + attr &= INAT_PFX_MASK; + return attr && attr <= INAT_LGCPFX_MAX; +} + +static inline int inat_is_address_size_prefix(insn_attr_t attr) +{ + return (attr & INAT_PFX_MASK) == INAT_PFX_ADDRSZ; +} + +static inline int inat_is_operand_size_prefix(insn_attr_t attr) +{ + return (attr & INAT_PFX_MASK) == INAT_PFX_OPNDSZ; +} + +static inline int inat_is_rex_prefix(insn_attr_t attr) +{ + return (attr & INAT_PFX_MASK) == INAT_PFX_REX; +} + +static inline int inat_last_prefix_id(insn_attr_t attr) +{ + if ((attr & INAT_PFX_MASK) > INAT_LSTPFX_MAX) + return 0; + else + return attr & INAT_PFX_MASK; +} + +static inline int inat_is_vex_prefix(insn_attr_t attr) +{ + attr &= INAT_PFX_MASK; + return attr == INAT_PFX_VEX2 || attr == INAT_PFX_VEX3; +} + +static inline int inat_is_vex3_prefix(insn_attr_t attr) +{ + return (attr & INAT_PFX_MASK) == INAT_PFX_VEX3; +} + +static inline int inat_is_escape(insn_attr_t attr) +{ + return attr & INAT_ESC_MASK; +} + +static inline int inat_escape_id(insn_attr_t attr) +{ + return (attr & INAT_ESC_MASK) >> INAT_ESC_OFFS; +} + +static inline int inat_is_group(insn_attr_t attr) +{ + return attr & INAT_GRP_MASK; +} + +static inline int inat_group_id(insn_attr_t attr) +{ + return (attr & INAT_GRP_MASK) >> INAT_GRP_OFFS; +} + +static inline int inat_group_common_attribute(insn_attr_t attr) +{ + return attr & ~INAT_GRP_MASK; +} + +static inline int inat_has_immediate(insn_attr_t attr) +{ + return attr & INAT_IMM_MASK; +} + +static inline int inat_immediate_size(insn_attr_t attr) +{ + return (attr & INAT_IMM_MASK) >> INAT_IMM_OFFS; +} + +static inline int inat_has_modrm(insn_attr_t attr) +{ + return attr & INAT_MODRM; +} + +static inline int inat_is_force64(insn_attr_t attr) +{ + return attr & INAT_FORCE64; +} + +static inline int inat_has_second_immediate(insn_attr_t attr) +{ + return attr & INAT_SCNDIMM; +} + +static inline int inat_has_moffset(insn_attr_t attr) +{ + return attr & INAT_MOFFSET; +} + +static inline int inat_has_variant(insn_attr_t attr) +{ + return attr & INAT_VARIANT; +} + +static inline int inat_accept_vex(insn_attr_t attr) +{ + return attr & INAT_VEXOK; +} + +static inline int inat_must_vex(insn_attr_t attr) +{ + return attr & INAT_VEXONLY; +} +#endif diff --git a/tools/perf/util/intel-pt-decoder/inat_types.h b/tools/perf/util/intel-pt-decoder/inat_types.h new file mode 100644 index 000000000000..cb3c20ce39cf --- /dev/null +++ b/tools/perf/util/intel-pt-decoder/inat_types.h @@ -0,0 +1,29 @@ +#ifndef _ASM_X86_INAT_TYPES_H +#define _ASM_X86_INAT_TYPES_H +/* + * x86 instruction attributes + * + * Written by Masami Hiramatsu <mhiramat@redhat.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + */ + +/* Instruction attributes */ +typedef unsigned int insn_attr_t; +typedef unsigned char insn_byte_t; +typedef signed int insn_value_t; + +#endif diff --git a/tools/perf/util/intel-pt-decoder/insn.c b/tools/perf/util/intel-pt-decoder/insn.c new file mode 100644 index 000000000000..47314a64399c --- /dev/null +++ b/tools/perf/util/intel-pt-decoder/insn.c @@ -0,0 +1,594 @@ +/* + * x86 instruction analysis + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright (C) IBM Corporation, 2002, 2004, 2009 + */ + +#ifdef __KERNEL__ +#include <linux/string.h> +#else +#include <string.h> +#endif +#include "inat.h" +#include "insn.h" + +/* Verify next sizeof(t) bytes can be on the same instruction */ +#define validate_next(t, insn, n) \ + ((insn)->next_byte + sizeof(t) + n <= (insn)->end_kaddr) + +#define __get_next(t, insn) \ + ({ t r = *(t*)insn->next_byte; insn->next_byte += sizeof(t); r; }) + +#define __peek_nbyte_next(t, insn, n) \ + ({ t r = *(t*)((insn)->next_byte + n); r; }) + +#define get_next(t, insn) \ + ({ if (unlikely(!validate_next(t, insn, 0))) goto err_out; __get_next(t, insn); }) + +#define peek_nbyte_next(t, insn, n) \ + ({ if (unlikely(!validate_next(t, insn, n))) goto err_out; __peek_nbyte_next(t, insn, n); }) + +#define peek_next(t, insn) peek_nbyte_next(t, insn, 0) + +/** + * insn_init() - initialize struct insn + * @insn: &struct insn to be initialized + * @kaddr: address (in kernel memory) of instruction (or copy thereof) + * @x86_64: !0 for 64-bit kernel or 64-bit app + */ +void insn_init(struct insn *insn, const void *kaddr, int buf_len, int x86_64) +{ + /* + * Instructions longer than MAX_INSN_SIZE (15 bytes) are invalid + * even if the input buffer is long enough to hold them. + */ + if (buf_len > MAX_INSN_SIZE) + buf_len = MAX_INSN_SIZE; + + memset(insn, 0, sizeof(*insn)); + insn->kaddr = kaddr; + insn->end_kaddr = kaddr + buf_len; + insn->next_byte = kaddr; + insn->x86_64 = x86_64 ? 1 : 0; + insn->opnd_bytes = 4; + if (x86_64) + insn->addr_bytes = 8; + else + insn->addr_bytes = 4; +} + +/** + * insn_get_prefixes - scan x86 instruction prefix bytes + * @insn: &struct insn containing instruction + * + * Populates the @insn->prefixes bitmap, and updates @insn->next_byte + * to point to the (first) opcode. No effect if @insn->prefixes.got + * is already set. + */ +void insn_get_prefixes(struct insn *insn) +{ + struct insn_field *prefixes = &insn->prefixes; + insn_attr_t attr; + insn_byte_t b, lb; + int i, nb; + + if (prefixes->got) + return; + + nb = 0; + lb = 0; + b = peek_next(insn_byte_t, insn); + attr = inat_get_opcode_attribute(b); + while (inat_is_legacy_prefix(attr)) { + /* Skip if same prefix */ + for (i = 0; i < nb; i++) + if (prefixes->bytes[i] == b) + goto found; + if (nb == 4) + /* Invalid instruction */ + break; + prefixes->bytes[nb++] = b; + if (inat_is_address_size_prefix(attr)) { + /* address size switches 2/4 or 4/8 */ + if (insn->x86_64) + insn->addr_bytes ^= 12; + else + insn->addr_bytes ^= 6; + } else if (inat_is_operand_size_prefix(attr)) { + /* oprand size switches 2/4 */ + insn->opnd_bytes ^= 6; + } +found: + prefixes->nbytes++; + insn->next_byte++; + lb = b; + b = peek_next(insn_byte_t, insn); + attr = inat_get_opcode_attribute(b); + } + /* Set the last prefix */ + if (lb && lb != insn->prefixes.bytes[3]) { + if (unlikely(insn->prefixes.bytes[3])) { + /* Swap the last prefix */ + b = insn->prefixes.bytes[3]; + for (i = 0; i < nb; i++) + if (prefixes->bytes[i] == lb) + prefixes->bytes[i] = b; + } + insn->prefixes.bytes[3] = lb; + } + + /* Decode REX prefix */ + if (insn->x86_64) { + b = peek_next(insn_byte_t, insn); + attr = inat_get_opcode_attribute(b); + if (inat_is_rex_prefix(attr)) { + insn->rex_prefix.value = b; + insn->rex_prefix.nbytes = 1; + insn->next_byte++; + if (X86_REX_W(b)) + /* REX.W overrides opnd_size */ + insn->opnd_bytes = 8; + } + } + insn->rex_prefix.got = 1; + + /* Decode VEX prefix */ + b = peek_next(insn_byte_t, insn); + attr = inat_get_opcode_attribute(b); + if (inat_is_vex_prefix(attr)) { + insn_byte_t b2 = peek_nbyte_next(insn_byte_t, insn, 1); + if (!insn->x86_64) { + /* + * In 32-bits mode, if the [7:6] bits (mod bits of + * ModRM) on the second byte are not 11b, it is + * LDS or LES. + */ + if (X86_MODRM_MOD(b2) != 3) + goto vex_end; + } + insn->vex_prefix.bytes[0] = b; + insn->vex_prefix.bytes[1] = b2; + if (inat_is_vex3_prefix(attr)) { + b2 = peek_nbyte_next(insn_byte_t, insn, 2); + insn->vex_prefix.bytes[2] = b2; + insn->vex_prefix.nbytes = 3; + insn->next_byte += 3; + if (insn->x86_64 && X86_VEX_W(b2)) + /* VEX.W overrides opnd_size */ + insn->opnd_bytes = 8; + } else { + /* + * For VEX2, fake VEX3-like byte#2. + * Makes it easier to decode vex.W, vex.vvvv, + * vex.L and vex.pp. Masking with 0x7f sets vex.W == 0. + */ + insn->vex_prefix.bytes[2] = b2 & 0x7f; + insn->vex_prefix.nbytes = 2; + insn->next_byte += 2; + } + } +vex_end: + insn->vex_prefix.got = 1; + + prefixes->got = 1; + +err_out: + return; +} + +/** + * insn_get_opcode - collect opcode(s) + * @insn: &struct insn containing instruction + * + * Populates @insn->opcode, updates @insn->next_byte to point past the + * opcode byte(s), and set @insn->attr (except for groups). + * If necessary, first collects any preceding (prefix) bytes. + * Sets @insn->opcode.value = opcode1. No effect if @insn->opcode.got + * is already 1. + */ +void insn_get_opcode(struct insn *insn) +{ + struct insn_field *opcode = &insn->opcode; + insn_byte_t op; + int pfx_id; + if (opcode->got) + return; + if (!insn->prefixes.got) + insn_get_prefixes(insn); + + /* Get first opcode */ + op = get_next(insn_byte_t, insn); + opcode->bytes[0] = op; + opcode->nbytes = 1; + + /* Check if there is VEX prefix or not */ + if (insn_is_avx(insn)) { + insn_byte_t m, p; + m = insn_vex_m_bits(insn); + p = insn_vex_p_bits(insn); + insn->attr = inat_get_avx_attribute(op, m, p); + if (!inat_accept_vex(insn->attr) && !inat_is_group(insn->attr)) + insn->attr = 0; /* This instruction is bad */ + goto end; /* VEX has only 1 byte for opcode */ + } + + insn->attr = inat_get_opcode_attribute(op); + while (inat_is_escape(insn->attr)) { + /* Get escaped opcode */ + op = get_next(insn_byte_t, insn); + opcode->bytes[opcode->nbytes++] = op; + pfx_id = insn_last_prefix_id(insn); + insn->attr = inat_get_escape_attribute(op, pfx_id, insn->attr); + } + if (inat_must_vex(insn->attr)) + insn->attr = 0; /* This instruction is bad */ +end: + opcode->got = 1; + +err_out: + return; +} + +/** + * insn_get_modrm - collect ModRM byte, if any + * @insn: &struct insn containing instruction + * + * Populates @insn->modrm and updates @insn->next_byte to point past the + * ModRM byte, if any. If necessary, first collects the preceding bytes + * (prefixes and opcode(s)). No effect if @insn->modrm.got is already 1. + */ +void insn_get_modrm(struct insn *insn) +{ + struct insn_field *modrm = &insn->modrm; + insn_byte_t pfx_id, mod; + if (modrm->got) + return; + if (!insn->opcode.got) + insn_get_opcode(insn); + + if (inat_has_modrm(insn->attr)) { + mod = get_next(insn_byte_t, insn); + modrm->value = mod; + modrm->nbytes = 1; + if (inat_is_group(insn->attr)) { + pfx_id = insn_last_prefix_id(insn); + insn->attr = inat_get_group_attribute(mod, pfx_id, + insn->attr); + if (insn_is_avx(insn) && !inat_accept_vex(insn->attr)) + insn->attr = 0; /* This is bad */ + } + } + + if (insn->x86_64 && inat_is_force64(insn->attr)) + insn->opnd_bytes = 8; + modrm->got = 1; + +err_out: + return; +} + + +/** + * insn_rip_relative() - Does instruction use RIP-relative addressing mode? + * @insn: &struct insn containing instruction + * + * If necessary, first collects the instruction up to and including the + * ModRM byte. No effect if @insn->x86_64 is 0. + */ +int insn_rip_relative(struct insn *insn) +{ + struct insn_field *modrm = &insn->modrm; + + if (!insn->x86_64) + return 0; + if (!modrm->got) + insn_get_modrm(insn); + /* + * For rip-relative instructions, the mod field (top 2 bits) + * is zero and the r/m field (bottom 3 bits) is 0x5. + */ + return (modrm->nbytes && (modrm->value & 0xc7) == 0x5); +} + +/** + * insn_get_sib() - Get the SIB byte of instruction + * @insn: &struct insn containing instruction + * + * If necessary, first collects the instruction up to and including the + * ModRM byte. + */ +void insn_get_sib(struct insn *insn) +{ + insn_byte_t modrm; + + if (insn->sib.got) + return; + if (!insn->modrm.got) + insn_get_modrm(insn); + if (insn->modrm.nbytes) { + modrm = (insn_byte_t)insn->modrm.value; + if (insn->addr_bytes != 2 && + X86_MODRM_MOD(modrm) != 3 && X86_MODRM_RM(modrm) == 4) { + insn->sib.value = get_next(insn_byte_t, insn); + insn->sib.nbytes = 1; + } + } + insn->sib.got = 1; + +err_out: + return; +} + + +/** + * insn_get_displacement() - Get the displacement of instruction + * @insn: &struct insn containing instruction + * + * If necessary, first collects the instruction up to and including the + * SIB byte. + * Displacement value is sign-expanded. + */ +void insn_get_displacement(struct insn *insn) +{ + insn_byte_t mod, rm, base; + + if (insn->displacement.got) + return; + if (!insn->sib.got) + insn_get_sib(insn); + if (insn->modrm.nbytes) { + /* + * Interpreting the modrm byte: + * mod = 00 - no displacement fields (exceptions below) + * mod = 01 - 1-byte displacement field + * mod = 10 - displacement field is 4 bytes, or 2 bytes if + * address size = 2 (0x67 prefix in 32-bit mode) + * mod = 11 - no memory operand + * + * If address size = 2... + * mod = 00, r/m = 110 - displacement field is 2 bytes + * + * If address size != 2... + * mod != 11, r/m = 100 - SIB byte exists + * mod = 00, SIB base = 101 - displacement field is 4 bytes + * mod = 00, r/m = 101 - rip-relative addressing, displacement + * field is 4 bytes + */ + mod = X86_MODRM_MOD(insn->modrm.value); + rm = X86_MODRM_RM(insn->modrm.value); + base = X86_SIB_BASE(insn->sib.value); + if (mod == 3) + goto out; + if (mod == 1) { + insn->displacement.value = get_next(char, insn); + insn->displacement.nbytes = 1; + } else if (insn->addr_bytes == 2) { + if ((mod == 0 && rm == 6) || mod == 2) { + insn->displacement.value = + get_next(short, insn); + insn->displacement.nbytes = 2; + } + } else { + if ((mod == 0 && rm == 5) || mod == 2 || + (mod == 0 && base == 5)) { + insn->displacement.value = get_next(int, insn); + insn->displacement.nbytes = 4; + } + } + } +out: + insn->displacement.got = 1; + +err_out: + return; +} + +/* Decode moffset16/32/64. Return 0 if failed */ +static int __get_moffset(struct insn *insn) +{ + switch (insn->addr_bytes) { + case 2: + insn->moffset1.value = get_next(short, insn); + insn->moffset1.nbytes = 2; + break; + case 4: + insn->moffset1.value = get_next(int, insn); + insn->moffset1.nbytes = 4; + break; + case 8: + insn->moffset1.value = get_next(int, insn); + insn->moffset1.nbytes = 4; + insn->moffset2.value = get_next(int, insn); + insn->moffset2.nbytes = 4; + break; + default: /* opnd_bytes must be modified manually */ + goto err_out; + } + insn->moffset1.got = insn->moffset2.got = 1; + + return 1; + +err_out: + return 0; +} + +/* Decode imm v32(Iz). Return 0 if failed */ +static int __get_immv32(struct insn *insn) +{ + switch (insn->opnd_bytes) { + case 2: + insn->immediate.value = get_next(short, insn); + insn->immediate.nbytes = 2; + break; + case 4: + case 8: + insn->immediate.value = get_next(int, insn); + insn->immediate.nbytes = 4; + break; + default: /* opnd_bytes must be modified manually */ + goto err_out; + } + + return 1; + +err_out: + return 0; +} + +/* Decode imm v64(Iv/Ov), Return 0 if failed */ +static int __get_immv(struct insn *insn) +{ + switch (insn->opnd_bytes) { + case 2: + insn->immediate1.value = get_next(short, insn); + insn->immediate1.nbytes = 2; + break; + case 4: + insn->immediate1.value = get_next(int, insn); + insn->immediate1.nbytes = 4; + break; + case 8: + insn->immediate1.value = get_next(int, insn); + insn->immediate1.nbytes = 4; + insn->immediate2.value = get_next(int, insn); + insn->immediate2.nbytes = 4; + break; + default: /* opnd_bytes must be modified manually */ + goto err_out; + } + insn->immediate1.got = insn->immediate2.got = 1; + + return 1; +err_out: + return 0; +} + +/* Decode ptr16:16/32(Ap) */ +static int __get_immptr(struct insn *insn) +{ + switch (insn->opnd_bytes) { + case 2: + insn->immediate1.value = get_next(short, insn); + insn->immediate1.nbytes = 2; + break; + case 4: + insn->immediate1.value = get_next(int, insn); + insn->immediate1.nbytes = 4; + break; + case 8: + /* ptr16:64 is not exist (no segment) */ + return 0; + default: /* opnd_bytes must be modified manually */ + goto err_out; + } + insn->immediate2.value = get_next(unsigned short, insn); + insn->immediate2.nbytes = 2; + insn->immediate1.got = insn->immediate2.got = 1; + + return 1; +err_out: + return 0; +} + +/** + * insn_get_immediate() - Get the immediates of instruction + * @insn: &struct insn containing instruction + * + * If necessary, first collects the instruction up to and including the + * displacement bytes. + * Basically, most of immediates are sign-expanded. Unsigned-value can be + * get by bit masking with ((1 << (nbytes * 8)) - 1) + */ +void insn_get_immediate(struct insn *insn) +{ + if (insn->immediate.got) + return; + if (!insn->displacement.got) + insn_get_displacement(insn); + + if (inat_has_moffset(insn->attr)) { + if (!__get_moffset(insn)) + goto err_out; + goto done; + } + + if (!inat_has_immediate(insn->attr)) + /* no immediates */ + goto done; + + switch (inat_immediate_size(insn->attr)) { + case INAT_IMM_BYTE: + insn->immediate.value = get_next(char, insn); + insn->immediate.nbytes = 1; + break; + case INAT_IMM_WORD: + insn->immediate.value = get_next(short, insn); + insn->immediate.nbytes = 2; + break; + case INAT_IMM_DWORD: + insn->immediate.value = get_next(int, insn); + insn->immediate.nbytes = 4; + break; + case INAT_IMM_QWORD: + insn->immediate1.value = get_next(int, insn); + insn->immediate1.nbytes = 4; + insn->immediate2.value = get_next(int, insn); + insn->immediate2.nbytes = 4; + break; + case INAT_IMM_PTR: + if (!__get_immptr(insn)) + goto err_out; + break; + case INAT_IMM_VWORD32: + if (!__get_immv32(insn)) + goto err_out; + break; + case INAT_IMM_VWORD: + if (!__get_immv(insn)) + goto err_out; + break; + default: + /* Here, insn must have an immediate, but failed */ + goto err_out; + } + if (inat_has_second_immediate(insn->attr)) { + insn->immediate2.value = get_next(char, insn); + insn->immediate2.nbytes = 1; + } +done: + insn->immediate.got = 1; + +err_out: + return; +} + +/** + * insn_get_length() - Get the length of instruction + * @insn: &struct insn containing instruction + * + * If necessary, first collects the instruction up to and including the + * immediates bytes. + */ +void insn_get_length(struct insn *insn) +{ + if (insn->length) + return; + if (!insn->immediate.got) + insn_get_immediate(insn); + insn->length = (unsigned char)((unsigned long)insn->next_byte + - (unsigned long)insn->kaddr); +} diff --git a/tools/perf/util/intel-pt-decoder/insn.h b/tools/perf/util/intel-pt-decoder/insn.h new file mode 100644 index 000000000000..dd12da0f4593 --- /dev/null +++ b/tools/perf/util/intel-pt-decoder/insn.h @@ -0,0 +1,201 @@ +#ifndef _ASM_X86_INSN_H +#define _ASM_X86_INSN_H +/* + * x86 instruction analysis + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright (C) IBM Corporation, 2009 + */ + +/* insn_attr_t is defined in inat.h */ +#include "inat.h" + +struct insn_field { + union { + insn_value_t value; + insn_byte_t bytes[4]; + }; + /* !0 if we've run insn_get_xxx() for this field */ + unsigned char got; + unsigned char nbytes; +}; + +struct insn { + struct insn_field prefixes; /* + * Prefixes + * prefixes.bytes[3]: last prefix + */ + struct insn_field rex_prefix; /* REX prefix */ + struct insn_field vex_prefix; /* VEX prefix */ + struct insn_field opcode; /* + * opcode.bytes[0]: opcode1 + * opcode.bytes[1]: opcode2 + * opcode.bytes[2]: opcode3 + */ + struct insn_field modrm; + struct insn_field sib; + struct insn_field displacement; + union { + struct insn_field immediate; + struct insn_field moffset1; /* for 64bit MOV */ + struct insn_field immediate1; /* for 64bit imm or off16/32 */ + }; + union { + struct insn_field moffset2; /* for 64bit MOV */ + struct insn_field immediate2; /* for 64bit imm or seg16 */ + }; + + insn_attr_t attr; + unsigned char opnd_bytes; + unsigned char addr_bytes; + unsigned char length; + unsigned char x86_64; + + const insn_byte_t *kaddr; /* kernel address of insn to analyze */ + const insn_byte_t *end_kaddr; /* kernel address of last insn in buffer */ + const insn_byte_t *next_byte; +}; + +#define MAX_INSN_SIZE 15 + +#define X86_MODRM_MOD(modrm) (((modrm) & 0xc0) >> 6) +#define X86_MODRM_REG(modrm) (((modrm) & 0x38) >> 3) +#define X86_MODRM_RM(modrm) ((modrm) & 0x07) + +#define X86_SIB_SCALE(sib) (((sib) & 0xc0) >> 6) +#define X86_SIB_INDEX(sib) (((sib) & 0x38) >> 3) +#define X86_SIB_BASE(sib) ((sib) & 0x07) + +#define X86_REX_W(rex) ((rex) & 8) +#define X86_REX_R(rex) ((rex) & 4) +#define X86_REX_X(rex) ((rex) & 2) +#define X86_REX_B(rex) ((rex) & 1) + +/* VEX bit flags */ +#define X86_VEX_W(vex) ((vex) & 0x80) /* VEX3 Byte2 */ +#define X86_VEX_R(vex) ((vex) & 0x80) /* VEX2/3 Byte1 */ +#define X86_VEX_X(vex) ((vex) & 0x40) /* VEX3 Byte1 */ +#define X86_VEX_B(vex) ((vex) & 0x20) /* VEX3 Byte1 */ +#define X86_VEX_L(vex) ((vex) & 0x04) /* VEX3 Byte2, VEX2 Byte1 */ +/* VEX bit fields */ +#define X86_VEX3_M(vex) ((vex) & 0x1f) /* VEX3 Byte1 */ +#define X86_VEX2_M 1 /* VEX2.M always 1 */ +#define X86_VEX_V(vex) (((vex) & 0x78) >> 3) /* VEX3 Byte2, VEX2 Byte1 */ +#define X86_VEX_P(vex) ((vex) & 0x03) /* VEX3 Byte2, VEX2 Byte1 */ +#define X86_VEX_M_MAX 0x1f /* VEX3.M Maximum value */ + +extern void insn_init(struct insn *insn, const void *kaddr, int buf_len, int x86_64); +extern void insn_get_prefixes(struct insn *insn); +extern void insn_get_opcode(struct insn *insn); +extern void insn_get_modrm(struct insn *insn); +extern void insn_get_sib(struct insn *insn); +extern void insn_get_displacement(struct insn *insn); +extern void insn_get_immediate(struct insn *insn); +extern void insn_get_length(struct insn *insn); + +/* Attribute will be determined after getting ModRM (for opcode groups) */ +static inline void insn_get_attribute(struct insn *insn) +{ + insn_get_modrm(insn); +} + +/* Instruction uses RIP-relative addressing */ +extern int insn_rip_relative(struct insn *insn); + +/* Init insn for kernel text */ +static inline void kernel_insn_init(struct insn *insn, + const void *kaddr, int buf_len) +{ +#ifdef CONFIG_X86_64 + insn_init(insn, kaddr, buf_len, 1); +#else /* CONFIG_X86_32 */ + insn_init(insn, kaddr, buf_len, 0); +#endif +} + +static inline int insn_is_avx(struct insn *insn) +{ + if (!insn->prefixes.got) + insn_get_prefixes(insn); + return (insn->vex_prefix.value != 0); +} + +/* Ensure this instruction is decoded completely */ +static inline int insn_complete(struct insn *insn) +{ + return insn->opcode.got && insn->modrm.got && insn->sib.got && + insn->displacement.got && insn->immediate.got; +} + +static inline insn_byte_t insn_vex_m_bits(struct insn *insn) +{ + if (insn->vex_prefix.nbytes == 2) /* 2 bytes VEX */ + return X86_VEX2_M; + else + return X86_VEX3_M(insn->vex_prefix.bytes[1]); +} + +static inline insn_byte_t insn_vex_p_bits(struct insn *insn) +{ + if (insn->vex_prefix.nbytes == 2) /* 2 bytes VEX */ + return X86_VEX_P(insn->vex_prefix.bytes[1]); + else + return X86_VEX_P(insn->vex_prefix.bytes[2]); +} + +/* Get the last prefix id from last prefix or VEX prefix */ +static inline int insn_last_prefix_id(struct insn *insn) +{ + if (insn_is_avx(insn)) + return insn_vex_p_bits(insn); /* VEX_p is a SIMD prefix id */ + + if (insn->prefixes.bytes[3]) + return inat_get_last_prefix_id(insn->prefixes.bytes[3]); + + return 0; +} + +/* Offset of each field from kaddr */ +static inline int insn_offset_rex_prefix(struct insn *insn) +{ + return insn->prefixes.nbytes; +} +static inline int insn_offset_vex_prefix(struct insn *insn) +{ + return insn_offset_rex_prefix(insn) + insn->rex_prefix.nbytes; +} +static inline int insn_offset_opcode(struct insn *insn) +{ + return insn_offset_vex_prefix(insn) + insn->vex_prefix.nbytes; +} +static inline int insn_offset_modrm(struct insn *insn) +{ + return insn_offset_opcode(insn) + insn->opcode.nbytes; +} +static inline int insn_offset_sib(struct insn *insn) +{ + return insn_offset_modrm(insn) + insn->modrm.nbytes; +} +static inline int insn_offset_displacement(struct insn *insn) +{ + return insn_offset_sib(insn) + insn->sib.nbytes; +} +static inline int insn_offset_immediate(struct insn *insn) +{ + return insn_offset_displacement(insn) + insn->displacement.nbytes; +} + +#endif /* _ASM_X86_INSN_H */ diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c new file mode 100644 index 000000000000..9409d014b46c --- /dev/null +++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c @@ -0,0 +1,2345 @@ +/* + * intel_pt_decoder.c: Intel Processor Trace support + * Copyright (c) 2013-2014, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + */ + +#ifndef _GNU_SOURCE +#define _GNU_SOURCE +#endif +#include <stdlib.h> +#include <stdbool.h> +#include <string.h> +#include <errno.h> +#include <stdint.h> +#include <inttypes.h> + +#include "../cache.h" +#include "../util.h" + +#include "intel-pt-insn-decoder.h" +#include "intel-pt-pkt-decoder.h" +#include "intel-pt-decoder.h" +#include "intel-pt-log.h" + +#define INTEL_PT_BLK_SIZE 1024 + +#define BIT63 (((uint64_t)1 << 63)) + +#define INTEL_PT_RETURN 1 + +/* Maximum number of loops with no packets consumed i.e. stuck in a loop */ +#define INTEL_PT_MAX_LOOPS 10000 + +struct intel_pt_blk { + struct intel_pt_blk *prev; + uint64_t ip[INTEL_PT_BLK_SIZE]; +}; + +struct intel_pt_stack { + struct intel_pt_blk *blk; + struct intel_pt_blk *spare; + int pos; +}; + +enum intel_pt_pkt_state { + INTEL_PT_STATE_NO_PSB, + INTEL_PT_STATE_NO_IP, + INTEL_PT_STATE_ERR_RESYNC, + INTEL_PT_STATE_IN_SYNC, + INTEL_PT_STATE_TNT, + INTEL_PT_STATE_TIP, + INTEL_PT_STATE_TIP_PGD, + INTEL_PT_STATE_FUP, + INTEL_PT_STATE_FUP_NO_TIP, +}; + +#ifdef INTEL_PT_STRICT +#define INTEL_PT_STATE_ERR1 INTEL_PT_STATE_NO_PSB +#define INTEL_PT_STATE_ERR2 INTEL_PT_STATE_NO_PSB +#define INTEL_PT_STATE_ERR3 INTEL_PT_STATE_NO_PSB +#define INTEL_PT_STATE_ERR4 INTEL_PT_STATE_NO_PSB +#else +#define INTEL_PT_STATE_ERR1 (decoder->pkt_state) +#define INTEL_PT_STATE_ERR2 INTEL_PT_STATE_NO_IP +#define INTEL_PT_STATE_ERR3 INTEL_PT_STATE_ERR_RESYNC +#define INTEL_PT_STATE_ERR4 INTEL_PT_STATE_IN_SYNC +#endif + +struct intel_pt_decoder { + int (*get_trace)(struct intel_pt_buffer *buffer, void *data); + int (*walk_insn)(struct intel_pt_insn *intel_pt_insn, + uint64_t *insn_cnt_ptr, uint64_t *ip, uint64_t to_ip, + uint64_t max_insn_cnt, void *data); + void *data; + struct intel_pt_state state; + const unsigned char *buf; + size_t len; + bool return_compression; + bool mtc_insn; + bool pge; + bool have_tma; + bool have_cyc; + uint64_t pos; + uint64_t last_ip; + uint64_t ip; + uint64_t cr3; + uint64_t timestamp; + uint64_t tsc_timestamp; + uint64_t ref_timestamp; + uint64_t ret_addr; + uint64_t ctc_timestamp; + uint64_t ctc_delta; + uint64_t cycle_cnt; + uint64_t cyc_ref_timestamp; + uint32_t last_mtc; + uint32_t tsc_ctc_ratio_n; + uint32_t tsc_ctc_ratio_d; + uint32_t tsc_ctc_mult; + uint32_t tsc_slip; + uint32_t ctc_rem_mask; + int mtc_shift; + struct intel_pt_stack stack; + enum intel_pt_pkt_state pkt_state; + struct intel_pt_pkt packet; + struct intel_pt_pkt tnt; + int pkt_step; + int pkt_len; + int last_packet_type; + unsigned int cbr; + unsigned int max_non_turbo_ratio; + double max_non_turbo_ratio_fp; + double cbr_cyc_to_tsc; + double calc_cyc_to_tsc; + bool have_calc_cyc_to_tsc; + int exec_mode; + unsigned int insn_bytes; + uint64_t sign_bit; + uint64_t sign_bits; + uint64_t period; + enum intel_pt_period_type period_type; + uint64_t tot_insn_cnt; + uint64_t period_insn_cnt; + uint64_t period_mask; + uint64_t period_ticks; + uint64_t last_masked_timestamp; + bool continuous_period; + bool overflow; + bool set_fup_tx_flags; + unsigned int fup_tx_flags; + unsigned int tx_flags; + uint64_t timestamp_insn_cnt; + uint64_t stuck_ip; + int no_progress; + int stuck_ip_prd; + int stuck_ip_cnt; + const unsigned char *next_buf; + size_t next_len; + unsigned char temp_buf[INTEL_PT_PKT_MAX_SZ]; +}; + +static uint64_t intel_pt_lower_power_of_2(uint64_t x) +{ + int i; + + for (i = 0; x != 1; i++) + x >>= 1; + + return x << i; +} + +static void intel_pt_setup_period(struct intel_pt_decoder *decoder) +{ + if (decoder->period_type == INTEL_PT_PERIOD_TICKS) { + uint64_t period; + + period = intel_pt_lower_power_of_2(decoder->period); + decoder->period_mask = ~(period - 1); + decoder->period_ticks = period; + } +} + +static uint64_t multdiv(uint64_t t, uint32_t n, uint32_t d) +{ + if (!d) + return 0; + return (t / d) * n + ((t % d) * n) / d; +} + +struct intel_pt_decoder *intel_pt_decoder_new(struct intel_pt_params *params) +{ + struct intel_pt_decoder *decoder; + + if (!params->get_trace || !params->walk_insn) + return NULL; + + decoder = zalloc(sizeof(struct intel_pt_decoder)); + if (!decoder) + return NULL; + + decoder->get_trace = params->get_trace; + decoder->walk_insn = params->walk_insn; + decoder->data = params->data; + decoder->return_compression = params->return_compression; + + decoder->sign_bit = (uint64_t)1 << 47; + decoder->sign_bits = ~(((uint64_t)1 << 48) - 1); + + decoder->period = params->period; + decoder->period_type = params->period_type; + + decoder->max_non_turbo_ratio = params->max_non_turbo_ratio; + decoder->max_non_turbo_ratio_fp = params->max_non_turbo_ratio; + + intel_pt_setup_period(decoder); + + decoder->mtc_shift = params->mtc_period; + decoder->ctc_rem_mask = (1 << decoder->mtc_shift) - 1; + + decoder->tsc_ctc_ratio_n = params->tsc_ctc_ratio_n; + decoder->tsc_ctc_ratio_d = params->tsc_ctc_ratio_d; + + if (!decoder->tsc_ctc_ratio_n) + decoder->tsc_ctc_ratio_d = 0; + + if (decoder->tsc_ctc_ratio_d) { + if (!(decoder->tsc_ctc_ratio_n % decoder->tsc_ctc_ratio_d)) + decoder->tsc_ctc_mult = decoder->tsc_ctc_ratio_n / + decoder->tsc_ctc_ratio_d; + + /* + * Allow for timestamps appearing to backwards because a TSC + * packet has slipped past a MTC packet, so allow 2 MTC ticks + * or ... + */ + decoder->tsc_slip = multdiv(2 << decoder->mtc_shift, + decoder->tsc_ctc_ratio_n, + decoder->tsc_ctc_ratio_d); + } + /* ... or 0x100 paranoia */ + if (decoder->tsc_slip < 0x100) + decoder->tsc_slip = 0x100; + + intel_pt_log("timestamp: mtc_shift %u\n", decoder->mtc_shift); + intel_pt_log("timestamp: tsc_ctc_ratio_n %u\n", decoder->tsc_ctc_ratio_n); + intel_pt_log("timestamp: tsc_ctc_ratio_d %u\n", decoder->tsc_ctc_ratio_d); + intel_pt_log("timestamp: tsc_ctc_mult %u\n", decoder->tsc_ctc_mult); + intel_pt_log("timestamp: tsc_slip %#x\n", decoder->tsc_slip); + + return decoder; +} + +static void intel_pt_pop_blk(struct intel_pt_stack *stack) +{ + struct intel_pt_blk *blk = stack->blk; + + stack->blk = blk->prev; + if (!stack->spare) + stack->spare = blk; + else + free(blk); +} + +static uint64_t intel_pt_pop(struct intel_pt_stack *stack) +{ + if (!stack->pos) { + if (!stack->blk) + return 0; + intel_pt_pop_blk(stack); + if (!stack->blk) + return 0; + stack->pos = INTEL_PT_BLK_SIZE; + } + return stack->blk->ip[--stack->pos]; +} + +static int intel_pt_alloc_blk(struct intel_pt_stack *stack) +{ + struct intel_pt_blk *blk; + + if (stack->spare) { + blk = stack->spare; + stack->spare = NULL; + } else { + blk = malloc(sizeof(struct intel_pt_blk)); + if (!blk) + return -ENOMEM; + } + + blk->prev = stack->blk; + stack->blk = blk; + stack->pos = 0; + return 0; +} + +static int intel_pt_push(struct intel_pt_stack *stack, uint64_t ip) +{ + int err; + + if (!stack->blk || stack->pos == INTEL_PT_BLK_SIZE) { + err = intel_pt_alloc_blk(stack); + if (err) + return err; + } + + stack->blk->ip[stack->pos++] = ip; + return 0; +} + +static void intel_pt_clear_stack(struct intel_pt_stack *stack) +{ + while (stack->blk) + intel_pt_pop_blk(stack); + stack->pos = 0; +} + +static void intel_pt_free_stack(struct intel_pt_stack *stack) +{ + intel_pt_clear_stack(stack); + zfree(&stack->blk); + zfree(&stack->spare); +} + +void intel_pt_decoder_free(struct intel_pt_decoder *decoder) +{ + intel_pt_free_stack(&decoder->stack); + free(decoder); +} + +static int intel_pt_ext_err(int code) +{ + switch (code) { + case -ENOMEM: + return INTEL_PT_ERR_NOMEM; + case -ENOSYS: + return INTEL_PT_ERR_INTERN; + case -EBADMSG: + return INTEL_PT_ERR_BADPKT; + case -ENODATA: + return INTEL_PT_ERR_NODATA; + case -EILSEQ: + return INTEL_PT_ERR_NOINSN; + case -ENOENT: + return INTEL_PT_ERR_MISMAT; + case -EOVERFLOW: + return INTEL_PT_ERR_OVR; + case -ENOSPC: + return INTEL_PT_ERR_LOST; + case -ELOOP: + return INTEL_PT_ERR_NELOOP; + default: + return INTEL_PT_ERR_UNK; + } +} + +static const char *intel_pt_err_msgs[] = { + [INTEL_PT_ERR_NOMEM] = "Memory allocation failed", + [INTEL_PT_ERR_INTERN] = "Internal error", + [INTEL_PT_ERR_BADPKT] = "Bad packet", + [INTEL_PT_ERR_NODATA] = "No more data", + [INTEL_PT_ERR_NOINSN] = "Failed to get instruction", + [INTEL_PT_ERR_MISMAT] = "Trace doesn't match instruction", + [INTEL_PT_ERR_OVR] = "Overflow packet", + [INTEL_PT_ERR_LOST] = "Lost trace data", + [INTEL_PT_ERR_UNK] = "Unknown error!", + [INTEL_PT_ERR_NELOOP] = "Never-ending loop", +}; + +int intel_pt__strerror(int code, char *buf, size_t buflen) +{ + if (code < 1 || code > INTEL_PT_ERR_MAX) + code = INTEL_PT_ERR_UNK; + strlcpy(buf, intel_pt_err_msgs[code], buflen); + return 0; +} + +static uint64_t intel_pt_calc_ip(struct intel_pt_decoder *decoder, + const struct intel_pt_pkt *packet, + uint64_t last_ip) +{ + uint64_t ip; + + switch (packet->count) { + case 2: + ip = (last_ip & (uint64_t)0xffffffffffff0000ULL) | + packet->payload; + break; + case 4: + ip = (last_ip & (uint64_t)0xffffffff00000000ULL) | + packet->payload; + break; + case 6: + ip = packet->payload; + break; + default: + return 0; + } + + if (ip & decoder->sign_bit) + return ip | decoder->sign_bits; + + return ip; +} + +static inline void intel_pt_set_last_ip(struct intel_pt_decoder *decoder) +{ + decoder->last_ip = intel_pt_calc_ip(decoder, &decoder->packet, + decoder->last_ip); +} + +static inline void intel_pt_set_ip(struct intel_pt_decoder *decoder) +{ + intel_pt_set_last_ip(decoder); + decoder->ip = decoder->last_ip; +} + +static void intel_pt_decoder_log_packet(struct intel_pt_decoder *decoder) +{ + intel_pt_log_packet(&decoder->packet, decoder->pkt_len, decoder->pos, + decoder->buf); +} + +static int intel_pt_bug(struct intel_pt_decoder *decoder) +{ + intel_pt_log("ERROR: Internal error\n"); + decoder->pkt_state = INTEL_PT_STATE_NO_PSB; + return -ENOSYS; +} + +static inline void intel_pt_clear_tx_flags(struct intel_pt_decoder *decoder) +{ + decoder->tx_flags = 0; +} + +static inline void intel_pt_update_in_tx(struct intel_pt_decoder *decoder) +{ + decoder->tx_flags = decoder->packet.payload & INTEL_PT_IN_TX; +} + +static int intel_pt_bad_packet(struct intel_pt_decoder *decoder) +{ + intel_pt_clear_tx_flags(decoder); + decoder->have_tma = false; + decoder->pkt_len = 1; + decoder->pkt_step = 1; + intel_pt_decoder_log_packet(decoder); + if (decoder->pkt_state != INTEL_PT_STATE_NO_PSB) { + intel_pt_log("ERROR: Bad packet\n"); + decoder->pkt_state = INTEL_PT_STATE_ERR1; + } + return -EBADMSG; +} + +static int intel_pt_get_data(struct intel_pt_decoder *decoder) +{ + struct intel_pt_buffer buffer = { .buf = 0, }; + int ret; + + decoder->pkt_step = 0; + + intel_pt_log("Getting more data\n"); + ret = decoder->get_trace(&buffer, decoder->data); + if (ret) + return ret; + decoder->buf = buffer.buf; + decoder->len = buffer.len; + if (!decoder->len) { + intel_pt_log("No more data\n"); + return -ENODATA; + } + if (!buffer.consecutive) { + decoder->ip = 0; + decoder->pkt_state = INTEL_PT_STATE_NO_PSB; + decoder->ref_timestamp = buffer.ref_timestamp; + decoder->timestamp = 0; + decoder->have_tma = false; + decoder->state.trace_nr = buffer.trace_nr; + intel_pt_log("Reference timestamp 0x%" PRIx64 "\n", + decoder->ref_timestamp); + return -ENOLINK; + } + + return 0; +} + +static int intel_pt_get_next_data(struct intel_pt_decoder *decoder) +{ + if (!decoder->next_buf) + return intel_pt_get_data(decoder); + + decoder->buf = decoder->next_buf; + decoder->len = decoder->next_len; + decoder->next_buf = 0; + decoder->next_len = 0; + return 0; +} + +static int intel_pt_get_split_packet(struct intel_pt_decoder *decoder) +{ + unsigned char *buf = decoder->temp_buf; + size_t old_len, len, n; + int ret; + + old_len = decoder->len; + len = decoder->len; + memcpy(buf, decoder->buf, len); + + ret = intel_pt_get_data(decoder); + if (ret) { + decoder->pos += old_len; + return ret < 0 ? ret : -EINVAL; + } + + n = INTEL_PT_PKT_MAX_SZ - len; + if (n > decoder->len) + n = decoder->len; + memcpy(buf + len, decoder->buf, n); + len += n; + + ret = intel_pt_get_packet(buf, len, &decoder->packet); + if (ret < (int)old_len) { + decoder->next_buf = decoder->buf; + decoder->next_len = decoder->len; + decoder->buf = buf; + decoder->len = old_len; + return intel_pt_bad_packet(decoder); + } + + decoder->next_buf = decoder->buf + (ret - old_len); + decoder->next_len = decoder->len - (ret - old_len); + + decoder->buf = buf; + decoder->len = ret; + + return ret; +} + +struct intel_pt_pkt_info { + struct intel_pt_decoder *decoder; + struct intel_pt_pkt packet; + uint64_t pos; + int pkt_len; + int last_packet_type; + void *data; +}; + +typedef int (*intel_pt_pkt_cb_t)(struct intel_pt_pkt_info *pkt_info); + +/* Lookahead packets in current buffer */ +static int intel_pt_pkt_lookahead(struct intel_pt_decoder *decoder, + intel_pt_pkt_cb_t cb, void *data) +{ + struct intel_pt_pkt_info pkt_info; + const unsigned char *buf = decoder->buf; + size_t len = decoder->len; + int ret; + + pkt_info.decoder = decoder; + pkt_info.pos = decoder->pos; + pkt_info.pkt_len = decoder->pkt_step; + pkt_info.last_packet_type = decoder->last_packet_type; + pkt_info.data = data; + + while (1) { + do { + pkt_info.pos += pkt_info.pkt_len; + buf += pkt_info.pkt_len; + len -= pkt_info.pkt_len; + + if (!len) + return INTEL_PT_NEED_MORE_BYTES; + + ret = intel_pt_get_packet(buf, len, &pkt_info.packet); + if (!ret) + return INTEL_PT_NEED_MORE_BYTES; + if (ret < 0) + return ret; + + pkt_info.pkt_len = ret; + } while (pkt_info.packet.type == INTEL_PT_PAD); + + ret = cb(&pkt_info); + if (ret) + return 0; + + pkt_info.last_packet_type = pkt_info.packet.type; + } +} + +struct intel_pt_calc_cyc_to_tsc_info { + uint64_t cycle_cnt; + unsigned int cbr; + uint32_t last_mtc; + uint64_t ctc_timestamp; + uint64_t ctc_delta; + uint64_t tsc_timestamp; + uint64_t timestamp; + bool have_tma; + bool from_mtc; + double cbr_cyc_to_tsc; +}; + +static int intel_pt_calc_cyc_cb(struct intel_pt_pkt_info *pkt_info) +{ + struct intel_pt_decoder *decoder = pkt_info->decoder; + struct intel_pt_calc_cyc_to_tsc_info *data = pkt_info->data; + uint64_t timestamp; + double cyc_to_tsc; + unsigned int cbr; + uint32_t mtc, mtc_delta, ctc, fc, ctc_rem; + + switch (pkt_info->packet.type) { + case INTEL_PT_TNT: + case INTEL_PT_TIP_PGE: + case INTEL_PT_TIP: + case INTEL_PT_FUP: + case INTEL_PT_PSB: + case INTEL_PT_PIP: + case INTEL_PT_MODE_EXEC: + case INTEL_PT_MODE_TSX: + case INTEL_PT_PSBEND: + case INTEL_PT_PAD: + case INTEL_PT_VMCS: + case INTEL_PT_MNT: + return 0; + + case INTEL_PT_MTC: + if (!data->have_tma) + return 0; + + mtc = pkt_info->packet.payload; + if (mtc > data->last_mtc) + mtc_delta = mtc - data->last_mtc; + else + mtc_delta = mtc + 256 - data->last_mtc; + data->ctc_delta += mtc_delta << decoder->mtc_shift; + data->last_mtc = mtc; + + if (decoder->tsc_ctc_mult) { + timestamp = data->ctc_timestamp + + data->ctc_delta * decoder->tsc_ctc_mult; + } else { + timestamp = data->ctc_timestamp + + multdiv(data->ctc_delta, + decoder->tsc_ctc_ratio_n, + decoder->tsc_ctc_ratio_d); + } + + if (timestamp < data->timestamp) + return 1; + + if (pkt_info->last_packet_type != INTEL_PT_CYC) { + data->timestamp = timestamp; + return 0; + } + + break; + + case INTEL_PT_TSC: + timestamp = pkt_info->packet.payload | + (data->timestamp & (0xffULL << 56)); + if (data->from_mtc && timestamp < data->timestamp && + data->timestamp - timestamp < decoder->tsc_slip) + return 1; + if (timestamp < data->timestamp) + timestamp += (1ULL << 56); + if (pkt_info->last_packet_type != INTEL_PT_CYC) { + if (data->from_mtc) + return 1; + data->tsc_timestamp = timestamp; + data->timestamp = timestamp; + return 0; + } + break; + + case INTEL_PT_TMA: + if (data->from_mtc) + return 1; + + if (!decoder->tsc_ctc_ratio_d) + return 0; + + ctc = pkt_info->packet.payload; + fc = pkt_info->packet.count; + ctc_rem = ctc & decoder->ctc_rem_mask; + + data->last_mtc = (ctc >> decoder->mtc_shift) & 0xff; + + data->ctc_timestamp = data->tsc_timestamp - fc; + if (decoder->tsc_ctc_mult) { + data->ctc_timestamp -= ctc_rem * decoder->tsc_ctc_mult; + } else { + data->ctc_timestamp -= + multdiv(ctc_rem, decoder->tsc_ctc_ratio_n, + decoder->tsc_ctc_ratio_d); + } + + data->ctc_delta = 0; + data->have_tma = true; + + return 0; + + case INTEL_PT_CYC: + data->cycle_cnt += pkt_info->packet.payload; + return 0; + + case INTEL_PT_CBR: + cbr = pkt_info->packet.payload; + if (data->cbr && data->cbr != cbr) + return 1; + data->cbr = cbr; + data->cbr_cyc_to_tsc = decoder->max_non_turbo_ratio_fp / cbr; + return 0; + + case INTEL_PT_TIP_PGD: + case INTEL_PT_TRACESTOP: + case INTEL_PT_OVF: + case INTEL_PT_BAD: /* Does not happen */ + default: + return 1; + } + + if (!data->cbr && decoder->cbr) { + data->cbr = decoder->cbr; + data->cbr_cyc_to_tsc = decoder->cbr_cyc_to_tsc; + } + + if (!data->cycle_cnt) + return 1; + + cyc_to_tsc = (double)(timestamp - decoder->timestamp) / data->cycle_cnt; + + if (data->cbr && cyc_to_tsc > data->cbr_cyc_to_tsc && + cyc_to_tsc / data->cbr_cyc_to_tsc > 1.25) { + intel_pt_log("Timestamp: calculated %g TSC ticks per cycle too big (c.f. CBR-based value %g), pos " x64_fmt "\n", + cyc_to_tsc, data->cbr_cyc_to_tsc, pkt_info->pos); + return 1; + } + + decoder->calc_cyc_to_tsc = cyc_to_tsc; + decoder->have_calc_cyc_to_tsc = true; + + if (data->cbr) { + intel_pt_log("Timestamp: calculated %g TSC ticks per cycle c.f. CBR-based value %g, pos " x64_fmt "\n", + cyc_to_tsc, data->cbr_cyc_to_tsc, pkt_info->pos); + } else { + intel_pt_log("Timestamp: calculated %g TSC ticks per cycle c.f. unknown CBR-based value, pos " x64_fmt "\n", + cyc_to_tsc, pkt_info->pos); + } + + return 1; +} + +static void intel_pt_calc_cyc_to_tsc(struct intel_pt_decoder *decoder, + bool from_mtc) +{ + struct intel_pt_calc_cyc_to_tsc_info data = { + .cycle_cnt = 0, + .cbr = 0, + .last_mtc = decoder->last_mtc, + .ctc_timestamp = decoder->ctc_timestamp, + .ctc_delta = decoder->ctc_delta, + .tsc_timestamp = decoder->tsc_timestamp, + .timestamp = decoder->timestamp, + .have_tma = decoder->have_tma, + .from_mtc = from_mtc, + .cbr_cyc_to_tsc = 0, + }; + + intel_pt_pkt_lookahead(decoder, intel_pt_calc_cyc_cb, &data); +} + +static int intel_pt_get_next_packet(struct intel_pt_decoder *decoder) +{ + int ret; + + decoder->last_packet_type = decoder->packet.type; + + do { + decoder->pos += decoder->pkt_step; + decoder->buf += decoder->pkt_step; + decoder->len -= decoder->pkt_step; + + if (!decoder->len) { + ret = intel_pt_get_next_data(decoder); + if (ret) + return ret; + } + + ret = intel_pt_get_packet(decoder->buf, decoder->len, + &decoder->packet); + if (ret == INTEL_PT_NEED_MORE_BYTES && + decoder->len < INTEL_PT_PKT_MAX_SZ && !decoder->next_buf) { + ret = intel_pt_get_split_packet(decoder); + if (ret < 0) + return ret; + } + if (ret <= 0) + return intel_pt_bad_packet(decoder); + + decoder->pkt_len = ret; + decoder->pkt_step = ret; + intel_pt_decoder_log_packet(decoder); + } while (decoder->packet.type == INTEL_PT_PAD); + + return 0; +} + +static uint64_t intel_pt_next_period(struct intel_pt_decoder *decoder) +{ + uint64_t timestamp, masked_timestamp; + + timestamp = decoder->timestamp + decoder->timestamp_insn_cnt; + masked_timestamp = timestamp & decoder->period_mask; + if (decoder->continuous_period) { + if (masked_timestamp != decoder->last_masked_timestamp) + return 1; + } else { + timestamp += 1; + masked_timestamp = timestamp & decoder->period_mask; + if (masked_timestamp != decoder->last_masked_timestamp) { + decoder->last_masked_timestamp = masked_timestamp; + decoder->continuous_period = true; + } + } + return decoder->period_ticks - (timestamp - masked_timestamp); +} + +static uint64_t intel_pt_next_sample(struct intel_pt_decoder *decoder) +{ + switch (decoder->period_type) { + case INTEL_PT_PERIOD_INSTRUCTIONS: + return decoder->period - decoder->period_insn_cnt; + case INTEL_PT_PERIOD_TICKS: + return intel_pt_next_period(decoder); + case INTEL_PT_PERIOD_NONE: + case INTEL_PT_PERIOD_MTC: + default: + return 0; + } +} + +static void intel_pt_sample_insn(struct intel_pt_decoder *decoder) +{ + uint64_t timestamp, masked_timestamp; + + switch (decoder->period_type) { + case INTEL_PT_PERIOD_INSTRUCTIONS: + decoder->period_insn_cnt = 0; + break; + case INTEL_PT_PERIOD_TICKS: + timestamp = decoder->timestamp + decoder->timestamp_insn_cnt; + masked_timestamp = timestamp & decoder->period_mask; + decoder->last_masked_timestamp = masked_timestamp; + break; + case INTEL_PT_PERIOD_NONE: + case INTEL_PT_PERIOD_MTC: + default: + break; + } + + decoder->state.type |= INTEL_PT_INSTRUCTION; +} + +static int intel_pt_walk_insn(struct intel_pt_decoder *decoder, + struct intel_pt_insn *intel_pt_insn, uint64_t ip) +{ + uint64_t max_insn_cnt, insn_cnt = 0; + int err; + + if (!decoder->mtc_insn) + decoder->mtc_insn = true; + + max_insn_cnt = intel_pt_next_sample(decoder); + + err = decoder->walk_insn(intel_pt_insn, &insn_cnt, &decoder->ip, ip, + max_insn_cnt, decoder->data); + + decoder->tot_insn_cnt += insn_cnt; + decoder->timestamp_insn_cnt += insn_cnt; + decoder->period_insn_cnt += insn_cnt; + + if (err) { + decoder->no_progress = 0; + decoder->pkt_state = INTEL_PT_STATE_ERR2; + intel_pt_log_at("ERROR: Failed to get instruction", + decoder->ip); + if (err == -ENOENT) + return -ENOLINK; + return -EILSEQ; + } + + if (ip && decoder->ip == ip) { + err = -EAGAIN; + goto out; + } + + if (max_insn_cnt && insn_cnt >= max_insn_cnt) + intel_pt_sample_insn(decoder); + + if (intel_pt_insn->branch == INTEL_PT_BR_NO_BRANCH) { + decoder->state.type = INTEL_PT_INSTRUCTION; + decoder->state.from_ip = decoder->ip; + decoder->state.to_ip = 0; + decoder->ip += intel_pt_insn->length; + err = INTEL_PT_RETURN; + goto out; + } + + if (intel_pt_insn->op == INTEL_PT_OP_CALL) { + /* Zero-length calls are excluded */ + if (intel_pt_insn->branch != INTEL_PT_BR_UNCONDITIONAL || + intel_pt_insn->rel) { + err = intel_pt_push(&decoder->stack, decoder->ip + + intel_pt_insn->length); + if (err) + goto out; + } + } else if (intel_pt_insn->op == INTEL_PT_OP_RET) { + decoder->ret_addr = intel_pt_pop(&decoder->stack); + } + + if (intel_pt_insn->branch == INTEL_PT_BR_UNCONDITIONAL) { + int cnt = decoder->no_progress++; + + decoder->state.from_ip = decoder->ip; + decoder->ip += intel_pt_insn->length + + intel_pt_insn->rel; + decoder->state.to_ip = decoder->ip; + err = INTEL_PT_RETURN; + + /* + * Check for being stuck in a loop. This can happen if a + * decoder error results in the decoder erroneously setting the + * ip to an address that is itself in an infinite loop that + * consumes no packets. When that happens, there must be an + * unconditional branch. + */ + if (cnt) { + if (cnt == 1) { + decoder->stuck_ip = decoder->state.to_ip; + decoder->stuck_ip_prd = 1; + decoder->stuck_ip_cnt = 1; + } else if (cnt > INTEL_PT_MAX_LOOPS || + decoder->state.to_ip == decoder->stuck_ip) { + intel_pt_log_at("ERROR: Never-ending loop", + decoder->state.to_ip); + decoder->pkt_state = INTEL_PT_STATE_ERR_RESYNC; + err = -ELOOP; + goto out; + } else if (!--decoder->stuck_ip_cnt) { + decoder->stuck_ip_prd += 1; + decoder->stuck_ip_cnt = decoder->stuck_ip_prd; + decoder->stuck_ip = decoder->state.to_ip; + } + } + goto out_no_progress; + } +out: + decoder->no_progress = 0; +out_no_progress: + decoder->state.insn_op = intel_pt_insn->op; + decoder->state.insn_len = intel_pt_insn->length; + + if (decoder->tx_flags & INTEL_PT_IN_TX) + decoder->state.flags |= INTEL_PT_IN_TX; + + return err; +} + +static int intel_pt_walk_fup(struct intel_pt_decoder *decoder) +{ + struct intel_pt_insn intel_pt_insn; + uint64_t ip; + int err; + + ip = decoder->last_ip; + + while (1) { + err = intel_pt_walk_insn(decoder, &intel_pt_insn, ip); + if (err == INTEL_PT_RETURN) + return 0; + if (err == -EAGAIN) { + if (decoder->set_fup_tx_flags) { + decoder->set_fup_tx_flags = false; + decoder->tx_flags = decoder->fup_tx_flags; + decoder->state.type = INTEL_PT_TRANSACTION; + decoder->state.from_ip = decoder->ip; + decoder->state.to_ip = 0; + decoder->state.flags = decoder->fup_tx_flags; + return 0; + } + return err; + } + decoder->set_fup_tx_flags = false; + if (err) + return err; + + if (intel_pt_insn.branch == INTEL_PT_BR_INDIRECT) { + intel_pt_log_at("ERROR: Unexpected indirect branch", + decoder->ip); + decoder->pkt_state = INTEL_PT_STATE_ERR_RESYNC; + return -ENOENT; + } + + if (intel_pt_insn.branch == INTEL_PT_BR_CONDITIONAL) { + intel_pt_log_at("ERROR: Unexpected conditional branch", + decoder->ip); + decoder->pkt_state = INTEL_PT_STATE_ERR_RESYNC; + return -ENOENT; + } + + intel_pt_bug(decoder); + } +} + +static int intel_pt_walk_tip(struct intel_pt_decoder *decoder) +{ + struct intel_pt_insn intel_pt_insn; + int err; + + err = intel_pt_walk_insn(decoder, &intel_pt_insn, 0); + if (err == INTEL_PT_RETURN) + return 0; + if (err) + return err; + + if (intel_pt_insn.branch == INTEL_PT_BR_INDIRECT) { + if (decoder->pkt_state == INTEL_PT_STATE_TIP_PGD) { + decoder->pge = false; + decoder->continuous_period = false; + decoder->pkt_state = INTEL_PT_STATE_IN_SYNC; + decoder->state.from_ip = decoder->ip; + decoder->state.to_ip = 0; + if (decoder->packet.count != 0) + decoder->ip = decoder->last_ip; + } else { + decoder->pkt_state = INTEL_PT_STATE_IN_SYNC; + decoder->state.from_ip = decoder->ip; + if (decoder->packet.count == 0) { + decoder->state.to_ip = 0; + } else { + decoder->state.to_ip = decoder->last_ip; + decoder->ip = decoder->last_ip; + } + } + return 0; + } + + if (intel_pt_insn.branch == INTEL_PT_BR_CONDITIONAL) { + intel_pt_log_at("ERROR: Conditional branch when expecting indirect branch", + decoder->ip); + decoder->pkt_state = INTEL_PT_STATE_ERR_RESYNC; + return -ENOENT; + } + + return intel_pt_bug(decoder); +} + +static int intel_pt_walk_tnt(struct intel_pt_decoder *decoder) +{ + struct intel_pt_insn intel_pt_insn; + int err; + + while (1) { + err = intel_pt_walk_insn(decoder, &intel_pt_insn, 0); + if (err == INTEL_PT_RETURN) + return 0; + if (err) + return err; + + if (intel_pt_insn.op == INTEL_PT_OP_RET) { + if (!decoder->return_compression) { + intel_pt_log_at("ERROR: RET when expecting conditional branch", + decoder->ip); + decoder->pkt_state = INTEL_PT_STATE_ERR3; + return -ENOENT; + } + if (!decoder->ret_addr) { + intel_pt_log_at("ERROR: Bad RET compression (stack empty)", + decoder->ip); + decoder->pkt_state = INTEL_PT_STATE_ERR3; + return -ENOENT; + } + if (!(decoder->tnt.payload & BIT63)) { + intel_pt_log_at("ERROR: Bad RET compression (TNT=N)", + decoder->ip); + decoder->pkt_state = INTEL_PT_STATE_ERR3; + return -ENOENT; + } + decoder->tnt.count -= 1; + if (!decoder->tnt.count) + decoder->pkt_state = INTEL_PT_STATE_IN_SYNC; + decoder->tnt.payload <<= 1; + decoder->state.from_ip = decoder->ip; + decoder->ip = decoder->ret_addr; + decoder->state.to_ip = decoder->ip; + return 0; + } + + if (intel_pt_insn.branch == INTEL_PT_BR_INDIRECT) { + /* Handle deferred TIPs */ + err = intel_pt_get_next_packet(decoder); + if (err) + return err; + if (decoder->packet.type != INTEL_PT_TIP || + decoder->packet.count == 0) { + intel_pt_log_at("ERROR: Missing deferred TIP for indirect branch", + decoder->ip); + decoder->pkt_state = INTEL_PT_STATE_ERR3; + decoder->pkt_step = 0; + return -ENOENT; + } + intel_pt_set_last_ip(decoder); + decoder->state.from_ip = decoder->ip; + decoder->state.to_ip = decoder->last_ip; + decoder->ip = decoder->last_ip; + return 0; + } + + if (intel_pt_insn.branch == INTEL_PT_BR_CONDITIONAL) { + decoder->tnt.count -= 1; + if (!decoder->tnt.count) + decoder->pkt_state = INTEL_PT_STATE_IN_SYNC; + if (decoder->tnt.payload & BIT63) { + decoder->tnt.payload <<= 1; + decoder->state.from_ip = decoder->ip; + decoder->ip += intel_pt_insn.length + + intel_pt_insn.rel; + decoder->state.to_ip = decoder->ip; + return 0; + } + /* Instruction sample for a non-taken branch */ + if (decoder->state.type & INTEL_PT_INSTRUCTION) { + decoder->tnt.payload <<= 1; + decoder->state.type = INTEL_PT_INSTRUCTION; + decoder->state.from_ip = decoder->ip; + decoder->state.to_ip = 0; + decoder->ip += intel_pt_insn.length; + return 0; + } + decoder->ip += intel_pt_insn.length; + if (!decoder->tnt.count) + return -EAGAIN; + decoder->tnt.payload <<= 1; + continue; + } + + return intel_pt_bug(decoder); + } +} + +static int intel_pt_mode_tsx(struct intel_pt_decoder *decoder, bool *no_tip) +{ + unsigned int fup_tx_flags; + int err; + + fup_tx_flags = decoder->packet.payload & + (INTEL_PT_IN_TX | INTEL_PT_ABORT_TX); + err = intel_pt_get_next_packet(decoder); + if (err) + return err; + if (decoder->packet.type == INTEL_PT_FUP) { + decoder->fup_tx_flags = fup_tx_flags; + decoder->set_fup_tx_flags = true; + if (!(decoder->fup_tx_flags & INTEL_PT_ABORT_TX)) + *no_tip = true; + } else { + intel_pt_log_at("ERROR: Missing FUP after MODE.TSX", + decoder->pos); + intel_pt_update_in_tx(decoder); + } + return 0; +} + +static void intel_pt_calc_tsc_timestamp(struct intel_pt_decoder *decoder) +{ + uint64_t timestamp; + + decoder->have_tma = false; + + if (decoder->ref_timestamp) { + timestamp = decoder->packet.payload | + (decoder->ref_timestamp & (0xffULL << 56)); + if (timestamp < decoder->ref_timestamp) { + if (decoder->ref_timestamp - timestamp > (1ULL << 55)) + timestamp += (1ULL << 56); + } else { + if (timestamp - decoder->ref_timestamp > (1ULL << 55)) + timestamp -= (1ULL << 56); + } + decoder->tsc_timestamp = timestamp; + decoder->timestamp = timestamp; + decoder->ref_timestamp = 0; + decoder->timestamp_insn_cnt = 0; + } else if (decoder->timestamp) { + timestamp = decoder->packet.payload | + (decoder->timestamp & (0xffULL << 56)); + decoder->tsc_timestamp = timestamp; + if (timestamp < decoder->timestamp && + decoder->timestamp - timestamp < decoder->tsc_slip) { + intel_pt_log_to("Suppressing backwards timestamp", + timestamp); + timestamp = decoder->timestamp; + } + if (timestamp < decoder->timestamp) { + intel_pt_log_to("Wraparound timestamp", timestamp); + timestamp += (1ULL << 56); + decoder->tsc_timestamp = timestamp; + } + decoder->timestamp = timestamp; + decoder->timestamp_insn_cnt = 0; + } + + if (decoder->last_packet_type == INTEL_PT_CYC) { + decoder->cyc_ref_timestamp = decoder->timestamp; + decoder->cycle_cnt = 0; + decoder->have_calc_cyc_to_tsc = false; + intel_pt_calc_cyc_to_tsc(decoder, false); + } + + intel_pt_log_to("Setting timestamp", decoder->timestamp); +} + +static int intel_pt_overflow(struct intel_pt_decoder *decoder) +{ + intel_pt_log("ERROR: Buffer overflow\n"); + intel_pt_clear_tx_flags(decoder); + decoder->have_tma = false; + decoder->cbr = 0; + decoder->pkt_state = INTEL_PT_STATE_ERR_RESYNC; + decoder->overflow = true; + return -EOVERFLOW; +} + +static void intel_pt_calc_tma(struct intel_pt_decoder *decoder) +{ + uint32_t ctc = decoder->packet.payload; + uint32_t fc = decoder->packet.count; + uint32_t ctc_rem = ctc & decoder->ctc_rem_mask; + + if (!decoder->tsc_ctc_ratio_d) + return; + + decoder->last_mtc = (ctc >> decoder->mtc_shift) & 0xff; + decoder->ctc_timestamp = decoder->tsc_timestamp - fc; + if (decoder->tsc_ctc_mult) { + decoder->ctc_timestamp -= ctc_rem * decoder->tsc_ctc_mult; + } else { + decoder->ctc_timestamp -= multdiv(ctc_rem, + decoder->tsc_ctc_ratio_n, + decoder->tsc_ctc_ratio_d); + } + decoder->ctc_delta = 0; + decoder->have_tma = true; + intel_pt_log("CTC timestamp " x64_fmt " last MTC %#x CTC rem %#x\n", + decoder->ctc_timestamp, decoder->last_mtc, ctc_rem); +} + +static void intel_pt_calc_mtc_timestamp(struct intel_pt_decoder *decoder) +{ + uint64_t timestamp; + uint32_t mtc, mtc_delta; + + if (!decoder->have_tma) + return; + + mtc = decoder->packet.payload; + + if (mtc > decoder->last_mtc) + mtc_delta = mtc - decoder->last_mtc; + else + mtc_delta = mtc + 256 - decoder->last_mtc; + + decoder->ctc_delta += mtc_delta << decoder->mtc_shift; + + if (decoder->tsc_ctc_mult) { + timestamp = decoder->ctc_timestamp + + decoder->ctc_delta * decoder->tsc_ctc_mult; + } else { + timestamp = decoder->ctc_timestamp + + multdiv(decoder->ctc_delta, + decoder->tsc_ctc_ratio_n, + decoder->tsc_ctc_ratio_d); + } + + if (timestamp < decoder->timestamp) + intel_pt_log("Suppressing MTC timestamp " x64_fmt " less than current timestamp " x64_fmt "\n", + timestamp, decoder->timestamp); + else + decoder->timestamp = timestamp; + + decoder->timestamp_insn_cnt = 0; + decoder->last_mtc = mtc; + + if (decoder->last_packet_type == INTEL_PT_CYC) { + decoder->cyc_ref_timestamp = decoder->timestamp; + decoder->cycle_cnt = 0; + decoder->have_calc_cyc_to_tsc = false; + intel_pt_calc_cyc_to_tsc(decoder, true); + } +} + +static void intel_pt_calc_cbr(struct intel_pt_decoder *decoder) +{ + unsigned int cbr = decoder->packet.payload; + + if (decoder->cbr == cbr) + return; + + decoder->cbr = cbr; + decoder->cbr_cyc_to_tsc = decoder->max_non_turbo_ratio_fp / cbr; +} + +static void intel_pt_calc_cyc_timestamp(struct intel_pt_decoder *decoder) +{ + uint64_t timestamp = decoder->cyc_ref_timestamp; + + decoder->have_cyc = true; + + decoder->cycle_cnt += decoder->packet.payload; + + if (!decoder->cyc_ref_timestamp) + return; + + if (decoder->have_calc_cyc_to_tsc) + timestamp += decoder->cycle_cnt * decoder->calc_cyc_to_tsc; + else if (decoder->cbr) + timestamp += decoder->cycle_cnt * decoder->cbr_cyc_to_tsc; + else + return; + + if (timestamp < decoder->timestamp) + intel_pt_log("Suppressing CYC timestamp " x64_fmt " less than current timestamp " x64_fmt "\n", + timestamp, decoder->timestamp); + else + decoder->timestamp = timestamp; +} + +/* Walk PSB+ packets when already in sync. */ +static int intel_pt_walk_psbend(struct intel_pt_decoder *decoder) +{ + int err; + + while (1) { + err = intel_pt_get_next_packet(decoder); + if (err) + return err; + + switch (decoder->packet.type) { + case INTEL_PT_PSBEND: + return 0; + + case INTEL_PT_TIP_PGD: + case INTEL_PT_TIP_PGE: + case INTEL_PT_TIP: + case INTEL_PT_TNT: + case INTEL_PT_TRACESTOP: + case INTEL_PT_BAD: + case INTEL_PT_PSB: + decoder->have_tma = false; + intel_pt_log("ERROR: Unexpected packet\n"); + return -EAGAIN; + + case INTEL_PT_OVF: + return intel_pt_overflow(decoder); + + case INTEL_PT_TSC: + intel_pt_calc_tsc_timestamp(decoder); + break; + + case INTEL_PT_TMA: + intel_pt_calc_tma(decoder); + break; + + case INTEL_PT_CBR: + intel_pt_calc_cbr(decoder); + break; + + case INTEL_PT_MODE_EXEC: + decoder->exec_mode = decoder->packet.payload; + break; + + case INTEL_PT_PIP: + decoder->cr3 = decoder->packet.payload & (BIT63 - 1); + break; + + case INTEL_PT_FUP: + decoder->pge = true; + intel_pt_set_last_ip(decoder); + break; + + case INTEL_PT_MODE_TSX: + intel_pt_update_in_tx(decoder); + break; + + case INTEL_PT_MTC: + intel_pt_calc_mtc_timestamp(decoder); + if (decoder->period_type == INTEL_PT_PERIOD_MTC) + decoder->state.type |= INTEL_PT_INSTRUCTION; + break; + + case INTEL_PT_CYC: + case INTEL_PT_VMCS: + case INTEL_PT_MNT: + case INTEL_PT_PAD: + default: + break; + } + } +} + +static int intel_pt_walk_fup_tip(struct intel_pt_decoder *decoder) +{ + int err; + + if (decoder->tx_flags & INTEL_PT_ABORT_TX) { + decoder->tx_flags = 0; + decoder->state.flags &= ~INTEL_PT_IN_TX; + decoder->state.flags |= INTEL_PT_ABORT_TX; + } else { + decoder->state.flags |= INTEL_PT_ASYNC; + } + + while (1) { + err = intel_pt_get_next_packet(decoder); + if (err) + return err; + + switch (decoder->packet.type) { + case INTEL_PT_TNT: + case INTEL_PT_FUP: + case INTEL_PT_TRACESTOP: + case INTEL_PT_PSB: + case INTEL_PT_TSC: + case INTEL_PT_TMA: + case INTEL_PT_CBR: + case INTEL_PT_MODE_TSX: + case INTEL_PT_BAD: + case INTEL_PT_PSBEND: + intel_pt_log("ERROR: Missing TIP after FUP\n"); + decoder->pkt_state = INTEL_PT_STATE_ERR3; + return -ENOENT; + + case INTEL_PT_OVF: + return intel_pt_overflow(decoder); + + case INTEL_PT_TIP_PGD: + decoder->state.from_ip = decoder->ip; + decoder->state.to_ip = 0; + if (decoder->packet.count != 0) { + intel_pt_set_ip(decoder); + intel_pt_log("Omitting PGD ip " x64_fmt "\n", + decoder->ip); + } + decoder->pge = false; + decoder->continuous_period = false; + return 0; + + case INTEL_PT_TIP_PGE: + decoder->pge = true; + intel_pt_log("Omitting PGE ip " x64_fmt "\n", + decoder->ip); + decoder->state.from_ip = 0; + if (decoder->packet.count == 0) { + decoder->state.to_ip = 0; + } else { + intel_pt_set_ip(decoder); + decoder->state.to_ip = decoder->ip; + } + return 0; + + case INTEL_PT_TIP: + decoder->state.from_ip = decoder->ip; + if (decoder->packet.count == 0) { + decoder->state.to_ip = 0; + } else { + intel_pt_set_ip(decoder); + decoder->state.to_ip = decoder->ip; + } + return 0; + + case INTEL_PT_PIP: + decoder->cr3 = decoder->packet.payload & (BIT63 - 1); + break; + + case INTEL_PT_MTC: + intel_pt_calc_mtc_timestamp(decoder); + if (decoder->period_type == INTEL_PT_PERIOD_MTC) + decoder->state.type |= INTEL_PT_INSTRUCTION; + break; + + case INTEL_PT_CYC: + intel_pt_calc_cyc_timestamp(decoder); + break; + + case INTEL_PT_MODE_EXEC: + decoder->exec_mode = decoder->packet.payload; + break; + + case INTEL_PT_VMCS: + case INTEL_PT_MNT: + case INTEL_PT_PAD: + break; + + default: + return intel_pt_bug(decoder); + } + } +} + +static int intel_pt_walk_trace(struct intel_pt_decoder *decoder) +{ + bool no_tip = false; + int err; + + while (1) { + err = intel_pt_get_next_packet(decoder); + if (err) + return err; +next: + switch (decoder->packet.type) { + case INTEL_PT_TNT: + if (!decoder->packet.count) + break; + decoder->tnt = decoder->packet; + decoder->pkt_state = INTEL_PT_STATE_TNT; + err = intel_pt_walk_tnt(decoder); + if (err == -EAGAIN) + break; + return err; + + case INTEL_PT_TIP_PGD: + if (decoder->packet.count != 0) + intel_pt_set_last_ip(decoder); + decoder->pkt_state = INTEL_PT_STATE_TIP_PGD; + return intel_pt_walk_tip(decoder); + + case INTEL_PT_TIP_PGE: { + decoder->pge = true; + if (decoder->packet.count == 0) { + intel_pt_log_at("Skipping zero TIP.PGE", + decoder->pos); + break; + } + intel_pt_set_ip(decoder); + decoder->state.from_ip = 0; + decoder->state.to_ip = decoder->ip; + return 0; + } + + case INTEL_PT_OVF: + return intel_pt_overflow(decoder); + + case INTEL_PT_TIP: + if (decoder->packet.count != 0) + intel_pt_set_last_ip(decoder); + decoder->pkt_state = INTEL_PT_STATE_TIP; + return intel_pt_walk_tip(decoder); + + case INTEL_PT_FUP: + if (decoder->packet.count == 0) { + intel_pt_log_at("Skipping zero FUP", + decoder->pos); + no_tip = false; + break; + } + intel_pt_set_last_ip(decoder); + err = intel_pt_walk_fup(decoder); + if (err != -EAGAIN) { + if (err) + return err; + if (no_tip) + decoder->pkt_state = + INTEL_PT_STATE_FUP_NO_TIP; + else + decoder->pkt_state = INTEL_PT_STATE_FUP; + return 0; + } + if (no_tip) { + no_tip = false; + break; + } + return intel_pt_walk_fup_tip(decoder); + + case INTEL_PT_TRACESTOP: + decoder->pge = false; + decoder->continuous_period = false; + intel_pt_clear_tx_flags(decoder); + decoder->have_tma = false; + break; + + case INTEL_PT_PSB: + intel_pt_clear_stack(&decoder->stack); + err = intel_pt_walk_psbend(decoder); + if (err == -EAGAIN) + goto next; + if (err) + return err; + break; + + case INTEL_PT_PIP: + decoder->cr3 = decoder->packet.payload & (BIT63 - 1); + break; + + case INTEL_PT_MTC: + intel_pt_calc_mtc_timestamp(decoder); + if (decoder->period_type != INTEL_PT_PERIOD_MTC) + break; + /* + * Ensure that there has been an instruction since the + * last MTC. + */ + if (!decoder->mtc_insn) + break; + decoder->mtc_insn = false; + /* Ensure that there is a timestamp */ + if (!decoder->timestamp) + break; + decoder->state.type = INTEL_PT_INSTRUCTION; + decoder->state.from_ip = decoder->ip; + decoder->state.to_ip = 0; + decoder->mtc_insn = false; + return 0; + + case INTEL_PT_TSC: + intel_pt_calc_tsc_timestamp(decoder); + break; + + case INTEL_PT_TMA: + intel_pt_calc_tma(decoder); + break; + + case INTEL_PT_CYC: + intel_pt_calc_cyc_timestamp(decoder); + break; + + case INTEL_PT_CBR: + intel_pt_calc_cbr(decoder); + break; + + case INTEL_PT_MODE_EXEC: + decoder->exec_mode = decoder->packet.payload; + break; + + case INTEL_PT_MODE_TSX: + /* MODE_TSX need not be followed by FUP */ + if (!decoder->pge) { + intel_pt_update_in_tx(decoder); + break; + } + err = intel_pt_mode_tsx(decoder, &no_tip); + if (err) + return err; + goto next; + + case INTEL_PT_BAD: /* Does not happen */ + return intel_pt_bug(decoder); + + case INTEL_PT_PSBEND: + case INTEL_PT_VMCS: + case INTEL_PT_MNT: + case INTEL_PT_PAD: + break; + + default: + return intel_pt_bug(decoder); + } + } +} + +/* Walk PSB+ packets to get in sync. */ +static int intel_pt_walk_psb(struct intel_pt_decoder *decoder) +{ + int err; + + while (1) { + err = intel_pt_get_next_packet(decoder); + if (err) + return err; + + switch (decoder->packet.type) { + case INTEL_PT_TIP_PGD: + decoder->continuous_period = false; + case INTEL_PT_TIP_PGE: + case INTEL_PT_TIP: + intel_pt_log("ERROR: Unexpected packet\n"); + return -ENOENT; + + case INTEL_PT_FUP: + decoder->pge = true; + if (decoder->last_ip || decoder->packet.count == 6 || + decoder->packet.count == 0) { + uint64_t current_ip = decoder->ip; + + intel_pt_set_ip(decoder); + if (current_ip) + intel_pt_log_to("Setting IP", + decoder->ip); + } + break; + + case INTEL_PT_MTC: + intel_pt_calc_mtc_timestamp(decoder); + break; + + case INTEL_PT_TSC: + intel_pt_calc_tsc_timestamp(decoder); + break; + + case INTEL_PT_TMA: + intel_pt_calc_tma(decoder); + break; + + case INTEL_PT_CYC: + intel_pt_calc_cyc_timestamp(decoder); + break; + + case INTEL_PT_CBR: + intel_pt_calc_cbr(decoder); + break; + + case INTEL_PT_PIP: + decoder->cr3 = decoder->packet.payload & (BIT63 - 1); + break; + + case INTEL_PT_MODE_EXEC: + decoder->exec_mode = decoder->packet.payload; + break; + + case INTEL_PT_MODE_TSX: + intel_pt_update_in_tx(decoder); + break; + + case INTEL_PT_TRACESTOP: + decoder->pge = false; + decoder->continuous_period = false; + intel_pt_clear_tx_flags(decoder); + case INTEL_PT_TNT: + decoder->have_tma = false; + intel_pt_log("ERROR: Unexpected packet\n"); + if (decoder->ip) + decoder->pkt_state = INTEL_PT_STATE_ERR4; + else + decoder->pkt_state = INTEL_PT_STATE_ERR3; + return -ENOENT; + + case INTEL_PT_BAD: /* Does not happen */ + return intel_pt_bug(decoder); + + case INTEL_PT_OVF: + return intel_pt_overflow(decoder); + + case INTEL_PT_PSBEND: + return 0; + + case INTEL_PT_PSB: + case INTEL_PT_VMCS: + case INTEL_PT_MNT: + case INTEL_PT_PAD: + default: + break; + } + } +} + +static int intel_pt_walk_to_ip(struct intel_pt_decoder *decoder) +{ + int err; + + while (1) { + err = intel_pt_get_next_packet(decoder); + if (err) + return err; + + switch (decoder->packet.type) { + case INTEL_PT_TIP_PGD: + decoder->continuous_period = false; + case INTEL_PT_TIP_PGE: + case INTEL_PT_TIP: + decoder->pge = decoder->packet.type != INTEL_PT_TIP_PGD; + if (decoder->last_ip || decoder->packet.count == 6 || + decoder->packet.count == 0) + intel_pt_set_ip(decoder); + if (decoder->ip) + return 0; + break; + + case INTEL_PT_FUP: + if (decoder->overflow) { + if (decoder->last_ip || + decoder->packet.count == 6 || + decoder->packet.count == 0) + intel_pt_set_ip(decoder); + if (decoder->ip) + return 0; + } + if (decoder->packet.count) + intel_pt_set_last_ip(decoder); + break; + + case INTEL_PT_MTC: + intel_pt_calc_mtc_timestamp(decoder); + break; + + case INTEL_PT_TSC: + intel_pt_calc_tsc_timestamp(decoder); + break; + + case INTEL_PT_TMA: + intel_pt_calc_tma(decoder); + break; + + case INTEL_PT_CYC: + intel_pt_calc_cyc_timestamp(decoder); + break; + + case INTEL_PT_CBR: + intel_pt_calc_cbr(decoder); + break; + + case INTEL_PT_PIP: + decoder->cr3 = decoder->packet.payload & (BIT63 - 1); + break; + + case INTEL_PT_MODE_EXEC: + decoder->exec_mode = decoder->packet.payload; + break; + + case INTEL_PT_MODE_TSX: + intel_pt_update_in_tx(decoder); + break; + + case INTEL_PT_OVF: + return intel_pt_overflow(decoder); + + case INTEL_PT_BAD: /* Does not happen */ + return intel_pt_bug(decoder); + + case INTEL_PT_TRACESTOP: + decoder->pge = false; + decoder->continuous_period = false; + intel_pt_clear_tx_flags(decoder); + decoder->have_tma = false; + break; + + case INTEL_PT_PSB: + err = intel_pt_walk_psb(decoder); + if (err) + return err; + if (decoder->ip) { + /* Do not have a sample */ + decoder->state.type = 0; + return 0; + } + break; + + case INTEL_PT_TNT: + case INTEL_PT_PSBEND: + case INTEL_PT_VMCS: + case INTEL_PT_MNT: + case INTEL_PT_PAD: + default: + break; + } + } +} + +static int intel_pt_sync_ip(struct intel_pt_decoder *decoder) +{ + int err; + + intel_pt_log("Scanning for full IP\n"); + err = intel_pt_walk_to_ip(decoder); + if (err) + return err; + + decoder->pkt_state = INTEL_PT_STATE_IN_SYNC; + decoder->overflow = false; + + decoder->state.from_ip = 0; + decoder->state.to_ip = decoder->ip; + intel_pt_log_to("Setting IP", decoder->ip); + + return 0; +} + +static int intel_pt_part_psb(struct intel_pt_decoder *decoder) +{ + const unsigned char *end = decoder->buf + decoder->len; + size_t i; + + for (i = INTEL_PT_PSB_LEN - 1; i; i--) { + if (i > decoder->len) + continue; + if (!memcmp(end - i, INTEL_PT_PSB_STR, i)) + return i; + } + return 0; +} + +static int intel_pt_rest_psb(struct intel_pt_decoder *decoder, int part_psb) +{ + size_t rest_psb = INTEL_PT_PSB_LEN - part_psb; + const char *psb = INTEL_PT_PSB_STR; + + if (rest_psb > decoder->len || + memcmp(decoder->buf, psb + part_psb, rest_psb)) + return 0; + + return rest_psb; +} + +static int intel_pt_get_split_psb(struct intel_pt_decoder *decoder, + int part_psb) +{ + int rest_psb, ret; + + decoder->pos += decoder->len; + decoder->len = 0; + + ret = intel_pt_get_next_data(decoder); + if (ret) + return ret; + + rest_psb = intel_pt_rest_psb(decoder, part_psb); + if (!rest_psb) + return 0; + + decoder->pos -= part_psb; + decoder->next_buf = decoder->buf + rest_psb; + decoder->next_len = decoder->len - rest_psb; + memcpy(decoder->temp_buf, INTEL_PT_PSB_STR, INTEL_PT_PSB_LEN); + decoder->buf = decoder->temp_buf; + decoder->len = INTEL_PT_PSB_LEN; + + return 0; +} + +static int intel_pt_scan_for_psb(struct intel_pt_decoder *decoder) +{ + unsigned char *next; + int ret; + + intel_pt_log("Scanning for PSB\n"); + while (1) { + if (!decoder->len) { + ret = intel_pt_get_next_data(decoder); + if (ret) + return ret; + } + + next = memmem(decoder->buf, decoder->len, INTEL_PT_PSB_STR, + INTEL_PT_PSB_LEN); + if (!next) { + int part_psb; + + part_psb = intel_pt_part_psb(decoder); + if (part_psb) { + ret = intel_pt_get_split_psb(decoder, part_psb); + if (ret) + return ret; + } else { + decoder->pos += decoder->len; + decoder->len = 0; + } + continue; + } + + decoder->pkt_step = next - decoder->buf; + return intel_pt_get_next_packet(decoder); + } +} + +static int intel_pt_sync(struct intel_pt_decoder *decoder) +{ + int err; + + decoder->pge = false; + decoder->continuous_period = false; + decoder->last_ip = 0; + decoder->ip = 0; + intel_pt_clear_stack(&decoder->stack); + + err = intel_pt_scan_for_psb(decoder); + if (err) + return err; + + decoder->pkt_state = INTEL_PT_STATE_NO_IP; + + err = intel_pt_walk_psb(decoder); + if (err) + return err; + + if (decoder->ip) { + decoder->state.type = 0; /* Do not have a sample */ + decoder->pkt_state = INTEL_PT_STATE_IN_SYNC; + } else { + return intel_pt_sync_ip(decoder); + } + + return 0; +} + +static uint64_t intel_pt_est_timestamp(struct intel_pt_decoder *decoder) +{ + uint64_t est = decoder->timestamp_insn_cnt << 1; + + if (!decoder->cbr || !decoder->max_non_turbo_ratio) + goto out; + + est *= decoder->max_non_turbo_ratio; + est /= decoder->cbr; +out: + return decoder->timestamp + est; +} + +const struct intel_pt_state *intel_pt_decode(struct intel_pt_decoder *decoder) +{ + int err; + + do { + decoder->state.type = INTEL_PT_BRANCH; + decoder->state.flags = 0; + + switch (decoder->pkt_state) { + case INTEL_PT_STATE_NO_PSB: + err = intel_pt_sync(decoder); + break; + case INTEL_PT_STATE_NO_IP: + decoder->last_ip = 0; + /* Fall through */ + case INTEL_PT_STATE_ERR_RESYNC: + err = intel_pt_sync_ip(decoder); + break; + case INTEL_PT_STATE_IN_SYNC: + err = intel_pt_walk_trace(decoder); + break; + case INTEL_PT_STATE_TNT: + err = intel_pt_walk_tnt(decoder); + if (err == -EAGAIN) + err = intel_pt_walk_trace(decoder); + break; + case INTEL_PT_STATE_TIP: + case INTEL_PT_STATE_TIP_PGD: + err = intel_pt_walk_tip(decoder); + break; + case INTEL_PT_STATE_FUP: + decoder->pkt_state = INTEL_PT_STATE_IN_SYNC; + err = intel_pt_walk_fup(decoder); + if (err == -EAGAIN) + err = intel_pt_walk_fup_tip(decoder); + else if (!err) + decoder->pkt_state = INTEL_PT_STATE_FUP; + break; + case INTEL_PT_STATE_FUP_NO_TIP: + decoder->pkt_state = INTEL_PT_STATE_IN_SYNC; + err = intel_pt_walk_fup(decoder); + if (err == -EAGAIN) + err = intel_pt_walk_trace(decoder); + break; + default: + err = intel_pt_bug(decoder); + break; + } + } while (err == -ENOLINK); + + decoder->state.err = err ? intel_pt_ext_err(err) : 0; + decoder->state.timestamp = decoder->timestamp; + decoder->state.est_timestamp = intel_pt_est_timestamp(decoder); + decoder->state.cr3 = decoder->cr3; + decoder->state.tot_insn_cnt = decoder->tot_insn_cnt; + + if (err) + decoder->state.from_ip = decoder->ip; + + return &decoder->state; +} + +static bool intel_pt_at_psb(unsigned char *buf, size_t len) +{ + if (len < INTEL_PT_PSB_LEN) + return false; + return memmem(buf, INTEL_PT_PSB_LEN, INTEL_PT_PSB_STR, + INTEL_PT_PSB_LEN); +} + +/** + * intel_pt_next_psb - move buffer pointer to the start of the next PSB packet. + * @buf: pointer to buffer pointer + * @len: size of buffer + * + * Updates the buffer pointer to point to the start of the next PSB packet if + * there is one, otherwise the buffer pointer is unchanged. If @buf is updated, + * @len is adjusted accordingly. + * + * Return: %true if a PSB packet is found, %false otherwise. + */ +static bool intel_pt_next_psb(unsigned char **buf, size_t *len) +{ + unsigned char *next; + + next = memmem(*buf, *len, INTEL_PT_PSB_STR, INTEL_PT_PSB_LEN); + if (next) { + *len -= next - *buf; + *buf = next; + return true; + } + return false; +} + +/** + * intel_pt_step_psb - move buffer pointer to the start of the following PSB + * packet. + * @buf: pointer to buffer pointer + * @len: size of buffer + * + * Updates the buffer pointer to point to the start of the following PSB packet + * (skipping the PSB at @buf itself) if there is one, otherwise the buffer + * pointer is unchanged. If @buf is updated, @len is adjusted accordingly. + * + * Return: %true if a PSB packet is found, %false otherwise. + */ +static bool intel_pt_step_psb(unsigned char **buf, size_t *len) +{ + unsigned char *next; + + if (!*len) + return false; + + next = memmem(*buf + 1, *len - 1, INTEL_PT_PSB_STR, INTEL_PT_PSB_LEN); + if (next) { + *len -= next - *buf; + *buf = next; + return true; + } + return false; +} + +/** + * intel_pt_last_psb - find the last PSB packet in a buffer. + * @buf: buffer + * @len: size of buffer + * + * This function finds the last PSB in a buffer. + * + * Return: A pointer to the last PSB in @buf if found, %NULL otherwise. + */ +static unsigned char *intel_pt_last_psb(unsigned char *buf, size_t len) +{ + const char *n = INTEL_PT_PSB_STR; + unsigned char *p; + size_t k; + + if (len < INTEL_PT_PSB_LEN) + return NULL; + + k = len - INTEL_PT_PSB_LEN + 1; + while (1) { + p = memrchr(buf, n[0], k); + if (!p) + return NULL; + if (!memcmp(p + 1, n + 1, INTEL_PT_PSB_LEN - 1)) + return p; + k = p - buf; + if (!k) + return NULL; + } +} + +/** + * intel_pt_next_tsc - find and return next TSC. + * @buf: buffer + * @len: size of buffer + * @tsc: TSC value returned + * + * Find a TSC packet in @buf and return the TSC value. This function assumes + * that @buf starts at a PSB and that PSB+ will contain TSC and so stops if a + * PSBEND packet is found. + * + * Return: %true if TSC is found, false otherwise. + */ +static bool intel_pt_next_tsc(unsigned char *buf, size_t len, uint64_t *tsc) +{ + struct intel_pt_pkt packet; + int ret; + + while (len) { + ret = intel_pt_get_packet(buf, len, &packet); + if (ret <= 0) + return false; + if (packet.type == INTEL_PT_TSC) { + *tsc = packet.payload; + return true; + } + if (packet.type == INTEL_PT_PSBEND) + return false; + buf += ret; + len -= ret; + } + return false; +} + +/** + * intel_pt_tsc_cmp - compare 7-byte TSCs. + * @tsc1: first TSC to compare + * @tsc2: second TSC to compare + * + * This function compares 7-byte TSC values allowing for the possibility that + * TSC wrapped around. Generally it is not possible to know if TSC has wrapped + * around so for that purpose this function assumes the absolute difference is + * less than half the maximum difference. + * + * Return: %-1 if @tsc1 is before @tsc2, %0 if @tsc1 == @tsc2, %1 if @tsc1 is + * after @tsc2. + */ +static int intel_pt_tsc_cmp(uint64_t tsc1, uint64_t tsc2) +{ + const uint64_t halfway = (1ULL << 55); + + if (tsc1 == tsc2) + return 0; + + if (tsc1 < tsc2) { + if (tsc2 - tsc1 < halfway) + return -1; + else + return 1; + } else { + if (tsc1 - tsc2 < halfway) + return 1; + else + return -1; + } +} + +/** + * intel_pt_find_overlap_tsc - determine start of non-overlapped trace data + * using TSC. + * @buf_a: first buffer + * @len_a: size of first buffer + * @buf_b: second buffer + * @len_b: size of second buffer + * + * If the trace contains TSC we can look at the last TSC of @buf_a and the + * first TSC of @buf_b in order to determine if the buffers overlap, and then + * walk forward in @buf_b until a later TSC is found. A precondition is that + * @buf_a and @buf_b are positioned at a PSB. + * + * Return: A pointer into @buf_b from where non-overlapped data starts, or + * @buf_b + @len_b if there is no non-overlapped data. + */ +static unsigned char *intel_pt_find_overlap_tsc(unsigned char *buf_a, + size_t len_a, + unsigned char *buf_b, + size_t len_b) +{ + uint64_t tsc_a, tsc_b; + unsigned char *p; + size_t len; + + p = intel_pt_last_psb(buf_a, len_a); + if (!p) + return buf_b; /* No PSB in buf_a => no overlap */ + + len = len_a - (p - buf_a); + if (!intel_pt_next_tsc(p, len, &tsc_a)) { + /* The last PSB+ in buf_a is incomplete, so go back one more */ + len_a -= len; + p = intel_pt_last_psb(buf_a, len_a); + if (!p) + return buf_b; /* No full PSB+ => assume no overlap */ + len = len_a - (p - buf_a); + if (!intel_pt_next_tsc(p, len, &tsc_a)) + return buf_b; /* No TSC in buf_a => assume no overlap */ + } + + while (1) { + /* Ignore PSB+ with no TSC */ + if (intel_pt_next_tsc(buf_b, len_b, &tsc_b) && + intel_pt_tsc_cmp(tsc_a, tsc_b) < 0) + return buf_b; /* tsc_a < tsc_b => no overlap */ + + if (!intel_pt_step_psb(&buf_b, &len_b)) + return buf_b + len_b; /* No PSB in buf_b => no data */ + } +} + +/** + * intel_pt_find_overlap - determine start of non-overlapped trace data. + * @buf_a: first buffer + * @len_a: size of first buffer + * @buf_b: second buffer + * @len_b: size of second buffer + * @have_tsc: can use TSC packets to detect overlap + * + * When trace samples or snapshots are recorded there is the possibility that + * the data overlaps. Note that, for the purposes of decoding, data is only + * useful if it begins with a PSB packet. + * + * Return: A pointer into @buf_b from where non-overlapped data starts, or + * @buf_b + @len_b if there is no non-overlapped data. + */ +unsigned char *intel_pt_find_overlap(unsigned char *buf_a, size_t len_a, + unsigned char *buf_b, size_t len_b, + bool have_tsc) +{ + unsigned char *found; + + /* Buffer 'b' must start at PSB so throw away everything before that */ + if (!intel_pt_next_psb(&buf_b, &len_b)) + return buf_b + len_b; /* No PSB */ + + if (!intel_pt_next_psb(&buf_a, &len_a)) + return buf_b; /* No overlap */ + + if (have_tsc) { + found = intel_pt_find_overlap_tsc(buf_a, len_a, buf_b, len_b); + if (found) + return found; + } + + /* + * Buffer 'b' cannot end within buffer 'a' so, for comparison purposes, + * we can ignore the first part of buffer 'a'. + */ + while (len_b < len_a) { + if (!intel_pt_step_psb(&buf_a, &len_a)) + return buf_b; /* No overlap */ + } + + /* Now len_b >= len_a */ + if (len_b > len_a) { + /* The leftover buffer 'b' must start at a PSB */ + while (!intel_pt_at_psb(buf_b + len_a, len_b - len_a)) { + if (!intel_pt_step_psb(&buf_a, &len_a)) + return buf_b; /* No overlap */ + } + } + + while (1) { + /* Potential overlap so check the bytes */ + found = memmem(buf_a, len_a, buf_b, len_a); + if (found) + return buf_b + len_a; + + /* Try again at next PSB in buffer 'a' */ + if (!intel_pt_step_psb(&buf_a, &len_a)) + return buf_b; /* No overlap */ + + /* The leftover buffer 'b' must start at a PSB */ + while (!intel_pt_at_psb(buf_b + len_a, len_b - len_a)) { + if (!intel_pt_step_psb(&buf_a, &len_a)) + return buf_b; /* No overlap */ + } + } +} diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h new file mode 100644 index 000000000000..02c38fec1c37 --- /dev/null +++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h @@ -0,0 +1,109 @@ +/* + * intel_pt_decoder.h: Intel Processor Trace support + * Copyright (c) 2013-2014, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + */ + +#ifndef INCLUDE__INTEL_PT_DECODER_H__ +#define INCLUDE__INTEL_PT_DECODER_H__ + +#include <stdint.h> +#include <stddef.h> +#include <stdbool.h> + +#include "intel-pt-insn-decoder.h" + +#define INTEL_PT_IN_TX (1 << 0) +#define INTEL_PT_ABORT_TX (1 << 1) +#define INTEL_PT_ASYNC (1 << 2) + +enum intel_pt_sample_type { + INTEL_PT_BRANCH = 1 << 0, + INTEL_PT_INSTRUCTION = 1 << 1, + INTEL_PT_TRANSACTION = 1 << 2, +}; + +enum intel_pt_period_type { + INTEL_PT_PERIOD_NONE, + INTEL_PT_PERIOD_INSTRUCTIONS, + INTEL_PT_PERIOD_TICKS, + INTEL_PT_PERIOD_MTC, +}; + +enum { + INTEL_PT_ERR_NOMEM = 1, + INTEL_PT_ERR_INTERN, + INTEL_PT_ERR_BADPKT, + INTEL_PT_ERR_NODATA, + INTEL_PT_ERR_NOINSN, + INTEL_PT_ERR_MISMAT, + INTEL_PT_ERR_OVR, + INTEL_PT_ERR_LOST, + INTEL_PT_ERR_UNK, + INTEL_PT_ERR_NELOOP, + INTEL_PT_ERR_MAX, +}; + +struct intel_pt_state { + enum intel_pt_sample_type type; + int err; + uint64_t from_ip; + uint64_t to_ip; + uint64_t cr3; + uint64_t tot_insn_cnt; + uint64_t timestamp; + uint64_t est_timestamp; + uint64_t trace_nr; + uint32_t flags; + enum intel_pt_insn_op insn_op; + int insn_len; +}; + +struct intel_pt_insn; + +struct intel_pt_buffer { + const unsigned char *buf; + size_t len; + bool consecutive; + uint64_t ref_timestamp; + uint64_t trace_nr; +}; + +struct intel_pt_params { + int (*get_trace)(struct intel_pt_buffer *buffer, void *data); + int (*walk_insn)(struct intel_pt_insn *intel_pt_insn, + uint64_t *insn_cnt_ptr, uint64_t *ip, uint64_t to_ip, + uint64_t max_insn_cnt, void *data); + void *data; + bool return_compression; + uint64_t period; + enum intel_pt_period_type period_type; + unsigned max_non_turbo_ratio; + unsigned int mtc_period; + uint32_t tsc_ctc_ratio_n; + uint32_t tsc_ctc_ratio_d; +}; + +struct intel_pt_decoder; + +struct intel_pt_decoder *intel_pt_decoder_new(struct intel_pt_params *params); +void intel_pt_decoder_free(struct intel_pt_decoder *decoder); + +const struct intel_pt_state *intel_pt_decode(struct intel_pt_decoder *decoder); + +unsigned char *intel_pt_find_overlap(unsigned char *buf_a, size_t len_a, + unsigned char *buf_b, size_t len_b, + bool have_tsc); + +int intel_pt__strerror(int code, char *buf, size_t buflen); + +#endif diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c new file mode 100644 index 000000000000..d23138c06665 --- /dev/null +++ b/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c @@ -0,0 +1,249 @@ +/* + * intel_pt_insn_decoder.c: Intel Processor Trace support + * Copyright (c) 2013-2014, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + */ + +#include <stdio.h> +#include <string.h> +#include <endian.h> +#include <byteswap.h> + +#include "event.h" + +#include "insn.h" + +#include "inat.c" +#include "insn.c" + +#include "intel-pt-insn-decoder.h" + +/* Based on branch_type() from perf_event_intel_lbr.c */ +static void intel_pt_insn_decoder(struct insn *insn, + struct intel_pt_insn *intel_pt_insn) +{ + enum intel_pt_insn_op op = INTEL_PT_OP_OTHER; + enum intel_pt_insn_branch branch = INTEL_PT_BR_NO_BRANCH; + int ext; + + if (insn_is_avx(insn)) { + intel_pt_insn->op = INTEL_PT_OP_OTHER; + intel_pt_insn->branch = INTEL_PT_BR_NO_BRANCH; + intel_pt_insn->length = insn->length; + return; + } + + switch (insn->opcode.bytes[0]) { + case 0xf: + switch (insn->opcode.bytes[1]) { + case 0x05: /* syscall */ + case 0x34: /* sysenter */ + op = INTEL_PT_OP_SYSCALL; + branch = INTEL_PT_BR_INDIRECT; + break; + case 0x07: /* sysret */ + case 0x35: /* sysexit */ + op = INTEL_PT_OP_SYSRET; + branch = INTEL_PT_BR_INDIRECT; + break; + case 0x80 ... 0x8f: /* jcc */ + op = INTEL_PT_OP_JCC; + branch = INTEL_PT_BR_CONDITIONAL; + break; + default: + break; + } + break; + case 0x70 ... 0x7f: /* jcc */ + op = INTEL_PT_OP_JCC; + branch = INTEL_PT_BR_CONDITIONAL; + break; + case 0xc2: /* near ret */ + case 0xc3: /* near ret */ + case 0xca: /* far ret */ + case 0xcb: /* far ret */ + op = INTEL_PT_OP_RET; + branch = INTEL_PT_BR_INDIRECT; + break; + case 0xcf: /* iret */ + op = INTEL_PT_OP_IRET; + branch = INTEL_PT_BR_INDIRECT; + break; + case 0xcc ... 0xce: /* int */ + op = INTEL_PT_OP_INT; + branch = INTEL_PT_BR_INDIRECT; + break; + case 0xe8: /* call near rel */ + op = INTEL_PT_OP_CALL; + branch = INTEL_PT_BR_UNCONDITIONAL; + break; + case 0x9a: /* call far absolute */ + op = INTEL_PT_OP_CALL; + branch = INTEL_PT_BR_INDIRECT; + break; + case 0xe0 ... 0xe2: /* loop */ + op = INTEL_PT_OP_LOOP; + branch = INTEL_PT_BR_CONDITIONAL; + break; + case 0xe3: /* jcc */ + op = INTEL_PT_OP_JCC; + branch = INTEL_PT_BR_CONDITIONAL; + break; + case 0xe9: /* jmp */ + case 0xeb: /* jmp */ + op = INTEL_PT_OP_JMP; + branch = INTEL_PT_BR_UNCONDITIONAL; + break; + case 0xea: /* far jmp */ + op = INTEL_PT_OP_JMP; + branch = INTEL_PT_BR_INDIRECT; + break; + case 0xff: /* call near absolute, call far absolute ind */ + ext = (insn->modrm.bytes[0] >> 3) & 0x7; + switch (ext) { + case 2: /* near ind call */ + case 3: /* far ind call */ + op = INTEL_PT_OP_CALL; + branch = INTEL_PT_BR_INDIRECT; + break; + case 4: + case 5: + op = INTEL_PT_OP_JMP; + branch = INTEL_PT_BR_INDIRECT; + break; + default: + break; + } + break; + default: + break; + } + + intel_pt_insn->op = op; + intel_pt_insn->branch = branch; + intel_pt_insn->length = insn->length; + + if (branch == INTEL_PT_BR_CONDITIONAL || + branch == INTEL_PT_BR_UNCONDITIONAL) { +#if __BYTE_ORDER == __BIG_ENDIAN + switch (insn->immediate.nbytes) { + case 1: + intel_pt_insn->rel = insn->immediate.value; + break; + case 2: + intel_pt_insn->rel = + bswap_16((short)insn->immediate.value); + break; + case 4: + intel_pt_insn->rel = bswap_32(insn->immediate.value); + break; + default: + intel_pt_insn->rel = 0; + break; + } +#else + intel_pt_insn->rel = insn->immediate.value; +#endif + } +} + +int intel_pt_get_insn(const unsigned char *buf, size_t len, int x86_64, + struct intel_pt_insn *intel_pt_insn) +{ + struct insn insn; + + insn_init(&insn, buf, len, x86_64); + insn_get_length(&insn); + if (!insn_complete(&insn) || insn.length > len) + return -1; + intel_pt_insn_decoder(&insn, intel_pt_insn); + if (insn.length < INTEL_PT_INSN_DBG_BUF_SZ) + memcpy(intel_pt_insn->buf, buf, insn.length); + else + memcpy(intel_pt_insn->buf, buf, INTEL_PT_INSN_DBG_BUF_SZ); + return 0; +} + +const char *branch_name[] = { + [INTEL_PT_OP_OTHER] = "Other", + [INTEL_PT_OP_CALL] = "Call", + [INTEL_PT_OP_RET] = "Ret", + [INTEL_PT_OP_JCC] = "Jcc", + [INTEL_PT_OP_JMP] = "Jmp", + [INTEL_PT_OP_LOOP] = "Loop", + [INTEL_PT_OP_IRET] = "IRet", + [INTEL_PT_OP_INT] = "Int", + [INTEL_PT_OP_SYSCALL] = "Syscall", + [INTEL_PT_OP_SYSRET] = "Sysret", +}; + +const char *intel_pt_insn_name(enum intel_pt_insn_op op) +{ + return branch_name[op]; +} + +int intel_pt_insn_desc(const struct intel_pt_insn *intel_pt_insn, char *buf, + size_t buf_len) +{ + switch (intel_pt_insn->branch) { + case INTEL_PT_BR_CONDITIONAL: + case INTEL_PT_BR_UNCONDITIONAL: + return snprintf(buf, buf_len, "%s %s%d", + intel_pt_insn_name(intel_pt_insn->op), + intel_pt_insn->rel > 0 ? "+" : "", + intel_pt_insn->rel); + case INTEL_PT_BR_NO_BRANCH: + case INTEL_PT_BR_INDIRECT: + return snprintf(buf, buf_len, "%s", + intel_pt_insn_name(intel_pt_insn->op)); + default: + break; + } + return 0; +} + +size_t intel_pt_insn_max_size(void) +{ + return MAX_INSN_SIZE; +} + +int intel_pt_insn_type(enum intel_pt_insn_op op) +{ + switch (op) { + case INTEL_PT_OP_OTHER: + return 0; + case INTEL_PT_OP_CALL: + return PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL; + case INTEL_PT_OP_RET: + return PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_RETURN; + case INTEL_PT_OP_JCC: + return PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CONDITIONAL; + case INTEL_PT_OP_JMP: + return PERF_IP_FLAG_BRANCH; + case INTEL_PT_OP_LOOP: + return PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CONDITIONAL; + case INTEL_PT_OP_IRET: + return PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_RETURN | + PERF_IP_FLAG_INTERRUPT; + case INTEL_PT_OP_INT: + return PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL | + PERF_IP_FLAG_INTERRUPT; + case INTEL_PT_OP_SYSCALL: + return PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL | + PERF_IP_FLAG_SYSCALLRET; + case INTEL_PT_OP_SYSRET: + return PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_RETURN | + PERF_IP_FLAG_SYSCALLRET; + default: + return 0; + } +} diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.h b/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.h new file mode 100644 index 000000000000..b0adbf37323e --- /dev/null +++ b/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.h @@ -0,0 +1,65 @@ +/* + * intel_pt_insn_decoder.h: Intel Processor Trace support + * Copyright (c) 2013-2014, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + */ + +#ifndef INCLUDE__INTEL_PT_INSN_DECODER_H__ +#define INCLUDE__INTEL_PT_INSN_DECODER_H__ + +#include <stddef.h> +#include <stdint.h> + +#define INTEL_PT_INSN_DESC_MAX 32 +#define INTEL_PT_INSN_DBG_BUF_SZ 16 + +enum intel_pt_insn_op { + INTEL_PT_OP_OTHER, + INTEL_PT_OP_CALL, + INTEL_PT_OP_RET, + INTEL_PT_OP_JCC, + INTEL_PT_OP_JMP, + INTEL_PT_OP_LOOP, + INTEL_PT_OP_IRET, + INTEL_PT_OP_INT, + INTEL_PT_OP_SYSCALL, + INTEL_PT_OP_SYSRET, +}; + +enum intel_pt_insn_branch { + INTEL_PT_BR_NO_BRANCH, + INTEL_PT_BR_INDIRECT, + INTEL_PT_BR_CONDITIONAL, + INTEL_PT_BR_UNCONDITIONAL, +}; + +struct intel_pt_insn { + enum intel_pt_insn_op op; + enum intel_pt_insn_branch branch; + int length; + int32_t rel; + unsigned char buf[INTEL_PT_INSN_DBG_BUF_SZ]; +}; + +int intel_pt_get_insn(const unsigned char *buf, size_t len, int x86_64, + struct intel_pt_insn *intel_pt_insn); + +const char *intel_pt_insn_name(enum intel_pt_insn_op op); + +int intel_pt_insn_desc(const struct intel_pt_insn *intel_pt_insn, char *buf, + size_t buf_len); + +size_t intel_pt_insn_max_size(void); + +int intel_pt_insn_type(enum intel_pt_insn_op op); + +#endif diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-log.c b/tools/perf/util/intel-pt-decoder/intel-pt-log.c new file mode 100644 index 000000000000..319bef33a64b --- /dev/null +++ b/tools/perf/util/intel-pt-decoder/intel-pt-log.c @@ -0,0 +1,156 @@ +/* + * intel_pt_log.c: Intel Processor Trace support + * Copyright (c) 2013-2014, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + */ + +#include <stdio.h> +#include <stdint.h> +#include <inttypes.h> +#include <stdarg.h> +#include <stdbool.h> +#include <string.h> + +#include "intel-pt-log.h" +#include "intel-pt-insn-decoder.h" + +#include "intel-pt-pkt-decoder.h" + +#define MAX_LOG_NAME 256 + +static FILE *f; +static char log_name[MAX_LOG_NAME]; +bool intel_pt_enable_logging; + +void intel_pt_log_enable(void) +{ + intel_pt_enable_logging = true; +} + +void intel_pt_log_disable(void) +{ + if (f) + fflush(f); + intel_pt_enable_logging = false; +} + +void intel_pt_log_set_name(const char *name) +{ + strncpy(log_name, name, MAX_LOG_NAME - 5); + strcat(log_name, ".log"); +} + +static void intel_pt_print_data(const unsigned char *buf, int len, uint64_t pos, + int indent) +{ + int i; + + for (i = 0; i < indent; i++) + fprintf(f, " "); + + fprintf(f, " %08" PRIx64 ": ", pos); + for (i = 0; i < len; i++) + fprintf(f, " %02x", buf[i]); + for (; i < 16; i++) + fprintf(f, " "); + fprintf(f, " "); +} + +static void intel_pt_print_no_data(uint64_t pos, int indent) +{ + int i; + + for (i = 0; i < indent; i++) + fprintf(f, " "); + + fprintf(f, " %08" PRIx64 ": ", pos); + for (i = 0; i < 16; i++) + fprintf(f, " "); + fprintf(f, " "); +} + +static int intel_pt_log_open(void) +{ + if (!intel_pt_enable_logging) + return -1; + + if (f) + return 0; + + if (!log_name[0]) + return -1; + + f = fopen(log_name, "w+"); + if (!f) { + intel_pt_enable_logging = false; + return -1; + } + + return 0; +} + +void __intel_pt_log_packet(const struct intel_pt_pkt *packet, int pkt_len, + uint64_t pos, const unsigned char *buf) +{ + char desc[INTEL_PT_PKT_DESC_MAX]; + + if (intel_pt_log_open()) + return; + + intel_pt_print_data(buf, pkt_len, pos, 0); + intel_pt_pkt_desc(packet, desc, INTEL_PT_PKT_DESC_MAX); + fprintf(f, "%s\n", desc); +} + +void __intel_pt_log_insn(struct intel_pt_insn *intel_pt_insn, uint64_t ip) +{ + char desc[INTEL_PT_INSN_DESC_MAX]; + size_t len = intel_pt_insn->length; + + if (intel_pt_log_open()) + return; + + if (len > INTEL_PT_INSN_DBG_BUF_SZ) + len = INTEL_PT_INSN_DBG_BUF_SZ; + intel_pt_print_data(intel_pt_insn->buf, len, ip, 8); + if (intel_pt_insn_desc(intel_pt_insn, desc, INTEL_PT_INSN_DESC_MAX) > 0) + fprintf(f, "%s\n", desc); + else + fprintf(f, "Bad instruction!\n"); +} + +void __intel_pt_log_insn_no_data(struct intel_pt_insn *intel_pt_insn, + uint64_t ip) +{ + char desc[INTEL_PT_INSN_DESC_MAX]; + + if (intel_pt_log_open()) + return; + + intel_pt_print_no_data(ip, 8); + if (intel_pt_insn_desc(intel_pt_insn, desc, INTEL_PT_INSN_DESC_MAX) > 0) + fprintf(f, "%s\n", desc); + else + fprintf(f, "Bad instruction!\n"); +} + +void __intel_pt_log(const char *fmt, ...) +{ + va_list args; + + if (intel_pt_log_open()) + return; + + va_start(args, fmt); + vfprintf(f, fmt, args); + va_end(args); +} diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-log.h b/tools/perf/util/intel-pt-decoder/intel-pt-log.h new file mode 100644 index 000000000000..debe751dc3d6 --- /dev/null +++ b/tools/perf/util/intel-pt-decoder/intel-pt-log.h @@ -0,0 +1,78 @@ +/* + * intel_pt_log.h: Intel Processor Trace support + * Copyright (c) 2013-2014, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + */ + +#ifndef INCLUDE__INTEL_PT_LOG_H__ +#define INCLUDE__INTEL_PT_LOG_H__ + +#include <stdint.h> +#include <inttypes.h> + +struct intel_pt_pkt; + +void intel_pt_log_enable(void); +void intel_pt_log_disable(void); +void intel_pt_log_set_name(const char *name); + +void __intel_pt_log_packet(const struct intel_pt_pkt *packet, int pkt_len, + uint64_t pos, const unsigned char *buf); + +struct intel_pt_insn; + +void __intel_pt_log_insn(struct intel_pt_insn *intel_pt_insn, uint64_t ip); +void __intel_pt_log_insn_no_data(struct intel_pt_insn *intel_pt_insn, + uint64_t ip); + +__attribute__((format(printf, 1, 2))) +void __intel_pt_log(const char *fmt, ...); + +#define intel_pt_log(fmt, ...) \ + do { \ + if (intel_pt_enable_logging) \ + __intel_pt_log(fmt, ##__VA_ARGS__); \ + } while (0) + +#define intel_pt_log_packet(arg, ...) \ + do { \ + if (intel_pt_enable_logging) \ + __intel_pt_log_packet(arg, ##__VA_ARGS__); \ + } while (0) + +#define intel_pt_log_insn(arg, ...) \ + do { \ + if (intel_pt_enable_logging) \ + __intel_pt_log_insn(arg, ##__VA_ARGS__); \ + } while (0) + +#define intel_pt_log_insn_no_data(arg, ...) \ + do { \ + if (intel_pt_enable_logging) \ + __intel_pt_log_insn_no_data(arg, ##__VA_ARGS__); \ + } while (0) + +#define x64_fmt "0x%" PRIx64 + +extern bool intel_pt_enable_logging; + +static inline void intel_pt_log_at(const char *msg, uint64_t u) +{ + intel_pt_log("%s at " x64_fmt "\n", msg, u); +} + +static inline void intel_pt_log_to(const char *msg, uint64_t u) +{ + intel_pt_log("%s to " x64_fmt "\n", msg, u); +} + +#endif diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c new file mode 100644 index 000000000000..b1257c816310 --- /dev/null +++ b/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c @@ -0,0 +1,518 @@ +/* + * intel_pt_pkt_decoder.c: Intel Processor Trace support + * Copyright (c) 2013-2014, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + */ + +#include <stdio.h> +#include <string.h> +#include <endian.h> +#include <byteswap.h> + +#include "intel-pt-pkt-decoder.h" + +#define BIT(n) (1 << (n)) + +#define BIT63 ((uint64_t)1 << 63) + +#define NR_FLAG BIT63 + +#if __BYTE_ORDER == __BIG_ENDIAN +#define le16_to_cpu bswap_16 +#define le32_to_cpu bswap_32 +#define le64_to_cpu bswap_64 +#define memcpy_le64(d, s, n) do { \ + memcpy((d), (s), (n)); \ + *(d) = le64_to_cpu(*(d)); \ +} while (0) +#else +#define le16_to_cpu +#define le32_to_cpu +#define le64_to_cpu +#define memcpy_le64 memcpy +#endif + +static const char * const packet_name[] = { + [INTEL_PT_BAD] = "Bad Packet!", + [INTEL_PT_PAD] = "PAD", + [INTEL_PT_TNT] = "TNT", + [INTEL_PT_TIP_PGD] = "TIP.PGD", + [INTEL_PT_TIP_PGE] = "TIP.PGE", + [INTEL_PT_TSC] = "TSC", + [INTEL_PT_TMA] = "TMA", + [INTEL_PT_MODE_EXEC] = "MODE.Exec", + [INTEL_PT_MODE_TSX] = "MODE.TSX", + [INTEL_PT_MTC] = "MTC", + [INTEL_PT_TIP] = "TIP", + [INTEL_PT_FUP] = "FUP", + [INTEL_PT_CYC] = "CYC", + [INTEL_PT_VMCS] = "VMCS", + [INTEL_PT_PSB] = "PSB", + [INTEL_PT_PSBEND] = "PSBEND", + [INTEL_PT_CBR] = "CBR", + [INTEL_PT_TRACESTOP] = "TraceSTOP", + [INTEL_PT_PIP] = "PIP", + [INTEL_PT_OVF] = "OVF", + [INTEL_PT_MNT] = "MNT", +}; + +const char *intel_pt_pkt_name(enum intel_pt_pkt_type type) +{ + return packet_name[type]; +} + +static int intel_pt_get_long_tnt(const unsigned char *buf, size_t len, + struct intel_pt_pkt *packet) +{ + uint64_t payload; + int count; + + if (len < 8) + return INTEL_PT_NEED_MORE_BYTES; + + payload = le64_to_cpu(*(uint64_t *)buf); + + for (count = 47; count; count--) { + if (payload & BIT63) + break; + payload <<= 1; + } + + packet->type = INTEL_PT_TNT; + packet->count = count; + packet->payload = payload << 1; + return 8; +} + +static int intel_pt_get_pip(const unsigned char *buf, size_t len, + struct intel_pt_pkt *packet) +{ + uint64_t payload = 0; + + if (len < 8) + return INTEL_PT_NEED_MORE_BYTES; + + packet->type = INTEL_PT_PIP; + memcpy_le64(&payload, buf + 2, 6); + packet->payload = payload >> 1; + if (payload & 1) + packet->payload |= NR_FLAG; + + return 8; +} + +static int intel_pt_get_tracestop(struct intel_pt_pkt *packet) +{ + packet->type = INTEL_PT_TRACESTOP; + return 2; +} + +static int intel_pt_get_cbr(const unsigned char *buf, size_t len, + struct intel_pt_pkt *packet) +{ + if (len < 4) + return INTEL_PT_NEED_MORE_BYTES; + packet->type = INTEL_PT_CBR; + packet->payload = buf[2]; + return 4; +} + +static int intel_pt_get_vmcs(const unsigned char *buf, size_t len, + struct intel_pt_pkt *packet) +{ + unsigned int count = (52 - 5) >> 3; + + if (count < 1 || count > 7) + return INTEL_PT_BAD_PACKET; + + if (len < count + 2) + return INTEL_PT_NEED_MORE_BYTES; + + packet->type = INTEL_PT_VMCS; + packet->count = count; + memcpy_le64(&packet->payload, buf + 2, count); + + return count + 2; +} + +static int intel_pt_get_ovf(struct intel_pt_pkt *packet) +{ + packet->type = INTEL_PT_OVF; + return 2; +} + +static int intel_pt_get_psb(const unsigned char *buf, size_t len, + struct intel_pt_pkt *packet) +{ + int i; + + if (len < 16) + return INTEL_PT_NEED_MORE_BYTES; + + for (i = 2; i < 16; i += 2) { + if (buf[i] != 2 || buf[i + 1] != 0x82) + return INTEL_PT_BAD_PACKET; + } + + packet->type = INTEL_PT_PSB; + return 16; +} + +static int intel_pt_get_psbend(struct intel_pt_pkt *packet) +{ + packet->type = INTEL_PT_PSBEND; + return 2; +} + +static int intel_pt_get_tma(const unsigned char *buf, size_t len, + struct intel_pt_pkt *packet) +{ + if (len < 7) + return INTEL_PT_NEED_MORE_BYTES; + + packet->type = INTEL_PT_TMA; + packet->payload = buf[2] | (buf[3] << 8); + packet->count = buf[5] | ((buf[6] & BIT(0)) << 8); + return 7; +} + +static int intel_pt_get_pad(struct intel_pt_pkt *packet) +{ + packet->type = INTEL_PT_PAD; + return 1; +} + +static int intel_pt_get_mnt(const unsigned char *buf, size_t len, + struct intel_pt_pkt *packet) +{ + if (len < 11) + return INTEL_PT_NEED_MORE_BYTES; + packet->type = INTEL_PT_MNT; + memcpy_le64(&packet->payload, buf + 3, 8); + return 11 +; +} + +static int intel_pt_get_3byte(const unsigned char *buf, size_t len, + struct intel_pt_pkt *packet) +{ + if (len < 3) + return INTEL_PT_NEED_MORE_BYTES; + + switch (buf[2]) { + case 0x88: /* MNT */ + return intel_pt_get_mnt(buf, len, packet); + default: + return INTEL_PT_BAD_PACKET; + } +} + +static int intel_pt_get_ext(const unsigned char *buf, size_t len, + struct intel_pt_pkt *packet) +{ + if (len < 2) + return INTEL_PT_NEED_MORE_BYTES; + + switch (buf[1]) { + case 0xa3: /* Long TNT */ + return intel_pt_get_long_tnt(buf, len, packet); + case 0x43: /* PIP */ + return intel_pt_get_pip(buf, len, packet); + case 0x83: /* TraceStop */ + return intel_pt_get_tracestop(packet); + case 0x03: /* CBR */ + return intel_pt_get_cbr(buf, len, packet); + case 0xc8: /* VMCS */ + return intel_pt_get_vmcs(buf, len, packet); + case 0xf3: /* OVF */ + return intel_pt_get_ovf(packet); + case 0x82: /* PSB */ + return intel_pt_get_psb(buf, len, packet); + case 0x23: /* PSBEND */ + return intel_pt_get_psbend(packet); + case 0x73: /* TMA */ + return intel_pt_get_tma(buf, len, packet); + case 0xC3: /* 3-byte header */ + return intel_pt_get_3byte(buf, len, packet); + default: + return INTEL_PT_BAD_PACKET; + } +} + +static int intel_pt_get_short_tnt(unsigned int byte, + struct intel_pt_pkt *packet) +{ + int count; + + for (count = 6; count; count--) { + if (byte & BIT(7)) + break; + byte <<= 1; + } + + packet->type = INTEL_PT_TNT; + packet->count = count; + packet->payload = (uint64_t)byte << 57; + + return 1; +} + +static int intel_pt_get_cyc(unsigned int byte, const unsigned char *buf, + size_t len, struct intel_pt_pkt *packet) +{ + unsigned int offs = 1, shift; + uint64_t payload = byte >> 3; + + byte >>= 2; + len -= 1; + for (shift = 5; byte & 1; shift += 7) { + if (offs > 9) + return INTEL_PT_BAD_PACKET; + if (len < offs) + return INTEL_PT_NEED_MORE_BYTES; + byte = buf[offs++]; + payload |= (byte >> 1) << shift; + } + + packet->type = INTEL_PT_CYC; + packet->payload = payload; + return offs; +} + +static int intel_pt_get_ip(enum intel_pt_pkt_type type, unsigned int byte, + const unsigned char *buf, size_t len, + struct intel_pt_pkt *packet) +{ + switch (byte >> 5) { + case 0: + packet->count = 0; + break; + case 1: + if (len < 3) + return INTEL_PT_NEED_MORE_BYTES; + packet->count = 2; + packet->payload = le16_to_cpu(*(uint16_t *)(buf + 1)); + break; + case 2: + if (len < 5) + return INTEL_PT_NEED_MORE_BYTES; + packet->count = 4; + packet->payload = le32_to_cpu(*(uint32_t *)(buf + 1)); + break; + case 3: + case 6: + if (len < 7) + return INTEL_PT_NEED_MORE_BYTES; + packet->count = 6; + memcpy_le64(&packet->payload, buf + 1, 6); + break; + default: + return INTEL_PT_BAD_PACKET; + } + + packet->type = type; + + return packet->count + 1; +} + +static int intel_pt_get_mode(const unsigned char *buf, size_t len, + struct intel_pt_pkt *packet) +{ + if (len < 2) + return INTEL_PT_NEED_MORE_BYTES; + + switch (buf[1] >> 5) { + case 0: + packet->type = INTEL_PT_MODE_EXEC; + switch (buf[1] & 3) { + case 0: + packet->payload = 16; + break; + case 1: + packet->payload = 64; + break; + case 2: + packet->payload = 32; + break; + default: + return INTEL_PT_BAD_PACKET; + } + break; + case 1: + packet->type = INTEL_PT_MODE_TSX; + if ((buf[1] & 3) == 3) + return INTEL_PT_BAD_PACKET; + packet->payload = buf[1] & 3; + break; + default: + return INTEL_PT_BAD_PACKET; + } + + return 2; +} + +static int intel_pt_get_tsc(const unsigned char *buf, size_t len, + struct intel_pt_pkt *packet) +{ + if (len < 8) + return INTEL_PT_NEED_MORE_BYTES; + packet->type = INTEL_PT_TSC; + memcpy_le64(&packet->payload, buf + 1, 7); + return 8; +} + +static int intel_pt_get_mtc(const unsigned char *buf, size_t len, + struct intel_pt_pkt *packet) +{ + if (len < 2) + return INTEL_PT_NEED_MORE_BYTES; + packet->type = INTEL_PT_MTC; + packet->payload = buf[1]; + return 2; +} + +static int intel_pt_do_get_packet(const unsigned char *buf, size_t len, + struct intel_pt_pkt *packet) +{ + unsigned int byte; + + memset(packet, 0, sizeof(struct intel_pt_pkt)); + + if (!len) + return INTEL_PT_NEED_MORE_BYTES; + + byte = buf[0]; + if (!(byte & BIT(0))) { + if (byte == 0) + return intel_pt_get_pad(packet); + if (byte == 2) + return intel_pt_get_ext(buf, len, packet); + return intel_pt_get_short_tnt(byte, packet); + } + + if ((byte & 2)) + return intel_pt_get_cyc(byte, buf, len, packet); + + switch (byte & 0x1f) { + case 0x0D: + return intel_pt_get_ip(INTEL_PT_TIP, byte, buf, len, packet); + case 0x11: + return intel_pt_get_ip(INTEL_PT_TIP_PGE, byte, buf, len, + packet); + case 0x01: + return intel_pt_get_ip(INTEL_PT_TIP_PGD, byte, buf, len, + packet); + case 0x1D: + return intel_pt_get_ip(INTEL_PT_FUP, byte, buf, len, packet); + case 0x19: + switch (byte) { + case 0x99: + return intel_pt_get_mode(buf, len, packet); + case 0x19: + return intel_pt_get_tsc(buf, len, packet); + case 0x59: + return intel_pt_get_mtc(buf, len, packet); + default: + return INTEL_PT_BAD_PACKET; + } + default: + return INTEL_PT_BAD_PACKET; + } +} + +int intel_pt_get_packet(const unsigned char *buf, size_t len, + struct intel_pt_pkt *packet) +{ + int ret; + + ret = intel_pt_do_get_packet(buf, len, packet); + if (ret > 0) { + while (ret < 8 && len > (size_t)ret && !buf[ret]) + ret += 1; + } + return ret; +} + +int intel_pt_pkt_desc(const struct intel_pt_pkt *packet, char *buf, + size_t buf_len) +{ + int ret, i, nr; + unsigned long long payload = packet->payload; + const char *name = intel_pt_pkt_name(packet->type); + + switch (packet->type) { + case INTEL_PT_BAD: + case INTEL_PT_PAD: + case INTEL_PT_PSB: + case INTEL_PT_PSBEND: + case INTEL_PT_TRACESTOP: + case INTEL_PT_OVF: + return snprintf(buf, buf_len, "%s", name); + case INTEL_PT_TNT: { + size_t blen = buf_len; + + ret = snprintf(buf, blen, "%s ", name); + if (ret < 0) + return ret; + buf += ret; + blen -= ret; + for (i = 0; i < packet->count; i++) { + if (payload & BIT63) + ret = snprintf(buf, blen, "T"); + else + ret = snprintf(buf, blen, "N"); + if (ret < 0) + return ret; + buf += ret; + blen -= ret; + payload <<= 1; + } + ret = snprintf(buf, blen, " (%d)", packet->count); + if (ret < 0) + return ret; + blen -= ret; + return buf_len - blen; + } + case INTEL_PT_TIP_PGD: + case INTEL_PT_TIP_PGE: + case INTEL_PT_TIP: + case INTEL_PT_FUP: + if (!(packet->count)) + return snprintf(buf, buf_len, "%s no ip", name); + case INTEL_PT_CYC: + case INTEL_PT_VMCS: + case INTEL_PT_MTC: + case INTEL_PT_MNT: + case INTEL_PT_CBR: + case INTEL_PT_TSC: + return snprintf(buf, buf_len, "%s 0x%llx", name, payload); + case INTEL_PT_TMA: + return snprintf(buf, buf_len, "%s CTC 0x%x FC 0x%x", name, + (unsigned)payload, packet->count); + case INTEL_PT_MODE_EXEC: + return snprintf(buf, buf_len, "%s %lld", name, payload); + case INTEL_PT_MODE_TSX: + return snprintf(buf, buf_len, "%s TXAbort:%u InTX:%u", + name, (unsigned)(payload >> 1) & 1, + (unsigned)payload & 1); + case INTEL_PT_PIP: + nr = packet->payload & NR_FLAG ? 1 : 0; + payload &= ~NR_FLAG; + ret = snprintf(buf, buf_len, "%s 0x%llx (NR=%d)", + name, payload, nr); + return ret; + default: + break; + } + return snprintf(buf, buf_len, "%s 0x%llx (%d)", + name, payload, packet->count); +} diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.h b/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.h new file mode 100644 index 000000000000..781bb79883bd --- /dev/null +++ b/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.h @@ -0,0 +1,70 @@ +/* + * intel_pt_pkt_decoder.h: Intel Processor Trace support + * Copyright (c) 2013-2014, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + */ + +#ifndef INCLUDE__INTEL_PT_PKT_DECODER_H__ +#define INCLUDE__INTEL_PT_PKT_DECODER_H__ + +#include <stddef.h> +#include <stdint.h> + +#define INTEL_PT_PKT_DESC_MAX 256 + +#define INTEL_PT_NEED_MORE_BYTES -1 +#define INTEL_PT_BAD_PACKET -2 + +#define INTEL_PT_PSB_STR "\002\202\002\202\002\202\002\202" \ + "\002\202\002\202\002\202\002\202" +#define INTEL_PT_PSB_LEN 16 + +#define INTEL_PT_PKT_MAX_SZ 16 + +enum intel_pt_pkt_type { + INTEL_PT_BAD, + INTEL_PT_PAD, + INTEL_PT_TNT, + INTEL_PT_TIP_PGD, + INTEL_PT_TIP_PGE, + INTEL_PT_TSC, + INTEL_PT_TMA, + INTEL_PT_MODE_EXEC, + INTEL_PT_MODE_TSX, + INTEL_PT_MTC, + INTEL_PT_TIP, + INTEL_PT_FUP, + INTEL_PT_CYC, + INTEL_PT_VMCS, + INTEL_PT_PSB, + INTEL_PT_PSBEND, + INTEL_PT_CBR, + INTEL_PT_TRACESTOP, + INTEL_PT_PIP, + INTEL_PT_OVF, + INTEL_PT_MNT, +}; + +struct intel_pt_pkt { + enum intel_pt_pkt_type type; + int count; + uint64_t payload; +}; + +const char *intel_pt_pkt_name(enum intel_pt_pkt_type); + +int intel_pt_get_packet(const unsigned char *buf, size_t len, + struct intel_pt_pkt *packet); + +int intel_pt_pkt_desc(const struct intel_pt_pkt *packet, char *buf, size_t len); + +#endif diff --git a/tools/perf/util/intel-pt-decoder/x86-opcode-map.txt b/tools/perf/util/intel-pt-decoder/x86-opcode-map.txt new file mode 100644 index 000000000000..d388de72eaca --- /dev/null +++ b/tools/perf/util/intel-pt-decoder/x86-opcode-map.txt @@ -0,0 +1,984 @@ +# x86 Opcode Maps +# +# This is (mostly) based on following documentations. +# - Intel(R) 64 and IA-32 Architectures Software Developer's Manual Vol.2C +# (#326018-047US, June 2013) +# +#<Opcode maps> +# Table: table-name +# Referrer: escaped-name +# AVXcode: avx-code +# opcode: mnemonic|GrpXXX [operand1[,operand2...]] [(extra1)[,(extra2)...] [| 2nd-mnemonic ...] +# (or) +# opcode: escape # escaped-name +# EndTable +# +#<group maps> +# GrpTable: GrpXXX +# reg: mnemonic [operand1[,operand2...]] [(extra1)[,(extra2)...] [| 2nd-mnemonic ...] +# EndTable +# +# AVX Superscripts +# (v): this opcode requires VEX prefix. +# (v1): this opcode only supports 128bit VEX. +# +# Last Prefix Superscripts +# - (66): the last prefix is 0x66 +# - (F3): the last prefix is 0xF3 +# - (F2): the last prefix is 0xF2 +# - (!F3) : the last prefix is not 0xF3 (including non-last prefix case) +# - (66&F2): Both 0x66 and 0xF2 prefixes are specified. + +Table: one byte opcode +Referrer: +AVXcode: +# 0x00 - 0x0f +00: ADD Eb,Gb +01: ADD Ev,Gv +02: ADD Gb,Eb +03: ADD Gv,Ev +04: ADD AL,Ib +05: ADD rAX,Iz +06: PUSH ES (i64) +07: POP ES (i64) +08: OR Eb,Gb +09: OR Ev,Gv +0a: OR Gb,Eb +0b: OR Gv,Ev +0c: OR AL,Ib +0d: OR rAX,Iz +0e: PUSH CS (i64) +0f: escape # 2-byte escape +# 0x10 - 0x1f +10: ADC Eb,Gb +11: ADC Ev,Gv +12: ADC Gb,Eb +13: ADC Gv,Ev +14: ADC AL,Ib +15: ADC rAX,Iz +16: PUSH SS (i64) +17: POP SS (i64) +18: SBB Eb,Gb +19: SBB Ev,Gv +1a: SBB Gb,Eb +1b: SBB Gv,Ev +1c: SBB AL,Ib +1d: SBB rAX,Iz +1e: PUSH DS (i64) +1f: POP DS (i64) +# 0x20 - 0x2f +20: AND Eb,Gb +21: AND Ev,Gv +22: AND Gb,Eb +23: AND Gv,Ev +24: AND AL,Ib +25: AND rAx,Iz +26: SEG=ES (Prefix) +27: DAA (i64) +28: SUB Eb,Gb +29: SUB Ev,Gv +2a: SUB Gb,Eb +2b: SUB Gv,Ev +2c: SUB AL,Ib +2d: SUB rAX,Iz +2e: SEG=CS (Prefix) +2f: DAS (i64) +# 0x30 - 0x3f +30: XOR Eb,Gb +31: XOR Ev,Gv +32: XOR Gb,Eb +33: XOR Gv,Ev +34: XOR AL,Ib +35: XOR rAX,Iz +36: SEG=SS (Prefix) +37: AAA (i64) +38: CMP Eb,Gb +39: CMP Ev,Gv +3a: CMP Gb,Eb +3b: CMP Gv,Ev +3c: CMP AL,Ib +3d: CMP rAX,Iz +3e: SEG=DS (Prefix) +3f: AAS (i64) +# 0x40 - 0x4f +40: INC eAX (i64) | REX (o64) +41: INC eCX (i64) | REX.B (o64) +42: INC eDX (i64) | REX.X (o64) +43: INC eBX (i64) | REX.XB (o64) +44: INC eSP (i64) | REX.R (o64) +45: INC eBP (i64) | REX.RB (o64) +46: INC eSI (i64) | REX.RX (o64) +47: INC eDI (i64) | REX.RXB (o64) +48: DEC eAX (i64) | REX.W (o64) +49: DEC eCX (i64) | REX.WB (o64) +4a: DEC eDX (i64) | REX.WX (o64) +4b: DEC eBX (i64) | REX.WXB (o64) +4c: DEC eSP (i64) | REX.WR (o64) +4d: DEC eBP (i64) | REX.WRB (o64) +4e: DEC eSI (i64) | REX.WRX (o64) +4f: DEC eDI (i64) | REX.WRXB (o64) +# 0x50 - 0x5f +50: PUSH rAX/r8 (d64) +51: PUSH rCX/r9 (d64) +52: PUSH rDX/r10 (d64) +53: PUSH rBX/r11 (d64) +54: PUSH rSP/r12 (d64) +55: PUSH rBP/r13 (d64) +56: PUSH rSI/r14 (d64) +57: PUSH rDI/r15 (d64) +58: POP rAX/r8 (d64) +59: POP rCX/r9 (d64) +5a: POP rDX/r10 (d64) +5b: POP rBX/r11 (d64) +5c: POP rSP/r12 (d64) +5d: POP rBP/r13 (d64) +5e: POP rSI/r14 (d64) +5f: POP rDI/r15 (d64) +# 0x60 - 0x6f +60: PUSHA/PUSHAD (i64) +61: POPA/POPAD (i64) +62: BOUND Gv,Ma (i64) +63: ARPL Ew,Gw (i64) | MOVSXD Gv,Ev (o64) +64: SEG=FS (Prefix) +65: SEG=GS (Prefix) +66: Operand-Size (Prefix) +67: Address-Size (Prefix) +68: PUSH Iz (d64) +69: IMUL Gv,Ev,Iz +6a: PUSH Ib (d64) +6b: IMUL Gv,Ev,Ib +6c: INS/INSB Yb,DX +6d: INS/INSW/INSD Yz,DX +6e: OUTS/OUTSB DX,Xb +6f: OUTS/OUTSW/OUTSD DX,Xz +# 0x70 - 0x7f +70: JO Jb +71: JNO Jb +72: JB/JNAE/JC Jb +73: JNB/JAE/JNC Jb +74: JZ/JE Jb +75: JNZ/JNE Jb +76: JBE/JNA Jb +77: JNBE/JA Jb +78: JS Jb +79: JNS Jb +7a: JP/JPE Jb +7b: JNP/JPO Jb +7c: JL/JNGE Jb +7d: JNL/JGE Jb +7e: JLE/JNG Jb +7f: JNLE/JG Jb +# 0x80 - 0x8f +80: Grp1 Eb,Ib (1A) +81: Grp1 Ev,Iz (1A) +82: Grp1 Eb,Ib (1A),(i64) +83: Grp1 Ev,Ib (1A) +84: TEST Eb,Gb +85: TEST Ev,Gv +86: XCHG Eb,Gb +87: XCHG Ev,Gv +88: MOV Eb,Gb +89: MOV Ev,Gv +8a: MOV Gb,Eb +8b: MOV Gv,Ev +8c: MOV Ev,Sw +8d: LEA Gv,M +8e: MOV Sw,Ew +8f: Grp1A (1A) | POP Ev (d64) +# 0x90 - 0x9f +90: NOP | PAUSE (F3) | XCHG r8,rAX +91: XCHG rCX/r9,rAX +92: XCHG rDX/r10,rAX +93: XCHG rBX/r11,rAX +94: XCHG rSP/r12,rAX +95: XCHG rBP/r13,rAX +96: XCHG rSI/r14,rAX +97: XCHG rDI/r15,rAX +98: CBW/CWDE/CDQE +99: CWD/CDQ/CQO +9a: CALLF Ap (i64) +9b: FWAIT/WAIT +9c: PUSHF/D/Q Fv (d64) +9d: POPF/D/Q Fv (d64) +9e: SAHF +9f: LAHF +# 0xa0 - 0xaf +a0: MOV AL,Ob +a1: MOV rAX,Ov +a2: MOV Ob,AL +a3: MOV Ov,rAX +a4: MOVS/B Yb,Xb +a5: MOVS/W/D/Q Yv,Xv +a6: CMPS/B Xb,Yb +a7: CMPS/W/D Xv,Yv +a8: TEST AL,Ib +a9: TEST rAX,Iz +aa: STOS/B Yb,AL +ab: STOS/W/D/Q Yv,rAX +ac: LODS/B AL,Xb +ad: LODS/W/D/Q rAX,Xv +ae: SCAS/B AL,Yb +# Note: The May 2011 Intel manual shows Xv for the second parameter of the +# next instruction but Yv is correct +af: SCAS/W/D/Q rAX,Yv +# 0xb0 - 0xbf +b0: MOV AL/R8L,Ib +b1: MOV CL/R9L,Ib +b2: MOV DL/R10L,Ib +b3: MOV BL/R11L,Ib +b4: MOV AH/R12L,Ib +b5: MOV CH/R13L,Ib +b6: MOV DH/R14L,Ib +b7: MOV BH/R15L,Ib +b8: MOV rAX/r8,Iv +b9: MOV rCX/r9,Iv +ba: MOV rDX/r10,Iv +bb: MOV rBX/r11,Iv +bc: MOV rSP/r12,Iv +bd: MOV rBP/r13,Iv +be: MOV rSI/r14,Iv +bf: MOV rDI/r15,Iv +# 0xc0 - 0xcf +c0: Grp2 Eb,Ib (1A) +c1: Grp2 Ev,Ib (1A) +c2: RETN Iw (f64) +c3: RETN +c4: LES Gz,Mp (i64) | VEX+2byte (Prefix) +c5: LDS Gz,Mp (i64) | VEX+1byte (Prefix) +c6: Grp11A Eb,Ib (1A) +c7: Grp11B Ev,Iz (1A) +c8: ENTER Iw,Ib +c9: LEAVE (d64) +ca: RETF Iw +cb: RETF +cc: INT3 +cd: INT Ib +ce: INTO (i64) +cf: IRET/D/Q +# 0xd0 - 0xdf +d0: Grp2 Eb,1 (1A) +d1: Grp2 Ev,1 (1A) +d2: Grp2 Eb,CL (1A) +d3: Grp2 Ev,CL (1A) +d4: AAM Ib (i64) +d5: AAD Ib (i64) +d6: +d7: XLAT/XLATB +d8: ESC +d9: ESC +da: ESC +db: ESC +dc: ESC +dd: ESC +de: ESC +df: ESC +# 0xe0 - 0xef +# Note: "forced64" is Intel CPU behavior: they ignore 0x66 prefix +# in 64-bit mode. AMD CPUs accept 0x66 prefix, it causes RIP truncation +# to 16 bits. In 32-bit mode, 0x66 is accepted by both Intel and AMD. +e0: LOOPNE/LOOPNZ Jb (f64) +e1: LOOPE/LOOPZ Jb (f64) +e2: LOOP Jb (f64) +e3: JrCXZ Jb (f64) +e4: IN AL,Ib +e5: IN eAX,Ib +e6: OUT Ib,AL +e7: OUT Ib,eAX +# With 0x66 prefix in 64-bit mode, for AMD CPUs immediate offset +# in "near" jumps and calls is 16-bit. For CALL, +# push of return address is 16-bit wide, RSP is decremented by 2 +# but is not truncated to 16 bits, unlike RIP. +e8: CALL Jz (f64) +e9: JMP-near Jz (f64) +ea: JMP-far Ap (i64) +eb: JMP-short Jb (f64) +ec: IN AL,DX +ed: IN eAX,DX +ee: OUT DX,AL +ef: OUT DX,eAX +# 0xf0 - 0xff +f0: LOCK (Prefix) +f1: +f2: REPNE (Prefix) | XACQUIRE (Prefix) +f3: REP/REPE (Prefix) | XRELEASE (Prefix) +f4: HLT +f5: CMC +f6: Grp3_1 Eb (1A) +f7: Grp3_2 Ev (1A) +f8: CLC +f9: STC +fa: CLI +fb: STI +fc: CLD +fd: STD +fe: Grp4 (1A) +ff: Grp5 (1A) +EndTable + +Table: 2-byte opcode (0x0f) +Referrer: 2-byte escape +AVXcode: 1 +# 0x0f 0x00-0x0f +00: Grp6 (1A) +01: Grp7 (1A) +02: LAR Gv,Ew +03: LSL Gv,Ew +04: +05: SYSCALL (o64) +06: CLTS +07: SYSRET (o64) +08: INVD +09: WBINVD +0a: +0b: UD2 (1B) +0c: +# AMD's prefetch group. Intel supports prefetchw(/1) only. +0d: GrpP +0e: FEMMS +# 3DNow! uses the last imm byte as opcode extension. +0f: 3DNow! Pq,Qq,Ib +# 0x0f 0x10-0x1f +# NOTE: According to Intel SDM opcode map, vmovups and vmovupd has no operands +# but it actually has operands. And also, vmovss and vmovsd only accept 128bit. +# MOVSS/MOVSD has too many forms(3) on SDM. This map just shows a typical form. +# Many AVX instructions lack v1 superscript, according to Intel AVX-Prgramming +# Reference A.1 +10: vmovups Vps,Wps | vmovupd Vpd,Wpd (66) | vmovss Vx,Hx,Wss (F3),(v1) | vmovsd Vx,Hx,Wsd (F2),(v1) +11: vmovups Wps,Vps | vmovupd Wpd,Vpd (66) | vmovss Wss,Hx,Vss (F3),(v1) | vmovsd Wsd,Hx,Vsd (F2),(v1) +12: vmovlps Vq,Hq,Mq (v1) | vmovhlps Vq,Hq,Uq (v1) | vmovlpd Vq,Hq,Mq (66),(v1) | vmovsldup Vx,Wx (F3) | vmovddup Vx,Wx (F2) +13: vmovlps Mq,Vq (v1) | vmovlpd Mq,Vq (66),(v1) +14: vunpcklps Vx,Hx,Wx | vunpcklpd Vx,Hx,Wx (66) +15: vunpckhps Vx,Hx,Wx | vunpckhpd Vx,Hx,Wx (66) +16: vmovhps Vdq,Hq,Mq (v1) | vmovlhps Vdq,Hq,Uq (v1) | vmovhpd Vdq,Hq,Mq (66),(v1) | vmovshdup Vx,Wx (F3) +17: vmovhps Mq,Vq (v1) | vmovhpd Mq,Vq (66),(v1) +18: Grp16 (1A) +19: +# Intel SDM opcode map does not list MPX instructions. For now using Gv for +# bnd registers and Ev for everything else is OK because the instruction +# decoder does not use the information except as an indication that there is +# a ModR/M byte. +1a: BNDCL Gv,Ev (F3) | BNDCU Gv,Ev (F2) | BNDMOV Gv,Ev (66) | BNDLDX Gv,Ev +1b: BNDCN Gv,Ev (F2) | BNDMOV Ev,Gv (66) | BNDMK Gv,Ev (F3) | BNDSTX Ev,Gv +1c: +1d: +1e: +1f: NOP Ev +# 0x0f 0x20-0x2f +20: MOV Rd,Cd +21: MOV Rd,Dd +22: MOV Cd,Rd +23: MOV Dd,Rd +24: +25: +26: +27: +28: vmovaps Vps,Wps | vmovapd Vpd,Wpd (66) +29: vmovaps Wps,Vps | vmovapd Wpd,Vpd (66) +2a: cvtpi2ps Vps,Qpi | cvtpi2pd Vpd,Qpi (66) | vcvtsi2ss Vss,Hss,Ey (F3),(v1) | vcvtsi2sd Vsd,Hsd,Ey (F2),(v1) +2b: vmovntps Mps,Vps | vmovntpd Mpd,Vpd (66) +2c: cvttps2pi Ppi,Wps | cvttpd2pi Ppi,Wpd (66) | vcvttss2si Gy,Wss (F3),(v1) | vcvttsd2si Gy,Wsd (F2),(v1) +2d: cvtps2pi Ppi,Wps | cvtpd2pi Qpi,Wpd (66) | vcvtss2si Gy,Wss (F3),(v1) | vcvtsd2si Gy,Wsd (F2),(v1) +2e: vucomiss Vss,Wss (v1) | vucomisd Vsd,Wsd (66),(v1) +2f: vcomiss Vss,Wss (v1) | vcomisd Vsd,Wsd (66),(v1) +# 0x0f 0x30-0x3f +30: WRMSR +31: RDTSC +32: RDMSR +33: RDPMC +34: SYSENTER +35: SYSEXIT +36: +37: GETSEC +38: escape # 3-byte escape 1 +39: +3a: escape # 3-byte escape 2 +3b: +3c: +3d: +3e: +3f: +# 0x0f 0x40-0x4f +40: CMOVO Gv,Ev +41: CMOVNO Gv,Ev +42: CMOVB/C/NAE Gv,Ev +43: CMOVAE/NB/NC Gv,Ev +44: CMOVE/Z Gv,Ev +45: CMOVNE/NZ Gv,Ev +46: CMOVBE/NA Gv,Ev +47: CMOVA/NBE Gv,Ev +48: CMOVS Gv,Ev +49: CMOVNS Gv,Ev +4a: CMOVP/PE Gv,Ev +4b: CMOVNP/PO Gv,Ev +4c: CMOVL/NGE Gv,Ev +4d: CMOVNL/GE Gv,Ev +4e: CMOVLE/NG Gv,Ev +4f: CMOVNLE/G Gv,Ev +# 0x0f 0x50-0x5f +50: vmovmskps Gy,Ups | vmovmskpd Gy,Upd (66) +51: vsqrtps Vps,Wps | vsqrtpd Vpd,Wpd (66) | vsqrtss Vss,Hss,Wss (F3),(v1) | vsqrtsd Vsd,Hsd,Wsd (F2),(v1) +52: vrsqrtps Vps,Wps | vrsqrtss Vss,Hss,Wss (F3),(v1) +53: vrcpps Vps,Wps | vrcpss Vss,Hss,Wss (F3),(v1) +54: vandps Vps,Hps,Wps | vandpd Vpd,Hpd,Wpd (66) +55: vandnps Vps,Hps,Wps | vandnpd Vpd,Hpd,Wpd (66) +56: vorps Vps,Hps,Wps | vorpd Vpd,Hpd,Wpd (66) +57: vxorps Vps,Hps,Wps | vxorpd Vpd,Hpd,Wpd (66) +58: vaddps Vps,Hps,Wps | vaddpd Vpd,Hpd,Wpd (66) | vaddss Vss,Hss,Wss (F3),(v1) | vaddsd Vsd,Hsd,Wsd (F2),(v1) +59: vmulps Vps,Hps,Wps | vmulpd Vpd,Hpd,Wpd (66) | vmulss Vss,Hss,Wss (F3),(v1) | vmulsd Vsd,Hsd,Wsd (F2),(v1) +5a: vcvtps2pd Vpd,Wps | vcvtpd2ps Vps,Wpd (66) | vcvtss2sd Vsd,Hx,Wss (F3),(v1) | vcvtsd2ss Vss,Hx,Wsd (F2),(v1) +5b: vcvtdq2ps Vps,Wdq | vcvtps2dq Vdq,Wps (66) | vcvttps2dq Vdq,Wps (F3) +5c: vsubps Vps,Hps,Wps | vsubpd Vpd,Hpd,Wpd (66) | vsubss Vss,Hss,Wss (F3),(v1) | vsubsd Vsd,Hsd,Wsd (F2),(v1) +5d: vminps Vps,Hps,Wps | vminpd Vpd,Hpd,Wpd (66) | vminss Vss,Hss,Wss (F3),(v1) | vminsd Vsd,Hsd,Wsd (F2),(v1) +5e: vdivps Vps,Hps,Wps | vdivpd Vpd,Hpd,Wpd (66) | vdivss Vss,Hss,Wss (F3),(v1) | vdivsd Vsd,Hsd,Wsd (F2),(v1) +5f: vmaxps Vps,Hps,Wps | vmaxpd Vpd,Hpd,Wpd (66) | vmaxss Vss,Hss,Wss (F3),(v1) | vmaxsd Vsd,Hsd,Wsd (F2),(v1) +# 0x0f 0x60-0x6f +60: punpcklbw Pq,Qd | vpunpcklbw Vx,Hx,Wx (66),(v1) +61: punpcklwd Pq,Qd | vpunpcklwd Vx,Hx,Wx (66),(v1) +62: punpckldq Pq,Qd | vpunpckldq Vx,Hx,Wx (66),(v1) +63: packsswb Pq,Qq | vpacksswb Vx,Hx,Wx (66),(v1) +64: pcmpgtb Pq,Qq | vpcmpgtb Vx,Hx,Wx (66),(v1) +65: pcmpgtw Pq,Qq | vpcmpgtw Vx,Hx,Wx (66),(v1) +66: pcmpgtd Pq,Qq | vpcmpgtd Vx,Hx,Wx (66),(v1) +67: packuswb Pq,Qq | vpackuswb Vx,Hx,Wx (66),(v1) +68: punpckhbw Pq,Qd | vpunpckhbw Vx,Hx,Wx (66),(v1) +69: punpckhwd Pq,Qd | vpunpckhwd Vx,Hx,Wx (66),(v1) +6a: punpckhdq Pq,Qd | vpunpckhdq Vx,Hx,Wx (66),(v1) +6b: packssdw Pq,Qd | vpackssdw Vx,Hx,Wx (66),(v1) +6c: vpunpcklqdq Vx,Hx,Wx (66),(v1) +6d: vpunpckhqdq Vx,Hx,Wx (66),(v1) +6e: movd/q Pd,Ey | vmovd/q Vy,Ey (66),(v1) +6f: movq Pq,Qq | vmovdqa Vx,Wx (66) | vmovdqu Vx,Wx (F3) +# 0x0f 0x70-0x7f +70: pshufw Pq,Qq,Ib | vpshufd Vx,Wx,Ib (66),(v1) | vpshufhw Vx,Wx,Ib (F3),(v1) | vpshuflw Vx,Wx,Ib (F2),(v1) +71: Grp12 (1A) +72: Grp13 (1A) +73: Grp14 (1A) +74: pcmpeqb Pq,Qq | vpcmpeqb Vx,Hx,Wx (66),(v1) +75: pcmpeqw Pq,Qq | vpcmpeqw Vx,Hx,Wx (66),(v1) +76: pcmpeqd Pq,Qq | vpcmpeqd Vx,Hx,Wx (66),(v1) +# Note: Remove (v), because vzeroall and vzeroupper becomes emms without VEX. +77: emms | vzeroupper | vzeroall +78: VMREAD Ey,Gy +79: VMWRITE Gy,Ey +7a: +7b: +7c: vhaddpd Vpd,Hpd,Wpd (66) | vhaddps Vps,Hps,Wps (F2) +7d: vhsubpd Vpd,Hpd,Wpd (66) | vhsubps Vps,Hps,Wps (F2) +7e: movd/q Ey,Pd | vmovd/q Ey,Vy (66),(v1) | vmovq Vq,Wq (F3),(v1) +7f: movq Qq,Pq | vmovdqa Wx,Vx (66) | vmovdqu Wx,Vx (F3) +# 0x0f 0x80-0x8f +# Note: "forced64" is Intel CPU behavior (see comment about CALL insn). +80: JO Jz (f64) +81: JNO Jz (f64) +82: JB/JC/JNAE Jz (f64) +83: JAE/JNB/JNC Jz (f64) +84: JE/JZ Jz (f64) +85: JNE/JNZ Jz (f64) +86: JBE/JNA Jz (f64) +87: JA/JNBE Jz (f64) +88: JS Jz (f64) +89: JNS Jz (f64) +8a: JP/JPE Jz (f64) +8b: JNP/JPO Jz (f64) +8c: JL/JNGE Jz (f64) +8d: JNL/JGE Jz (f64) +8e: JLE/JNG Jz (f64) +8f: JNLE/JG Jz (f64) +# 0x0f 0x90-0x9f +90: SETO Eb +91: SETNO Eb +92: SETB/C/NAE Eb +93: SETAE/NB/NC Eb +94: SETE/Z Eb +95: SETNE/NZ Eb +96: SETBE/NA Eb +97: SETA/NBE Eb +98: SETS Eb +99: SETNS Eb +9a: SETP/PE Eb +9b: SETNP/PO Eb +9c: SETL/NGE Eb +9d: SETNL/GE Eb +9e: SETLE/NG Eb +9f: SETNLE/G Eb +# 0x0f 0xa0-0xaf +a0: PUSH FS (d64) +a1: POP FS (d64) +a2: CPUID +a3: BT Ev,Gv +a4: SHLD Ev,Gv,Ib +a5: SHLD Ev,Gv,CL +a6: GrpPDLK +a7: GrpRNG +a8: PUSH GS (d64) +a9: POP GS (d64) +aa: RSM +ab: BTS Ev,Gv +ac: SHRD Ev,Gv,Ib +ad: SHRD Ev,Gv,CL +ae: Grp15 (1A),(1C) +af: IMUL Gv,Ev +# 0x0f 0xb0-0xbf +b0: CMPXCHG Eb,Gb +b1: CMPXCHG Ev,Gv +b2: LSS Gv,Mp +b3: BTR Ev,Gv +b4: LFS Gv,Mp +b5: LGS Gv,Mp +b6: MOVZX Gv,Eb +b7: MOVZX Gv,Ew +b8: JMPE (!F3) | POPCNT Gv,Ev (F3) +b9: Grp10 (1A) +ba: Grp8 Ev,Ib (1A) +bb: BTC Ev,Gv +bc: BSF Gv,Ev (!F3) | TZCNT Gv,Ev (F3) +bd: BSR Gv,Ev (!F3) | LZCNT Gv,Ev (F3) +be: MOVSX Gv,Eb +bf: MOVSX Gv,Ew +# 0x0f 0xc0-0xcf +c0: XADD Eb,Gb +c1: XADD Ev,Gv +c2: vcmpps Vps,Hps,Wps,Ib | vcmppd Vpd,Hpd,Wpd,Ib (66) | vcmpss Vss,Hss,Wss,Ib (F3),(v1) | vcmpsd Vsd,Hsd,Wsd,Ib (F2),(v1) +c3: movnti My,Gy +c4: pinsrw Pq,Ry/Mw,Ib | vpinsrw Vdq,Hdq,Ry/Mw,Ib (66),(v1) +c5: pextrw Gd,Nq,Ib | vpextrw Gd,Udq,Ib (66),(v1) +c6: vshufps Vps,Hps,Wps,Ib | vshufpd Vpd,Hpd,Wpd,Ib (66) +c7: Grp9 (1A) +c8: BSWAP RAX/EAX/R8/R8D +c9: BSWAP RCX/ECX/R9/R9D +ca: BSWAP RDX/EDX/R10/R10D +cb: BSWAP RBX/EBX/R11/R11D +cc: BSWAP RSP/ESP/R12/R12D +cd: BSWAP RBP/EBP/R13/R13D +ce: BSWAP RSI/ESI/R14/R14D +cf: BSWAP RDI/EDI/R15/R15D +# 0x0f 0xd0-0xdf +d0: vaddsubpd Vpd,Hpd,Wpd (66) | vaddsubps Vps,Hps,Wps (F2) +d1: psrlw Pq,Qq | vpsrlw Vx,Hx,Wx (66),(v1) +d2: psrld Pq,Qq | vpsrld Vx,Hx,Wx (66),(v1) +d3: psrlq Pq,Qq | vpsrlq Vx,Hx,Wx (66),(v1) +d4: paddq Pq,Qq | vpaddq Vx,Hx,Wx (66),(v1) +d5: pmullw Pq,Qq | vpmullw Vx,Hx,Wx (66),(v1) +d6: vmovq Wq,Vq (66),(v1) | movq2dq Vdq,Nq (F3) | movdq2q Pq,Uq (F2) +d7: pmovmskb Gd,Nq | vpmovmskb Gd,Ux (66),(v1) +d8: psubusb Pq,Qq | vpsubusb Vx,Hx,Wx (66),(v1) +d9: psubusw Pq,Qq | vpsubusw Vx,Hx,Wx (66),(v1) +da: pminub Pq,Qq | vpminub Vx,Hx,Wx (66),(v1) +db: pand Pq,Qq | vpand Vx,Hx,Wx (66),(v1) +dc: paddusb Pq,Qq | vpaddusb Vx,Hx,Wx (66),(v1) +dd: paddusw Pq,Qq | vpaddusw Vx,Hx,Wx (66),(v1) +de: pmaxub Pq,Qq | vpmaxub Vx,Hx,Wx (66),(v1) +df: pandn Pq,Qq | vpandn Vx,Hx,Wx (66),(v1) +# 0x0f 0xe0-0xef +e0: pavgb Pq,Qq | vpavgb Vx,Hx,Wx (66),(v1) +e1: psraw Pq,Qq | vpsraw Vx,Hx,Wx (66),(v1) +e2: psrad Pq,Qq | vpsrad Vx,Hx,Wx (66),(v1) +e3: pavgw Pq,Qq | vpavgw Vx,Hx,Wx (66),(v1) +e4: pmulhuw Pq,Qq | vpmulhuw Vx,Hx,Wx (66),(v1) +e5: pmulhw Pq,Qq | vpmulhw Vx,Hx,Wx (66),(v1) +e6: vcvttpd2dq Vx,Wpd (66) | vcvtdq2pd Vx,Wdq (F3) | vcvtpd2dq Vx,Wpd (F2) +e7: movntq Mq,Pq | vmovntdq Mx,Vx (66) +e8: psubsb Pq,Qq | vpsubsb Vx,Hx,Wx (66),(v1) +e9: psubsw Pq,Qq | vpsubsw Vx,Hx,Wx (66),(v1) +ea: pminsw Pq,Qq | vpminsw Vx,Hx,Wx (66),(v1) +eb: por Pq,Qq | vpor Vx,Hx,Wx (66),(v1) +ec: paddsb Pq,Qq | vpaddsb Vx,Hx,Wx (66),(v1) +ed: paddsw Pq,Qq | vpaddsw Vx,Hx,Wx (66),(v1) +ee: pmaxsw Pq,Qq | vpmaxsw Vx,Hx,Wx (66),(v1) +ef: pxor Pq,Qq | vpxor Vx,Hx,Wx (66),(v1) +# 0x0f 0xf0-0xff +f0: vlddqu Vx,Mx (F2) +f1: psllw Pq,Qq | vpsllw Vx,Hx,Wx (66),(v1) +f2: pslld Pq,Qq | vpslld Vx,Hx,Wx (66),(v1) +f3: psllq Pq,Qq | vpsllq Vx,Hx,Wx (66),(v1) +f4: pmuludq Pq,Qq | vpmuludq Vx,Hx,Wx (66),(v1) +f5: pmaddwd Pq,Qq | vpmaddwd Vx,Hx,Wx (66),(v1) +f6: psadbw Pq,Qq | vpsadbw Vx,Hx,Wx (66),(v1) +f7: maskmovq Pq,Nq | vmaskmovdqu Vx,Ux (66),(v1) +f8: psubb Pq,Qq | vpsubb Vx,Hx,Wx (66),(v1) +f9: psubw Pq,Qq | vpsubw Vx,Hx,Wx (66),(v1) +fa: psubd Pq,Qq | vpsubd Vx,Hx,Wx (66),(v1) +fb: psubq Pq,Qq | vpsubq Vx,Hx,Wx (66),(v1) +fc: paddb Pq,Qq | vpaddb Vx,Hx,Wx (66),(v1) +fd: paddw Pq,Qq | vpaddw Vx,Hx,Wx (66),(v1) +fe: paddd Pq,Qq | vpaddd Vx,Hx,Wx (66),(v1) +ff: +EndTable + +Table: 3-byte opcode 1 (0x0f 0x38) +Referrer: 3-byte escape 1 +AVXcode: 2 +# 0x0f 0x38 0x00-0x0f +00: pshufb Pq,Qq | vpshufb Vx,Hx,Wx (66),(v1) +01: phaddw Pq,Qq | vphaddw Vx,Hx,Wx (66),(v1) +02: phaddd Pq,Qq | vphaddd Vx,Hx,Wx (66),(v1) +03: phaddsw Pq,Qq | vphaddsw Vx,Hx,Wx (66),(v1) +04: pmaddubsw Pq,Qq | vpmaddubsw Vx,Hx,Wx (66),(v1) +05: phsubw Pq,Qq | vphsubw Vx,Hx,Wx (66),(v1) +06: phsubd Pq,Qq | vphsubd Vx,Hx,Wx (66),(v1) +07: phsubsw Pq,Qq | vphsubsw Vx,Hx,Wx (66),(v1) +08: psignb Pq,Qq | vpsignb Vx,Hx,Wx (66),(v1) +09: psignw Pq,Qq | vpsignw Vx,Hx,Wx (66),(v1) +0a: psignd Pq,Qq | vpsignd Vx,Hx,Wx (66),(v1) +0b: pmulhrsw Pq,Qq | vpmulhrsw Vx,Hx,Wx (66),(v1) +0c: vpermilps Vx,Hx,Wx (66),(v) +0d: vpermilpd Vx,Hx,Wx (66),(v) +0e: vtestps Vx,Wx (66),(v) +0f: vtestpd Vx,Wx (66),(v) +# 0x0f 0x38 0x10-0x1f +10: pblendvb Vdq,Wdq (66) +11: +12: +13: vcvtph2ps Vx,Wx,Ib (66),(v) +14: blendvps Vdq,Wdq (66) +15: blendvpd Vdq,Wdq (66) +16: vpermps Vqq,Hqq,Wqq (66),(v) +17: vptest Vx,Wx (66) +18: vbroadcastss Vx,Wd (66),(v) +19: vbroadcastsd Vqq,Wq (66),(v) +1a: vbroadcastf128 Vqq,Mdq (66),(v) +1b: +1c: pabsb Pq,Qq | vpabsb Vx,Wx (66),(v1) +1d: pabsw Pq,Qq | vpabsw Vx,Wx (66),(v1) +1e: pabsd Pq,Qq | vpabsd Vx,Wx (66),(v1) +1f: +# 0x0f 0x38 0x20-0x2f +20: vpmovsxbw Vx,Ux/Mq (66),(v1) +21: vpmovsxbd Vx,Ux/Md (66),(v1) +22: vpmovsxbq Vx,Ux/Mw (66),(v1) +23: vpmovsxwd Vx,Ux/Mq (66),(v1) +24: vpmovsxwq Vx,Ux/Md (66),(v1) +25: vpmovsxdq Vx,Ux/Mq (66),(v1) +26: +27: +28: vpmuldq Vx,Hx,Wx (66),(v1) +29: vpcmpeqq Vx,Hx,Wx (66),(v1) +2a: vmovntdqa Vx,Mx (66),(v1) +2b: vpackusdw Vx,Hx,Wx (66),(v1) +2c: vmaskmovps Vx,Hx,Mx (66),(v) +2d: vmaskmovpd Vx,Hx,Mx (66),(v) +2e: vmaskmovps Mx,Hx,Vx (66),(v) +2f: vmaskmovpd Mx,Hx,Vx (66),(v) +# 0x0f 0x38 0x30-0x3f +30: vpmovzxbw Vx,Ux/Mq (66),(v1) +31: vpmovzxbd Vx,Ux/Md (66),(v1) +32: vpmovzxbq Vx,Ux/Mw (66),(v1) +33: vpmovzxwd Vx,Ux/Mq (66),(v1) +34: vpmovzxwq Vx,Ux/Md (66),(v1) +35: vpmovzxdq Vx,Ux/Mq (66),(v1) +36: vpermd Vqq,Hqq,Wqq (66),(v) +37: vpcmpgtq Vx,Hx,Wx (66),(v1) +38: vpminsb Vx,Hx,Wx (66),(v1) +39: vpminsd Vx,Hx,Wx (66),(v1) +3a: vpminuw Vx,Hx,Wx (66),(v1) +3b: vpminud Vx,Hx,Wx (66),(v1) +3c: vpmaxsb Vx,Hx,Wx (66),(v1) +3d: vpmaxsd Vx,Hx,Wx (66),(v1) +3e: vpmaxuw Vx,Hx,Wx (66),(v1) +3f: vpmaxud Vx,Hx,Wx (66),(v1) +# 0x0f 0x38 0x40-0x8f +40: vpmulld Vx,Hx,Wx (66),(v1) +41: vphminposuw Vdq,Wdq (66),(v1) +42: +43: +44: +45: vpsrlvd/q Vx,Hx,Wx (66),(v) +46: vpsravd Vx,Hx,Wx (66),(v) +47: vpsllvd/q Vx,Hx,Wx (66),(v) +# Skip 0x48-0x57 +58: vpbroadcastd Vx,Wx (66),(v) +59: vpbroadcastq Vx,Wx (66),(v) +5a: vbroadcasti128 Vqq,Mdq (66),(v) +# Skip 0x5b-0x77 +78: vpbroadcastb Vx,Wx (66),(v) +79: vpbroadcastw Vx,Wx (66),(v) +# Skip 0x7a-0x7f +80: INVEPT Gy,Mdq (66) +81: INVPID Gy,Mdq (66) +82: INVPCID Gy,Mdq (66) +8c: vpmaskmovd/q Vx,Hx,Mx (66),(v) +8e: vpmaskmovd/q Mx,Vx,Hx (66),(v) +# 0x0f 0x38 0x90-0xbf (FMA) +90: vgatherdd/q Vx,Hx,Wx (66),(v) +91: vgatherqd/q Vx,Hx,Wx (66),(v) +92: vgatherdps/d Vx,Hx,Wx (66),(v) +93: vgatherqps/d Vx,Hx,Wx (66),(v) +94: +95: +96: vfmaddsub132ps/d Vx,Hx,Wx (66),(v) +97: vfmsubadd132ps/d Vx,Hx,Wx (66),(v) +98: vfmadd132ps/d Vx,Hx,Wx (66),(v) +99: vfmadd132ss/d Vx,Hx,Wx (66),(v),(v1) +9a: vfmsub132ps/d Vx,Hx,Wx (66),(v) +9b: vfmsub132ss/d Vx,Hx,Wx (66),(v),(v1) +9c: vfnmadd132ps/d Vx,Hx,Wx (66),(v) +9d: vfnmadd132ss/d Vx,Hx,Wx (66),(v),(v1) +9e: vfnmsub132ps/d Vx,Hx,Wx (66),(v) +9f: vfnmsub132ss/d Vx,Hx,Wx (66),(v),(v1) +a6: vfmaddsub213ps/d Vx,Hx,Wx (66),(v) +a7: vfmsubadd213ps/d Vx,Hx,Wx (66),(v) +a8: vfmadd213ps/d Vx,Hx,Wx (66),(v) +a9: vfmadd213ss/d Vx,Hx,Wx (66),(v),(v1) +aa: vfmsub213ps/d Vx,Hx,Wx (66),(v) +ab: vfmsub213ss/d Vx,Hx,Wx (66),(v),(v1) +ac: vfnmadd213ps/d Vx,Hx,Wx (66),(v) +ad: vfnmadd213ss/d Vx,Hx,Wx (66),(v),(v1) +ae: vfnmsub213ps/d Vx,Hx,Wx (66),(v) +af: vfnmsub213ss/d Vx,Hx,Wx (66),(v),(v1) +b6: vfmaddsub231ps/d Vx,Hx,Wx (66),(v) +b7: vfmsubadd231ps/d Vx,Hx,Wx (66),(v) +b8: vfmadd231ps/d Vx,Hx,Wx (66),(v) +b9: vfmadd231ss/d Vx,Hx,Wx (66),(v),(v1) +ba: vfmsub231ps/d Vx,Hx,Wx (66),(v) +bb: vfmsub231ss/d Vx,Hx,Wx (66),(v),(v1) +bc: vfnmadd231ps/d Vx,Hx,Wx (66),(v) +bd: vfnmadd231ss/d Vx,Hx,Wx (66),(v),(v1) +be: vfnmsub231ps/d Vx,Hx,Wx (66),(v) +bf: vfnmsub231ss/d Vx,Hx,Wx (66),(v),(v1) +# 0x0f 0x38 0xc0-0xff +c8: sha1nexte Vdq,Wdq +c9: sha1msg1 Vdq,Wdq +ca: sha1msg2 Vdq,Wdq +cb: sha256rnds2 Vdq,Wdq +cc: sha256msg1 Vdq,Wdq +cd: sha256msg2 Vdq,Wdq +db: VAESIMC Vdq,Wdq (66),(v1) +dc: VAESENC Vdq,Hdq,Wdq (66),(v1) +dd: VAESENCLAST Vdq,Hdq,Wdq (66),(v1) +de: VAESDEC Vdq,Hdq,Wdq (66),(v1) +df: VAESDECLAST Vdq,Hdq,Wdq (66),(v1) +f0: MOVBE Gy,My | MOVBE Gw,Mw (66) | CRC32 Gd,Eb (F2) | CRC32 Gd,Eb (66&F2) +f1: MOVBE My,Gy | MOVBE Mw,Gw (66) | CRC32 Gd,Ey (F2) | CRC32 Gd,Ew (66&F2) +f2: ANDN Gy,By,Ey (v) +f3: Grp17 (1A) +f5: BZHI Gy,Ey,By (v) | PEXT Gy,By,Ey (F3),(v) | PDEP Gy,By,Ey (F2),(v) +f6: ADCX Gy,Ey (66) | ADOX Gy,Ey (F3) | MULX By,Gy,rDX,Ey (F2),(v) +f7: BEXTR Gy,Ey,By (v) | SHLX Gy,Ey,By (66),(v) | SARX Gy,Ey,By (F3),(v) | SHRX Gy,Ey,By (F2),(v) +EndTable + +Table: 3-byte opcode 2 (0x0f 0x3a) +Referrer: 3-byte escape 2 +AVXcode: 3 +# 0x0f 0x3a 0x00-0xff +00: vpermq Vqq,Wqq,Ib (66),(v) +01: vpermpd Vqq,Wqq,Ib (66),(v) +02: vpblendd Vx,Hx,Wx,Ib (66),(v) +03: +04: vpermilps Vx,Wx,Ib (66),(v) +05: vpermilpd Vx,Wx,Ib (66),(v) +06: vperm2f128 Vqq,Hqq,Wqq,Ib (66),(v) +07: +08: vroundps Vx,Wx,Ib (66) +09: vroundpd Vx,Wx,Ib (66) +0a: vroundss Vss,Wss,Ib (66),(v1) +0b: vroundsd Vsd,Wsd,Ib (66),(v1) +0c: vblendps Vx,Hx,Wx,Ib (66) +0d: vblendpd Vx,Hx,Wx,Ib (66) +0e: vpblendw Vx,Hx,Wx,Ib (66),(v1) +0f: palignr Pq,Qq,Ib | vpalignr Vx,Hx,Wx,Ib (66),(v1) +14: vpextrb Rd/Mb,Vdq,Ib (66),(v1) +15: vpextrw Rd/Mw,Vdq,Ib (66),(v1) +16: vpextrd/q Ey,Vdq,Ib (66),(v1) +17: vextractps Ed,Vdq,Ib (66),(v1) +18: vinsertf128 Vqq,Hqq,Wqq,Ib (66),(v) +19: vextractf128 Wdq,Vqq,Ib (66),(v) +1d: vcvtps2ph Wx,Vx,Ib (66),(v) +20: vpinsrb Vdq,Hdq,Ry/Mb,Ib (66),(v1) +21: vinsertps Vdq,Hdq,Udq/Md,Ib (66),(v1) +22: vpinsrd/q Vdq,Hdq,Ey,Ib (66),(v1) +38: vinserti128 Vqq,Hqq,Wqq,Ib (66),(v) +39: vextracti128 Wdq,Vqq,Ib (66),(v) +40: vdpps Vx,Hx,Wx,Ib (66) +41: vdppd Vdq,Hdq,Wdq,Ib (66),(v1) +42: vmpsadbw Vx,Hx,Wx,Ib (66),(v1) +44: vpclmulqdq Vdq,Hdq,Wdq,Ib (66),(v1) +46: vperm2i128 Vqq,Hqq,Wqq,Ib (66),(v) +4a: vblendvps Vx,Hx,Wx,Lx (66),(v) +4b: vblendvpd Vx,Hx,Wx,Lx (66),(v) +4c: vpblendvb Vx,Hx,Wx,Lx (66),(v1) +60: vpcmpestrm Vdq,Wdq,Ib (66),(v1) +61: vpcmpestri Vdq,Wdq,Ib (66),(v1) +62: vpcmpistrm Vdq,Wdq,Ib (66),(v1) +63: vpcmpistri Vdq,Wdq,Ib (66),(v1) +cc: sha1rnds4 Vdq,Wdq,Ib +df: VAESKEYGEN Vdq,Wdq,Ib (66),(v1) +f0: RORX Gy,Ey,Ib (F2),(v) +EndTable + +GrpTable: Grp1 +0: ADD +1: OR +2: ADC +3: SBB +4: AND +5: SUB +6: XOR +7: CMP +EndTable + +GrpTable: Grp1A +0: POP +EndTable + +GrpTable: Grp2 +0: ROL +1: ROR +2: RCL +3: RCR +4: SHL/SAL +5: SHR +6: +7: SAR +EndTable + +GrpTable: Grp3_1 +0: TEST Eb,Ib +1: +2: NOT Eb +3: NEG Eb +4: MUL AL,Eb +5: IMUL AL,Eb +6: DIV AL,Eb +7: IDIV AL,Eb +EndTable + +GrpTable: Grp3_2 +0: TEST Ev,Iz +1: +2: NOT Ev +3: NEG Ev +4: MUL rAX,Ev +5: IMUL rAX,Ev +6: DIV rAX,Ev +7: IDIV rAX,Ev +EndTable + +GrpTable: Grp4 +0: INC Eb +1: DEC Eb +EndTable + +GrpTable: Grp5 +0: INC Ev +1: DEC Ev +# Note: "forced64" is Intel CPU behavior (see comment about CALL insn). +2: CALLN Ev (f64) +3: CALLF Ep +4: JMPN Ev (f64) +5: JMPF Mp +6: PUSH Ev (d64) +7: +EndTable + +GrpTable: Grp6 +0: SLDT Rv/Mw +1: STR Rv/Mw +2: LLDT Ew +3: LTR Ew +4: VERR Ew +5: VERW Ew +EndTable + +GrpTable: Grp7 +0: SGDT Ms | VMCALL (001),(11B) | VMLAUNCH (010),(11B) | VMRESUME (011),(11B) | VMXOFF (100),(11B) +1: SIDT Ms | MONITOR (000),(11B) | MWAIT (001),(11B) | CLAC (010),(11B) | STAC (011),(11B) +2: LGDT Ms | XGETBV (000),(11B) | XSETBV (001),(11B) | VMFUNC (100),(11B) | XEND (101)(11B) | XTEST (110)(11B) +3: LIDT Ms +4: SMSW Mw/Rv +5: rdpkru (110),(11B) | wrpkru (111),(11B) +6: LMSW Ew +7: INVLPG Mb | SWAPGS (o64),(000),(11B) | RDTSCP (001),(11B) +EndTable + +GrpTable: Grp8 +4: BT +5: BTS +6: BTR +7: BTC +EndTable + +GrpTable: Grp9 +1: CMPXCHG8B/16B Mq/Mdq +3: xrstors +4: xsavec +5: xsaves +6: VMPTRLD Mq | VMCLEAR Mq (66) | VMXON Mq (F3) | RDRAND Rv (11B) +7: VMPTRST Mq | VMPTRST Mq (F3) | RDSEED Rv (11B) +EndTable + +GrpTable: Grp10 +EndTable + +# Grp11A and Grp11B are expressed as Grp11 in Intel SDM +GrpTable: Grp11A +0: MOV Eb,Ib +7: XABORT Ib (000),(11B) +EndTable + +GrpTable: Grp11B +0: MOV Eb,Iz +7: XBEGIN Jz (000),(11B) +EndTable + +GrpTable: Grp12 +2: psrlw Nq,Ib (11B) | vpsrlw Hx,Ux,Ib (66),(11B),(v1) +4: psraw Nq,Ib (11B) | vpsraw Hx,Ux,Ib (66),(11B),(v1) +6: psllw Nq,Ib (11B) | vpsllw Hx,Ux,Ib (66),(11B),(v1) +EndTable + +GrpTable: Grp13 +2: psrld Nq,Ib (11B) | vpsrld Hx,Ux,Ib (66),(11B),(v1) +4: psrad Nq,Ib (11B) | vpsrad Hx,Ux,Ib (66),(11B),(v1) +6: pslld Nq,Ib (11B) | vpslld Hx,Ux,Ib (66),(11B),(v1) +EndTable + +GrpTable: Grp14 +2: psrlq Nq,Ib (11B) | vpsrlq Hx,Ux,Ib (66),(11B),(v1) +3: vpsrldq Hx,Ux,Ib (66),(11B),(v1) +6: psllq Nq,Ib (11B) | vpsllq Hx,Ux,Ib (66),(11B),(v1) +7: vpslldq Hx,Ux,Ib (66),(11B),(v1) +EndTable + +GrpTable: Grp15 +0: fxsave | RDFSBASE Ry (F3),(11B) +1: fxstor | RDGSBASE Ry (F3),(11B) +2: vldmxcsr Md (v1) | WRFSBASE Ry (F3),(11B) +3: vstmxcsr Md (v1) | WRGSBASE Ry (F3),(11B) +4: XSAVE +5: XRSTOR | lfence (11B) +6: XSAVEOPT | clwb (66) | mfence (11B) +7: clflush | clflushopt (66) | sfence (11B) | pcommit (66),(11B) +EndTable + +GrpTable: Grp16 +0: prefetch NTA +1: prefetch T0 +2: prefetch T1 +3: prefetch T2 +EndTable + +GrpTable: Grp17 +1: BLSR By,Ey (v) +2: BLSMSK By,Ey (v) +3: BLSI By,Ey (v) +EndTable + +# AMD's Prefetch Group +GrpTable: GrpP +0: PREFETCH +1: PREFETCHW +EndTable + +GrpTable: GrpPDLK +0: MONTMUL +1: XSHA1 +2: XSHA2 +EndTable + +GrpTable: GrpRNG +0: xstore-rng +1: xcrypt-ecb +2: xcrypt-cbc +4: xcrypt-cfb +5: xcrypt-ofb +EndTable diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c new file mode 100644 index 000000000000..97f963a3dcb9 --- /dev/null +++ b/tools/perf/util/intel-pt.c @@ -0,0 +1,2164 @@ +/* + * intel_pt.c: Intel Processor Trace support + * Copyright (c) 2013-2015, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + */ + +#include <stdio.h> +#include <stdbool.h> +#include <errno.h> +#include <linux/kernel.h> +#include <linux/types.h> + +#include "../perf.h" +#include "session.h" +#include "machine.h" +#include "sort.h" +#include "tool.h" +#include "event.h" +#include "evlist.h" +#include "evsel.h" +#include "map.h" +#include "color.h" +#include "util.h" +#include "thread.h" +#include "thread-stack.h" +#include "symbol.h" +#include "callchain.h" +#include "dso.h" +#include "debug.h" +#include "auxtrace.h" +#include "tsc.h" +#include "intel-pt.h" + +#include "intel-pt-decoder/intel-pt-log.h" +#include "intel-pt-decoder/intel-pt-decoder.h" +#include "intel-pt-decoder/intel-pt-insn-decoder.h" +#include "intel-pt-decoder/intel-pt-pkt-decoder.h" + +#define MAX_TIMESTAMP (~0ULL) + +struct intel_pt { + struct auxtrace auxtrace; + struct auxtrace_queues queues; + struct auxtrace_heap heap; + u32 auxtrace_type; + struct perf_session *session; + struct machine *machine; + struct perf_evsel *switch_evsel; + struct thread *unknown_thread; + bool timeless_decoding; + bool sampling_mode; + bool snapshot_mode; + bool per_cpu_mmaps; + bool have_tsc; + bool data_queued; + bool est_tsc; + bool sync_switch; + bool mispred_all; + int have_sched_switch; + u32 pmu_type; + u64 kernel_start; + u64 switch_ip; + u64 ptss_ip; + + struct perf_tsc_conversion tc; + bool cap_user_time_zero; + + struct itrace_synth_opts synth_opts; + + bool sample_instructions; + u64 instructions_sample_type; + u64 instructions_sample_period; + u64 instructions_id; + + bool sample_branches; + u32 branches_filter; + u64 branches_sample_type; + u64 branches_id; + + bool sample_transactions; + u64 transactions_sample_type; + u64 transactions_id; + + bool synth_needs_swap; + + u64 tsc_bit; + u64 mtc_bit; + u64 mtc_freq_bits; + u32 tsc_ctc_ratio_n; + u32 tsc_ctc_ratio_d; + u64 cyc_bit; + u64 noretcomp_bit; + unsigned max_non_turbo_ratio; +}; + +enum switch_state { + INTEL_PT_SS_NOT_TRACING, + INTEL_PT_SS_UNKNOWN, + INTEL_PT_SS_TRACING, + INTEL_PT_SS_EXPECTING_SWITCH_EVENT, + INTEL_PT_SS_EXPECTING_SWITCH_IP, +}; + +struct intel_pt_queue { + struct intel_pt *pt; + unsigned int queue_nr; + struct auxtrace_buffer *buffer; + void *decoder; + const struct intel_pt_state *state; + struct ip_callchain *chain; + struct branch_stack *last_branch; + struct branch_stack *last_branch_rb; + size_t last_branch_pos; + union perf_event *event_buf; + bool on_heap; + bool stop; + bool step_through_buffers; + bool use_buffer_pid_tid; + pid_t pid, tid; + int cpu; + int switch_state; + pid_t next_tid; + struct thread *thread; + bool exclude_kernel; + bool have_sample; + u64 time; + u64 timestamp; + u32 flags; + u16 insn_len; + u64 last_insn_cnt; +}; + +static void intel_pt_dump(struct intel_pt *pt __maybe_unused, + unsigned char *buf, size_t len) +{ + struct intel_pt_pkt packet; + size_t pos = 0; + int ret, pkt_len, i; + char desc[INTEL_PT_PKT_DESC_MAX]; + const char *color = PERF_COLOR_BLUE; + + color_fprintf(stdout, color, + ". ... Intel Processor Trace data: size %zu bytes\n", + len); + + while (len) { + ret = intel_pt_get_packet(buf, len, &packet); + if (ret > 0) + pkt_len = ret; + else + pkt_len = 1; + printf("."); + color_fprintf(stdout, color, " %08x: ", pos); + for (i = 0; i < pkt_len; i++) + color_fprintf(stdout, color, " %02x", buf[i]); + for (; i < 16; i++) + color_fprintf(stdout, color, " "); + if (ret > 0) { + ret = intel_pt_pkt_desc(&packet, desc, + INTEL_PT_PKT_DESC_MAX); + if (ret > 0) + color_fprintf(stdout, color, " %s\n", desc); + } else { + color_fprintf(stdout, color, " Bad packet!\n"); + } + pos += pkt_len; + buf += pkt_len; + len -= pkt_len; + } +} + +static void intel_pt_dump_event(struct intel_pt *pt, unsigned char *buf, + size_t len) +{ + printf(".\n"); + intel_pt_dump(pt, buf, len); +} + +static int intel_pt_do_fix_overlap(struct intel_pt *pt, struct auxtrace_buffer *a, + struct auxtrace_buffer *b) +{ + void *start; + + start = intel_pt_find_overlap(a->data, a->size, b->data, b->size, + pt->have_tsc); + if (!start) + return -EINVAL; + b->use_size = b->data + b->size - start; + b->use_data = start; + return 0; +} + +static void intel_pt_use_buffer_pid_tid(struct intel_pt_queue *ptq, + struct auxtrace_queue *queue, + struct auxtrace_buffer *buffer) +{ + if (queue->cpu == -1 && buffer->cpu != -1) + ptq->cpu = buffer->cpu; + + ptq->pid = buffer->pid; + ptq->tid = buffer->tid; + + intel_pt_log("queue %u cpu %d pid %d tid %d\n", + ptq->queue_nr, ptq->cpu, ptq->pid, ptq->tid); + + thread__zput(ptq->thread); + + if (ptq->tid != -1) { + if (ptq->pid != -1) + ptq->thread = machine__findnew_thread(ptq->pt->machine, + ptq->pid, + ptq->tid); + else + ptq->thread = machine__find_thread(ptq->pt->machine, -1, + ptq->tid); + } +} + +/* This function assumes data is processed sequentially only */ +static int intel_pt_get_trace(struct intel_pt_buffer *b, void *data) +{ + struct intel_pt_queue *ptq = data; + struct auxtrace_buffer *buffer = ptq->buffer, *old_buffer = buffer; + struct auxtrace_queue *queue; + + if (ptq->stop) { + b->len = 0; + return 0; + } + + queue = &ptq->pt->queues.queue_array[ptq->queue_nr]; + + buffer = auxtrace_buffer__next(queue, buffer); + if (!buffer) { + if (old_buffer) + auxtrace_buffer__drop_data(old_buffer); + b->len = 0; + return 0; + } + + ptq->buffer = buffer; + + if (!buffer->data) { + int fd = perf_data_file__fd(ptq->pt->session->file); + + buffer->data = auxtrace_buffer__get_data(buffer, fd); + if (!buffer->data) + return -ENOMEM; + } + + if (ptq->pt->snapshot_mode && !buffer->consecutive && old_buffer && + intel_pt_do_fix_overlap(ptq->pt, old_buffer, buffer)) + return -ENOMEM; + + if (old_buffer) + auxtrace_buffer__drop_data(old_buffer); + + if (buffer->use_data) { + b->len = buffer->use_size; + b->buf = buffer->use_data; + } else { + b->len = buffer->size; + b->buf = buffer->data; + } + b->ref_timestamp = buffer->reference; + + if (!old_buffer || ptq->pt->sampling_mode || (ptq->pt->snapshot_mode && + !buffer->consecutive)) { + b->consecutive = false; + b->trace_nr = buffer->buffer_nr + 1; + } else { + b->consecutive = true; + } + + if (ptq->use_buffer_pid_tid && (ptq->pid != buffer->pid || + ptq->tid != buffer->tid)) + intel_pt_use_buffer_pid_tid(ptq, queue, buffer); + + if (ptq->step_through_buffers) + ptq->stop = true; + + if (!b->len) + return intel_pt_get_trace(b, data); + + return 0; +} + +struct intel_pt_cache_entry { + struct auxtrace_cache_entry entry; + u64 insn_cnt; + u64 byte_cnt; + enum intel_pt_insn_op op; + enum intel_pt_insn_branch branch; + int length; + int32_t rel; +}; + +static int intel_pt_config_div(const char *var, const char *value, void *data) +{ + int *d = data; + long val; + + if (!strcmp(var, "intel-pt.cache-divisor")) { + val = strtol(value, NULL, 0); + if (val > 0 && val <= INT_MAX) + *d = val; + } + + return 0; +} + +static int intel_pt_cache_divisor(void) +{ + static int d; + + if (d) + return d; + + perf_config(intel_pt_config_div, &d); + + if (!d) + d = 64; + + return d; +} + +static unsigned int intel_pt_cache_size(struct dso *dso, + struct machine *machine) +{ + off_t size; + + size = dso__data_size(dso, machine); + size /= intel_pt_cache_divisor(); + if (size < 1000) + return 10; + if (size > (1 << 21)) + return 21; + return 32 - __builtin_clz(size); +} + +static struct auxtrace_cache *intel_pt_cache(struct dso *dso, + struct machine *machine) +{ + struct auxtrace_cache *c; + unsigned int bits; + + if (dso->auxtrace_cache) + return dso->auxtrace_cache; + + bits = intel_pt_cache_size(dso, machine); + + /* Ignoring cache creation failure */ + c = auxtrace_cache__new(bits, sizeof(struct intel_pt_cache_entry), 200); + + dso->auxtrace_cache = c; + + return c; +} + +static int intel_pt_cache_add(struct dso *dso, struct machine *machine, + u64 offset, u64 insn_cnt, u64 byte_cnt, + struct intel_pt_insn *intel_pt_insn) +{ + struct auxtrace_cache *c = intel_pt_cache(dso, machine); + struct intel_pt_cache_entry *e; + int err; + + if (!c) + return -ENOMEM; + + e = auxtrace_cache__alloc_entry(c); + if (!e) + return -ENOMEM; + + e->insn_cnt = insn_cnt; + e->byte_cnt = byte_cnt; + e->op = intel_pt_insn->op; + e->branch = intel_pt_insn->branch; + e->length = intel_pt_insn->length; + e->rel = intel_pt_insn->rel; + + err = auxtrace_cache__add(c, offset, &e->entry); + if (err) + auxtrace_cache__free_entry(c, e); + + return err; +} + +static struct intel_pt_cache_entry * +intel_pt_cache_lookup(struct dso *dso, struct machine *machine, u64 offset) +{ + struct auxtrace_cache *c = intel_pt_cache(dso, machine); + + if (!c) + return NULL; + + return auxtrace_cache__lookup(dso->auxtrace_cache, offset); +} + +static int intel_pt_walk_next_insn(struct intel_pt_insn *intel_pt_insn, + uint64_t *insn_cnt_ptr, uint64_t *ip, + uint64_t to_ip, uint64_t max_insn_cnt, + void *data) +{ + struct intel_pt_queue *ptq = data; + struct machine *machine = ptq->pt->machine; + struct thread *thread; + struct addr_location al; + unsigned char buf[1024]; + size_t bufsz; + ssize_t len; + int x86_64; + u8 cpumode; + u64 offset, start_offset, start_ip; + u64 insn_cnt = 0; + bool one_map = true; + + if (to_ip && *ip == to_ip) + goto out_no_cache; + + bufsz = intel_pt_insn_max_size(); + + if (*ip >= ptq->pt->kernel_start) + cpumode = PERF_RECORD_MISC_KERNEL; + else + cpumode = PERF_RECORD_MISC_USER; + + thread = ptq->thread; + if (!thread) { + if (cpumode != PERF_RECORD_MISC_KERNEL) + return -EINVAL; + thread = ptq->pt->unknown_thread; + } + + while (1) { + thread__find_addr_map(thread, cpumode, MAP__FUNCTION, *ip, &al); + if (!al.map || !al.map->dso) + return -EINVAL; + + if (al.map->dso->data.status == DSO_DATA_STATUS_ERROR && + dso__data_status_seen(al.map->dso, + DSO_DATA_STATUS_SEEN_ITRACE)) + return -ENOENT; + + offset = al.map->map_ip(al.map, *ip); + + if (!to_ip && one_map) { + struct intel_pt_cache_entry *e; + + e = intel_pt_cache_lookup(al.map->dso, machine, offset); + if (e && + (!max_insn_cnt || e->insn_cnt <= max_insn_cnt)) { + *insn_cnt_ptr = e->insn_cnt; + *ip += e->byte_cnt; + intel_pt_insn->op = e->op; + intel_pt_insn->branch = e->branch; + intel_pt_insn->length = e->length; + intel_pt_insn->rel = e->rel; + intel_pt_log_insn_no_data(intel_pt_insn, *ip); + return 0; + } + } + + start_offset = offset; + start_ip = *ip; + + /* Load maps to ensure dso->is_64_bit has been updated */ + map__load(al.map, machine->symbol_filter); + + x86_64 = al.map->dso->is_64_bit; + + while (1) { + len = dso__data_read_offset(al.map->dso, machine, + offset, buf, bufsz); + if (len <= 0) + return -EINVAL; + + if (intel_pt_get_insn(buf, len, x86_64, intel_pt_insn)) + return -EINVAL; + + intel_pt_log_insn(intel_pt_insn, *ip); + + insn_cnt += 1; + + if (intel_pt_insn->branch != INTEL_PT_BR_NO_BRANCH) + goto out; + + if (max_insn_cnt && insn_cnt >= max_insn_cnt) + goto out_no_cache; + + *ip += intel_pt_insn->length; + + if (to_ip && *ip == to_ip) + goto out_no_cache; + + if (*ip >= al.map->end) + break; + + offset += intel_pt_insn->length; + } + one_map = false; + } +out: + *insn_cnt_ptr = insn_cnt; + + if (!one_map) + goto out_no_cache; + + /* + * Didn't lookup in the 'to_ip' case, so do it now to prevent duplicate + * entries. + */ + if (to_ip) { + struct intel_pt_cache_entry *e; + + e = intel_pt_cache_lookup(al.map->dso, machine, start_offset); + if (e) + return 0; + } + + /* Ignore cache errors */ + intel_pt_cache_add(al.map->dso, machine, start_offset, insn_cnt, + *ip - start_ip, intel_pt_insn); + + return 0; + +out_no_cache: + *insn_cnt_ptr = insn_cnt; + return 0; +} + +static bool intel_pt_get_config(struct intel_pt *pt, + struct perf_event_attr *attr, u64 *config) +{ + if (attr->type == pt->pmu_type) { + if (config) + *config = attr->config; + return true; + } + + return false; +} + +static bool intel_pt_exclude_kernel(struct intel_pt *pt) +{ + struct perf_evsel *evsel; + + evlist__for_each(pt->session->evlist, evsel) { + if (intel_pt_get_config(pt, &evsel->attr, NULL) && + !evsel->attr.exclude_kernel) + return false; + } + return true; +} + +static bool intel_pt_return_compression(struct intel_pt *pt) +{ + struct perf_evsel *evsel; + u64 config; + + if (!pt->noretcomp_bit) + return true; + + evlist__for_each(pt->session->evlist, evsel) { + if (intel_pt_get_config(pt, &evsel->attr, &config) && + (config & pt->noretcomp_bit)) + return false; + } + return true; +} + +static unsigned int intel_pt_mtc_period(struct intel_pt *pt) +{ + struct perf_evsel *evsel; + unsigned int shift; + u64 config; + + if (!pt->mtc_freq_bits) + return 0; + + for (shift = 0, config = pt->mtc_freq_bits; !(config & 1); shift++) + config >>= 1; + + evlist__for_each(pt->session->evlist, evsel) { + if (intel_pt_get_config(pt, &evsel->attr, &config)) + return (config & pt->mtc_freq_bits) >> shift; + } + return 0; +} + +static bool intel_pt_timeless_decoding(struct intel_pt *pt) +{ + struct perf_evsel *evsel; + bool timeless_decoding = true; + u64 config; + + if (!pt->tsc_bit || !pt->cap_user_time_zero) + return true; + + evlist__for_each(pt->session->evlist, evsel) { + if (!(evsel->attr.sample_type & PERF_SAMPLE_TIME)) + return true; + if (intel_pt_get_config(pt, &evsel->attr, &config)) { + if (config & pt->tsc_bit) + timeless_decoding = false; + else + return true; + } + } + return timeless_decoding; +} + +static bool intel_pt_tracing_kernel(struct intel_pt *pt) +{ + struct perf_evsel *evsel; + + evlist__for_each(pt->session->evlist, evsel) { + if (intel_pt_get_config(pt, &evsel->attr, NULL) && + !evsel->attr.exclude_kernel) + return true; + } + return false; +} + +static bool intel_pt_have_tsc(struct intel_pt *pt) +{ + struct perf_evsel *evsel; + bool have_tsc = false; + u64 config; + + if (!pt->tsc_bit) + return false; + + evlist__for_each(pt->session->evlist, evsel) { + if (intel_pt_get_config(pt, &evsel->attr, &config)) { + if (config & pt->tsc_bit) + have_tsc = true; + else + return false; + } + } + return have_tsc; +} + +static u64 intel_pt_ns_to_ticks(const struct intel_pt *pt, u64 ns) +{ + u64 quot, rem; + + quot = ns / pt->tc.time_mult; + rem = ns % pt->tc.time_mult; + return (quot << pt->tc.time_shift) + (rem << pt->tc.time_shift) / + pt->tc.time_mult; +} + +static struct intel_pt_queue *intel_pt_alloc_queue(struct intel_pt *pt, + unsigned int queue_nr) +{ + struct intel_pt_params params = { .get_trace = 0, }; + struct intel_pt_queue *ptq; + + ptq = zalloc(sizeof(struct intel_pt_queue)); + if (!ptq) + return NULL; + + if (pt->synth_opts.callchain) { + size_t sz = sizeof(struct ip_callchain); + + sz += pt->synth_opts.callchain_sz * sizeof(u64); + ptq->chain = zalloc(sz); + if (!ptq->chain) + goto out_free; + } + + if (pt->synth_opts.last_branch) { + size_t sz = sizeof(struct branch_stack); + + sz += pt->synth_opts.last_branch_sz * + sizeof(struct branch_entry); + ptq->last_branch = zalloc(sz); + if (!ptq->last_branch) + goto out_free; + ptq->last_branch_rb = zalloc(sz); + if (!ptq->last_branch_rb) + goto out_free; + } + + ptq->event_buf = malloc(PERF_SAMPLE_MAX_SIZE); + if (!ptq->event_buf) + goto out_free; + + ptq->pt = pt; + ptq->queue_nr = queue_nr; + ptq->exclude_kernel = intel_pt_exclude_kernel(pt); + ptq->pid = -1; + ptq->tid = -1; + ptq->cpu = -1; + ptq->next_tid = -1; + + params.get_trace = intel_pt_get_trace; + params.walk_insn = intel_pt_walk_next_insn; + params.data = ptq; + params.return_compression = intel_pt_return_compression(pt); + params.max_non_turbo_ratio = pt->max_non_turbo_ratio; + params.mtc_period = intel_pt_mtc_period(pt); + params.tsc_ctc_ratio_n = pt->tsc_ctc_ratio_n; + params.tsc_ctc_ratio_d = pt->tsc_ctc_ratio_d; + + if (pt->synth_opts.instructions) { + if (pt->synth_opts.period) { + switch (pt->synth_opts.period_type) { + case PERF_ITRACE_PERIOD_INSTRUCTIONS: + params.period_type = + INTEL_PT_PERIOD_INSTRUCTIONS; + params.period = pt->synth_opts.period; + break; + case PERF_ITRACE_PERIOD_TICKS: + params.period_type = INTEL_PT_PERIOD_TICKS; + params.period = pt->synth_opts.period; + break; + case PERF_ITRACE_PERIOD_NANOSECS: + params.period_type = INTEL_PT_PERIOD_TICKS; + params.period = intel_pt_ns_to_ticks(pt, + pt->synth_opts.period); + break; + default: + break; + } + } + + if (!params.period) { + params.period_type = INTEL_PT_PERIOD_INSTRUCTIONS; + params.period = 1; + } + } + + ptq->decoder = intel_pt_decoder_new(¶ms); + if (!ptq->decoder) + goto out_free; + + return ptq; + +out_free: + zfree(&ptq->event_buf); + zfree(&ptq->last_branch); + zfree(&ptq->last_branch_rb); + zfree(&ptq->chain); + free(ptq); + return NULL; +} + +static void intel_pt_free_queue(void *priv) +{ + struct intel_pt_queue *ptq = priv; + + if (!ptq) + return; + thread__zput(ptq->thread); + intel_pt_decoder_free(ptq->decoder); + zfree(&ptq->event_buf); + zfree(&ptq->last_branch); + zfree(&ptq->last_branch_rb); + zfree(&ptq->chain); + free(ptq); +} + +static void intel_pt_set_pid_tid_cpu(struct intel_pt *pt, + struct auxtrace_queue *queue) +{ + struct intel_pt_queue *ptq = queue->priv; + + if (queue->tid == -1 || pt->have_sched_switch) { + ptq->tid = machine__get_current_tid(pt->machine, ptq->cpu); + thread__zput(ptq->thread); + } + + if (!ptq->thread && ptq->tid != -1) + ptq->thread = machine__find_thread(pt->machine, -1, ptq->tid); + + if (ptq->thread) { + ptq->pid = ptq->thread->pid_; + if (queue->cpu == -1) + ptq->cpu = ptq->thread->cpu; + } +} + +static void intel_pt_sample_flags(struct intel_pt_queue *ptq) +{ + if (ptq->state->flags & INTEL_PT_ABORT_TX) { + ptq->flags = PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_TX_ABORT; + } else if (ptq->state->flags & INTEL_PT_ASYNC) { + if (ptq->state->to_ip) + ptq->flags = PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL | + PERF_IP_FLAG_ASYNC | + PERF_IP_FLAG_INTERRUPT; + else + ptq->flags = PERF_IP_FLAG_BRANCH | + PERF_IP_FLAG_TRACE_END; + ptq->insn_len = 0; + } else { + if (ptq->state->from_ip) + ptq->flags = intel_pt_insn_type(ptq->state->insn_op); + else + ptq->flags = PERF_IP_FLAG_BRANCH | + PERF_IP_FLAG_TRACE_BEGIN; + if (ptq->state->flags & INTEL_PT_IN_TX) + ptq->flags |= PERF_IP_FLAG_IN_TX; + ptq->insn_len = ptq->state->insn_len; + } +} + +static int intel_pt_setup_queue(struct intel_pt *pt, + struct auxtrace_queue *queue, + unsigned int queue_nr) +{ + struct intel_pt_queue *ptq = queue->priv; + + if (list_empty(&queue->head)) + return 0; + + if (!ptq) { + ptq = intel_pt_alloc_queue(pt, queue_nr); + if (!ptq) + return -ENOMEM; + queue->priv = ptq; + + if (queue->cpu != -1) + ptq->cpu = queue->cpu; + ptq->tid = queue->tid; + + if (pt->sampling_mode) { + if (pt->timeless_decoding) + ptq->step_through_buffers = true; + if (pt->timeless_decoding || !pt->have_sched_switch) + ptq->use_buffer_pid_tid = true; + } + } + + if (!ptq->on_heap && + (!pt->sync_switch || + ptq->switch_state != INTEL_PT_SS_EXPECTING_SWITCH_EVENT)) { + const struct intel_pt_state *state; + int ret; + + if (pt->timeless_decoding) + return 0; + + intel_pt_log("queue %u getting timestamp\n", queue_nr); + intel_pt_log("queue %u decoding cpu %d pid %d tid %d\n", + queue_nr, ptq->cpu, ptq->pid, ptq->tid); + while (1) { + state = intel_pt_decode(ptq->decoder); + if (state->err) { + if (state->err == INTEL_PT_ERR_NODATA) { + intel_pt_log("queue %u has no timestamp\n", + queue_nr); + return 0; + } + continue; + } + if (state->timestamp) + break; + } + + ptq->timestamp = state->timestamp; + intel_pt_log("queue %u timestamp 0x%" PRIx64 "\n", + queue_nr, ptq->timestamp); + ptq->state = state; + ptq->have_sample = true; + intel_pt_sample_flags(ptq); + ret = auxtrace_heap__add(&pt->heap, queue_nr, ptq->timestamp); + if (ret) + return ret; + ptq->on_heap = true; + } + + return 0; +} + +static int intel_pt_setup_queues(struct intel_pt *pt) +{ + unsigned int i; + int ret; + + for (i = 0; i < pt->queues.nr_queues; i++) { + ret = intel_pt_setup_queue(pt, &pt->queues.queue_array[i], i); + if (ret) + return ret; + } + return 0; +} + +static inline void intel_pt_copy_last_branch_rb(struct intel_pt_queue *ptq) +{ + struct branch_stack *bs_src = ptq->last_branch_rb; + struct branch_stack *bs_dst = ptq->last_branch; + size_t nr = 0; + + bs_dst->nr = bs_src->nr; + + if (!bs_src->nr) + return; + + nr = ptq->pt->synth_opts.last_branch_sz - ptq->last_branch_pos; + memcpy(&bs_dst->entries[0], + &bs_src->entries[ptq->last_branch_pos], + sizeof(struct branch_entry) * nr); + + if (bs_src->nr >= ptq->pt->synth_opts.last_branch_sz) { + memcpy(&bs_dst->entries[nr], + &bs_src->entries[0], + sizeof(struct branch_entry) * ptq->last_branch_pos); + } +} + +static inline void intel_pt_reset_last_branch_rb(struct intel_pt_queue *ptq) +{ + ptq->last_branch_pos = 0; + ptq->last_branch_rb->nr = 0; +} + +static void intel_pt_update_last_branch_rb(struct intel_pt_queue *ptq) +{ + const struct intel_pt_state *state = ptq->state; + struct branch_stack *bs = ptq->last_branch_rb; + struct branch_entry *be; + + if (!ptq->last_branch_pos) + ptq->last_branch_pos = ptq->pt->synth_opts.last_branch_sz; + + ptq->last_branch_pos -= 1; + + be = &bs->entries[ptq->last_branch_pos]; + be->from = state->from_ip; + be->to = state->to_ip; + be->flags.abort = !!(state->flags & INTEL_PT_ABORT_TX); + be->flags.in_tx = !!(state->flags & INTEL_PT_IN_TX); + /* No support for mispredict */ + be->flags.mispred = ptq->pt->mispred_all; + + if (bs->nr < ptq->pt->synth_opts.last_branch_sz) + bs->nr += 1; +} + +static int intel_pt_inject_event(union perf_event *event, + struct perf_sample *sample, u64 type, + bool swapped) +{ + event->header.size = perf_event__sample_event_size(sample, type, 0); + return perf_event__synthesize_sample(event, type, 0, sample, swapped); +} + +static int intel_pt_synth_branch_sample(struct intel_pt_queue *ptq) +{ + int ret; + struct intel_pt *pt = ptq->pt; + union perf_event *event = ptq->event_buf; + struct perf_sample sample = { .ip = 0, }; + struct dummy_branch_stack { + u64 nr; + struct branch_entry entries; + } dummy_bs; + + if (pt->branches_filter && !(pt->branches_filter & ptq->flags)) + return 0; + + event->sample.header.type = PERF_RECORD_SAMPLE; + event->sample.header.misc = PERF_RECORD_MISC_USER; + event->sample.header.size = sizeof(struct perf_event_header); + + if (!pt->timeless_decoding) + sample.time = tsc_to_perf_time(ptq->timestamp, &pt->tc); + + sample.ip = ptq->state->from_ip; + sample.pid = ptq->pid; + sample.tid = ptq->tid; + sample.addr = ptq->state->to_ip; + sample.id = ptq->pt->branches_id; + sample.stream_id = ptq->pt->branches_id; + sample.period = 1; + sample.cpu = ptq->cpu; + sample.flags = ptq->flags; + sample.insn_len = ptq->insn_len; + + /* + * perf report cannot handle events without a branch stack when using + * SORT_MODE__BRANCH so make a dummy one. + */ + if (pt->synth_opts.last_branch && sort__mode == SORT_MODE__BRANCH) { + dummy_bs = (struct dummy_branch_stack){ + .nr = 1, + .entries = { + .from = sample.ip, + .to = sample.addr, + }, + }; + sample.branch_stack = (struct branch_stack *)&dummy_bs; + } + + if (pt->synth_opts.inject) { + ret = intel_pt_inject_event(event, &sample, + pt->branches_sample_type, + pt->synth_needs_swap); + if (ret) + return ret; + } + + ret = perf_session__deliver_synth_event(pt->session, event, &sample); + if (ret) + pr_err("Intel Processor Trace: failed to deliver branch event, error %d\n", + ret); + + return ret; +} + +static int intel_pt_synth_instruction_sample(struct intel_pt_queue *ptq) +{ + int ret; + struct intel_pt *pt = ptq->pt; + union perf_event *event = ptq->event_buf; + struct perf_sample sample = { .ip = 0, }; + + event->sample.header.type = PERF_RECORD_SAMPLE; + event->sample.header.misc = PERF_RECORD_MISC_USER; + event->sample.header.size = sizeof(struct perf_event_header); + + if (!pt->timeless_decoding) + sample.time = tsc_to_perf_time(ptq->timestamp, &pt->tc); + + sample.ip = ptq->state->from_ip; + sample.pid = ptq->pid; + sample.tid = ptq->tid; + sample.addr = ptq->state->to_ip; + sample.id = ptq->pt->instructions_id; + sample.stream_id = ptq->pt->instructions_id; + sample.period = ptq->state->tot_insn_cnt - ptq->last_insn_cnt; + sample.cpu = ptq->cpu; + sample.flags = ptq->flags; + sample.insn_len = ptq->insn_len; + + ptq->last_insn_cnt = ptq->state->tot_insn_cnt; + + if (pt->synth_opts.callchain) { + thread_stack__sample(ptq->thread, ptq->chain, + pt->synth_opts.callchain_sz, sample.ip); + sample.callchain = ptq->chain; + } + + if (pt->synth_opts.last_branch) { + intel_pt_copy_last_branch_rb(ptq); + sample.branch_stack = ptq->last_branch; + } + + if (pt->synth_opts.inject) { + ret = intel_pt_inject_event(event, &sample, + pt->instructions_sample_type, + pt->synth_needs_swap); + if (ret) + return ret; + } + + ret = perf_session__deliver_synth_event(pt->session, event, &sample); + if (ret) + pr_err("Intel Processor Trace: failed to deliver instruction event, error %d\n", + ret); + + if (pt->synth_opts.last_branch) + intel_pt_reset_last_branch_rb(ptq); + + return ret; +} + +static int intel_pt_synth_transaction_sample(struct intel_pt_queue *ptq) +{ + int ret; + struct intel_pt *pt = ptq->pt; + union perf_event *event = ptq->event_buf; + struct perf_sample sample = { .ip = 0, }; + + event->sample.header.type = PERF_RECORD_SAMPLE; + event->sample.header.misc = PERF_RECORD_MISC_USER; + event->sample.header.size = sizeof(struct perf_event_header); + + if (!pt->timeless_decoding) + sample.time = tsc_to_perf_time(ptq->timestamp, &pt->tc); + + sample.ip = ptq->state->from_ip; + sample.pid = ptq->pid; + sample.tid = ptq->tid; + sample.addr = ptq->state->to_ip; + sample.id = ptq->pt->transactions_id; + sample.stream_id = ptq->pt->transactions_id; + sample.period = 1; + sample.cpu = ptq->cpu; + sample.flags = ptq->flags; + sample.insn_len = ptq->insn_len; + + if (pt->synth_opts.callchain) { + thread_stack__sample(ptq->thread, ptq->chain, + pt->synth_opts.callchain_sz, sample.ip); + sample.callchain = ptq->chain; + } + + if (pt->synth_opts.last_branch) { + intel_pt_copy_last_branch_rb(ptq); + sample.branch_stack = ptq->last_branch; + } + + if (pt->synth_opts.inject) { + ret = intel_pt_inject_event(event, &sample, + pt->transactions_sample_type, + pt->synth_needs_swap); + if (ret) + return ret; + } + + ret = perf_session__deliver_synth_event(pt->session, event, &sample); + if (ret) + pr_err("Intel Processor Trace: failed to deliver transaction event, error %d\n", + ret); + + if (pt->synth_opts.callchain) + intel_pt_reset_last_branch_rb(ptq); + + return ret; +} + +static int intel_pt_synth_error(struct intel_pt *pt, int code, int cpu, + pid_t pid, pid_t tid, u64 ip) +{ + union perf_event event; + char msg[MAX_AUXTRACE_ERROR_MSG]; + int err; + + intel_pt__strerror(code, msg, MAX_AUXTRACE_ERROR_MSG); + + auxtrace_synth_error(&event.auxtrace_error, PERF_AUXTRACE_ERROR_ITRACE, + code, cpu, pid, tid, ip, msg); + + err = perf_session__deliver_synth_event(pt->session, &event, NULL); + if (err) + pr_err("Intel Processor Trace: failed to deliver error event, error %d\n", + err); + + return err; +} + +static int intel_pt_next_tid(struct intel_pt *pt, struct intel_pt_queue *ptq) +{ + struct auxtrace_queue *queue; + pid_t tid = ptq->next_tid; + int err; + + if (tid == -1) + return 0; + + intel_pt_log("switch: cpu %d tid %d\n", ptq->cpu, tid); + + err = machine__set_current_tid(pt->machine, ptq->cpu, -1, tid); + + queue = &pt->queues.queue_array[ptq->queue_nr]; + intel_pt_set_pid_tid_cpu(pt, queue); + + ptq->next_tid = -1; + + return err; +} + +static inline bool intel_pt_is_switch_ip(struct intel_pt_queue *ptq, u64 ip) +{ + struct intel_pt *pt = ptq->pt; + + return ip == pt->switch_ip && + (ptq->flags & PERF_IP_FLAG_BRANCH) && + !(ptq->flags & (PERF_IP_FLAG_CONDITIONAL | PERF_IP_FLAG_ASYNC | + PERF_IP_FLAG_INTERRUPT | PERF_IP_FLAG_TX_ABORT)); +} + +static int intel_pt_sample(struct intel_pt_queue *ptq) +{ + const struct intel_pt_state *state = ptq->state; + struct intel_pt *pt = ptq->pt; + int err; + + if (!ptq->have_sample) + return 0; + + ptq->have_sample = false; + + if (pt->sample_instructions && + (state->type & INTEL_PT_INSTRUCTION)) { + err = intel_pt_synth_instruction_sample(ptq); + if (err) + return err; + } + + if (pt->sample_transactions && + (state->type & INTEL_PT_TRANSACTION)) { + err = intel_pt_synth_transaction_sample(ptq); + if (err) + return err; + } + + if (!(state->type & INTEL_PT_BRANCH)) + return 0; + + if (pt->synth_opts.callchain) + thread_stack__event(ptq->thread, ptq->flags, state->from_ip, + state->to_ip, ptq->insn_len, + state->trace_nr); + else + thread_stack__set_trace_nr(ptq->thread, state->trace_nr); + + if (pt->sample_branches) { + err = intel_pt_synth_branch_sample(ptq); + if (err) + return err; + } + + if (pt->synth_opts.last_branch) + intel_pt_update_last_branch_rb(ptq); + + if (!pt->sync_switch) + return 0; + + if (intel_pt_is_switch_ip(ptq, state->to_ip)) { + switch (ptq->switch_state) { + case INTEL_PT_SS_UNKNOWN: + case INTEL_PT_SS_EXPECTING_SWITCH_IP: + err = intel_pt_next_tid(pt, ptq); + if (err) + return err; + ptq->switch_state = INTEL_PT_SS_TRACING; + break; + default: + ptq->switch_state = INTEL_PT_SS_EXPECTING_SWITCH_EVENT; + return 1; + } + } else if (!state->to_ip) { + ptq->switch_state = INTEL_PT_SS_NOT_TRACING; + } else if (ptq->switch_state == INTEL_PT_SS_NOT_TRACING) { + ptq->switch_state = INTEL_PT_SS_UNKNOWN; + } else if (ptq->switch_state == INTEL_PT_SS_UNKNOWN && + state->to_ip == pt->ptss_ip && + (ptq->flags & PERF_IP_FLAG_CALL)) { + ptq->switch_state = INTEL_PT_SS_TRACING; + } + + return 0; +} + +static u64 intel_pt_switch_ip(struct intel_pt *pt, u64 *ptss_ip) +{ + struct machine *machine = pt->machine; + struct map *map; + struct symbol *sym, *start; + u64 ip, switch_ip = 0; + const char *ptss; + + if (ptss_ip) + *ptss_ip = 0; + + map = machine__kernel_map(machine); + if (!map) + return 0; + + if (map__load(map, machine->symbol_filter)) + return 0; + + start = dso__first_symbol(map->dso, MAP__FUNCTION); + + for (sym = start; sym; sym = dso__next_symbol(sym)) { + if (sym->binding == STB_GLOBAL && + !strcmp(sym->name, "__switch_to")) { + ip = map->unmap_ip(map, sym->start); + if (ip >= map->start && ip < map->end) { + switch_ip = ip; + break; + } + } + } + + if (!switch_ip || !ptss_ip) + return 0; + + if (pt->have_sched_switch == 1) + ptss = "perf_trace_sched_switch"; + else + ptss = "__perf_event_task_sched_out"; + + for (sym = start; sym; sym = dso__next_symbol(sym)) { + if (!strcmp(sym->name, ptss)) { + ip = map->unmap_ip(map, sym->start); + if (ip >= map->start && ip < map->end) { + *ptss_ip = ip; + break; + } + } + } + + return switch_ip; +} + +static int intel_pt_run_decoder(struct intel_pt_queue *ptq, u64 *timestamp) +{ + const struct intel_pt_state *state = ptq->state; + struct intel_pt *pt = ptq->pt; + int err; + + if (!pt->kernel_start) { + pt->kernel_start = machine__kernel_start(pt->machine); + if (pt->per_cpu_mmaps && + (pt->have_sched_switch == 1 || pt->have_sched_switch == 3) && + !pt->timeless_decoding && intel_pt_tracing_kernel(pt) && + !pt->sampling_mode) { + pt->switch_ip = intel_pt_switch_ip(pt, &pt->ptss_ip); + if (pt->switch_ip) { + intel_pt_log("switch_ip: %"PRIx64" ptss_ip: %"PRIx64"\n", + pt->switch_ip, pt->ptss_ip); + pt->sync_switch = true; + } + } + } + + intel_pt_log("queue %u decoding cpu %d pid %d tid %d\n", + ptq->queue_nr, ptq->cpu, ptq->pid, ptq->tid); + while (1) { + err = intel_pt_sample(ptq); + if (err) + return err; + + state = intel_pt_decode(ptq->decoder); + if (state->err) { + if (state->err == INTEL_PT_ERR_NODATA) + return 1; + if (pt->sync_switch && + state->from_ip >= pt->kernel_start) { + pt->sync_switch = false; + intel_pt_next_tid(pt, ptq); + } + if (pt->synth_opts.errors) { + err = intel_pt_synth_error(pt, state->err, + ptq->cpu, ptq->pid, + ptq->tid, + state->from_ip); + if (err) + return err; + } + continue; + } + + ptq->state = state; + ptq->have_sample = true; + intel_pt_sample_flags(ptq); + + /* Use estimated TSC upon return to user space */ + if (pt->est_tsc && + (state->from_ip >= pt->kernel_start || !state->from_ip) && + state->to_ip && state->to_ip < pt->kernel_start) { + intel_pt_log("TSC %"PRIx64" est. TSC %"PRIx64"\n", + state->timestamp, state->est_timestamp); + ptq->timestamp = state->est_timestamp; + /* Use estimated TSC in unknown switch state */ + } else if (pt->sync_switch && + ptq->switch_state == INTEL_PT_SS_UNKNOWN && + intel_pt_is_switch_ip(ptq, state->to_ip) && + ptq->next_tid == -1) { + intel_pt_log("TSC %"PRIx64" est. TSC %"PRIx64"\n", + state->timestamp, state->est_timestamp); + ptq->timestamp = state->est_timestamp; + } else if (state->timestamp > ptq->timestamp) { + ptq->timestamp = state->timestamp; + } + + if (!pt->timeless_decoding && ptq->timestamp >= *timestamp) { + *timestamp = ptq->timestamp; + return 0; + } + } + return 0; +} + +static inline int intel_pt_update_queues(struct intel_pt *pt) +{ + if (pt->queues.new_data) { + pt->queues.new_data = false; + return intel_pt_setup_queues(pt); + } + return 0; +} + +static int intel_pt_process_queues(struct intel_pt *pt, u64 timestamp) +{ + unsigned int queue_nr; + u64 ts; + int ret; + + while (1) { + struct auxtrace_queue *queue; + struct intel_pt_queue *ptq; + + if (!pt->heap.heap_cnt) + return 0; + + if (pt->heap.heap_array[0].ordinal >= timestamp) + return 0; + + queue_nr = pt->heap.heap_array[0].queue_nr; + queue = &pt->queues.queue_array[queue_nr]; + ptq = queue->priv; + + intel_pt_log("queue %u processing 0x%" PRIx64 " to 0x%" PRIx64 "\n", + queue_nr, pt->heap.heap_array[0].ordinal, + timestamp); + + auxtrace_heap__pop(&pt->heap); + + if (pt->heap.heap_cnt) { + ts = pt->heap.heap_array[0].ordinal + 1; + if (ts > timestamp) + ts = timestamp; + } else { + ts = timestamp; + } + + intel_pt_set_pid_tid_cpu(pt, queue); + + ret = intel_pt_run_decoder(ptq, &ts); + + if (ret < 0) { + auxtrace_heap__add(&pt->heap, queue_nr, ts); + return ret; + } + + if (!ret) { + ret = auxtrace_heap__add(&pt->heap, queue_nr, ts); + if (ret < 0) + return ret; + } else { + ptq->on_heap = false; + } + } + + return 0; +} + +static int intel_pt_process_timeless_queues(struct intel_pt *pt, pid_t tid, + u64 time_) +{ + struct auxtrace_queues *queues = &pt->queues; + unsigned int i; + u64 ts = 0; + + for (i = 0; i < queues->nr_queues; i++) { + struct auxtrace_queue *queue = &pt->queues.queue_array[i]; + struct intel_pt_queue *ptq = queue->priv; + + if (ptq && (tid == -1 || ptq->tid == tid)) { + ptq->time = time_; + intel_pt_set_pid_tid_cpu(pt, queue); + intel_pt_run_decoder(ptq, &ts); + } + } + return 0; +} + +static int intel_pt_lost(struct intel_pt *pt, struct perf_sample *sample) +{ + return intel_pt_synth_error(pt, INTEL_PT_ERR_LOST, sample->cpu, + sample->pid, sample->tid, 0); +} + +static struct intel_pt_queue *intel_pt_cpu_to_ptq(struct intel_pt *pt, int cpu) +{ + unsigned i, j; + + if (cpu < 0 || !pt->queues.nr_queues) + return NULL; + + if ((unsigned)cpu >= pt->queues.nr_queues) + i = pt->queues.nr_queues - 1; + else + i = cpu; + + if (pt->queues.queue_array[i].cpu == cpu) + return pt->queues.queue_array[i].priv; + + for (j = 0; i > 0; j++) { + if (pt->queues.queue_array[--i].cpu == cpu) + return pt->queues.queue_array[i].priv; + } + + for (; j < pt->queues.nr_queues; j++) { + if (pt->queues.queue_array[j].cpu == cpu) + return pt->queues.queue_array[j].priv; + } + + return NULL; +} + +static int intel_pt_sync_switch(struct intel_pt *pt, int cpu, pid_t tid, + u64 timestamp) +{ + struct intel_pt_queue *ptq; + int err; + + if (!pt->sync_switch) + return 1; + + ptq = intel_pt_cpu_to_ptq(pt, cpu); + if (!ptq) + return 1; + + switch (ptq->switch_state) { + case INTEL_PT_SS_NOT_TRACING: + ptq->next_tid = -1; + break; + case INTEL_PT_SS_UNKNOWN: + case INTEL_PT_SS_TRACING: + ptq->next_tid = tid; + ptq->switch_state = INTEL_PT_SS_EXPECTING_SWITCH_IP; + return 0; + case INTEL_PT_SS_EXPECTING_SWITCH_EVENT: + if (!ptq->on_heap) { + ptq->timestamp = perf_time_to_tsc(timestamp, + &pt->tc); + err = auxtrace_heap__add(&pt->heap, ptq->queue_nr, + ptq->timestamp); + if (err) + return err; + ptq->on_heap = true; + } + ptq->switch_state = INTEL_PT_SS_TRACING; + break; + case INTEL_PT_SS_EXPECTING_SWITCH_IP: + ptq->next_tid = tid; + intel_pt_log("ERROR: cpu %d expecting switch ip\n", cpu); + break; + default: + break; + } + + return 1; +} + +static int intel_pt_process_switch(struct intel_pt *pt, + struct perf_sample *sample) +{ + struct perf_evsel *evsel; + pid_t tid; + int cpu, ret; + + evsel = perf_evlist__id2evsel(pt->session->evlist, sample->id); + if (evsel != pt->switch_evsel) + return 0; + + tid = perf_evsel__intval(evsel, sample, "next_pid"); + cpu = sample->cpu; + + intel_pt_log("sched_switch: cpu %d tid %d time %"PRIu64" tsc %#"PRIx64"\n", + cpu, tid, sample->time, perf_time_to_tsc(sample->time, + &pt->tc)); + + ret = intel_pt_sync_switch(pt, cpu, tid, sample->time); + if (ret <= 0) + return ret; + + return machine__set_current_tid(pt->machine, cpu, -1, tid); +} + +static int intel_pt_context_switch(struct intel_pt *pt, union perf_event *event, + struct perf_sample *sample) +{ + bool out = event->header.misc & PERF_RECORD_MISC_SWITCH_OUT; + pid_t pid, tid; + int cpu, ret; + + cpu = sample->cpu; + + if (pt->have_sched_switch == 3) { + if (!out) + return 0; + if (event->header.type != PERF_RECORD_SWITCH_CPU_WIDE) { + pr_err("Expecting CPU-wide context switch event\n"); + return -EINVAL; + } + pid = event->context_switch.next_prev_pid; + tid = event->context_switch.next_prev_tid; + } else { + if (out) + return 0; + pid = sample->pid; + tid = sample->tid; + } + + if (tid == -1) { + pr_err("context_switch event has no tid\n"); + return -EINVAL; + } + + intel_pt_log("context_switch: cpu %d pid %d tid %d time %"PRIu64" tsc %#"PRIx64"\n", + cpu, pid, tid, sample->time, perf_time_to_tsc(sample->time, + &pt->tc)); + + ret = intel_pt_sync_switch(pt, cpu, tid, sample->time); + if (ret <= 0) + return ret; + + return machine__set_current_tid(pt->machine, cpu, pid, tid); +} + +static int intel_pt_process_itrace_start(struct intel_pt *pt, + union perf_event *event, + struct perf_sample *sample) +{ + if (!pt->per_cpu_mmaps) + return 0; + + intel_pt_log("itrace_start: cpu %d pid %d tid %d time %"PRIu64" tsc %#"PRIx64"\n", + sample->cpu, event->itrace_start.pid, + event->itrace_start.tid, sample->time, + perf_time_to_tsc(sample->time, &pt->tc)); + + return machine__set_current_tid(pt->machine, sample->cpu, + event->itrace_start.pid, + event->itrace_start.tid); +} + +static int intel_pt_process_event(struct perf_session *session, + union perf_event *event, + struct perf_sample *sample, + struct perf_tool *tool) +{ + struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt, + auxtrace); + u64 timestamp; + int err = 0; + + if (dump_trace) + return 0; + + if (!tool->ordered_events) { + pr_err("Intel Processor Trace requires ordered events\n"); + return -EINVAL; + } + + if (sample->time && sample->time != (u64)-1) + timestamp = perf_time_to_tsc(sample->time, &pt->tc); + else + timestamp = 0; + + if (timestamp || pt->timeless_decoding) { + err = intel_pt_update_queues(pt); + if (err) + return err; + } + + if (pt->timeless_decoding) { + if (event->header.type == PERF_RECORD_EXIT) { + err = intel_pt_process_timeless_queues(pt, + event->fork.tid, + sample->time); + } + } else if (timestamp) { + err = intel_pt_process_queues(pt, timestamp); + } + if (err) + return err; + + if (event->header.type == PERF_RECORD_AUX && + (event->aux.flags & PERF_AUX_FLAG_TRUNCATED) && + pt->synth_opts.errors) { + err = intel_pt_lost(pt, sample); + if (err) + return err; + } + + if (pt->switch_evsel && event->header.type == PERF_RECORD_SAMPLE) + err = intel_pt_process_switch(pt, sample); + else if (event->header.type == PERF_RECORD_ITRACE_START) + err = intel_pt_process_itrace_start(pt, event, sample); + else if (event->header.type == PERF_RECORD_SWITCH || + event->header.type == PERF_RECORD_SWITCH_CPU_WIDE) + err = intel_pt_context_switch(pt, event, sample); + + intel_pt_log("event %s (%u): cpu %d time %"PRIu64" tsc %#"PRIx64"\n", + perf_event__name(event->header.type), event->header.type, + sample->cpu, sample->time, timestamp); + + return err; +} + +static int intel_pt_flush(struct perf_session *session, struct perf_tool *tool) +{ + struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt, + auxtrace); + int ret; + + if (dump_trace) + return 0; + + if (!tool->ordered_events) + return -EINVAL; + + ret = intel_pt_update_queues(pt); + if (ret < 0) + return ret; + + if (pt->timeless_decoding) + return intel_pt_process_timeless_queues(pt, -1, + MAX_TIMESTAMP - 1); + + return intel_pt_process_queues(pt, MAX_TIMESTAMP); +} + +static void intel_pt_free_events(struct perf_session *session) +{ + struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt, + auxtrace); + struct auxtrace_queues *queues = &pt->queues; + unsigned int i; + + for (i = 0; i < queues->nr_queues; i++) { + intel_pt_free_queue(queues->queue_array[i].priv); + queues->queue_array[i].priv = NULL; + } + intel_pt_log_disable(); + auxtrace_queues__free(queues); +} + +static void intel_pt_free(struct perf_session *session) +{ + struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt, + auxtrace); + + auxtrace_heap__free(&pt->heap); + intel_pt_free_events(session); + session->auxtrace = NULL; + thread__delete(pt->unknown_thread); + free(pt); +} + +static int intel_pt_process_auxtrace_event(struct perf_session *session, + union perf_event *event, + struct perf_tool *tool __maybe_unused) +{ + struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt, + auxtrace); + + if (pt->sampling_mode) + return 0; + + if (!pt->data_queued) { + struct auxtrace_buffer *buffer; + off_t data_offset; + int fd = perf_data_file__fd(session->file); + int err; + + if (perf_data_file__is_pipe(session->file)) { + data_offset = 0; + } else { + data_offset = lseek(fd, 0, SEEK_CUR); + if (data_offset == -1) + return -errno; + } + + err = auxtrace_queues__add_event(&pt->queues, session, event, + data_offset, &buffer); + if (err) + return err; + + /* Dump here now we have copied a piped trace out of the pipe */ + if (dump_trace) { + if (auxtrace_buffer__get_data(buffer, fd)) { + intel_pt_dump_event(pt, buffer->data, + buffer->size); + auxtrace_buffer__put_data(buffer); + } + } + } + + return 0; +} + +struct intel_pt_synth { + struct perf_tool dummy_tool; + struct perf_session *session; +}; + +static int intel_pt_event_synth(struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample __maybe_unused, + struct machine *machine __maybe_unused) +{ + struct intel_pt_synth *intel_pt_synth = + container_of(tool, struct intel_pt_synth, dummy_tool); + + return perf_session__deliver_synth_event(intel_pt_synth->session, event, + NULL); +} + +static int intel_pt_synth_event(struct perf_session *session, + struct perf_event_attr *attr, u64 id) +{ + struct intel_pt_synth intel_pt_synth; + + memset(&intel_pt_synth, 0, sizeof(struct intel_pt_synth)); + intel_pt_synth.session = session; + + return perf_event__synthesize_attr(&intel_pt_synth.dummy_tool, attr, 1, + &id, intel_pt_event_synth); +} + +static int intel_pt_synth_events(struct intel_pt *pt, + struct perf_session *session) +{ + struct perf_evlist *evlist = session->evlist; + struct perf_evsel *evsel; + struct perf_event_attr attr; + bool found = false; + u64 id; + int err; + + evlist__for_each(evlist, evsel) { + if (evsel->attr.type == pt->pmu_type && evsel->ids) { + found = true; + break; + } + } + + if (!found) { + pr_debug("There are no selected events with Intel Processor Trace data\n"); + return 0; + } + + memset(&attr, 0, sizeof(struct perf_event_attr)); + attr.size = sizeof(struct perf_event_attr); + attr.type = PERF_TYPE_HARDWARE; + attr.sample_type = evsel->attr.sample_type & PERF_SAMPLE_MASK; + attr.sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID | + PERF_SAMPLE_PERIOD; + if (pt->timeless_decoding) + attr.sample_type &= ~(u64)PERF_SAMPLE_TIME; + else + attr.sample_type |= PERF_SAMPLE_TIME; + if (!pt->per_cpu_mmaps) + attr.sample_type &= ~(u64)PERF_SAMPLE_CPU; + attr.exclude_user = evsel->attr.exclude_user; + attr.exclude_kernel = evsel->attr.exclude_kernel; + attr.exclude_hv = evsel->attr.exclude_hv; + attr.exclude_host = evsel->attr.exclude_host; + attr.exclude_guest = evsel->attr.exclude_guest; + attr.sample_id_all = evsel->attr.sample_id_all; + attr.read_format = evsel->attr.read_format; + + id = evsel->id[0] + 1000000000; + if (!id) + id = 1; + + if (pt->synth_opts.instructions) { + attr.config = PERF_COUNT_HW_INSTRUCTIONS; + if (pt->synth_opts.period_type == PERF_ITRACE_PERIOD_NANOSECS) + attr.sample_period = + intel_pt_ns_to_ticks(pt, pt->synth_opts.period); + else + attr.sample_period = pt->synth_opts.period; + pt->instructions_sample_period = attr.sample_period; + if (pt->synth_opts.callchain) + attr.sample_type |= PERF_SAMPLE_CALLCHAIN; + if (pt->synth_opts.last_branch) + attr.sample_type |= PERF_SAMPLE_BRANCH_STACK; + pr_debug("Synthesizing 'instructions' event with id %" PRIu64 " sample type %#" PRIx64 "\n", + id, (u64)attr.sample_type); + err = intel_pt_synth_event(session, &attr, id); + if (err) { + pr_err("%s: failed to synthesize 'instructions' event type\n", + __func__); + return err; + } + pt->sample_instructions = true; + pt->instructions_sample_type = attr.sample_type; + pt->instructions_id = id; + id += 1; + } + + if (pt->synth_opts.transactions) { + attr.config = PERF_COUNT_HW_INSTRUCTIONS; + attr.sample_period = 1; + if (pt->synth_opts.callchain) + attr.sample_type |= PERF_SAMPLE_CALLCHAIN; + if (pt->synth_opts.last_branch) + attr.sample_type |= PERF_SAMPLE_BRANCH_STACK; + pr_debug("Synthesizing 'transactions' event with id %" PRIu64 " sample type %#" PRIx64 "\n", + id, (u64)attr.sample_type); + err = intel_pt_synth_event(session, &attr, id); + if (err) { + pr_err("%s: failed to synthesize 'transactions' event type\n", + __func__); + return err; + } + pt->sample_transactions = true; + pt->transactions_id = id; + id += 1; + evlist__for_each(evlist, evsel) { + if (evsel->id && evsel->id[0] == pt->transactions_id) { + if (evsel->name) + zfree(&evsel->name); + evsel->name = strdup("transactions"); + break; + } + } + } + + if (pt->synth_opts.branches) { + attr.config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS; + attr.sample_period = 1; + attr.sample_type |= PERF_SAMPLE_ADDR; + attr.sample_type &= ~(u64)PERF_SAMPLE_CALLCHAIN; + attr.sample_type &= ~(u64)PERF_SAMPLE_BRANCH_STACK; + pr_debug("Synthesizing 'branches' event with id %" PRIu64 " sample type %#" PRIx64 "\n", + id, (u64)attr.sample_type); + err = intel_pt_synth_event(session, &attr, id); + if (err) { + pr_err("%s: failed to synthesize 'branches' event type\n", + __func__); + return err; + } + pt->sample_branches = true; + pt->branches_sample_type = attr.sample_type; + pt->branches_id = id; + } + + pt->synth_needs_swap = evsel->needs_swap; + + return 0; +} + +static struct perf_evsel *intel_pt_find_sched_switch(struct perf_evlist *evlist) +{ + struct perf_evsel *evsel; + + evlist__for_each_reverse(evlist, evsel) { + const char *name = perf_evsel__name(evsel); + + if (!strcmp(name, "sched:sched_switch")) + return evsel; + } + + return NULL; +} + +static bool intel_pt_find_switch(struct perf_evlist *evlist) +{ + struct perf_evsel *evsel; + + evlist__for_each(evlist, evsel) { + if (evsel->attr.context_switch) + return true; + } + + return false; +} + +static int intel_pt_perf_config(const char *var, const char *value, void *data) +{ + struct intel_pt *pt = data; + + if (!strcmp(var, "intel-pt.mispred-all")) + pt->mispred_all = perf_config_bool(var, value); + + return 0; +} + +static const char * const intel_pt_info_fmts[] = { + [INTEL_PT_PMU_TYPE] = " PMU Type %"PRId64"\n", + [INTEL_PT_TIME_SHIFT] = " Time Shift %"PRIu64"\n", + [INTEL_PT_TIME_MULT] = " Time Muliplier %"PRIu64"\n", + [INTEL_PT_TIME_ZERO] = " Time Zero %"PRIu64"\n", + [INTEL_PT_CAP_USER_TIME_ZERO] = " Cap Time Zero %"PRId64"\n", + [INTEL_PT_TSC_BIT] = " TSC bit %#"PRIx64"\n", + [INTEL_PT_NORETCOMP_BIT] = " NoRETComp bit %#"PRIx64"\n", + [INTEL_PT_HAVE_SCHED_SWITCH] = " Have sched_switch %"PRId64"\n", + [INTEL_PT_SNAPSHOT_MODE] = " Snapshot mode %"PRId64"\n", + [INTEL_PT_PER_CPU_MMAPS] = " Per-cpu maps %"PRId64"\n", + [INTEL_PT_MTC_BIT] = " MTC bit %#"PRIx64"\n", + [INTEL_PT_TSC_CTC_N] = " TSC:CTC numerator %"PRIu64"\n", + [INTEL_PT_TSC_CTC_D] = " TSC:CTC denominator %"PRIu64"\n", + [INTEL_PT_CYC_BIT] = " CYC bit %#"PRIx64"\n", +}; + +static void intel_pt_print_info(u64 *arr, int start, int finish) +{ + int i; + + if (!dump_trace) + return; + + for (i = start; i <= finish; i++) + fprintf(stdout, intel_pt_info_fmts[i], arr[i]); +} + +int intel_pt_process_auxtrace_info(union perf_event *event, + struct perf_session *session) +{ + struct auxtrace_info_event *auxtrace_info = &event->auxtrace_info; + size_t min_sz = sizeof(u64) * INTEL_PT_PER_CPU_MMAPS; + struct intel_pt *pt; + int err; + + if (auxtrace_info->header.size < sizeof(struct auxtrace_info_event) + + min_sz) + return -EINVAL; + + pt = zalloc(sizeof(struct intel_pt)); + if (!pt) + return -ENOMEM; + + perf_config(intel_pt_perf_config, pt); + + err = auxtrace_queues__init(&pt->queues); + if (err) + goto err_free; + + intel_pt_log_set_name(INTEL_PT_PMU_NAME); + + pt->session = session; + pt->machine = &session->machines.host; /* No kvm support */ + pt->auxtrace_type = auxtrace_info->type; + pt->pmu_type = auxtrace_info->priv[INTEL_PT_PMU_TYPE]; + pt->tc.time_shift = auxtrace_info->priv[INTEL_PT_TIME_SHIFT]; + pt->tc.time_mult = auxtrace_info->priv[INTEL_PT_TIME_MULT]; + pt->tc.time_zero = auxtrace_info->priv[INTEL_PT_TIME_ZERO]; + pt->cap_user_time_zero = auxtrace_info->priv[INTEL_PT_CAP_USER_TIME_ZERO]; + pt->tsc_bit = auxtrace_info->priv[INTEL_PT_TSC_BIT]; + pt->noretcomp_bit = auxtrace_info->priv[INTEL_PT_NORETCOMP_BIT]; + pt->have_sched_switch = auxtrace_info->priv[INTEL_PT_HAVE_SCHED_SWITCH]; + pt->snapshot_mode = auxtrace_info->priv[INTEL_PT_SNAPSHOT_MODE]; + pt->per_cpu_mmaps = auxtrace_info->priv[INTEL_PT_PER_CPU_MMAPS]; + intel_pt_print_info(&auxtrace_info->priv[0], INTEL_PT_PMU_TYPE, + INTEL_PT_PER_CPU_MMAPS); + + if (auxtrace_info->header.size >= sizeof(struct auxtrace_info_event) + + (sizeof(u64) * INTEL_PT_CYC_BIT)) { + pt->mtc_bit = auxtrace_info->priv[INTEL_PT_MTC_BIT]; + pt->mtc_freq_bits = auxtrace_info->priv[INTEL_PT_MTC_FREQ_BITS]; + pt->tsc_ctc_ratio_n = auxtrace_info->priv[INTEL_PT_TSC_CTC_N]; + pt->tsc_ctc_ratio_d = auxtrace_info->priv[INTEL_PT_TSC_CTC_D]; + pt->cyc_bit = auxtrace_info->priv[INTEL_PT_CYC_BIT]; + intel_pt_print_info(&auxtrace_info->priv[0], INTEL_PT_MTC_BIT, + INTEL_PT_CYC_BIT); + } + + pt->timeless_decoding = intel_pt_timeless_decoding(pt); + pt->have_tsc = intel_pt_have_tsc(pt); + pt->sampling_mode = false; + pt->est_tsc = !pt->timeless_decoding; + + pt->unknown_thread = thread__new(999999999, 999999999); + if (!pt->unknown_thread) { + err = -ENOMEM; + goto err_free_queues; + } + err = thread__set_comm(pt->unknown_thread, "unknown", 0); + if (err) + goto err_delete_thread; + if (thread__init_map_groups(pt->unknown_thread, pt->machine)) { + err = -ENOMEM; + goto err_delete_thread; + } + + pt->auxtrace.process_event = intel_pt_process_event; + pt->auxtrace.process_auxtrace_event = intel_pt_process_auxtrace_event; + pt->auxtrace.flush_events = intel_pt_flush; + pt->auxtrace.free_events = intel_pt_free_events; + pt->auxtrace.free = intel_pt_free; + session->auxtrace = &pt->auxtrace; + + if (dump_trace) + return 0; + + if (pt->have_sched_switch == 1) { + pt->switch_evsel = intel_pt_find_sched_switch(session->evlist); + if (!pt->switch_evsel) { + pr_err("%s: missing sched_switch event\n", __func__); + goto err_delete_thread; + } + } else if (pt->have_sched_switch == 2 && + !intel_pt_find_switch(session->evlist)) { + pr_err("%s: missing context_switch attribute flag\n", __func__); + goto err_delete_thread; + } + + if (session->itrace_synth_opts && session->itrace_synth_opts->set) { + pt->synth_opts = *session->itrace_synth_opts; + } else { + itrace_synth_opts__set_default(&pt->synth_opts); + if (use_browser != -1) { + pt->synth_opts.branches = false; + pt->synth_opts.callchain = true; + } + } + + if (pt->synth_opts.log) + intel_pt_log_enable(); + + /* Maximum non-turbo ratio is TSC freq / 100 MHz */ + if (pt->tc.time_mult) { + u64 tsc_freq = intel_pt_ns_to_ticks(pt, 1000000000); + + pt->max_non_turbo_ratio = (tsc_freq + 50000000) / 100000000; + intel_pt_log("TSC frequency %"PRIu64"\n", tsc_freq); + intel_pt_log("Maximum non-turbo ratio %u\n", + pt->max_non_turbo_ratio); + } + + if (pt->synth_opts.calls) + pt->branches_filter |= PERF_IP_FLAG_CALL | PERF_IP_FLAG_ASYNC | + PERF_IP_FLAG_TRACE_END; + if (pt->synth_opts.returns) + pt->branches_filter |= PERF_IP_FLAG_RETURN | + PERF_IP_FLAG_TRACE_BEGIN; + + if (pt->synth_opts.callchain && !symbol_conf.use_callchain) { + symbol_conf.use_callchain = true; + if (callchain_register_param(&callchain_param) < 0) { + symbol_conf.use_callchain = false; + pt->synth_opts.callchain = false; + } + } + + err = intel_pt_synth_events(pt, session); + if (err) + goto err_delete_thread; + + err = auxtrace_queues__process_index(&pt->queues, session); + if (err) + goto err_delete_thread; + + if (pt->queues.populated) + pt->data_queued = true; + + if (pt->timeless_decoding) + pr_debug2("Intel PT decoding without timestamps\n"); + + return 0; + +err_delete_thread: + thread__delete(pt->unknown_thread); +err_free_queues: + intel_pt_log_disable(); + auxtrace_queues__free(&pt->queues); + session->auxtrace = NULL; +err_free: + free(pt); + return err; +} diff --git a/tools/perf/util/intel-pt.h b/tools/perf/util/intel-pt.h new file mode 100644 index 000000000000..0065949df693 --- /dev/null +++ b/tools/perf/util/intel-pt.h @@ -0,0 +1,56 @@ +/* + * intel_pt.h: Intel Processor Trace support + * Copyright (c) 2013-2015, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + */ + +#ifndef INCLUDE__PERF_INTEL_PT_H__ +#define INCLUDE__PERF_INTEL_PT_H__ + +#define INTEL_PT_PMU_NAME "intel_pt" + +enum { + INTEL_PT_PMU_TYPE, + INTEL_PT_TIME_SHIFT, + INTEL_PT_TIME_MULT, + INTEL_PT_TIME_ZERO, + INTEL_PT_CAP_USER_TIME_ZERO, + INTEL_PT_TSC_BIT, + INTEL_PT_NORETCOMP_BIT, + INTEL_PT_HAVE_SCHED_SWITCH, + INTEL_PT_SNAPSHOT_MODE, + INTEL_PT_PER_CPU_MMAPS, + INTEL_PT_MTC_BIT, + INTEL_PT_MTC_FREQ_BITS, + INTEL_PT_TSC_CTC_N, + INTEL_PT_TSC_CTC_D, + INTEL_PT_CYC_BIT, + INTEL_PT_AUXTRACE_PRIV_MAX, +}; + +#define INTEL_PT_AUXTRACE_PRIV_SIZE (INTEL_PT_AUXTRACE_PRIV_MAX * sizeof(u64)) + +struct auxtrace_record; +struct perf_tool; +union perf_event; +struct perf_session; +struct perf_event_attr; +struct perf_pmu; + +struct auxtrace_record *intel_pt_recording_init(int *err); + +int intel_pt_process_auxtrace_info(union perf_event *event, + struct perf_session *session); + +struct perf_event_attr *intel_pt_pmu_default_config(struct perf_pmu *pmu); + +#endif diff --git a/tools/perf/util/llvm-utils.c b/tools/perf/util/llvm-utils.c new file mode 100644 index 000000000000..4f6a4780bd5f --- /dev/null +++ b/tools/perf/util/llvm-utils.c @@ -0,0 +1,408 @@ +/* + * Copyright (C) 2015, Wang Nan <wangnan0@huawei.com> + * Copyright (C) 2015, Huawei Inc. + */ + +#include <stdio.h> +#include <sys/utsname.h> +#include "util.h" +#include "debug.h" +#include "llvm-utils.h" +#include "cache.h" + +#define CLANG_BPF_CMD_DEFAULT_TEMPLATE \ + "$CLANG_EXEC -D__KERNEL__ $CLANG_OPTIONS " \ + "$KERNEL_INC_OPTIONS -Wno-unused-value " \ + "-Wno-pointer-sign -working-directory " \ + "$WORKING_DIR -c \"$CLANG_SOURCE\" -target bpf -O2 -o -" + +struct llvm_param llvm_param = { + .clang_path = "clang", + .clang_bpf_cmd_template = CLANG_BPF_CMD_DEFAULT_TEMPLATE, + .clang_opt = NULL, + .kbuild_dir = NULL, + .kbuild_opts = NULL, + .user_set_param = false, +}; + +int perf_llvm_config(const char *var, const char *value) +{ + if (prefixcmp(var, "llvm.")) + return 0; + var += sizeof("llvm.") - 1; + + if (!strcmp(var, "clang-path")) + llvm_param.clang_path = strdup(value); + else if (!strcmp(var, "clang-bpf-cmd-template")) + llvm_param.clang_bpf_cmd_template = strdup(value); + else if (!strcmp(var, "clang-opt")) + llvm_param.clang_opt = strdup(value); + else if (!strcmp(var, "kbuild-dir")) + llvm_param.kbuild_dir = strdup(value); + else if (!strcmp(var, "kbuild-opts")) + llvm_param.kbuild_opts = strdup(value); + else + return -1; + llvm_param.user_set_param = true; + return 0; +} + +static int +search_program(const char *def, const char *name, + char *output) +{ + char *env, *path, *tmp = NULL; + char buf[PATH_MAX]; + int ret; + + output[0] = '\0'; + if (def && def[0] != '\0') { + if (def[0] == '/') { + if (access(def, F_OK) == 0) { + strlcpy(output, def, PATH_MAX); + return 0; + } + } else if (def[0] != '\0') + name = def; + } + + env = getenv("PATH"); + if (!env) + return -1; + env = strdup(env); + if (!env) + return -1; + + ret = -ENOENT; + path = strtok_r(env, ":", &tmp); + while (path) { + scnprintf(buf, sizeof(buf), "%s/%s", path, name); + if (access(buf, F_OK) == 0) { + strlcpy(output, buf, PATH_MAX); + ret = 0; + break; + } + path = strtok_r(NULL, ":", &tmp); + } + + free(env); + return ret; +} + +#define READ_SIZE 4096 +static int +read_from_pipe(const char *cmd, void **p_buf, size_t *p_read_sz) +{ + int err = 0; + void *buf = NULL; + FILE *file = NULL; + size_t read_sz = 0, buf_sz = 0; + + file = popen(cmd, "r"); + if (!file) { + pr_err("ERROR: unable to popen cmd: %s\n", + strerror(errno)); + return -EINVAL; + } + + while (!feof(file) && !ferror(file)) { + /* + * Make buf_sz always have obe byte extra space so we + * can put '\0' there. + */ + if (buf_sz - read_sz < READ_SIZE + 1) { + void *new_buf; + + buf_sz = read_sz + READ_SIZE + 1; + new_buf = realloc(buf, buf_sz); + + if (!new_buf) { + pr_err("ERROR: failed to realloc memory\n"); + err = -ENOMEM; + goto errout; + } + + buf = new_buf; + } + read_sz += fread(buf + read_sz, 1, READ_SIZE, file); + } + + if (buf_sz - read_sz < 1) { + pr_err("ERROR: internal error\n"); + err = -EINVAL; + goto errout; + } + + if (ferror(file)) { + pr_err("ERROR: error occurred when reading from pipe: %s\n", + strerror(errno)); + err = -EIO; + goto errout; + } + + err = WEXITSTATUS(pclose(file)); + file = NULL; + if (err) { + err = -EINVAL; + goto errout; + } + + /* + * If buf is string, give it terminal '\0' to make our life + * easier. If buf is not string, that '\0' is out of space + * indicated by read_sz so caller won't even notice it. + */ + ((char *)buf)[read_sz] = '\0'; + + if (!p_buf) + free(buf); + else + *p_buf = buf; + + if (p_read_sz) + *p_read_sz = read_sz; + return 0; + +errout: + if (file) + pclose(file); + free(buf); + if (p_buf) + *p_buf = NULL; + if (p_read_sz) + *p_read_sz = 0; + return err; +} + +static inline void +force_set_env(const char *var, const char *value) +{ + if (value) { + setenv(var, value, 1); + pr_debug("set env: %s=%s\n", var, value); + } else { + unsetenv(var); + pr_debug("unset env: %s\n", var); + } +} + +static void +version_notice(void) +{ + pr_err( +" \tLLVM 3.7 or newer is required. Which can be found from http://llvm.org\n" +" \tYou may want to try git trunk:\n" +" \t\tgit clone http://llvm.org/git/llvm.git\n" +" \t\t and\n" +" \t\tgit clone http://llvm.org/git/clang.git\n\n" +" \tOr fetch the latest clang/llvm 3.7 from pre-built llvm packages for\n" +" \tdebian/ubuntu:\n" +" \t\thttp://llvm.org/apt\n\n" +" \tIf you are using old version of clang, change 'clang-bpf-cmd-template'\n" +" \toption in [llvm] section of ~/.perfconfig to:\n\n" +" \t \"$CLANG_EXEC $CLANG_OPTIONS $KERNEL_INC_OPTIONS \\\n" +" \t -working-directory $WORKING_DIR -c $CLANG_SOURCE \\\n" +" \t -emit-llvm -o - | /path/to/llc -march=bpf -filetype=obj -o -\"\n" +" \t(Replace /path/to/llc with path to your llc)\n\n" +); +} + +static int detect_kbuild_dir(char **kbuild_dir) +{ + const char *test_dir = llvm_param.kbuild_dir; + const char *prefix_dir = ""; + const char *suffix_dir = ""; + + char *autoconf_path; + struct utsname utsname; + + int err; + + if (!test_dir) { + err = uname(&utsname); + if (err) { + pr_warning("uname failed: %s\n", strerror(errno)); + return -EINVAL; + } + + test_dir = utsname.release; + prefix_dir = "/lib/modules/"; + suffix_dir = "/build"; + } + + err = asprintf(&autoconf_path, "%s%s%s/include/generated/autoconf.h", + prefix_dir, test_dir, suffix_dir); + if (err < 0) + return -ENOMEM; + + if (access(autoconf_path, R_OK) == 0) { + free(autoconf_path); + + err = asprintf(kbuild_dir, "%s%s%s", prefix_dir, test_dir, + suffix_dir); + if (err < 0) + return -ENOMEM; + return 0; + } + free(autoconf_path); + return -ENOENT; +} + +static const char *kinc_fetch_script = +"#!/usr/bin/env sh\n" +"if ! test -d \"$KBUILD_DIR\"\n" +"then\n" +" exit -1\n" +"fi\n" +"if ! test -f \"$KBUILD_DIR/include/generated/autoconf.h\"\n" +"then\n" +" exit -1\n" +"fi\n" +"TMPDIR=`mktemp -d`\n" +"if test -z \"$TMPDIR\"\n" +"then\n" +" exit -1\n" +"fi\n" +"cat << EOF > $TMPDIR/Makefile\n" +"obj-y := dummy.o\n" +"\\$(obj)/%.o: \\$(src)/%.c\n" +"\t@echo -n \"\\$(NOSTDINC_FLAGS) \\$(LINUXINCLUDE) \\$(EXTRA_CFLAGS)\"\n" +"EOF\n" +"touch $TMPDIR/dummy.c\n" +"make -s -C $KBUILD_DIR M=$TMPDIR $KBUILD_OPTS dummy.o 2>/dev/null\n" +"RET=$?\n" +"rm -rf $TMPDIR\n" +"exit $RET\n"; + +static inline void +get_kbuild_opts(char **kbuild_dir, char **kbuild_include_opts) +{ + int err; + + if (!kbuild_dir || !kbuild_include_opts) + return; + + *kbuild_dir = NULL; + *kbuild_include_opts = NULL; + + if (llvm_param.kbuild_dir && !llvm_param.kbuild_dir[0]) { + pr_debug("[llvm.kbuild-dir] is set to \"\" deliberately.\n"); + pr_debug("Skip kbuild options detection.\n"); + return; + } + + err = detect_kbuild_dir(kbuild_dir); + if (err) { + pr_warning( +"WARNING:\tunable to get correct kernel building directory.\n" +"Hint:\tSet correct kbuild directory using 'kbuild-dir' option in [llvm]\n" +" \tsection of ~/.perfconfig or set it to \"\" to suppress kbuild\n" +" \tdetection.\n\n"); + return; + } + + pr_debug("Kernel build dir is set to %s\n", *kbuild_dir); + force_set_env("KBUILD_DIR", *kbuild_dir); + force_set_env("KBUILD_OPTS", llvm_param.kbuild_opts); + err = read_from_pipe(kinc_fetch_script, + (void **)kbuild_include_opts, + NULL); + if (err) { + pr_warning( +"WARNING:\tunable to get kernel include directories from '%s'\n" +"Hint:\tTry set clang include options using 'clang-bpf-cmd-template'\n" +" \toption in [llvm] section of ~/.perfconfig and set 'kbuild-dir'\n" +" \toption in [llvm] to \"\" to suppress this detection.\n\n", + *kbuild_dir); + + free(*kbuild_dir); + *kbuild_dir = NULL; + return; + } + + pr_debug("include option is set to %s\n", *kbuild_include_opts); +} + +int llvm__compile_bpf(const char *path, void **p_obj_buf, + size_t *p_obj_buf_sz) +{ + int err; + char clang_path[PATH_MAX]; + const char *clang_opt = llvm_param.clang_opt; + const char *template = llvm_param.clang_bpf_cmd_template; + char *kbuild_dir = NULL, *kbuild_include_opts = NULL; + void *obj_buf = NULL; + size_t obj_buf_sz; + + if (!template) + template = CLANG_BPF_CMD_DEFAULT_TEMPLATE; + + err = search_program(llvm_param.clang_path, + "clang", clang_path); + if (err) { + pr_err( +"ERROR:\tunable to find clang.\n" +"Hint:\tTry to install latest clang/llvm to support BPF. Check your $PATH\n" +" \tand 'clang-path' option in [llvm] section of ~/.perfconfig.\n"); + version_notice(); + return -ENOENT; + } + + /* + * This is an optional work. Even it fail we can continue our + * work. Needn't to check error return. + */ + get_kbuild_opts(&kbuild_dir, &kbuild_include_opts); + + force_set_env("CLANG_EXEC", clang_path); + force_set_env("CLANG_OPTIONS", clang_opt); + force_set_env("KERNEL_INC_OPTIONS", kbuild_include_opts); + force_set_env("WORKING_DIR", kbuild_dir ? : "."); + + /* + * Since we may reset clang's working dir, path of source file + * should be transferred into absolute path, except we want + * stdin to be source file (testing). + */ + force_set_env("CLANG_SOURCE", + (path[0] == '-') ? path : + make_nonrelative_path(path)); + + pr_debug("llvm compiling command template: %s\n", template); + err = read_from_pipe(template, &obj_buf, &obj_buf_sz); + if (err) { + pr_err("ERROR:\tunable to compile %s\n", path); + pr_err("Hint:\tCheck error message shown above.\n"); + pr_err("Hint:\tYou can also pre-compile it into .o using:\n"); + pr_err(" \t\tclang -target bpf -O2 -c %s\n", path); + pr_err(" \twith proper -I and -D options.\n"); + goto errout; + } + + free(kbuild_dir); + free(kbuild_include_opts); + if (!p_obj_buf) + free(obj_buf); + else + *p_obj_buf = obj_buf; + + if (p_obj_buf_sz) + *p_obj_buf_sz = obj_buf_sz; + return 0; +errout: + free(kbuild_dir); + free(kbuild_include_opts); + free(obj_buf); + if (p_obj_buf) + *p_obj_buf = NULL; + if (p_obj_buf_sz) + *p_obj_buf_sz = 0; + return err; +} + +int llvm__search_clang(void) +{ + char clang_path[PATH_MAX]; + + return search_program(llvm_param.clang_path, "clang", clang_path); +} diff --git a/tools/perf/util/llvm-utils.h b/tools/perf/util/llvm-utils.h new file mode 100644 index 000000000000..5b3cf1c229e2 --- /dev/null +++ b/tools/perf/util/llvm-utils.h @@ -0,0 +1,49 @@ +/* + * Copyright (C) 2015, Wang Nan <wangnan0@huawei.com> + * Copyright (C) 2015, Huawei Inc. + */ +#ifndef __LLVM_UTILS_H +#define __LLVM_UTILS_H + +#include "debug.h" + +struct llvm_param { + /* Path of clang executable */ + const char *clang_path; + /* + * Template of clang bpf compiling. 5 env variables + * can be used: + * $CLANG_EXEC: Path to clang. + * $CLANG_OPTIONS: Extra options to clang. + * $KERNEL_INC_OPTIONS: Kernel include directories. + * $WORKING_DIR: Kernel source directory. + * $CLANG_SOURCE: Source file to be compiled. + */ + const char *clang_bpf_cmd_template; + /* Will be filled in $CLANG_OPTIONS */ + const char *clang_opt; + /* Where to find kbuild system */ + const char *kbuild_dir; + /* + * Arguments passed to make, like 'ARCH=arm' if doing cross + * compiling. Should not be used for dynamic compiling. + */ + const char *kbuild_opts; + /* + * Default is false. If one of the above fields is set by user + * explicitly then user_set_llvm is set to true. This is used + * for perf test. If user doesn't set anything in .perfconfig + * and clang is not found, don't trigger llvm test. + */ + bool user_set_param; +}; + +extern struct llvm_param llvm_param; +extern int perf_llvm_config(const char *var, const char *value); + +extern int llvm__compile_bpf(const char *path, void **p_obj_buf, + size_t *p_obj_buf_sz); + +/* This function is for test__llvm() use only */ +extern int llvm__search_clang(void); +#endif diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index 7ff682770fdb..5ef90be2a249 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -35,6 +35,7 @@ int machine__init(struct machine *machine, const char *root_dir, pid_t pid) machine->last_match = NULL; machine->vdso_info = NULL; + machine->env = NULL; machine->pid = pid; @@ -250,7 +251,7 @@ struct machine *machines__findnew(struct machines *machines, pid_t pid) static struct strlist *seen; if (!seen) - seen = strlist__new(true, NULL); + seen = strlist__new(NULL, NULL); if (!strlist__has_entry(seen, path)) { pr_err("Can't access file %s\n", path); @@ -550,6 +551,14 @@ int machine__process_itrace_start_event(struct machine *machine __maybe_unused, return 0; } +int machine__process_switch_event(struct machine *machine __maybe_unused, + union perf_event *event) +{ + if (dump_trace) + perf_event__fprintf_switch(event, stdout); + return 0; +} + struct map *machine__findnew_module_map(struct machine *machine, u64 start, const char *filename) { @@ -616,7 +625,7 @@ size_t machine__fprintf_vmlinux_path(struct machine *machine, FILE *fp) { int i; size_t printed = 0; - struct dso *kdso = machine->vmlinux_maps[MAP__FUNCTION]->dso; + struct dso *kdso = machine__kernel_map(machine)->dso; if (kdso->has_build_id) { char filename[PATH_MAX]; @@ -732,6 +741,7 @@ int __machine__create_kernel_maps(struct machine *machine, struct dso *kernel) for (type = 0; type < MAP__NR_TYPES; ++type) { struct kmap *kmap; + struct map *map; machine->vmlinux_maps[type] = map__new2(start, kernel, type); if (machine->vmlinux_maps[type] == NULL) @@ -740,13 +750,13 @@ int __machine__create_kernel_maps(struct machine *machine, struct dso *kernel) machine->vmlinux_maps[type]->map_ip = machine->vmlinux_maps[type]->unmap_ip = identity__map_ip; - kmap = map__kmap(machine->vmlinux_maps[type]); + map = __machine__kernel_map(machine, type); + kmap = map__kmap(map); if (!kmap) return -1; kmap->kmaps = &machine->kmaps; - map_groups__insert(&machine->kmaps, - machine->vmlinux_maps[type]); + map_groups__insert(&machine->kmaps, map); } return 0; @@ -758,13 +768,13 @@ void machine__destroy_kernel_maps(struct machine *machine) for (type = 0; type < MAP__NR_TYPES; ++type) { struct kmap *kmap; + struct map *map = __machine__kernel_map(machine, type); - if (machine->vmlinux_maps[type] == NULL) + if (map == NULL) continue; - kmap = map__kmap(machine->vmlinux_maps[type]); - map_groups__remove(&machine->kmaps, - machine->vmlinux_maps[type]); + kmap = map__kmap(map); + map_groups__remove(&machine->kmaps, map); if (kmap && kmap->ref_reloc_sym) { /* * ref_reloc_sym is shared among all maps, so free just @@ -858,7 +868,7 @@ int machines__create_kernel_maps(struct machines *machines, pid_t pid) int machine__load_kallsyms(struct machine *machine, const char *filename, enum map_type type, symbol_filter_t filter) { - struct map *map = machine->vmlinux_maps[type]; + struct map *map = machine__kernel_map(machine); int ret = dso__load_kallsyms(map->dso, filename, map, filter); if (ret > 0) { @@ -877,7 +887,7 @@ int machine__load_kallsyms(struct machine *machine, const char *filename, int machine__load_vmlinux_path(struct machine *machine, enum map_type type, symbol_filter_t filter) { - struct map *map = machine->vmlinux_maps[type]; + struct map *map = machine__kernel_map(machine); int ret = dso__load_vmlinux_path(map->dso, map, filter); if (ret > 0) @@ -1235,8 +1245,7 @@ static int machine__process_kernel_mmap_event(struct machine *machine, /* * preload dso of guest kernel and modules */ - dso__load(kernel, machine->vmlinux_maps[MAP__FUNCTION], - NULL); + dso__load(kernel, machine__kernel_map(machine), NULL); } } return 0; @@ -1387,6 +1396,24 @@ int machine__process_fork_event(struct machine *machine, union perf_event *event event->fork.ptid); int err = 0; + if (dump_trace) + perf_event__fprintf_task(event, stdout); + + /* + * There may be an existing thread that is not actually the parent, + * either because we are processing events out of order, or because the + * (fork) event that would have removed the thread was lost. Assume the + * latter case and continue on as best we can. + */ + if (parent->pid_ != (pid_t)event->fork.ppid) { + dump_printf("removing erroneous parent thread %d/%d\n", + parent->pid_, parent->tid); + machine__remove_thread(machine, parent); + thread__put(parent); + parent = machine__findnew_thread(machine, event->fork.ppid, + event->fork.ptid); + } + /* if a thread currently exists for the thread id remove it */ if (thread != NULL) { machine__remove_thread(machine, thread); @@ -1395,8 +1422,6 @@ int machine__process_fork_event(struct machine *machine, union perf_event *event thread = machine__findnew_thread(machine, event->fork.pid, event->fork.tid); - if (dump_trace) - perf_event__fprintf_task(event, stdout); if (thread == NULL || parent == NULL || thread__fork(thread, parent, sample->time) < 0) { @@ -1451,6 +1476,9 @@ int machine__process_event(struct machine *machine, union perf_event *event, ret = machine__process_itrace_start_event(machine, event); break; case PERF_RECORD_LOST_SAMPLES: ret = machine__process_lost_samples_event(machine, event, sample); break; + case PERF_RECORD_SWITCH: + case PERF_RECORD_SWITCH_CPU_WIDE: + ret = machine__process_switch_event(machine, event); break; default: ret = -1; break; @@ -1803,7 +1831,7 @@ static int thread__resolve_callchain_sample(struct thread *thread, } check_calls: - if (chain->nr > PERF_MAX_STACK_DEPTH) { + if (chain->nr > PERF_MAX_STACK_DEPTH && (int)chain->nr > max_stack) { pr_warning("corrupted callchain. skipping...\n"); return 0; } @@ -1969,7 +1997,7 @@ int machine__set_current_tid(struct machine *machine, int cpu, pid_t pid, int machine__get_kernel_start(struct machine *machine) { - struct map *map = machine__kernel_map(machine, MAP__FUNCTION); + struct map *map = machine__kernel_map(machine); int err = 0; /* @@ -1993,3 +2021,17 @@ struct dso *machine__findnew_dso(struct machine *machine, const char *filename) { return dsos__findnew(&machine->dsos, filename); } + +char *machine__resolve_kernel_addr(void *vmachine, unsigned long long *addrp, char **modp) +{ + struct machine *machine = vmachine; + struct map *map; + struct symbol *sym = map_groups__find_symbol(&machine->kmaps, MAP__FUNCTION, *addrp, &map, NULL); + + if (sym == NULL) + return NULL; + + *modp = __map__is_kmodule(map) ? (char *)map->dso->short_name : NULL; + *addrp = map->unmap_ip(map, sym->start); + return sym->name; +} diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h index 887798e511e9..2c2b443df5ba 100644 --- a/tools/perf/util/machine.h +++ b/tools/perf/util/machine.h @@ -34,6 +34,7 @@ struct machine { struct list_head dead_threads; struct thread *last_match; struct vdso_info *vdso_info; + struct perf_env *env; struct dsos dsos; struct map_groups kmaps; struct map *vmlinux_maps[MAP__NR_TYPES]; @@ -47,11 +48,17 @@ struct machine { }; static inline -struct map *machine__kernel_map(struct machine *machine, enum map_type type) +struct map *__machine__kernel_map(struct machine *machine, enum map_type type) { return machine->vmlinux_maps[type]; } +static inline +struct map *machine__kernel_map(struct machine *machine) +{ + return __machine__kernel_map(machine, MAP__FUNCTION); +} + int machine__get_kernel_start(struct machine *machine); static inline u64 machine__kernel_start(struct machine *machine) @@ -87,6 +94,8 @@ int machine__process_aux_event(struct machine *machine, union perf_event *event); int machine__process_itrace_start_event(struct machine *machine, union perf_event *event); +int machine__process_switch_event(struct machine *machine __maybe_unused, + union perf_event *event); int machine__process_mmap_event(struct machine *machine, union perf_event *event, struct perf_sample *sample); int machine__process_mmap2_event(struct machine *machine, union perf_event *event, @@ -237,5 +246,9 @@ int machine__synthesize_threads(struct machine *machine, struct target *target, pid_t machine__get_current_tid(struct machine *machine, int cpu); int machine__set_current_tid(struct machine *machine, int cpu, pid_t pid, pid_t tid); +/* + * For use with libtraceevent's pevent_set_function_resolver() + */ +char *machine__resolve_kernel_addr(void *vmachine, unsigned long long *addrp, char **modp); #endif /* __PERF_MACHINE_H */ diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c index b5a5e9c02437..4e38c396a897 100644 --- a/tools/perf/util/map.c +++ b/tools/perf/util/map.c @@ -224,6 +224,20 @@ struct map *map__new2(u64 start, struct dso *dso, enum map_type type) return map; } +/* + * Use this and __map__is_kmodule() for map instances that are in + * machine->kmaps, and thus have map->groups->machine all properly set, to + * disambiguate between the kernel and modules. + * + * When the need arises, introduce map__is_{kernel,kmodule)() that + * checks (map->groups != NULL && map->groups->machine != NULL && + * map->dso->kernel) before calling __map__is_{kernel,kmodule}()) + */ +bool __map__is_kernel(const struct map *map) +{ + return __machine__kernel_map(map->groups->machine, map->type) == map; +} + static void map__exit(struct map *map) { BUG_ON(!RB_EMPTY_NODE(&map->rb_node)); @@ -334,9 +348,18 @@ struct symbol *map__find_symbol_by_name(struct map *map, const char *name, return dso__find_symbol_by_name(map->dso, map->type, name); } -struct map *map__clone(struct map *map) +struct map *map__clone(struct map *from) { - return memdup(map, sizeof(*map)); + struct map *map = memdup(from, sizeof(*map)); + + if (map != NULL) { + atomic_set(&map->refcnt, 1); + RB_CLEAR_NODE(&map->rb_node); + dso__get(map->dso); + map->groups = NULL; + } + + return map; } int map__overlap(struct map *l, struct map *r) @@ -530,13 +553,9 @@ struct symbol *map_groups__find_symbol(struct map_groups *mg, return NULL; } -struct symbol *map_groups__find_symbol_by_name(struct map_groups *mg, - enum map_type type, - const char *name, - struct map **mapp, - symbol_filter_t filter) +struct symbol *maps__find_symbol_by_name(struct maps *maps, const char *name, + struct map **mapp, symbol_filter_t filter) { - struct maps *maps = &mg->maps[type]; struct symbol *sym; struct rb_node *nd; @@ -560,6 +579,17 @@ out: return sym; } +struct symbol *map_groups__find_symbol_by_name(struct map_groups *mg, + enum map_type type, + const char *name, + struct map **mapp, + symbol_filter_t filter) +{ + struct symbol *sym = maps__find_symbol_by_name(&mg->maps[type], name, mapp, filter); + + return sym; +} + int map_groups__find_ams(struct addr_map_symbol *ams, symbol_filter_t filter) { if (ams->addr < ams->map->start || ams->addr >= ams->map->end) { diff --git a/tools/perf/util/map.h b/tools/perf/util/map.h index d73e687b224e..7309d64ce39e 100644 --- a/tools/perf/util/map.h +++ b/tools/perf/util/map.h @@ -190,6 +190,8 @@ void maps__remove(struct maps *maps, struct map *map); struct map *maps__find(struct maps *maps, u64 addr); struct map *maps__first(struct maps *maps); struct map *map__next(struct map *map); +struct symbol *maps__find_symbol_by_name(struct maps *maps, const char *name, + struct map **mapp, symbol_filter_t filter); void map_groups__init(struct map_groups *mg, struct machine *machine); void map_groups__exit(struct map_groups *mg); int map_groups__clone(struct map_groups *mg, @@ -256,4 +258,11 @@ int map_groups__fixup_overlappings(struct map_groups *mg, struct map *map, struct map *map_groups__find_by_name(struct map_groups *mg, enum map_type type, const char *name); +bool __map__is_kernel(const struct map *map); + +static inline bool __map__is_kmodule(const struct map *map) +{ + return !__map__is_kernel(map); +} + #endif /* __PERF_MAP_H */ diff --git a/tools/perf/util/ordered-events.c b/tools/perf/util/ordered-events.c index 52be201b9b25..b1b9e2385f4b 100644 --- a/tools/perf/util/ordered-events.c +++ b/tools/perf/util/ordered-events.c @@ -220,6 +220,9 @@ static int __ordered_events__flush(struct ordered_events *oe) else if (last_ts <= limit) oe->last = list_entry(head->prev, struct ordered_event, list); + if (show_progress) + ui_progress__finish(); + return 0; } diff --git a/tools/perf/util/parse-branch-options.c b/tools/perf/util/parse-branch-options.c index a3b1e13a05c0..355eecf6bf59 100644 --- a/tools/perf/util/parse-branch-options.c +++ b/tools/perf/util/parse-branch-options.c @@ -27,6 +27,7 @@ static const struct branch_mode branch_modes[] = { BRANCH_OPT("no_tx", PERF_SAMPLE_BRANCH_NO_TX), BRANCH_OPT("cond", PERF_SAMPLE_BRANCH_COND), BRANCH_OPT("ind_jmp", PERF_SAMPLE_BRANCH_IND_JUMP), + BRANCH_OPT("call", PERF_SAMPLE_BRANCH_CALL), BRANCH_END }; diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 09f8d2357108..bee60583839a 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -1,4 +1,5 @@ #include <linux/hw_breakpoint.h> +#include <linux/err.h> #include "util.h" #include "../perf.h" #include "evlist.h" @@ -10,8 +11,9 @@ #include "symbol.h" #include "cache.h" #include "header.h" +#include "bpf-loader.h" #include "debug.h" -#include <api/fs/debugfs.h> +#include <api/fs/tracing_path.h> #include "parse-events-bison.h" #define YY_EXTRA_TYPE int #include "parse-events-flex.h" @@ -26,6 +28,8 @@ extern int parse_events_debug; #endif int parse_events_parse(void *data, void *scanner); +static int get_config_terms(struct list_head *head_config, + struct list_head *head_terms __maybe_unused); static struct perf_pmu_event_symbol *perf_pmu_events_list; /* @@ -276,7 +280,8 @@ const char *event_type(int type) static struct perf_evsel * __add_event(struct list_head *list, int *idx, struct perf_event_attr *attr, - char *name, struct cpu_map *cpus) + char *name, struct cpu_map *cpus, + struct list_head *config_terms) { struct perf_evsel *evsel; @@ -286,19 +291,24 @@ __add_event(struct list_head *list, int *idx, if (!evsel) return NULL; - if (cpus) - evsel->cpus = cpu_map__get(cpus); + evsel->cpus = cpu_map__get(cpus); + evsel->own_cpus = cpu_map__get(cpus); if (name) evsel->name = strdup(name); + + if (config_terms) + list_splice(config_terms, &evsel->config_terms); + list_add_tail(&evsel->node, list); return evsel; } static int add_event(struct list_head *list, int *idx, - struct perf_event_attr *attr, char *name) + struct perf_event_attr *attr, char *name, + struct list_head *config_terms) { - return __add_event(list, idx, attr, name, NULL) ? 0 : -ENOMEM; + return __add_event(list, idx, attr, name, NULL, config_terms) ? 0 : -ENOMEM; } static int parse_aliases(char *str, const char *names[][PERF_EVSEL__MAX_ALIASES], int size) @@ -377,35 +387,75 @@ int parse_events_add_cache(struct list_head *list, int *idx, memset(&attr, 0, sizeof(attr)); attr.config = cache_type | (cache_op << 8) | (cache_result << 16); attr.type = PERF_TYPE_HW_CACHE; - return add_event(list, idx, &attr, name); + return add_event(list, idx, &attr, name, NULL); +} + +static void tracepoint_error(struct parse_events_error *e, int err, + char *sys, char *name) +{ + char help[BUFSIZ]; + + /* + * We get error directly from syscall errno ( > 0), + * or from encoded pointer's error ( < 0). + */ + err = abs(err); + + switch (err) { + case EACCES: + e->str = strdup("can't access trace events"); + break; + case ENOENT: + e->str = strdup("unknown tracepoint"); + break; + default: + e->str = strdup("failed to add tracepoint"); + break; + } + + tracing_path__strerror_open_tp(err, help, sizeof(help), sys, name); + e->help = strdup(help); } static int add_tracepoint(struct list_head *list, int *idx, - char *sys_name, char *evt_name) + char *sys_name, char *evt_name, + struct parse_events_error *err, + struct list_head *head_config) { struct perf_evsel *evsel; evsel = perf_evsel__newtp_idx(sys_name, evt_name, (*idx)++); - if (!evsel) - return -ENOMEM; + if (IS_ERR(evsel)) { + tracepoint_error(err, PTR_ERR(evsel), sys_name, evt_name); + return PTR_ERR(evsel); + } - list_add_tail(&evsel->node, list); + if (head_config) { + LIST_HEAD(config_terms); + if (get_config_terms(head_config, &config_terms)) + return -ENOMEM; + list_splice(&config_terms, &evsel->config_terms); + } + + list_add_tail(&evsel->node, list); return 0; } static int add_tracepoint_multi_event(struct list_head *list, int *idx, - char *sys_name, char *evt_name) + char *sys_name, char *evt_name, + struct parse_events_error *err, + struct list_head *head_config) { char evt_path[MAXPATHLEN]; struct dirent *evt_ent; DIR *evt_dir; - int ret = 0; + int ret = 0, found = 0; snprintf(evt_path, MAXPATHLEN, "%s/%s", tracing_events_path, sys_name); evt_dir = opendir(evt_path); if (!evt_dir) { - perror("Can't open event dir"); + tracepoint_error(err, errno, sys_name, evt_name); return -1; } @@ -419,7 +469,15 @@ static int add_tracepoint_multi_event(struct list_head *list, int *idx, if (!strglobmatch(evt_ent->d_name, evt_name)) continue; - ret = add_tracepoint(list, idx, sys_name, evt_ent->d_name); + found++; + + ret = add_tracepoint(list, idx, sys_name, evt_ent->d_name, + err, head_config); + } + + if (!found) { + tracepoint_error(err, ENOENT, sys_name, evt_name); + ret = -1; } closedir(evt_dir); @@ -427,15 +485,21 @@ static int add_tracepoint_multi_event(struct list_head *list, int *idx, } static int add_tracepoint_event(struct list_head *list, int *idx, - char *sys_name, char *evt_name) + char *sys_name, char *evt_name, + struct parse_events_error *err, + struct list_head *head_config) { return strpbrk(evt_name, "*?") ? - add_tracepoint_multi_event(list, idx, sys_name, evt_name) : - add_tracepoint(list, idx, sys_name, evt_name); + add_tracepoint_multi_event(list, idx, sys_name, evt_name, + err, head_config) : + add_tracepoint(list, idx, sys_name, evt_name, + err, head_config); } static int add_tracepoint_multi_sys(struct list_head *list, int *idx, - char *sys_name, char *evt_name) + char *sys_name, char *evt_name, + struct parse_events_error *err, + struct list_head *head_config) { struct dirent *events_ent; DIR *events_dir; @@ -443,7 +507,7 @@ static int add_tracepoint_multi_sys(struct list_head *list, int *idx, events_dir = opendir(tracing_events_path); if (!events_dir) { - perror("Can't open event dir"); + tracepoint_error(err, errno, sys_name, evt_name); return -1; } @@ -459,20 +523,135 @@ static int add_tracepoint_multi_sys(struct list_head *list, int *idx, continue; ret = add_tracepoint_event(list, idx, events_ent->d_name, - evt_name); + evt_name, err, head_config); } closedir(events_dir); return ret; } -int parse_events_add_tracepoint(struct list_head *list, int *idx, - char *sys, char *event) +struct __add_bpf_event_param { + struct parse_events_evlist *data; + struct list_head *list; +}; + +static int add_bpf_event(struct probe_trace_event *tev, int fd, + void *_param) { - if (strpbrk(sys, "*?")) - return add_tracepoint_multi_sys(list, idx, sys, event); - else - return add_tracepoint_event(list, idx, sys, event); + LIST_HEAD(new_evsels); + struct __add_bpf_event_param *param = _param; + struct parse_events_evlist *evlist = param->data; + struct list_head *list = param->list; + struct perf_evsel *pos; + int err; + + pr_debug("add bpf event %s:%s and attach bpf program %d\n", + tev->group, tev->event, fd); + + err = parse_events_add_tracepoint(&new_evsels, &evlist->idx, tev->group, + tev->event, evlist->error, NULL); + if (err) { + struct perf_evsel *evsel, *tmp; + + pr_debug("Failed to add BPF event %s:%s\n", + tev->group, tev->event); + list_for_each_entry_safe(evsel, tmp, &new_evsels, node) { + list_del(&evsel->node); + perf_evsel__delete(evsel); + } + return err; + } + pr_debug("adding %s:%s\n", tev->group, tev->event); + + list_for_each_entry(pos, &new_evsels, node) { + pr_debug("adding %s:%s to %p\n", + tev->group, tev->event, pos); + pos->bpf_fd = fd; + } + list_splice(&new_evsels, list); + return 0; +} + +int parse_events_load_bpf_obj(struct parse_events_evlist *data, + struct list_head *list, + struct bpf_object *obj) +{ + int err; + char errbuf[BUFSIZ]; + struct __add_bpf_event_param param = {data, list}; + static bool registered_unprobe_atexit = false; + + if (IS_ERR(obj) || !obj) { + snprintf(errbuf, sizeof(errbuf), + "Internal error: load bpf obj with NULL"); + err = -EINVAL; + goto errout; + } + + /* + * Register atexit handler before calling bpf__probe() so + * bpf__probe() don't need to unprobe probe points its already + * created when failure. + */ + if (!registered_unprobe_atexit) { + atexit(bpf__clear); + registered_unprobe_atexit = true; + } + + err = bpf__probe(obj); + if (err) { + bpf__strerror_probe(obj, err, errbuf, sizeof(errbuf)); + goto errout; + } + + err = bpf__load(obj); + if (err) { + bpf__strerror_load(obj, err, errbuf, sizeof(errbuf)); + goto errout; + } + + err = bpf__foreach_tev(obj, add_bpf_event, ¶m); + if (err) { + snprintf(errbuf, sizeof(errbuf), + "Attach events in BPF object failed"); + goto errout; + } + + return 0; +errout: + data->error->help = strdup("(add -v to see detail)"); + data->error->str = strdup(errbuf); + return err; +} + +int parse_events_load_bpf(struct parse_events_evlist *data, + struct list_head *list, + char *bpf_file_name, + bool source) +{ + struct bpf_object *obj; + + obj = bpf__prepare_load(bpf_file_name, source); + if (IS_ERR(obj) || !obj) { + char errbuf[BUFSIZ]; + int err; + + err = obj ? PTR_ERR(obj) : -EINVAL; + + if (err == -ENOTSUP) + snprintf(errbuf, sizeof(errbuf), + "BPF support is not compiled"); + else + snprintf(errbuf, sizeof(errbuf), + "BPF object file '%s' is invalid", + bpf_file_name); + + data->error->help = strdup("(add -v to see detail)"); + data->error->str = strdup(errbuf); + return err; + } + + return parse_events_load_bpf_obj(data, list, obj); } static int @@ -539,7 +718,7 @@ int parse_events_add_breakpoint(struct list_head *list, int *idx, attr.type = PERF_TYPE_BREAKPOINT; attr.sample_period = 1; - return add_event(list, idx, &attr, NULL); + return add_event(list, idx, &attr, NULL, NULL); } static int check_type_val(struct parse_events_term *term, @@ -559,9 +738,13 @@ static int check_type_val(struct parse_events_term *term, return -EINVAL; } -static int config_term(struct perf_event_attr *attr, - struct parse_events_term *term, - struct parse_events_error *err) +typedef int config_term_func_t(struct perf_event_attr *attr, + struct parse_events_term *term, + struct parse_events_error *err); + +static int config_term_common(struct perf_event_attr *attr, + struct parse_events_term *term, + struct parse_events_error *err) { #define CHECK_TYPE_VAL(type) \ do { \ @@ -570,12 +753,6 @@ do { \ } while (0) switch (term->type_term) { - case PARSE_EVENTS__TERM_TYPE_USER: - /* - * Always succeed for sysfs terms, as we dont know - * at this point what type they need to have. - */ - return 0; case PARSE_EVENTS__TERM_TYPE_CONFIG: CHECK_TYPE_VAL(NUM); attr->config = term->val.num; @@ -590,7 +767,9 @@ do { \ break; case PARSE_EVENTS__TERM_TYPE_SAMPLE_PERIOD: CHECK_TYPE_VAL(NUM); - attr->sample_period = term->val.num; + break; + case PARSE_EVENTS__TERM_TYPE_SAMPLE_FREQ: + CHECK_TYPE_VAL(NUM); break; case PARSE_EVENTS__TERM_TYPE_BRANCH_SAMPLE_TYPE: /* @@ -598,10 +777,33 @@ do { \ * attr->branch_sample_type = term->val.num; */ break; + case PARSE_EVENTS__TERM_TYPE_TIME: + CHECK_TYPE_VAL(NUM); + if (term->val.num > 1) { + err->str = strdup("expected 0 or 1"); + err->idx = term->err_val; + return -EINVAL; + } + break; + case PARSE_EVENTS__TERM_TYPE_CALLGRAPH: + CHECK_TYPE_VAL(STR); + break; + case PARSE_EVENTS__TERM_TYPE_STACKSIZE: + CHECK_TYPE_VAL(NUM); + break; + case PARSE_EVENTS__TERM_TYPE_INHERIT: + CHECK_TYPE_VAL(NUM); + break; + case PARSE_EVENTS__TERM_TYPE_NOINHERIT: + CHECK_TYPE_VAL(NUM); + break; case PARSE_EVENTS__TERM_TYPE_NAME: CHECK_TYPE_VAL(STR); break; default: + err->str = strdup("unknown term"); + err->idx = term->err_term; + err->help = parse_events_formats_error_string(NULL); return -EINVAL; } @@ -609,9 +811,46 @@ do { \ #undef CHECK_TYPE_VAL } +static int config_term_pmu(struct perf_event_attr *attr, + struct parse_events_term *term, + struct parse_events_error *err) +{ + if (term->type_term == PARSE_EVENTS__TERM_TYPE_USER) + /* + * Always succeed for sysfs terms, as we dont know + * at this point what type they need to have. + */ + return 0; + else + return config_term_common(attr, term, err); +} + +static int config_term_tracepoint(struct perf_event_attr *attr, + struct parse_events_term *term, + struct parse_events_error *err) +{ + switch (term->type_term) { + case PARSE_EVENTS__TERM_TYPE_CALLGRAPH: + case PARSE_EVENTS__TERM_TYPE_STACKSIZE: + case PARSE_EVENTS__TERM_TYPE_INHERIT: + case PARSE_EVENTS__TERM_TYPE_NOINHERIT: + return config_term_common(attr, term, err); + default: + if (err) { + err->idx = term->err_term; + err->str = strdup("unknown term"); + err->help = strdup("valid terms: call-graph,stack-size\n"); + } + return -EINVAL; + } + + return 0; +} + static int config_attr(struct perf_event_attr *attr, struct list_head *head, - struct parse_events_error *err) + struct parse_events_error *err, + config_term_func_t config_term) { struct parse_events_term *term; @@ -622,22 +861,99 @@ static int config_attr(struct perf_event_attr *attr, return 0; } +static int get_config_terms(struct list_head *head_config, + struct list_head *head_terms __maybe_unused) +{ +#define ADD_CONFIG_TERM(__type, __name, __val) \ +do { \ + struct perf_evsel_config_term *__t; \ + \ + __t = zalloc(sizeof(*__t)); \ + if (!__t) \ + return -ENOMEM; \ + \ + INIT_LIST_HEAD(&__t->list); \ + __t->type = PERF_EVSEL__CONFIG_TERM_ ## __type; \ + __t->val.__name = __val; \ + list_add_tail(&__t->list, head_terms); \ +} while (0) + + struct parse_events_term *term; + + list_for_each_entry(term, head_config, list) { + switch (term->type_term) { + case PARSE_EVENTS__TERM_TYPE_SAMPLE_PERIOD: + ADD_CONFIG_TERM(PERIOD, period, term->val.num); + break; + case PARSE_EVENTS__TERM_TYPE_SAMPLE_FREQ: + ADD_CONFIG_TERM(FREQ, freq, term->val.num); + break; + case PARSE_EVENTS__TERM_TYPE_TIME: + ADD_CONFIG_TERM(TIME, time, term->val.num); + break; + case PARSE_EVENTS__TERM_TYPE_CALLGRAPH: + ADD_CONFIG_TERM(CALLGRAPH, callgraph, term->val.str); + break; + case PARSE_EVENTS__TERM_TYPE_STACKSIZE: + ADD_CONFIG_TERM(STACK_USER, stack_user, term->val.num); + break; + case PARSE_EVENTS__TERM_TYPE_INHERIT: + ADD_CONFIG_TERM(INHERIT, inherit, term->val.num ? 1 : 0); + break; + case PARSE_EVENTS__TERM_TYPE_NOINHERIT: + ADD_CONFIG_TERM(INHERIT, inherit, term->val.num ? 0 : 1); + break; + default: + break; + } + } +#undef ADD_EVSEL_CONFIG + return 0; +} + +int parse_events_add_tracepoint(struct list_head *list, int *idx, + char *sys, char *event, + struct parse_events_error *err, + struct list_head *head_config) +{ + if (head_config) { + struct perf_event_attr attr; + + if (config_attr(&attr, head_config, err, + config_term_tracepoint)) + return -EINVAL; + } + + if (strpbrk(sys, "*?")) + return add_tracepoint_multi_sys(list, idx, sys, event, + err, head_config); + else + return add_tracepoint_event(list, idx, sys, event, + err, head_config); +} + int parse_events_add_numeric(struct parse_events_evlist *data, struct list_head *list, u32 type, u64 config, struct list_head *head_config) { struct perf_event_attr attr; + LIST_HEAD(config_terms); memset(&attr, 0, sizeof(attr)); attr.type = type; attr.config = config; - if (head_config && - config_attr(&attr, head_config, data->error)) - return -EINVAL; + if (head_config) { + if (config_attr(&attr, head_config, data->error, + config_term_common)) + return -EINVAL; + + if (get_config_terms(head_config, &config_terms)) + return -ENOMEM; + } - return add_event(list, &data->idx, &attr, NULL); + return add_event(list, &data->idx, &attr, NULL, &config_terms); } static int parse_events__is_name_term(struct parse_events_term *term) @@ -664,6 +980,7 @@ int parse_events_add_pmu(struct parse_events_evlist *data, struct perf_pmu_info info; struct perf_pmu *pmu; struct perf_evsel *evsel; + LIST_HEAD(config_terms); pmu = perf_pmu__find(name); if (!pmu) @@ -678,7 +995,7 @@ int parse_events_add_pmu(struct parse_events_evlist *data, if (!head_config) { attr.type = pmu->type; - evsel = __add_event(list, &data->idx, &attr, NULL, pmu->cpus); + evsel = __add_event(list, &data->idx, &attr, NULL, pmu->cpus, NULL); return evsel ? 0 : -ENOMEM; } @@ -689,14 +1006,18 @@ int parse_events_add_pmu(struct parse_events_evlist *data, * Configure hardcoded terms first, no need to check * return value when called with fail == 0 ;) */ - if (config_attr(&attr, head_config, data->error)) + if (config_attr(&attr, head_config, data->error, config_term_pmu)) return -EINVAL; + if (get_config_terms(head_config, &config_terms)) + return -ENOMEM; + if (perf_pmu__config(pmu, &attr, head_config, data->error)) return -EINVAL; evsel = __add_event(list, &data->idx, &attr, - pmu_event_name(head_config), pmu->cpus); + pmu_event_name(head_config), pmu->cpus, + &config_terms); if (evsel) { evsel->unit = info.unit; evsel->scale = info.scale; @@ -717,6 +1038,11 @@ void parse_events__set_leader(char *name, struct list_head *list) { struct perf_evsel *leader; + if (list_empty(list)) { + WARN_ONCE(true, "WARNING: failed to set leader: empty list"); + return; + } + __perf_evlist__set_leader(list); leader = list_entry(list->next, struct perf_evsel, node); leader->group_name = name ? strdup(name) : NULL; @@ -743,6 +1069,7 @@ struct event_modifier { int eG; int eI; int precise; + int precise_max; int exclude_GH; int sample_read; int pinned; @@ -758,6 +1085,7 @@ static int get_event_modifier(struct event_modifier *mod, char *str, int eG = evsel ? evsel->attr.exclude_guest : 0; int eI = evsel ? evsel->attr.exclude_idle : 0; int precise = evsel ? evsel->attr.precise_ip : 0; + int precise_max = 0; int sample_read = 0; int pinned = evsel ? evsel->attr.pinned : 0; @@ -794,6 +1122,8 @@ static int get_event_modifier(struct event_modifier *mod, char *str, /* use of precise requires exclude_guest */ if (!exclude_GH) eG = 1; + } else if (*str == 'P') { + precise_max = 1; } else if (*str == 'S') { sample_read = 1; } else if (*str == 'D') { @@ -824,6 +1154,7 @@ static int get_event_modifier(struct event_modifier *mod, char *str, mod->eG = eG; mod->eI = eI; mod->precise = precise; + mod->precise_max = precise_max; mod->exclude_GH = exclude_GH; mod->sample_read = sample_read; mod->pinned = pinned; @@ -840,7 +1171,7 @@ static int check_modifier(char *str) char *p = str; /* The sizeof includes 0 byte as well. */ - if (strlen(str) > (sizeof("ukhGHpppSDI") - 1)) + if (strlen(str) > (sizeof("ukhGHpppPSDI") - 1)) return -1; while (*p) { @@ -879,6 +1210,7 @@ int parse_events__modifier_event(struct list_head *list, char *str, bool add) evsel->attr.exclude_idle = mod.eI; evsel->exclude_GH = mod.exclude_GH; evsel->sample_read = mod.sample_read; + evsel->precise_max = mod.precise_max; if (perf_evsel__is_group_leader(evsel)) evsel->attr.pinned = mod.pinned; @@ -1064,9 +1396,18 @@ int parse_events(struct perf_evlist *evlist, const char *str, ret = parse_events__scanner(str, &data, PE_START_EVENTS); perf_pmu__parse_cleanup(); if (!ret) { - int entries = data.idx - evlist->nr_entries; - perf_evlist__splice_list_tail(evlist, &data.list, entries); + struct perf_evsel *last; + + if (list_empty(&data.list)) { + WARN_ONCE(true, "WARNING: event parser found nothing"); + return -1; + } + + perf_evlist__splice_list_tail(evlist, &data.list); evlist->nr_groups += data.nr_groups; + last = perf_evlist__last(evlist); + last->cmdline_group_boundary = true; + return 0; } @@ -1105,7 +1446,7 @@ static void parse_events_print_error(struct parse_events_error *err, * Maximum error index indent, we will cut * the event string if it's bigger. */ - int max_err_idx = 10; + int max_err_idx = 13; /* * Let's be specific with the message when @@ -1162,30 +1503,99 @@ int parse_events_option(const struct option *opt, const char *str, return ret; } -int parse_filter(const struct option *opt, const char *str, - int unset __maybe_unused) +static int +foreach_evsel_in_last_glob(struct perf_evlist *evlist, + int (*func)(struct perf_evsel *evsel, + const void *arg), + const void *arg) { - struct perf_evlist *evlist = *(struct perf_evlist **)opt->value; struct perf_evsel *last = NULL; + int err; + /* + * Don't return when list_empty, give func a chance to report + * error when it found last == NULL. + * + * So no need to WARN here, let *func do this. + */ if (evlist->nr_entries > 0) last = perf_evlist__last(evlist); - if (last == NULL || last->attr.type != PERF_TYPE_TRACEPOINT) { + do { + err = (*func)(last, arg); + if (err) + return -1; + if (!last) + return 0; + + if (last->node.prev == &evlist->entries) + return 0; + last = list_entry(last->node.prev, struct perf_evsel, node); + } while (!last->cmdline_group_boundary); + + return 0; +} + +static int set_filter(struct perf_evsel *evsel, const void *arg) +{ + const char *str = arg; + + if (evsel == NULL || evsel->attr.type != PERF_TYPE_TRACEPOINT) { fprintf(stderr, "--filter option should follow a -e tracepoint option\n"); return -1; } - last->filter = strdup(str); - if (last->filter == NULL) { - fprintf(stderr, "not enough memory to hold filter string\n"); + if (perf_evsel__append_filter(evsel, "&&", str) < 0) { + fprintf(stderr, + "not enough memory to hold filter string\n"); return -1; } return 0; } +int parse_filter(const struct option *opt, const char *str, + int unset __maybe_unused) +{ + struct perf_evlist *evlist = *(struct perf_evlist **)opt->value; + + return foreach_evsel_in_last_glob(evlist, set_filter, + (const void *)str); +} + +static int add_exclude_perf_filter(struct perf_evsel *evsel, + const void *arg __maybe_unused) +{ + char new_filter[64]; + + if (evsel == NULL || evsel->attr.type != PERF_TYPE_TRACEPOINT) { + fprintf(stderr, + "--exclude-perf option should follow a -e tracepoint option\n"); + return -1; + } + + snprintf(new_filter, sizeof(new_filter), "common_pid != %d", getpid()); + + if (perf_evsel__append_filter(evsel, "&&", new_filter) < 0) { + fprintf(stderr, + "not enough memory to hold filter string\n"); + return -1; + } + + return 0; +} + +int exclude_perf(const struct option *opt, + const char *arg __maybe_unused, + int unset __maybe_unused) +{ + struct perf_evlist *evlist = *(struct perf_evlist **)opt->value; + + return foreach_evsel_in_last_glob(evlist, add_exclude_perf_filter, + NULL); +} + static const char * const event_type_descriptors[] = { "Hardware event", "Software event", @@ -1276,7 +1686,7 @@ restart: printf(" %-50s [%s]\n", evt_list[evt_i++], event_type_descriptors[PERF_TYPE_TRACEPOINT]); } - if (evt_num) + if (evt_num && pager_in_use()) printf("\n"); out_free: @@ -1432,7 +1842,7 @@ restart: printf(" %-50s [%s]\n", evt_list[evt_i++], event_type_descriptors[PERF_TYPE_HW_CACHE]); } - if (evt_num) + if (evt_num && pager_in_use()) printf("\n"); out_free: @@ -1505,7 +1915,7 @@ restart: } printf(" %-50s [%s]\n", evt_list[evt_i++], event_type_descriptors[type]); } - if (evt_num) + if (evt_num && pager_in_use()) printf("\n"); out_free: @@ -1546,13 +1956,14 @@ void print_events(const char *event_glob, bool name_only) printf(" %-50s [%s]\n", "cpu/t1=v1[,t2=v2,t3 ...]/modifier", event_type_descriptors[PERF_TYPE_RAW]); - printf(" (see 'man perf-list' on how to encode it)\n"); - printf("\n"); + if (pager_in_use()) + printf(" (see 'man perf-list' on how to encode it)\n\n"); printf(" %-50s [%s]\n", "mem:<addr>[/len][:access]", event_type_descriptors[PERF_TYPE_BREAKPOINT]); - printf("\n"); + if (pager_in_use()) + printf("\n"); } print_tracepoint_events(NULL, NULL, name_only); @@ -1668,3 +2079,29 @@ void parse_events_evlist_error(struct parse_events_evlist *data, err->str = strdup(str); WARN_ONCE(!err->str, "WARNING: failed to allocate error string"); } + +/* + * Return string contains valid config terms of an event. + * @additional_terms: For terms such as PMU sysfs terms. + */ +char *parse_events_formats_error_string(char *additional_terms) +{ + char *str; + static const char *static_terms = "config,config1,config2,name," + "period,freq,branch_type,time," + "call-graph,stack-size\n"; + + /* valid terms */ + if (additional_terms) { + if (!asprintf(&str, "valid terms: %s,%s", + additional_terms, static_terms)) + goto fail; + } else { + if (!asprintf(&str, "valid terms: %s", static_terms)) + goto fail; + } + return str; + +fail: + return NULL; +} diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h index 131f29b2f132..f1a6db107241 100644 --- a/tools/perf/util/parse-events.h +++ b/tools/perf/util/parse-events.h @@ -34,6 +34,7 @@ extern int parse_events(struct perf_evlist *evlist, const char *str, struct parse_events_error *error); extern int parse_events_terms(struct list_head *terms, const char *str); extern int parse_filter(const struct option *opt, const char *str, int unset); +extern int exclude_perf(const struct option *opt, const char *arg, int unset); #define EVENTS_HELP_MAX (128*1024) @@ -61,7 +62,13 @@ enum { PARSE_EVENTS__TERM_TYPE_CONFIG2, PARSE_EVENTS__TERM_TYPE_NAME, PARSE_EVENTS__TERM_TYPE_SAMPLE_PERIOD, + PARSE_EVENTS__TERM_TYPE_SAMPLE_FREQ, PARSE_EVENTS__TERM_TYPE_BRANCH_SAMPLE_TYPE, + PARSE_EVENTS__TERM_TYPE_TIME, + PARSE_EVENTS__TERM_TYPE_CALLGRAPH, + PARSE_EVENTS__TERM_TYPE_STACKSIZE, + PARSE_EVENTS__TERM_TYPE_NOINHERIT, + PARSE_EVENTS__TERM_TYPE_INHERIT }; struct parse_events_term { @@ -113,7 +120,18 @@ int parse_events__modifier_event(struct list_head *list, char *str, bool add); int parse_events__modifier_group(struct list_head *list, char *event_mod); int parse_events_name(struct list_head *list, char *name); int parse_events_add_tracepoint(struct list_head *list, int *idx, - char *sys, char *event); + char *sys, char *event, + struct parse_events_error *error, + struct list_head *head_config); +int parse_events_load_bpf(struct parse_events_evlist *data, + struct list_head *list, + char *bpf_file_name, + bool source); +/* Provide this function for perf test */ +struct bpf_object; +int parse_events_load_bpf_obj(struct parse_events_evlist *data, + struct list_head *list, + struct bpf_object *obj); int parse_events_add_numeric(struct parse_events_evlist *data, struct list_head *list, u32 type, u64 config, @@ -150,5 +168,6 @@ int print_hwcache_events(const char *event_glob, bool name_only); extern int is_valid_tracepoint(const char *event_string); int valid_event_mount(const char *eventfs); +char *parse_events_formats_error_string(char *additional_terms); #endif /* __PERF_PARSE_EVENTS_H */ diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l index 13cef3c65565..58c5831ffd5c 100644 --- a/tools/perf/util/parse-events.l +++ b/tools/perf/util/parse-events.l @@ -115,6 +115,8 @@ do { \ group [^,{}/]*[{][^}]*[}][^,{}/]* event_pmu [^,{}/]+[/][^/]*[/][^,{}/]* event [^,{}/]+ +bpf_object .*\.(o|bpf) +bpf_source .*\.c num_dec [0-9]+ num_hex 0x[a-fA-F0-9]+ @@ -122,7 +124,7 @@ num_raw_hex [a-fA-F0-9]+ name [a-zA-Z_*?][a-zA-Z0-9_*?.]* name_minus [a-zA-Z_*?][a-zA-Z0-9\-_*?.]* /* If you add a modifier you need to update check_modifier() */ -modifier_event [ukhpGHSDI]+ +modifier_event [ukhpPGHSDI]+ modifier_bp [rwx]{1,3} %% @@ -159,6 +161,8 @@ modifier_bp [rwx]{1,3} } {event_pmu} | +{bpf_object} | +{bpf_source} | {event} { BEGIN(INITIAL); REWIND(1); @@ -174,7 +178,7 @@ modifier_bp [rwx]{1,3} <config>{ /* - * Please update formats_error_string any time + * Please update parse_events_formats_error_string any time * new static term is added. */ config { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_CONFIG); } @@ -182,7 +186,13 @@ config1 { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_CONFIG1); } config2 { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_CONFIG2); } name { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_NAME); } period { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_SAMPLE_PERIOD); } +freq { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_SAMPLE_FREQ); } branch_type { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_BRANCH_SAMPLE_TYPE); } +time { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_TIME); } +call-graph { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_CALLGRAPH); } +stack-size { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_STACKSIZE); } +inherit { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_INHERIT); } +no-inherit { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_NOINHERIT); } , { return ','; } "/" { BEGIN(INITIAL); return '/'; } {name_minus} { return str(yyscanner, PE_NAME); } @@ -260,6 +270,8 @@ r{num_raw_hex} { return raw(yyscanner); } {num_hex} { return value(yyscanner, 16); } {modifier_event} { return str(yyscanner, PE_MODIFIER_EVENT); } +{bpf_object} { return str(yyscanner, PE_BPF_OBJECT); } +{bpf_source} { return str(yyscanner, PE_BPF_SOURCE); } {name} { return pmu_str_check(yyscanner); } "/" { BEGIN(config); return '/'; } - { return '-'; } diff --git a/tools/perf/util/parse-events.y b/tools/perf/util/parse-events.y index 591905a02b92..ad379968d4c1 100644 --- a/tools/perf/util/parse-events.y +++ b/tools/perf/util/parse-events.y @@ -42,6 +42,7 @@ static inc_group_count(struct list_head *list, %token PE_VALUE PE_VALUE_SYM_HW PE_VALUE_SYM_SW PE_RAW PE_TERM %token PE_EVENT_NAME %token PE_NAME +%token PE_BPF_OBJECT PE_BPF_SOURCE %token PE_MODIFIER_EVENT PE_MODIFIER_BP %token PE_NAME_CACHE_TYPE PE_NAME_CACHE_OP_RESULT %token PE_PREFIX_MEM PE_PREFIX_RAW PE_PREFIX_GROUP @@ -53,6 +54,8 @@ static inc_group_count(struct list_head *list, %type <num> PE_RAW %type <num> PE_TERM %type <str> PE_NAME +%type <str> PE_BPF_OBJECT +%type <str> PE_BPF_SOURCE %type <str> PE_NAME_CACHE_TYPE %type <str> PE_NAME_CACHE_OP_RESULT %type <str> PE_MODIFIER_EVENT @@ -67,8 +70,10 @@ static inc_group_count(struct list_head *list, %type <head> event_legacy_cache %type <head> event_legacy_mem %type <head> event_legacy_tracepoint +%type <tracepoint_name> tracepoint_name %type <head> event_legacy_numeric %type <head> event_legacy_raw +%type <head> event_bpf_file %type <head> event_def %type <head> event_mod %type <head> event_name @@ -84,6 +89,10 @@ static inc_group_count(struct list_head *list, u64 num; struct list_head *head; struct parse_events_term *term; + struct tracepoint_name { + char *sys; + char *event; + } tracepoint_name; } %% @@ -198,7 +207,8 @@ event_def: event_pmu | event_legacy_mem | event_legacy_tracepoint sep_dc | event_legacy_numeric sep_dc | - event_legacy_raw sep_dc + event_legacy_raw sep_dc | + event_bpf_file event_pmu: PE_NAME '/' event_config '/' @@ -255,7 +265,7 @@ PE_PMU_EVENT_PRE '-' PE_PMU_EVENT_SUF sep_dc list_add_tail(&term->list, head); ALLOC_LIST(list); - ABORT_ON(parse_events_add_pmu(list, &data->idx, "cpu", head)); + ABORT_ON(parse_events_add_pmu(data, list, "cpu", head)); parse_events__free_terms(head); $$ = list; } @@ -368,36 +378,60 @@ PE_PREFIX_MEM PE_VALUE sep_dc } event_legacy_tracepoint: -PE_NAME '-' PE_NAME ':' PE_NAME +tracepoint_name { struct parse_events_evlist *data = _data; + struct parse_events_error *error = data->error; struct list_head *list; - char sys_name[128]; - snprintf(&sys_name, 128, "%s-%s", $1, $3); ALLOC_LIST(list); - ABORT_ON(parse_events_add_tracepoint(list, &data->idx, &sys_name, $5)); + if (error) + error->idx = @1.first_column; + + if (parse_events_add_tracepoint(list, &data->idx, $1.sys, $1.event, + error, NULL)) + return -1; + $$ = list; } | -PE_NAME ':' PE_NAME +tracepoint_name '/' event_config '/' { struct parse_events_evlist *data = _data; + struct parse_events_error *error = data->error; struct list_head *list; ALLOC_LIST(list); - if (parse_events_add_tracepoint(list, &data->idx, $1, $3)) { - struct parse_events_error *error = data->error; + if (error) + error->idx = @1.first_column; - if (error) { - error->idx = @1.first_column; - error->str = strdup("unknown tracepoint"); - } + if (parse_events_add_tracepoint(list, &data->idx, $1.sys, $1.event, + error, $3)) return -1; - } + $$ = list; } +tracepoint_name: +PE_NAME '-' PE_NAME ':' PE_NAME +{ + char sys_name[128]; + struct tracepoint_name tracepoint; + + snprintf(&sys_name, 128, "%s-%s", $1, $3); + tracepoint.sys = &sys_name; + tracepoint.event = $5; + + $$ = tracepoint; +} +| +PE_NAME ':' PE_NAME +{ + struct tracepoint_name tracepoint = {$1, $3}; + + $$ = tracepoint; +} + event_legacy_numeric: PE_VALUE ':' PE_VALUE { @@ -420,6 +454,28 @@ PE_RAW $$ = list; } +event_bpf_file: +PE_BPF_OBJECT +{ + struct parse_events_evlist *data = _data; + struct parse_events_error *error = data->error; + struct list_head *list; + + ALLOC_LIST(list); + ABORT_ON(parse_events_load_bpf(data, list, $1, false)); + $$ = list; +} +| +PE_BPF_SOURCE +{ + struct parse_events_evlist *data = _data; + struct list_head *list; + + ALLOC_LIST(list); + ABORT_ON(parse_events_load_bpf(data, list, $1, true)); + $$ = list; +} + start_terms: event_config { struct parse_events_terms *data = _data; diff --git a/tools/perf/util/parse-options.c b/tools/perf/util/parse-options.c index 01626be2a8eb..9fca09296eb3 100644 --- a/tools/perf/util/parse-options.c +++ b/tools/perf/util/parse-options.c @@ -2,10 +2,13 @@ #include "parse-options.h" #include "cache.h" #include "header.h" +#include <linux/string.h> #define OPT_SHORT 1 #define OPT_UNSET 2 +static struct strbuf error_buf = STRBUF_INIT; + static int opterror(const struct option *opt, const char *reason, int flags) { if (flags & OPT_SHORT) @@ -372,7 +375,8 @@ void parse_options_start(struct parse_opt_ctx_t *ctx, } static int usage_with_options_internal(const char * const *, - const struct option *, int); + const struct option *, int, + struct parse_opt_ctx_t *); int parse_options_step(struct parse_opt_ctx_t *ctx, const struct option *options, @@ -396,8 +400,9 @@ int parse_options_step(struct parse_opt_ctx_t *ctx, if (arg[1] != '-') { ctx->opt = ++arg; - if (internal_help && *ctx->opt == 'h') - return usage_with_options_internal(usagestr, options, 0); + if (internal_help && *ctx->opt == 'h') { + return usage_with_options_internal(usagestr, options, 0, ctx); + } switch (parse_short_opt(ctx, options)) { case -1: return parse_options_usage(usagestr, options, arg, 1); @@ -412,7 +417,7 @@ int parse_options_step(struct parse_opt_ctx_t *ctx, check_typos(arg, options); while (ctx->opt) { if (internal_help && *ctx->opt == 'h') - return usage_with_options_internal(usagestr, options, 0); + return usage_with_options_internal(usagestr, options, 0, ctx); arg = ctx->opt; switch (parse_short_opt(ctx, options)) { case -1: @@ -445,9 +450,9 @@ int parse_options_step(struct parse_opt_ctx_t *ctx, arg += 2; if (internal_help && !strcmp(arg, "help-all")) - return usage_with_options_internal(usagestr, options, 1); + return usage_with_options_internal(usagestr, options, 1, ctx); if (internal_help && !strcmp(arg, "help")) - return usage_with_options_internal(usagestr, options, 0); + return usage_with_options_internal(usagestr, options, 0, ctx); if (!strcmp(arg, "list-opts")) return PARSE_OPT_LIST_OPTS; if (!strcmp(arg, "list-cmds")) @@ -496,7 +501,7 @@ int parse_options_subcommand(int argc, const char **argv, const struct option *o { struct parse_opt_ctx_t ctx; - perf_header__set_cmdline(argc, argv); + perf_env__set_cmdline(&perf_env, argc, argv); /* build usage string if it's not provided */ if (subcommands && !usagestr[0]) { @@ -537,9 +542,11 @@ int parse_options_subcommand(int argc, const char **argv, const struct option *o exit(130); default: /* PARSE_OPT_UNKNOWN */ if (ctx.argv[0][1] == '-') { - error("unknown option `%s'", ctx.argv[0] + 2); + strbuf_addf(&error_buf, "unknown option `%s'", + ctx.argv[0] + 2); } else { - error("unknown switch `%c'", *ctx.opt); + strbuf_addf(&error_buf, "unknown switch `%c'", + *ctx.opt); } usage_with_options(usagestr, options); } @@ -642,13 +649,93 @@ static void print_option_help(const struct option *opts, int full) fprintf(stderr, "%*s%s\n", pad + USAGE_GAP, "", opts->help); } +static int option__cmp(const void *va, const void *vb) +{ + const struct option *a = va, *b = vb; + int sa = tolower(a->short_name), sb = tolower(b->short_name), ret; + + if (sa == 0) + sa = 'z' + 1; + if (sb == 0) + sb = 'z' + 1; + + ret = sa - sb; + + if (ret == 0) { + const char *la = a->long_name ?: "", + *lb = b->long_name ?: ""; + ret = strcmp(la, lb); + } + + return ret; +} + +static struct option *options__order(const struct option *opts) +{ + int nr_opts = 0; + const struct option *o = opts; + struct option *ordered; + + for (o = opts; o->type != OPTION_END; o++) + ++nr_opts; + + ordered = memdup(opts, sizeof(*o) * (nr_opts + 1)); + if (ordered == NULL) + goto out; + + qsort(ordered, nr_opts, sizeof(*o), option__cmp); +out: + return ordered; +} + +static bool option__in_argv(const struct option *opt, const struct parse_opt_ctx_t *ctx) +{ + int i; + + for (i = 1; i < ctx->argc; ++i) { + const char *arg = ctx->argv[i]; + + if (arg[0] != '-') { + if (arg[1] == '\0') { + if (arg[0] == opt->short_name) + return true; + continue; + } + + if (opt->long_name && strcmp(opt->long_name, arg) == 0) + return true; + + if (opt->help && strcasestr(opt->help, arg) != NULL) + return true; + + continue; + } + + if (arg[1] == opt->short_name || + (arg[1] == '-' && opt->long_name && strcmp(opt->long_name, arg + 2) == 0)) + return true; + } + + return false; +} + int usage_with_options_internal(const char * const *usagestr, - const struct option *opts, int full) + const struct option *opts, int full, + struct parse_opt_ctx_t *ctx) { + struct option *ordered; + if (!usagestr) return PARSE_OPT_HELP; - fprintf(stderr, "\n usage: %s\n", *usagestr++); + setup_pager(); + + if (strbuf_avail(&error_buf)) { + fprintf(stderr, " Error: %s\n", error_buf.buf); + strbuf_release(&error_buf); + } + + fprintf(stderr, "\n Usage: %s\n", *usagestr++); while (*usagestr && **usagestr) fprintf(stderr, " or: %s\n", *usagestr++); while (*usagestr) { @@ -661,11 +748,20 @@ int usage_with_options_internal(const char * const *usagestr, if (opts->type != OPTION_GROUP) fputc('\n', stderr); - for ( ; opts->type != OPTION_END; opts++) + ordered = options__order(opts); + if (ordered) + opts = ordered; + + for ( ; opts->type != OPTION_END; opts++) { + if (ctx && ctx->argc > 1 && !option__in_argv(opts, ctx)) + continue; print_option_help(opts, full); + } fputc('\n', stderr); + free(ordered); + return PARSE_OPT_HELP; } @@ -673,7 +769,22 @@ void usage_with_options(const char * const *usagestr, const struct option *opts) { exit_browser(false); - usage_with_options_internal(usagestr, opts, 0); + usage_with_options_internal(usagestr, opts, 0, NULL); + exit(129); +} + +void usage_with_options_msg(const char * const *usagestr, + const struct option *opts, const char *fmt, ...) +{ + va_list ap; + + exit_browser(false); + + va_start(ap, fmt); + strbuf_addv(&error_buf, fmt, ap); + va_end(ap); + + usage_with_options_internal(usagestr, opts, 0, NULL); exit(129); } @@ -684,7 +795,7 @@ int parse_options_usage(const char * const *usagestr, if (!usagestr) goto opt; - fprintf(stderr, "\n usage: %s\n", *usagestr++); + fprintf(stderr, "\n Usage: %s\n", *usagestr++); while (*usagestr && **usagestr) fprintf(stderr, " or: %s\n", *usagestr++); while (*usagestr) { @@ -698,24 +809,23 @@ int parse_options_usage(const char * const *usagestr, opt: for ( ; opts->type != OPTION_END; opts++) { if (short_opt) { - if (opts->short_name == *optstr) + if (opts->short_name == *optstr) { + print_option_help(opts, 0); break; + } continue; } if (opts->long_name == NULL) continue; - if (!prefixcmp(optstr, opts->long_name)) - break; - if (!prefixcmp(optstr, "no-") && - !prefixcmp(optstr + 3, opts->long_name)) - break; + if (!prefixcmp(opts->long_name, optstr)) + print_option_help(opts, 0); + if (!prefixcmp("no-", optstr) && + !prefixcmp(opts->long_name, optstr + 3)) + print_option_help(opts, 0); } - if (opts->type != OPTION_END) - print_option_help(opts, 0); - return PARSE_OPT_HELP; } diff --git a/tools/perf/util/parse-options.h b/tools/perf/util/parse-options.h index 367d8b816cc7..a8e407bc251e 100644 --- a/tools/perf/util/parse-options.h +++ b/tools/perf/util/parse-options.h @@ -111,6 +111,7 @@ struct option { #define OPT_GROUP(h) { .type = OPTION_GROUP, .help = (h) } #define OPT_BIT(s, l, v, h, b) { .type = OPTION_BIT, .short_name = (s), .long_name = (l), .value = check_vtype(v, int *), .help = (h), .defval = (b) } #define OPT_BOOLEAN(s, l, v, h) { .type = OPTION_BOOLEAN, .short_name = (s), .long_name = (l), .value = check_vtype(v, bool *), .help = (h) } +#define OPT_BOOLEAN_FLAG(s, l, v, h, f) { .type = OPTION_BOOLEAN, .short_name = (s), .long_name = (l), .value = check_vtype(v, bool *), .help = (h), .flags = (f) } #define OPT_BOOLEAN_SET(s, l, v, os, h) \ { .type = OPTION_BOOLEAN, .short_name = (s), .long_name = (l), \ .value = check_vtype(v, bool *), .help = (h), \ @@ -160,6 +161,10 @@ extern int parse_options_subcommand(int argc, const char **argv, extern NORETURN void usage_with_options(const char * const *usagestr, const struct option *options); +extern NORETURN __attribute__((format(printf,3,4))) +void usage_with_options_msg(const char * const *usagestr, + const struct option *options, + const char *fmt, ...); /*----- incremantal advanced APIs -----*/ diff --git a/tools/perf/util/parse-regs-options.c b/tools/perf/util/parse-regs-options.c new file mode 100644 index 000000000000..4f2c1c255d81 --- /dev/null +++ b/tools/perf/util/parse-regs-options.c @@ -0,0 +1,71 @@ +#include "perf.h" +#include "util/util.h" +#include "util/debug.h" +#include "util/parse-options.h" +#include "util/parse-regs-options.h" + +int +parse_regs(const struct option *opt, const char *str, int unset) +{ + uint64_t *mode = (uint64_t *)opt->value; + const struct sample_reg *r; + char *s, *os = NULL, *p; + int ret = -1; + + if (unset) + return 0; + + /* + * cannot set it twice + */ + if (*mode) + return -1; + + /* str may be NULL in case no arg is passed to -I */ + if (str) { + /* because str is read-only */ + s = os = strdup(str); + if (!s) + return -1; + + for (;;) { + p = strchr(s, ','); + if (p) + *p = '\0'; + + if (!strcmp(s, "?")) { + fprintf(stderr, "available registers: "); + for (r = sample_reg_masks; r->name; r++) { + fprintf(stderr, "%s ", r->name); + } + fputc('\n', stderr); + /* just printing available regs */ + return -1; + } + for (r = sample_reg_masks; r->name; r++) { + if (!strcasecmp(s, r->name)) + break; + } + if (!r->name) { + ui__warning("unknown register %s," + " check man page\n", s); + goto error; + } + + *mode |= r->mask; + + if (!p) + break; + + s = p + 1; + } + } + ret = 0; + + /* default to all possible regs */ + if (*mode == 0) + *mode = PERF_REGS_MASK; +error: + free(os); + return ret; +} diff --git a/tools/perf/util/parse-regs-options.h b/tools/perf/util/parse-regs-options.h new file mode 100644 index 000000000000..7d762b188007 --- /dev/null +++ b/tools/perf/util/parse-regs-options.h @@ -0,0 +1,5 @@ +#ifndef _PERF_PARSE_REGS_OPTIONS_H +#define _PERF_PARSE_REGS_OPTIONS_H 1 +struct option; +int parse_regs(const struct option *opt, const char *str, int unset); +#endif /* _PERF_PARSE_REGS_OPTIONS_H */ diff --git a/tools/perf/util/perf_regs.c b/tools/perf/util/perf_regs.c index 43168fb0d9a2..6b8eb13e14e4 100644 --- a/tools/perf/util/perf_regs.c +++ b/tools/perf/util/perf_regs.c @@ -2,6 +2,11 @@ #include "perf_regs.h" #include "event.h" +const struct sample_reg __weak sample_reg_masks[] = { + SMPL_REG_END +}; + +#ifdef HAVE_PERF_REGS_SUPPORT int perf_reg_value(u64 *valp, struct regs_dump *regs, int id) { int i, idx = 0; @@ -25,3 +30,4 @@ out: *valp = regs->cache_regs[id]; return 0; } +#endif diff --git a/tools/perf/util/perf_regs.h b/tools/perf/util/perf_regs.h index 980dbf76bc98..679d6e493962 100644 --- a/tools/perf/util/perf_regs.h +++ b/tools/perf/util/perf_regs.h @@ -2,9 +2,19 @@ #define __PERF_REGS_H #include <linux/types.h> +#include <linux/compiler.h> struct regs_dump; +struct sample_reg { + const char *name; + uint64_t mask; +}; +#define SMPL_REG(n, b) { .name = #n, .mask = 1ULL << (b) } +#define SMPL_REG_END { .name = NULL } + +extern const struct sample_reg sample_reg_masks[]; + #ifdef HAVE_PERF_REGS_SUPPORT #include <perf_regs.h> diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index 7bcb8c315615..e4b173dec4b9 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -462,10 +462,6 @@ static struct perf_pmu *pmu_lookup(const char *name) LIST_HEAD(aliases); __u32 type; - /* No support for intel_bts or intel_pt so disallow them */ - if (!strcmp(name, "intel_bts") || !strcmp(name, "intel_pt")) - return NULL; - /* * The pmu data we store & need consists of the pmu * type value and format definitions. Load both right @@ -542,7 +538,7 @@ struct perf_pmu *perf_pmu__find(const char *name) } static struct perf_pmu_format * -pmu_find_format(struct list_head *formats, char *name) +pmu_find_format(struct list_head *formats, const char *name) { struct perf_pmu_format *format; @@ -553,6 +549,21 @@ pmu_find_format(struct list_head *formats, char *name) return NULL; } +__u64 perf_pmu__format_bits(struct list_head *formats, const char *name) +{ + struct perf_pmu_format *format = pmu_find_format(formats, name); + __u64 bits = 0; + int fbit; + + if (!format) + return 0; + + for_each_set_bit(fbit, format->bits, PERF_PMU_FORMAT_BITS) + bits |= 1ULL << fbit; + + return bits; +} + /* * Sets value based on the format definition (format parameter) * and unformated value (value parameter). @@ -574,6 +585,18 @@ static void pmu_format_value(unsigned long *format, __u64 value, __u64 *v, } } +static __u64 pmu_format_max_value(const unsigned long *format) +{ + int w; + + w = bitmap_weight(format, PERF_PMU_FORMAT_BITS); + if (!w) + return 0; + if (w < 64) + return (1ULL << w) - 1; + return -1; +} + /* * Term is a string term, and might be a param-term. Try to look up it's value * in the remaining terms. @@ -603,36 +626,26 @@ static int pmu_resolve_param_term(struct parse_events_term *term, return -1; } -static char *formats_error_string(struct list_head *formats) +static char *pmu_formats_string(struct list_head *formats) { struct perf_pmu_format *format; - char *err, *str; - static const char *static_terms = "config,config1,config2,name,period,branch_type\n"; + char *str; + struct strbuf buf; unsigned i = 0; - if (!asprintf(&str, "valid terms:")) + if (!formats) return NULL; + strbuf_init(&buf, 0); /* sysfs exported terms */ - list_for_each_entry(format, formats, list) { - char c = i++ ? ',' : ' '; - - err = str; - if (!asprintf(&str, "%s%c%s", err, c, format->name)) - goto fail; - free(err); - } + list_for_each_entry(format, formats, list) + strbuf_addf(&buf, i++ ? ",%s" : "%s", + format->name); - /* static terms */ - err = str; - if (!asprintf(&str, "%s,%s", err, static_terms)) - goto fail; + str = strbuf_detach(&buf, NULL); + strbuf_release(&buf); - free(err); return str; -fail: - free(err); - return NULL; } /* @@ -647,7 +660,7 @@ static int pmu_config_term(struct list_head *formats, { struct perf_pmu_format *format; __u64 *vp; - __u64 val; + __u64 val, max_val; /* * If this is a parameter we've already used for parameterized-eval, @@ -668,9 +681,12 @@ static int pmu_config_term(struct list_head *formats, if (verbose) printf("Invalid event/parameter '%s'\n", term->config); if (err) { + char *pmu_term = pmu_formats_string(formats); + err->idx = term->err_term; err->str = strdup("unknown term"); - err->help = formats_error_string(formats); + err->help = parse_events_formats_error_string(pmu_term); + free(pmu_term); } return -EINVAL; } @@ -713,6 +729,22 @@ static int pmu_config_term(struct list_head *formats, } else return -EINVAL; + max_val = pmu_format_max_value(format->bits); + if (val > max_val) { + if (err) { + err->idx = term->err_val; + if (asprintf(&err->str, + "value too big for format, maximum is %llu", + (unsigned long long)max_val) < 0) + err->str = strdup("value too big for format"); + return -EINVAL; + } + /* + * Assume we don't care if !err, in which case the value will be + * silently truncated. + */ + } + pmu_format_value(format->bits, val, vp, zero); return 0; } @@ -976,7 +1008,8 @@ void print_pmu_events(const char *event_glob, bool name_only) goto out_enomem; j++; } - if (pmu->selectable) { + if (pmu->selectable && + (event_glob == NULL || strglobmatch(pmu->name, event_glob))) { char *s; if (asprintf(&s, "%s//", pmu->name) < 0) goto out_enomem; @@ -994,7 +1027,7 @@ void print_pmu_events(const char *event_glob, bool name_only) printf(" %-50s [Kernel PMU event]\n", aliases[j]); printed++; } - if (printed) + if (printed && pager_in_use()) printf("\n"); out_free: for (j = 0; j < len; j++) diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h index 7b9c8cf8ae3e..5d7e84466bee 100644 --- a/tools/perf/util/pmu.h +++ b/tools/perf/util/pmu.h @@ -54,6 +54,7 @@ int perf_pmu__config_terms(struct list_head *formats, struct perf_event_attr *attr, struct list_head *head_terms, bool zero, struct parse_events_error *error); +__u64 perf_pmu__format_bits(struct list_head *formats, const char *name); int perf_pmu__check_alias(struct perf_pmu *pmu, struct list_head *head_terms, struct perf_pmu_info *info); struct list_head *perf_pmu__alias(struct perf_pmu *pmu, diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index 381f23a443c7..b51a8bfb40f9 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -40,11 +40,11 @@ #include "color.h" #include "symbol.h" #include "thread.h" -#include <api/fs/debugfs.h> -#include <api/fs/tracefs.h> +#include <api/fs/fs.h> #include "trace-event.h" /* For __maybe_unused */ #include "probe-event.h" #include "probe-finder.h" +#include "probe-file.h" #include "session.h" #define MAX_CMDLEN 256 @@ -55,11 +55,7 @@ struct probe_conf probe_conf; #define semantic_error(msg ...) pr_err("Semantic error :" msg) -/* If there is no space to write, returns -E2BIG. */ -static int e_snprintf(char *str, size_t size, const char *format, ...) - __attribute__((format(printf, 3, 4))); - -static int e_snprintf(char *str, size_t size, const char *format, ...) +int e_snprintf(char *str, size_t size, const char *format, ...) { int ret; va_list ap; @@ -72,11 +68,10 @@ static int e_snprintf(char *str, size_t size, const char *format, ...) } static char *synthesize_perf_probe_point(struct perf_probe_point *pp); -static void clear_probe_trace_event(struct probe_trace_event *tev); static struct machine *host_machine; /* Initialize symbol maps and path of vmlinux/modules */ -static int init_symbol_maps(bool user_only) +int init_probe_symbol_maps(bool user_only) { int ret; @@ -106,7 +101,7 @@ out: return ret; } -static void exit_symbol_maps(void) +void exit_probe_symbol_maps(void) { if (host_machine) { machine__delete(host_machine); @@ -131,17 +126,19 @@ static struct ref_reloc_sym *kernel_get_ref_reloc_sym(void) { /* kmap->ref_reloc_sym should be set if host_machine is initialized */ struct kmap *kmap; + struct map *map = machine__kernel_map(host_machine); - if (map__load(host_machine->vmlinux_maps[MAP__FUNCTION], NULL) < 0) + if (map__load(map, NULL) < 0) return NULL; - kmap = map__kmap(host_machine->vmlinux_maps[MAP__FUNCTION]); + kmap = map__kmap(map); if (!kmap) return NULL; return kmap->ref_reloc_sym; } -static u64 kernel_get_symbol_address_by_name(const char *name, bool reloc) +static int kernel_get_symbol_address_by_name(const char *name, u64 *addr, + bool reloc, bool reladdr) { struct ref_reloc_sym *reloc_sym; struct symbol *sym; @@ -150,12 +147,14 @@ static u64 kernel_get_symbol_address_by_name(const char *name, bool reloc) /* ref_reloc_sym is just a label. Need a special fix*/ reloc_sym = kernel_get_ref_reloc_sym(); if (reloc_sym && strcmp(name, reloc_sym->name) == 0) - return (reloc) ? reloc_sym->addr : reloc_sym->unrelocated_addr; + *addr = (reloc) ? reloc_sym->addr : reloc_sym->unrelocated_addr; else { sym = __find_kernel_function_by_name(name, &map); - if (sym) - return map->unmap_ip(map, sym->start) - - ((reloc) ? 0 : map->reloc); + if (!sym) + return -ENOENT; + *addr = map->unmap_ip(map, sym->start) - + ((reloc) ? 0 : map->reloc) - + ((reladdr) ? map->start : 0); } return 0; } @@ -249,12 +248,14 @@ static void clear_probe_trace_events(struct probe_trace_event *tevs, int ntevs) static bool kprobe_blacklist__listed(unsigned long address); static bool kprobe_warn_out_range(const char *symbol, unsigned long address) { - u64 etext_addr; + u64 etext_addr = 0; + int ret; /* Get the address of _etext for checking non-probable text symbol */ - etext_addr = kernel_get_symbol_address_by_name("_etext", false); + ret = kernel_get_symbol_address_by_name("_etext", &etext_addr, + false, false); - if (etext_addr != 0 && etext_addr < address) + if (ret == 0 && etext_addr < address) pr_warning("%s is out of .text, skip it.\n", symbol); else if (kprobe_blacklist__listed(address)) pr_warning("%s is blacklisted function, skip it.\n", symbol); @@ -274,18 +275,19 @@ static int kernel_get_module_dso(const char *module, struct dso **pdso) int ret = 0; if (module) { - list_for_each_entry(dso, &host_machine->dsos.head, node) { - if (!dso->kernel) - continue; - if (strncmp(dso->short_name + 1, module, - dso->short_name_len - 2) == 0) - goto found; + char module_name[128]; + + snprintf(module_name, sizeof(module_name), "[%s]", module); + map = map_groups__find_by_name(&host_machine->kmaps, MAP__FUNCTION, module_name); + if (map) { + dso = map->dso; + goto found; } pr_debug("Failed to find module %s.\n", module); return -ENOENT; } - map = host_machine->vmlinux_maps[MAP__FUNCTION]; + map = machine__kernel_map(host_machine); dso = map->dso; vmlinux_name = symbol_conf.vmlinux_name; @@ -439,19 +441,22 @@ static char *debuginfo_cache_path; static struct debuginfo *debuginfo_cache__open(const char *module, bool silent) { - if ((debuginfo_cache_path && !strcmp(debuginfo_cache_path, module)) || - (!debuginfo_cache_path && !module && debuginfo_cache)) + const char *path = module; + + /* If the module is NULL, it should be the kernel. */ + if (!module) + path = "kernel"; + + if (debuginfo_cache_path && !strcmp(debuginfo_cache_path, path)) goto out; /* Copy module path */ free(debuginfo_cache_path); - if (module) { - debuginfo_cache_path = strdup(module); - if (!debuginfo_cache_path) { - debuginfo__delete(debuginfo_cache); - debuginfo_cache = NULL; - goto out; - } + debuginfo_cache_path = strdup(path); + if (!debuginfo_cache_path) { + debuginfo__delete(debuginfo_cache); + debuginfo_cache = NULL; + goto out; } debuginfo_cache = open_debuginfo(module, silent); @@ -519,9 +524,11 @@ static int find_perf_probe_point_from_dwarf(struct probe_trace_point *tp, if (ret < 0) goto error; addr += stext; - } else { - addr = kernel_get_symbol_address_by_name(tp->symbol, false); - if (addr == 0) + } else if (tp->symbol) { + /* If the module is given, this returns relative address */ + ret = kernel_get_symbol_address_by_name(tp->symbol, &addr, + false, !!tp->module); + if (ret != 0) goto error; addr += tp->offset; } @@ -709,9 +716,10 @@ static int try_to_find_probe_trace_events(struct perf_probe_event *pev, } /* Error path : ntevs < 0 */ pr_debug("An error occurred in debuginfo analysis (%d).\n", ntevs); - if (ntevs == -EBADF) { - pr_warning("Warning: No dwarf info found in the vmlinux - " - "please rebuild kernel with CONFIG_DEBUG_INFO=y.\n"); + if (ntevs < 0) { + if (ntevs == -EBADF) + pr_warning("Warning: No dwarf info found in the vmlinux - " + "please rebuild kernel with CONFIG_DEBUG_INFO=y.\n"); if (!need_dwarf) { pr_debug("Trying to use symbols.\n"); return 0; @@ -863,11 +871,11 @@ int show_line_range(struct line_range *lr, const char *module, bool user) { int ret; - ret = init_symbol_maps(user); + ret = init_probe_symbol_maps(user); if (ret < 0) return ret; ret = __show_line_range(lr, module, user); - exit_symbol_maps(); + exit_probe_symbol_maps(); return ret; } @@ -945,7 +953,7 @@ int show_available_vars(struct perf_probe_event *pevs, int npevs, int i, ret = 0; struct debuginfo *dinfo; - ret = init_symbol_maps(pevs->uprobes); + ret = init_probe_symbol_maps(pevs->uprobes); if (ret < 0) return ret; @@ -962,7 +970,7 @@ int show_available_vars(struct perf_probe_event *pevs, int npevs, debuginfo__delete(dinfo); out: - exit_symbol_maps(); + exit_probe_symbol_maps(); return ret; } @@ -1197,15 +1205,37 @@ static int parse_perf_probe_point(char *arg, struct perf_probe_event *pev) *ptr++ = '\0'; } - tmp = strdup(arg); - if (tmp == NULL) - return -ENOMEM; + if (arg[0] == '\0') + tmp = NULL; + else { + tmp = strdup(arg); + if (tmp == NULL) + return -ENOMEM; + } if (file_spec) pp->file = tmp; - else + else { pp->function = tmp; + /* + * Keep pp->function even if this is absolute address, + * so it can mark whether abs_address is valid. + * Which make 'perf probe lib.bin 0x0' possible. + * + * Note that checking length of tmp is not needed + * because when we access tmp[1] we know tmp[0] is '0', + * so tmp[1] should always valid (but could be '\0'). + */ + if (tmp && !strncmp(tmp, "0x", 2)) { + pp->abs_address = strtoul(pp->function, &tmp, 0); + if (*tmp != '\0') { + semantic_error("Invalid absolute address.\n"); + return -EINVAL; + } + } + } + /* Parse other options */ while (ptr) { arg = ptr; @@ -1467,8 +1497,7 @@ bool perf_probe_event_need_dwarf(struct perf_probe_event *pev) } /* Parse probe_events event into struct probe_point */ -static int parse_probe_trace_command(const char *cmd, - struct probe_trace_event *tev) +int parse_probe_trace_command(const char *cmd, struct probe_trace_event *tev) { struct probe_trace_point *tp = &tev->point; char pr; @@ -1523,9 +1552,31 @@ static int parse_probe_trace_command(const char *cmd, } else p = argv[1]; fmt1_str = strtok_r(p, "+", &fmt); - if (fmt1_str[0] == '0') /* only the address started with 0x */ - tp->address = strtoul(fmt1_str, NULL, 0); - else { + /* only the address started with 0x */ + if (fmt1_str[0] == '0') { + /* + * Fix a special case: + * if address == 0, kernel reports something like: + * p:probe_libc/abs_0 /lib/libc-2.18.so:0x (null) arg1=%ax + * Newer kernel may fix that, but we want to + * support old kernel also. + */ + if (strcmp(fmt1_str, "0x") == 0) { + if (!argv[2] || strcmp(argv[2], "(null)")) { + ret = -EINVAL; + goto out; + } + tp->address = 0; + + free(argv[2]); + for (i = 2; argv[i + 1] != NULL; i++) + argv[i] = argv[i + 1]; + + argv[i] = NULL; + argc -= 1; + } else + tp->address = strtoul(fmt1_str, NULL, 0); + } else { /* Only the symbol-based probe has offset */ tp->symbol = strdup(fmt1_str); if (tp->symbol == NULL) { @@ -1782,14 +1833,29 @@ char *synthesize_probe_trace_command(struct probe_trace_event *tev) if (len <= 0) goto error; - /* Uprobes must have tp->address and tp->module */ - if (tev->uprobes && (!tp->address || !tp->module)) + /* Uprobes must have tp->module */ + if (tev->uprobes && !tp->module) goto error; + /* + * If tp->address == 0, then this point must be a + * absolute address uprobe. + * try_to_find_absolute_address() should have made + * tp->symbol to "0x0". + */ + if (tev->uprobes && !tp->address) { + if (!tp->symbol || strcmp(tp->symbol, "0x0")) + goto error; + } /* Use the tp->address for uprobes */ if (tev->uprobes) ret = e_snprintf(buf + len, MAX_CMDLEN - len, "%s:0x%lx", tp->module, tp->address); + else if (!strncmp(tp->symbol, "0x", 2)) + /* Absolute address. See try_to_find_absolute_address() */ + ret = e_snprintf(buf + len, MAX_CMDLEN - len, "%s%s0x%lx", + tp->module ?: "", tp->module ? ":" : "", + tp->address); else ret = e_snprintf(buf + len, MAX_CMDLEN - len, "%s%s%s+%lu", tp->module ?: "", tp->module ? ":" : "", @@ -1819,17 +1885,21 @@ static int find_perf_probe_point_from_map(struct probe_trace_point *tp, { struct symbol *sym = NULL; struct map *map; - u64 addr; + u64 addr = tp->address; int ret = -ENOENT; if (!is_kprobe) { map = dso__new_map(tp->module); if (!map) goto out; - addr = tp->address; sym = map__find_symbol(map, addr, NULL); } else { - addr = kernel_get_symbol_address_by_name(tp->symbol, true); + if (tp->symbol && !addr) { + ret = kernel_get_symbol_address_by_name(tp->symbol, + &addr, true, false); + if (ret < 0) + goto out; + } if (addr) { addr += tp->offset; sym = __find_kernel_function(addr, &map); @@ -1852,8 +1922,8 @@ out: } static int convert_to_perf_probe_point(struct probe_trace_point *tp, - struct perf_probe_point *pp, - bool is_kprobe) + struct perf_probe_point *pp, + bool is_kprobe) { char buf[128]; int ret; @@ -1870,7 +1940,7 @@ static int convert_to_perf_probe_point(struct probe_trace_point *tp, if (tp->symbol) { pp->function = strdup(tp->symbol); pp->offset = tp->offset; - } else if (!tp->module && !is_kprobe) { + } else { ret = e_snprintf(buf, 128, "0x%" PRIx64, (u64)tp->address); if (ret < 0) return ret; @@ -1951,7 +2021,7 @@ void clear_perf_probe_event(struct perf_probe_event *pev) memset(pev, 0, sizeof(*pev)); } -static void clear_probe_trace_event(struct probe_trace_event *tev) +void clear_probe_trace_event(struct probe_trace_event *tev) { struct probe_trace_arg_ref *ref, *next; int i; @@ -1976,119 +2046,6 @@ static void clear_probe_trace_event(struct probe_trace_event *tev) memset(tev, 0, sizeof(*tev)); } -static void print_open_warning(int err, bool is_kprobe) -{ - char sbuf[STRERR_BUFSIZE]; - - if (err == -ENOENT) { - const char *config; - - if (!is_kprobe) - config = "CONFIG_UPROBE_EVENTS"; - else - config = "CONFIG_KPROBE_EVENTS"; - - pr_warning("%cprobe_events file does not exist" - " - please rebuild kernel with %s.\n", - is_kprobe ? 'k' : 'u', config); - } else if (err == -ENOTSUP) - pr_warning("Tracefs or debugfs is not mounted.\n"); - else - pr_warning("Failed to open %cprobe_events: %s\n", - is_kprobe ? 'k' : 'u', - strerror_r(-err, sbuf, sizeof(sbuf))); -} - -static void print_both_open_warning(int kerr, int uerr) -{ - /* Both kprobes and uprobes are disabled, warn it. */ - if (kerr == -ENOTSUP && uerr == -ENOTSUP) - pr_warning("Tracefs or debugfs is not mounted.\n"); - else if (kerr == -ENOENT && uerr == -ENOENT) - pr_warning("Please rebuild kernel with CONFIG_KPROBE_EVENTS " - "or/and CONFIG_UPROBE_EVENTS.\n"); - else { - char sbuf[STRERR_BUFSIZE]; - pr_warning("Failed to open kprobe events: %s.\n", - strerror_r(-kerr, sbuf, sizeof(sbuf))); - pr_warning("Failed to open uprobe events: %s.\n", - strerror_r(-uerr, sbuf, sizeof(sbuf))); - } -} - -static int open_probe_events(const char *trace_file, bool readwrite) -{ - char buf[PATH_MAX]; - const char *__debugfs; - const char *tracing_dir = ""; - int ret; - - __debugfs = tracefs_find_mountpoint(); - if (__debugfs == NULL) { - tracing_dir = "tracing/"; - - __debugfs = debugfs_find_mountpoint(); - if (__debugfs == NULL) - return -ENOTSUP; - } - - ret = e_snprintf(buf, PATH_MAX, "%s/%s%s", - __debugfs, tracing_dir, trace_file); - if (ret >= 0) { - pr_debug("Opening %s write=%d\n", buf, readwrite); - if (readwrite && !probe_event_dry_run) - ret = open(buf, O_RDWR | O_APPEND, 0); - else - ret = open(buf, O_RDONLY, 0); - - if (ret < 0) - ret = -errno; - } - return ret; -} - -static int open_kprobe_events(bool readwrite) -{ - return open_probe_events("kprobe_events", readwrite); -} - -static int open_uprobe_events(bool readwrite) -{ - return open_probe_events("uprobe_events", readwrite); -} - -/* Get raw string list of current kprobe_events or uprobe_events */ -static struct strlist *get_probe_trace_command_rawlist(int fd) -{ - int ret, idx; - FILE *fp; - char buf[MAX_CMDLEN]; - char *p; - struct strlist *sl; - - sl = strlist__new(true, NULL); - - fp = fdopen(dup(fd), "r"); - while (!feof(fp)) { - p = fgets(buf, MAX_CMDLEN, fp); - if (!p) - break; - - idx = strlen(p) - 1; - if (p[idx] == '\n') - p[idx] = '\0'; - ret = strlist__add(sl, buf); - if (ret < 0) { - pr_debug("strlist__add failed (%d)\n", ret); - strlist__delete(sl); - return NULL; - } - } - fclose(fp); - - return sl; -} - struct kprobe_blacklist_node { struct list_head list; unsigned long start; @@ -2112,7 +2069,7 @@ static void kprobe_blacklist__delete(struct list_head *blacklist) static int kprobe_blacklist__load(struct list_head *blacklist) { struct kprobe_blacklist_node *node; - const char *__debugfs = debugfs_find_mountpoint(); + const char *__debugfs = debugfs__mountpoint(); char buf[PATH_MAX], *p; FILE *fp; int ret; @@ -2238,9 +2195,9 @@ out: } /* Show an event */ -static int show_perf_probe_event(const char *group, const char *event, - struct perf_probe_event *pev, - const char *module, bool use_stdout) +int show_perf_probe_event(const char *group, const char *event, + struct perf_probe_event *pev, + const char *module, bool use_stdout) { struct strbuf buf = STRBUF_INIT; int ret; @@ -2284,7 +2241,7 @@ static int __show_perf_probe_events(int fd, bool is_kprobe, memset(&tev, 0, sizeof(tev)); memset(&pev, 0, sizeof(pev)); - rawlist = get_probe_trace_command_rawlist(fd); + rawlist = probe_file__get_rawlist(fd); if (!rawlist) return -ENOMEM; @@ -2321,93 +2278,24 @@ int show_perf_probe_events(struct strfilter *filter) setup_pager(); - ret = init_symbol_maps(false); + ret = init_probe_symbol_maps(false); if (ret < 0) return ret; - kp_fd = open_kprobe_events(false); - if (kp_fd >= 0) { - ret = __show_perf_probe_events(kp_fd, true, filter); - close(kp_fd); - if (ret < 0) - goto out; - } - - up_fd = open_uprobe_events(false); - if (kp_fd < 0 && up_fd < 0) { - print_both_open_warning(kp_fd, up_fd); - ret = kp_fd; - goto out; - } + ret = probe_file__open_both(&kp_fd, &up_fd, 0); + if (ret < 0) + return ret; - if (up_fd >= 0) { + if (kp_fd >= 0) + ret = __show_perf_probe_events(kp_fd, true, filter); + if (up_fd >= 0 && ret >= 0) ret = __show_perf_probe_events(up_fd, false, filter); + if (kp_fd > 0) + close(kp_fd); + if (up_fd > 0) close(up_fd); - } -out: - exit_symbol_maps(); - return ret; -} + exit_probe_symbol_maps(); -/* Get current perf-probe event names */ -static struct strlist *get_probe_trace_event_names(int fd, bool include_group) -{ - char buf[128]; - struct strlist *sl, *rawlist; - struct str_node *ent; - struct probe_trace_event tev; - int ret = 0; - - memset(&tev, 0, sizeof(tev)); - rawlist = get_probe_trace_command_rawlist(fd); - if (!rawlist) - return NULL; - sl = strlist__new(true, NULL); - strlist__for_each(ent, rawlist) { - ret = parse_probe_trace_command(ent->s, &tev); - if (ret < 0) - break; - if (include_group) { - ret = e_snprintf(buf, 128, "%s:%s", tev.group, - tev.event); - if (ret >= 0) - ret = strlist__add(sl, buf); - } else - ret = strlist__add(sl, tev.event); - clear_probe_trace_event(&tev); - if (ret < 0) - break; - } - strlist__delete(rawlist); - - if (ret < 0) { - strlist__delete(sl); - return NULL; - } - return sl; -} - -static int write_probe_trace_event(int fd, struct probe_trace_event *tev) -{ - int ret = 0; - char *buf = synthesize_probe_trace_command(tev); - char sbuf[STRERR_BUFSIZE]; - - if (!buf) { - pr_debug("Failed to synthesize probe trace event.\n"); - return -EINVAL; - } - - pr_debug("Writing event: %s\n", buf); - if (!probe_event_dry_run) { - ret = write(fd, buf, strlen(buf)); - if (ret <= 0) { - ret = -errno; - pr_warning("Failed to write event: %s\n", - strerror_r(errno, sbuf, sizeof(sbuf))); - } - } - free(buf); return ret; } @@ -2415,36 +2303,41 @@ static int get_new_event_name(char *buf, size_t len, const char *base, struct strlist *namelist, bool allow_suffix) { int i, ret; - char *p; + char *p, *nbase; if (*base == '.') base++; + nbase = strdup(base); + if (!nbase) + return -ENOMEM; - /* Try no suffix */ - ret = e_snprintf(buf, len, "%s", base); + /* Cut off the dot suffixes (e.g. .const, .isra)*/ + p = strchr(nbase, '.'); + if (p && p != nbase) + *p = '\0'; + + /* Try no suffix number */ + ret = e_snprintf(buf, len, "%s", nbase); if (ret < 0) { pr_debug("snprintf() failed: %d\n", ret); - return ret; + goto out; } - /* Cut off the postfixes (e.g. .const, .isra)*/ - p = strchr(buf, '.'); - if (p && p != buf) - *p = '\0'; if (!strlist__has_entry(namelist, buf)) - return 0; + goto out; if (!allow_suffix) { pr_warning("Error: event \"%s\" already exists. " - "(Use -f to force duplicates.)\n", base); - return -EEXIST; + "(Use -f to force duplicates.)\n", buf); + ret = -EEXIST; + goto out; } /* Try to add suffix */ for (i = 1; i < MAX_EVENT_INDEX; i++) { - ret = e_snprintf(buf, len, "%s_%d", base, i); + ret = e_snprintf(buf, len, "%s_%d", nbase, i); if (ret < 0) { pr_debug("snprintf() failed: %d\n", ret); - return ret; + goto out; } if (!strlist__has_entry(namelist, buf)) break; @@ -2454,6 +2347,8 @@ static int get_new_event_name(char *buf, size_t len, const char *base, ret = -ERANGE; } +out: + free(nbase); return ret; } @@ -2478,81 +2373,84 @@ out: free(buf); } +/* Set new name from original perf_probe_event and namelist */ +static int probe_trace_event__set_name(struct probe_trace_event *tev, + struct perf_probe_event *pev, + struct strlist *namelist, + bool allow_suffix) +{ + const char *event, *group; + char buf[64]; + int ret; + + if (pev->event) + event = pev->event; + else + if (pev->point.function && + (strncmp(pev->point.function, "0x", 2) != 0) && + !strisglob(pev->point.function)) + event = pev->point.function; + else + event = tev->point.realname; + if (pev->group) + group = pev->group; + else + group = PERFPROBE_GROUP; + + /* Get an unused new event name */ + ret = get_new_event_name(buf, 64, event, + namelist, allow_suffix); + if (ret < 0) + return ret; + + event = buf; + + tev->event = strdup(event); + tev->group = strdup(group); + if (tev->event == NULL || tev->group == NULL) + return -ENOMEM; + + /* Add added event name to namelist */ + strlist__add(namelist, event); + return 0; +} + static int __add_probe_trace_events(struct perf_probe_event *pev, struct probe_trace_event *tevs, int ntevs, bool allow_suffix) { int i, fd, ret; struct probe_trace_event *tev = NULL; - char buf[64]; - const char *event = NULL, *group = NULL; struct strlist *namelist; - bool safename; - if (pev->uprobes) - fd = open_uprobe_events(true); - else - fd = open_kprobe_events(true); - - if (fd < 0) { - print_open_warning(fd, !pev->uprobes); + fd = probe_file__open(PF_FL_RW | (pev->uprobes ? PF_FL_UPROBE : 0)); + if (fd < 0) return fd; - } /* Get current event names */ - namelist = get_probe_trace_event_names(fd, false); + namelist = probe_file__get_namelist(fd); if (!namelist) { pr_debug("Failed to get current event list.\n"); ret = -ENOMEM; goto close_out; } - safename = (pev->point.function && !strisglob(pev->point.function)); ret = 0; - pr_info("Added new event%s\n", (ntevs > 1) ? "s:" : ":"); for (i = 0; i < ntevs; i++) { tev = &tevs[i]; /* Skip if the symbol is out of .text or blacklisted */ if (!tev->point.symbol) continue; - if (pev->event) - event = pev->event; - else - if (safename) - event = pev->point.function; - else - event = tev->point.realname; - if (pev->group) - group = pev->group; - else - group = PERFPROBE_GROUP; - - /* Get an unused new event name */ - ret = get_new_event_name(buf, 64, event, - namelist, allow_suffix); + /* Set new name for tev (and update namelist) */ + ret = probe_trace_event__set_name(tev, pev, namelist, + allow_suffix); if (ret < 0) break; - event = buf; - tev->event = strdup(event); - tev->group = strdup(group); - if (tev->event == NULL || tev->group == NULL) { - ret = -ENOMEM; - break; - } - ret = write_probe_trace_event(fd, tev); + ret = probe_file__add_event(fd, tev); if (ret < 0) break; - /* Add added event name to namelist */ - strlist__add(namelist, event); - - /* We use tev's name for showing new events */ - show_perf_probe_event(tev->group, tev->event, pev, - tev->point.module, false); - /* Save the last valid name */ - event = tev->event; - group = tev->group; /* * Probes after the first probe which comes from same @@ -2565,13 +2463,6 @@ static int __add_probe_trace_events(struct perf_probe_event *pev, if (ret == -EINVAL && pev->uprobes) warn_uprobe_event_compat(tev); - /* Note that it is possible to skip all events because of blacklist */ - if (ret >= 0 && event) { - /* Show how to use the event. */ - pr_info("\nYou can now use it in all perf tools, such as:\n\n"); - pr_info("\tperf record -e %s:%s -aR sleep 1\n\n", group, event); - } - strlist__delete(namelist); close_out: close(fd); @@ -2652,7 +2543,8 @@ static int find_probe_trace_events_from_map(struct perf_probe_event *pev, goto out; } - if (!pev->uprobes && !pp->retprobe) { + /* Note that the symbols in the kmodule are not relocated */ + if (!pev->uprobes && !pp->retprobe && !pev->target) { reloc_sym = kernel_get_ref_reloc_sym(); if (!reloc_sym) { pr_warning("Relocated base symbol is not found!\n"); @@ -2689,8 +2581,9 @@ static int find_probe_trace_events_from_map(struct perf_probe_event *pev, } /* Add one probe point */ tp->address = map->unmap_ip(map, sym->start) + pp->offset; - /* If we found a wrong one, mark it by NULL symbol */ - if (!pev->uprobes && + + /* Check the kprobe (not in module) is within .text */ + if (!pev->uprobes && !pev->target && kprobe_warn_out_range(sym->name, tp->address)) { tp->symbol = NULL; /* Skip it */ skipped++; @@ -2748,6 +2641,98 @@ err_out: goto out; } +static int try_to_find_absolute_address(struct perf_probe_event *pev, + struct probe_trace_event **tevs) +{ + struct perf_probe_point *pp = &pev->point; + struct probe_trace_event *tev; + struct probe_trace_point *tp; + int i, err; + + if (!(pev->point.function && !strncmp(pev->point.function, "0x", 2))) + return -EINVAL; + if (perf_probe_event_need_dwarf(pev)) + return -EINVAL; + + /* + * This is 'perf probe /lib/libc.so 0xabcd'. Try to probe at + * absolute address. + * + * Only one tev can be generated by this. + */ + *tevs = zalloc(sizeof(*tev)); + if (!*tevs) + return -ENOMEM; + + tev = *tevs; + tp = &tev->point; + + /* + * Don't use tp->offset, use address directly, because + * in synthesize_probe_trace_command() address cannot be + * zero. + */ + tp->address = pev->point.abs_address; + tp->retprobe = pp->retprobe; + tev->uprobes = pev->uprobes; + + err = -ENOMEM; + /* + * Give it a '0x' leading symbol name. + * In __add_probe_trace_events, a NULL symbol is interpreted as + * invalud. + */ + if (asprintf(&tp->symbol, "0x%lx", tp->address) < 0) + goto errout; + + /* For kprobe, check range */ + if ((!tev->uprobes) && + (kprobe_warn_out_range(tev->point.symbol, + tev->point.address))) { + err = -EACCES; + goto errout; + } + + if (asprintf(&tp->realname, "abs_%lx", tp->address) < 0) + goto errout; + + if (pev->target) { + tp->module = strdup(pev->target); + if (!tp->module) + goto errout; + } + + if (tev->group) { + tev->group = strdup(pev->group); + if (!tev->group) + goto errout; + } + + if (pev->event) { + tev->event = strdup(pev->event); + if (!tev->event) + goto errout; + } + + tev->nargs = pev->nargs; + tev->args = zalloc(sizeof(struct probe_trace_arg) * tev->nargs); + if (!tev->args) { + err = -ENOMEM; + goto errout; + } + for (i = 0; i < tev->nargs; i++) + copy_to_probe_trace_arg(&tev->args[i], &pev->args[i]); + + return 1; + +errout: + if (*tevs) { + clear_probe_trace_events(*tevs, 1); + *tevs = NULL; + } + return err; +} + bool __weak arch__prefers_symtab(void) { return false; } static int convert_to_probe_trace_events(struct perf_probe_event *pev, @@ -2764,6 +2749,10 @@ static int convert_to_probe_trace_events(struct perf_probe_event *pev, } } + ret = try_to_find_absolute_address(pev, tevs); + if (ret > 0) + return ret; + if (arch__prefers_symtab() && !perf_probe_event_need_dwarf(pev)) { ret = find_probe_trace_events_from_map(pev, tevs); if (ret > 0) @@ -2778,174 +2767,104 @@ static int convert_to_probe_trace_events(struct perf_probe_event *pev, return find_probe_trace_events_from_map(pev, tevs); } -struct __event_package { - struct perf_probe_event *pev; - struct probe_trace_event *tevs; - int ntevs; -}; - -int add_perf_probe_events(struct perf_probe_event *pevs, int npevs) +int convert_perf_probe_events(struct perf_probe_event *pevs, int npevs) { - int i, j, ret; - struct __event_package *pkgs; - - ret = 0; - pkgs = zalloc(sizeof(struct __event_package) * npevs); - - if (pkgs == NULL) - return -ENOMEM; - - ret = init_symbol_maps(pevs->uprobes); - if (ret < 0) { - free(pkgs); - return ret; - } + int i, ret; /* Loop 1: convert all events */ for (i = 0; i < npevs; i++) { - pkgs[i].pev = &pevs[i]; /* Init kprobe blacklist if needed */ - if (!pkgs[i].pev->uprobes) + if (!pevs[i].uprobes) kprobe_blacklist__init(); /* Convert with or without debuginfo */ - ret = convert_to_probe_trace_events(pkgs[i].pev, - &pkgs[i].tevs); + ret = convert_to_probe_trace_events(&pevs[i], &pevs[i].tevs); if (ret < 0) - goto end; - pkgs[i].ntevs = ret; + return ret; + pevs[i].ntevs = ret; } /* This just release blacklist only if allocated */ kprobe_blacklist__release(); + return 0; +} + +int apply_perf_probe_events(struct perf_probe_event *pevs, int npevs) +{ + int i, ret = 0; + /* Loop 2: add all events */ for (i = 0; i < npevs; i++) { - ret = __add_probe_trace_events(pkgs[i].pev, pkgs[i].tevs, - pkgs[i].ntevs, + ret = __add_probe_trace_events(&pevs[i], pevs[i].tevs, + pevs[i].ntevs, probe_conf.force_add); if (ret < 0) break; } -end: - /* Loop 3: cleanup and free trace events */ - for (i = 0; i < npevs; i++) { - for (j = 0; j < pkgs[i].ntevs; j++) - clear_probe_trace_event(&pkgs[i].tevs[j]); - zfree(&pkgs[i].tevs); - } - free(pkgs); - exit_symbol_maps(); - return ret; } -static int __del_trace_probe_event(int fd, struct str_node *ent) +void cleanup_perf_probe_events(struct perf_probe_event *pevs, int npevs) { - char *p; - char buf[128]; - int ret; + int i, j; - /* Convert from perf-probe event to trace-probe event */ - ret = e_snprintf(buf, 128, "-:%s", ent->s); - if (ret < 0) - goto error; - - p = strchr(buf + 2, ':'); - if (!p) { - pr_debug("Internal error: %s should have ':' but not.\n", - ent->s); - ret = -ENOTSUP; - goto error; - } - *p = '/'; - - pr_debug("Writing event: %s\n", buf); - ret = write(fd, buf, strlen(buf)); - if (ret < 0) { - ret = -errno; - goto error; + /* Loop 3: cleanup and free trace events */ + for (i = 0; i < npevs; i++) { + for (j = 0; j < pevs[i].ntevs; j++) + clear_probe_trace_event(&pevs[i].tevs[j]); + zfree(&pevs[i].tevs); + pevs[i].ntevs = 0; + clear_perf_probe_event(&pevs[i]); } - - pr_info("Removed event: %s\n", ent->s); - return 0; -error: - pr_warning("Failed to delete event: %s\n", - strerror_r(-ret, buf, sizeof(buf))); - return ret; } -static int del_trace_probe_events(int fd, struct strfilter *filter, - struct strlist *namelist) +int add_perf_probe_events(struct perf_probe_event *pevs, int npevs) { - struct str_node *ent; - const char *p; - int ret = -ENOENT; + int ret; - if (!namelist) - return -ENOENT; + ret = init_probe_symbol_maps(pevs->uprobes); + if (ret < 0) + return ret; - strlist__for_each(ent, namelist) { - p = strchr(ent->s, ':'); - if ((p && strfilter__compare(filter, p + 1)) || - strfilter__compare(filter, ent->s)) { - ret = __del_trace_probe_event(fd, ent); - if (ret < 0) - break; - } - } + ret = convert_perf_probe_events(pevs, npevs); + if (ret == 0) + ret = apply_perf_probe_events(pevs, npevs); + cleanup_perf_probe_events(pevs, npevs); + + exit_probe_symbol_maps(); return ret; } int del_perf_probe_events(struct strfilter *filter) { int ret, ret2, ufd = -1, kfd = -1; - struct strlist *namelist = NULL, *unamelist = NULL; char *str = strfilter__string(filter); if (!str) return -EINVAL; - pr_debug("Delete filter: \'%s\'\n", str); - /* Get current event names */ - kfd = open_kprobe_events(true); - if (kfd >= 0) - namelist = get_probe_trace_event_names(kfd, true); - - ufd = open_uprobe_events(true); - if (ufd >= 0) - unamelist = get_probe_trace_event_names(ufd, true); - - if (kfd < 0 && ufd < 0) { - print_both_open_warning(kfd, ufd); - ret = kfd; - goto error; - } + ret = probe_file__open_both(&kfd, &ufd, PF_FL_RW); + if (ret < 0) + goto out; - ret = del_trace_probe_events(kfd, filter, namelist); + ret = probe_file__del_events(kfd, filter); if (ret < 0 && ret != -ENOENT) goto error; - ret2 = del_trace_probe_events(ufd, filter, unamelist); + ret2 = probe_file__del_events(ufd, filter); if (ret2 < 0 && ret2 != -ENOENT) { ret = ret2; goto error; } - if (ret == -ENOENT && ret2 == -ENOENT) - pr_debug("\"%s\" does not hit any event.\n", str); - /* Note that this is silently ignored */ ret = 0; error: - if (kfd >= 0) { - strlist__delete(namelist); + if (kfd >= 0) close(kfd); - } - - if (ufd >= 0) { - strlist__delete(unamelist); + if (ufd >= 0) close(ufd); - } +out: free(str); return ret; @@ -2972,7 +2891,7 @@ int show_available_funcs(const char *target, struct strfilter *_filter, struct map *map; int ret; - ret = init_symbol_maps(user); + ret = init_probe_symbol_maps(user); if (ret < 0) return ret; @@ -3002,8 +2921,27 @@ end: if (user) { map__put(map); } - exit_symbol_maps(); + exit_probe_symbol_maps(); return ret; } +int copy_to_probe_trace_arg(struct probe_trace_arg *tvar, + struct perf_probe_arg *pvar) +{ + tvar->value = strdup(pvar->var); + if (tvar->value == NULL) + return -ENOMEM; + if (pvar->type) { + tvar->type = strdup(pvar->type); + if (tvar->type == NULL) + return -ENOMEM; + } + if (pvar->name) { + tvar->name = strdup(pvar->name); + if (tvar->name == NULL) + return -ENOMEM; + } else + tvar->name = NULL; + return 0; +} diff --git a/tools/perf/util/probe-event.h b/tools/perf/util/probe-event.h index 31db6ee7db54..ba926c30f8cd 100644 --- a/tools/perf/util/probe-event.h +++ b/tools/perf/util/probe-event.h @@ -59,6 +59,7 @@ struct perf_probe_point { bool retprobe; /* Return probe flag */ char *lazy_line; /* Lazy matching pattern */ unsigned long offset; /* Offset from function entry */ + unsigned long abs_address; /* Absolute address of the point */ }; /* Perf probe probing argument field chain */ @@ -86,6 +87,8 @@ struct perf_probe_event { bool uprobes; /* Uprobe event flag */ char *target; /* Target binary */ struct perf_probe_arg *args; /* Arguments */ + struct probe_trace_event *tevs; + int ntevs; }; /* Line range */ @@ -106,9 +109,15 @@ struct variable_list { struct strlist *vars; /* Available variables */ }; +struct map; +int init_probe_symbol_maps(bool user_only); +void exit_probe_symbol_maps(void); + /* Command string to events */ extern int parse_perf_probe_command(const char *cmd, struct perf_probe_event *pev); +extern int parse_probe_trace_command(const char *cmd, + struct probe_trace_event *tev); /* Events to command string */ extern char *synthesize_perf_probe_command(struct perf_probe_event *pev); @@ -121,6 +130,7 @@ extern bool perf_probe_event_need_dwarf(struct perf_probe_event *pev); /* Release event contents */ extern void clear_perf_probe_event(struct perf_probe_event *pev); +extern void clear_probe_trace_event(struct probe_trace_event *tev); /* Command string to line-range */ extern int parse_line_range_desc(const char *cmd, struct line_range *lr); @@ -132,7 +142,14 @@ extern void line_range__clear(struct line_range *lr); extern int line_range__init(struct line_range *lr); extern int add_perf_probe_events(struct perf_probe_event *pevs, int npevs); +extern int convert_perf_probe_events(struct perf_probe_event *pevs, int npevs); +extern int apply_perf_probe_events(struct perf_probe_event *pevs, int npevs); +extern void cleanup_perf_probe_events(struct perf_probe_event *pevs, int npevs); extern int del_perf_probe_events(struct strfilter *filter); + +extern int show_perf_probe_event(const char *group, const char *event, + struct perf_probe_event *pev, + const char *module, bool use_stdout); extern int show_perf_probe_events(struct strfilter *filter); extern int show_line_range(struct line_range *lr, const char *module, bool user); @@ -144,7 +161,14 @@ bool arch__prefers_symtab(void); void arch__fix_tev_from_maps(struct perf_probe_event *pev, struct probe_trace_event *tev, struct map *map); +/* If there is no space to write, returns -E2BIG. */ +int e_snprintf(char *str, size_t size, const char *format, ...) + __attribute__((format(printf, 3, 4))); + /* Maximum index number of event-name postfix */ #define MAX_EVENT_INDEX 1024 +int copy_to_probe_trace_arg(struct probe_trace_arg *tvar, + struct perf_probe_arg *pvar); + #endif /*_PROBE_EVENT_H */ diff --git a/tools/perf/util/probe-file.c b/tools/perf/util/probe-file.c new file mode 100644 index 000000000000..89dbeb92c68e --- /dev/null +++ b/tools/perf/util/probe-file.c @@ -0,0 +1,321 @@ +/* + * probe-file.c : operate ftrace k/uprobe events files + * + * Written by Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ +#include "util.h" +#include "event.h" +#include "strlist.h" +#include "debug.h" +#include "cache.h" +#include "color.h" +#include "symbol.h" +#include "thread.h" +#include <api/fs/tracing_path.h> +#include "probe-event.h" +#include "probe-file.h" +#include "session.h" + +#define MAX_CMDLEN 256 + +static void print_open_warning(int err, bool uprobe) +{ + char sbuf[STRERR_BUFSIZE]; + + if (err == -ENOENT) { + const char *config; + + if (uprobe) + config = "CONFIG_UPROBE_EVENTS"; + else + config = "CONFIG_KPROBE_EVENTS"; + + pr_warning("%cprobe_events file does not exist" + " - please rebuild kernel with %s.\n", + uprobe ? 'u' : 'k', config); + } else if (err == -ENOTSUP) + pr_warning("Tracefs or debugfs is not mounted.\n"); + else + pr_warning("Failed to open %cprobe_events: %s\n", + uprobe ? 'u' : 'k', + strerror_r(-err, sbuf, sizeof(sbuf))); +} + +static void print_both_open_warning(int kerr, int uerr) +{ + /* Both kprobes and uprobes are disabled, warn it. */ + if (kerr == -ENOTSUP && uerr == -ENOTSUP) + pr_warning("Tracefs or debugfs is not mounted.\n"); + else if (kerr == -ENOENT && uerr == -ENOENT) + pr_warning("Please rebuild kernel with CONFIG_KPROBE_EVENTS " + "or/and CONFIG_UPROBE_EVENTS.\n"); + else { + char sbuf[STRERR_BUFSIZE]; + pr_warning("Failed to open kprobe events: %s.\n", + strerror_r(-kerr, sbuf, sizeof(sbuf))); + pr_warning("Failed to open uprobe events: %s.\n", + strerror_r(-uerr, sbuf, sizeof(sbuf))); + } +} + +static int open_probe_events(const char *trace_file, bool readwrite) +{ + char buf[PATH_MAX]; + const char *tracing_dir = ""; + int ret; + + ret = e_snprintf(buf, PATH_MAX, "%s/%s%s", + tracing_path, tracing_dir, trace_file); + if (ret >= 0) { + pr_debug("Opening %s write=%d\n", buf, readwrite); + if (readwrite && !probe_event_dry_run) + ret = open(buf, O_RDWR | O_APPEND, 0); + else + ret = open(buf, O_RDONLY, 0); + + if (ret < 0) + ret = -errno; + } + return ret; +} + +static int open_kprobe_events(bool readwrite) +{ + return open_probe_events("kprobe_events", readwrite); +} + +static int open_uprobe_events(bool readwrite) +{ + return open_probe_events("uprobe_events", readwrite); +} + +int probe_file__open(int flag) +{ + int fd; + + if (flag & PF_FL_UPROBE) + fd = open_uprobe_events(flag & PF_FL_RW); + else + fd = open_kprobe_events(flag & PF_FL_RW); + if (fd < 0) + print_open_warning(fd, flag & PF_FL_UPROBE); + + return fd; +} + +int probe_file__open_both(int *kfd, int *ufd, int flag) +{ + if (!kfd || !ufd) + return -EINVAL; + + *kfd = open_kprobe_events(flag & PF_FL_RW); + *ufd = open_uprobe_events(flag & PF_FL_RW); + if (*kfd < 0 && *ufd < 0) { + print_both_open_warning(*kfd, *ufd); + return *kfd; + } + + return 0; +} + +/* Get raw string list of current kprobe_events or uprobe_events */ +struct strlist *probe_file__get_rawlist(int fd) +{ + int ret, idx; + FILE *fp; + char buf[MAX_CMDLEN]; + char *p; + struct strlist *sl; + + sl = strlist__new(NULL, NULL); + + fp = fdopen(dup(fd), "r"); + while (!feof(fp)) { + p = fgets(buf, MAX_CMDLEN, fp); + if (!p) + break; + + idx = strlen(p) - 1; + if (p[idx] == '\n') + p[idx] = '\0'; + ret = strlist__add(sl, buf); + if (ret < 0) { + pr_debug("strlist__add failed (%d)\n", ret); + strlist__delete(sl); + return NULL; + } + } + fclose(fp); + + return sl; +} + +static struct strlist *__probe_file__get_namelist(int fd, bool include_group) +{ + char buf[128]; + struct strlist *sl, *rawlist; + struct str_node *ent; + struct probe_trace_event tev; + int ret = 0; + + memset(&tev, 0, sizeof(tev)); + rawlist = probe_file__get_rawlist(fd); + if (!rawlist) + return NULL; + sl = strlist__new(NULL, NULL); + strlist__for_each(ent, rawlist) { + ret = parse_probe_trace_command(ent->s, &tev); + if (ret < 0) + break; + if (include_group) { + ret = e_snprintf(buf, 128, "%s:%s", tev.group, + tev.event); + if (ret >= 0) + ret = strlist__add(sl, buf); + } else + ret = strlist__add(sl, tev.event); + clear_probe_trace_event(&tev); + if (ret < 0) + break; + } + strlist__delete(rawlist); + + if (ret < 0) { + strlist__delete(sl); + return NULL; + } + return sl; +} + +/* Get current perf-probe event names */ +struct strlist *probe_file__get_namelist(int fd) +{ + return __probe_file__get_namelist(fd, false); +} + +int probe_file__add_event(int fd, struct probe_trace_event *tev) +{ + int ret = 0; + char *buf = synthesize_probe_trace_command(tev); + char sbuf[STRERR_BUFSIZE]; + + if (!buf) { + pr_debug("Failed to synthesize probe trace event.\n"); + return -EINVAL; + } + + pr_debug("Writing event: %s\n", buf); + if (!probe_event_dry_run) { + ret = write(fd, buf, strlen(buf)); + if (ret <= 0) { + ret = -errno; + pr_warning("Failed to write event: %s\n", + strerror_r(errno, sbuf, sizeof(sbuf))); + } + } + free(buf); + + return ret; +} + +static int __del_trace_probe_event(int fd, struct str_node *ent) +{ + char *p; + char buf[128]; + int ret; + + /* Convert from perf-probe event to trace-probe event */ + ret = e_snprintf(buf, 128, "-:%s", ent->s); + if (ret < 0) + goto error; + + p = strchr(buf + 2, ':'); + if (!p) { + pr_debug("Internal error: %s should have ':' but not.\n", + ent->s); + ret = -ENOTSUP; + goto error; + } + *p = '/'; + + pr_debug("Writing event: %s\n", buf); + ret = write(fd, buf, strlen(buf)); + if (ret < 0) { + ret = -errno; + goto error; + } + + return 0; +error: + pr_warning("Failed to delete event: %s\n", + strerror_r(-ret, buf, sizeof(buf))); + return ret; +} + +int probe_file__get_events(int fd, struct strfilter *filter, + struct strlist *plist) +{ + struct strlist *namelist; + struct str_node *ent; + const char *p; + int ret = -ENOENT; + + namelist = __probe_file__get_namelist(fd, true); + if (!namelist) + return -ENOENT; + + strlist__for_each(ent, namelist) { + p = strchr(ent->s, ':'); + if ((p && strfilter__compare(filter, p + 1)) || + strfilter__compare(filter, ent->s)) { + strlist__add(plist, ent->s); + ret = 0; + } + } + strlist__delete(namelist); + + return ret; +} + +int probe_file__del_strlist(int fd, struct strlist *namelist) +{ + int ret = 0; + struct str_node *ent; + + strlist__for_each(ent, namelist) { + ret = __del_trace_probe_event(fd, ent); + if (ret < 0) + break; + } + return ret; +} + +int probe_file__del_events(int fd, struct strfilter *filter) +{ + struct strlist *namelist; + int ret; + + namelist = strlist__new(NULL, NULL); + if (!namelist) + return -ENOMEM; + + ret = probe_file__get_events(fd, filter, namelist); + if (ret < 0) + return ret; + + ret = probe_file__del_strlist(fd, namelist); + strlist__delete(namelist); + + return ret; +} diff --git a/tools/perf/util/probe-file.h b/tools/perf/util/probe-file.h new file mode 100644 index 000000000000..18ac9cf51c34 --- /dev/null +++ b/tools/perf/util/probe-file.h @@ -0,0 +1,22 @@ +#ifndef __PROBE_FILE_H +#define __PROBE_FILE_H + +#include "strlist.h" +#include "strfilter.h" +#include "probe-event.h" + +#define PF_FL_UPROBE 1 +#define PF_FL_RW 2 + +int probe_file__open(int flag); +int probe_file__open_both(int *kfd, int *ufd, int flag); +struct strlist *probe_file__get_namelist(int fd); +struct strlist *probe_file__get_rawlist(int fd); +int probe_file__add_event(int fd, struct probe_trace_event *tev); +int probe_file__del_events(int fd, struct strfilter *filter); +int probe_file__get_events(int fd, struct strfilter *filter, + struct strlist *plist); +int probe_file__del_strlist(int fd, struct strlist *namelist); + + +#endif diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c index 2da65a710893..bd8f03de5e40 100644 --- a/tools/perf/util/probe-finder.c +++ b/tools/perf/util/probe-finder.c @@ -70,6 +70,7 @@ static int debuginfo__init_offline_dwarf(struct debuginfo *dbg, if (!dbg->dwfl) goto error; + dwfl_report_begin(dbg->dwfl); dbg->mod = dwfl_report_offline(dbg->dwfl, "", "", fd); if (!dbg->mod) goto error; @@ -78,6 +79,8 @@ static int debuginfo__init_offline_dwarf(struct debuginfo *dbg, if (!dbg->dbg) goto error; + dwfl_report_end(dbg->dwfl, NULL, NULL); + return 0; error: if (dbg->dwfl) @@ -553,24 +556,9 @@ static int find_variable(Dwarf_Die *sc_die, struct probe_finder *pf) char buf[32], *ptr; int ret = 0; - if (!is_c_varname(pf->pvar->var)) { - /* Copy raw parameters */ - pf->tvar->value = strdup(pf->pvar->var); - if (pf->tvar->value == NULL) - return -ENOMEM; - if (pf->pvar->type) { - pf->tvar->type = strdup(pf->pvar->type); - if (pf->tvar->type == NULL) - return -ENOMEM; - } - if (pf->pvar->name) { - pf->tvar->name = strdup(pf->pvar->name); - if (pf->tvar->name == NULL) - return -ENOMEM; - } else - pf->tvar->name = NULL; - return 0; - } + /* Copy raw parameters */ + if (!is_c_varname(pf->pvar->var)) + return copy_to_probe_trace_arg(pf->tvar, pf->pvar); if (pf->pvar->name) pf->tvar->name = strdup(pf->pvar->name); @@ -606,6 +594,7 @@ static int find_variable(Dwarf_Die *sc_die, struct probe_finder *pf) /* Convert subprogram DIE to trace point */ static int convert_to_trace_point(Dwarf_Die *sp_die, Dwfl_Module *mod, Dwarf_Addr paddr, bool retprobe, + const char *function, struct probe_trace_point *tp) { Dwarf_Addr eaddr, highaddr; @@ -649,8 +638,10 @@ static int convert_to_trace_point(Dwarf_Die *sp_die, Dwfl_Module *mod, /* Return probe must be on the head of a subprogram */ if (retprobe) { if (eaddr != paddr) { - pr_warning("Return probe must be on the head of" - " a real function.\n"); + pr_warning("Failed to find \"%s%%return\",\n" + " because %s is an inlined function and" + " has no return point.\n", function, + function); return -EINVAL; } tp->retprobe = true; @@ -1190,6 +1181,7 @@ static int add_probe_trace_event(Dwarf_Die *sc_die, struct probe_finder *pf) { struct trace_event_finder *tf = container_of(pf, struct trace_event_finder, pf); + struct perf_probe_point *pp = &pf->pev->point; struct probe_trace_event *tev; struct perf_probe_arg *args; int ret, i; @@ -1204,7 +1196,7 @@ static int add_probe_trace_event(Dwarf_Die *sc_die, struct probe_finder *pf) /* Trace point should be converted from subprogram DIE */ ret = convert_to_trace_point(&pf->sp_die, tf->mod, pf->addr, - pf->pev->point.retprobe, &tev->point); + pp->retprobe, pp->function, &tev->point); if (ret < 0) return ret; @@ -1334,6 +1326,7 @@ static int add_available_vars(Dwarf_Die *sc_die, struct probe_finder *pf) { struct available_var_finder *af = container_of(pf, struct available_var_finder, pf); + struct perf_probe_point *pp = &pf->pev->point; struct variable_list *vl; Dwarf_Die die_mem; int ret; @@ -1347,7 +1340,7 @@ static int add_available_vars(Dwarf_Die *sc_die, struct probe_finder *pf) /* Trace point should be converted from subprogram DIE */ ret = convert_to_trace_point(&pf->sp_die, af->mod, pf->addr, - pf->pev->point.retprobe, &vl->point); + pp->retprobe, pp->function, &vl->point); if (ret < 0) return ret; @@ -1355,7 +1348,7 @@ static int add_available_vars(Dwarf_Die *sc_die, struct probe_finder *pf) vl->point.offset); /* Find local variables */ - vl->vars = strlist__new(true, NULL); + vl->vars = strlist__new(NULL, NULL); if (vl->vars == NULL) return -ENOMEM; af->child = true; @@ -1414,6 +1407,41 @@ int debuginfo__find_available_vars_at(struct debuginfo *dbg, return (ret < 0) ? ret : af.nvls; } +/* For the kernel module, we need a special code to get a DIE */ +static int debuginfo__get_text_offset(struct debuginfo *dbg, Dwarf_Addr *offs) +{ + int n, i; + Elf32_Word shndx; + Elf_Scn *scn; + Elf *elf; + GElf_Shdr mem, *shdr; + const char *p; + + elf = dwfl_module_getelf(dbg->mod, &dbg->bias); + if (!elf) + return -EINVAL; + + /* Get the number of relocations */ + n = dwfl_module_relocations(dbg->mod); + if (n < 0) + return -ENOENT; + /* Search the relocation related .text section */ + for (i = 0; i < n; i++) { + p = dwfl_module_relocation_info(dbg->mod, i, &shndx); + if (strcmp(p, ".text") == 0) { + /* OK, get the section header */ + scn = elf_getscn(elf, shndx); + if (!scn) + return -ENOENT; + shdr = gelf_getshdr(scn, &mem); + if (!shdr) + return -ENOENT; + *offs = shdr->sh_addr; + } + } + return 0; +} + /* Reverse search */ int debuginfo__find_probe_point(struct debuginfo *dbg, unsigned long addr, struct perf_probe_point *ppt) @@ -1422,9 +1450,16 @@ int debuginfo__find_probe_point(struct debuginfo *dbg, unsigned long addr, Dwarf_Addr _addr = 0, baseaddr = 0; const char *fname = NULL, *func = NULL, *basefunc = NULL, *tmp; int baseline = 0, lineno = 0, ret = 0; + bool reloc = false; +retry: /* Find cu die */ if (!dwarf_addrdie(dbg->dbg, (Dwarf_Addr)addr, &cudie)) { + if (!reloc && debuginfo__get_text_offset(dbg, &baseaddr) == 0) { + addr += baseaddr; + reloc = true; + goto retry; + } pr_warning("Failed to find debug information for address %lx\n", addr); ret = -EINVAL; diff --git a/tools/perf/util/python-ext-sources b/tools/perf/util/python-ext-sources index 0766d98c5da5..51be28b1bca2 100644 --- a/tools/perf/util/python-ext-sources +++ b/tools/perf/util/python-ext-sources @@ -16,7 +16,7 @@ util/util.c util/xyarray.c util/cgroup.c util/rblist.c -util/stat.c +util/counts.c util/strlist.c util/trace-event.c ../lib/rbtree.c diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c index 626422eda727..98f127abfa42 100644 --- a/tools/perf/util/python.c +++ b/tools/perf/util/python.c @@ -67,6 +67,7 @@ static char pyrf_mmap_event__doc[] = PyDoc_STR("perf mmap event object."); static PyMemberDef pyrf_mmap_event__members[] = { sample_members member_def(perf_event_header, type, T_UINT, "event type"), + member_def(perf_event_header, misc, T_UINT, "event misc"), member_def(mmap_event, pid, T_UINT, "event pid"), member_def(mmap_event, tid, T_UINT, "event tid"), member_def(mmap_event, start, T_ULONGLONG, "start of the map"), @@ -297,6 +298,43 @@ static PyTypeObject pyrf_sample_event__type = { .tp_repr = (reprfunc)pyrf_sample_event__repr, }; +static char pyrf_context_switch_event__doc[] = PyDoc_STR("perf context_switch event object."); + +static PyMemberDef pyrf_context_switch_event__members[] = { + sample_members + member_def(perf_event_header, type, T_UINT, "event type"), + member_def(context_switch_event, next_prev_pid, T_UINT, "next/prev pid"), + member_def(context_switch_event, next_prev_tid, T_UINT, "next/prev tid"), + { .name = NULL, }, +}; + +static PyObject *pyrf_context_switch_event__repr(struct pyrf_event *pevent) +{ + PyObject *ret; + char *s; + + if (asprintf(&s, "{ type: context_switch, next_prev_pid: %u, next_prev_tid: %u, switch_out: %u }", + pevent->event.context_switch.next_prev_pid, + pevent->event.context_switch.next_prev_tid, + !!(pevent->event.header.misc & PERF_RECORD_MISC_SWITCH_OUT)) < 0) { + ret = PyErr_NoMemory(); + } else { + ret = PyString_FromString(s); + free(s); + } + return ret; +} + +static PyTypeObject pyrf_context_switch_event__type = { + PyVarObject_HEAD_INIT(NULL, 0) + .tp_name = "perf.context_switch_event", + .tp_basicsize = sizeof(struct pyrf_event), + .tp_flags = Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, + .tp_doc = pyrf_context_switch_event__doc, + .tp_members = pyrf_context_switch_event__members, + .tp_repr = (reprfunc)pyrf_context_switch_event__repr, +}; + static int pyrf_event__setup_types(void) { int err; @@ -306,6 +344,7 @@ static int pyrf_event__setup_types(void) pyrf_lost_event__type.tp_new = pyrf_read_event__type.tp_new = pyrf_sample_event__type.tp_new = + pyrf_context_switch_event__type.tp_new = pyrf_throttle_event__type.tp_new = PyType_GenericNew; err = PyType_Ready(&pyrf_mmap_event__type); if (err < 0) @@ -328,6 +367,9 @@ static int pyrf_event__setup_types(void) err = PyType_Ready(&pyrf_sample_event__type); if (err < 0) goto out; + err = PyType_Ready(&pyrf_context_switch_event__type); + if (err < 0) + goto out; out: return err; } @@ -342,6 +384,8 @@ static PyTypeObject *pyrf_event__type[] = { [PERF_RECORD_FORK] = &pyrf_task_event__type, [PERF_RECORD_READ] = &pyrf_read_event__type, [PERF_RECORD_SAMPLE] = &pyrf_sample_event__type, + [PERF_RECORD_SWITCH] = &pyrf_context_switch_event__type, + [PERF_RECORD_SWITCH_CPU_WIDE] = &pyrf_context_switch_event__type, }; static PyObject *pyrf_event__new(union perf_event *event) @@ -349,8 +393,10 @@ static PyObject *pyrf_event__new(union perf_event *event) struct pyrf_event *pevent; PyTypeObject *ptype; - if (event->header.type < PERF_RECORD_MMAP || - event->header.type > PERF_RECORD_SAMPLE) + if ((event->header.type < PERF_RECORD_MMAP || + event->header.type > PERF_RECORD_SAMPLE) && + !(event->header.type == PERF_RECORD_SWITCH || + event->header.type == PERF_RECORD_SWITCH_CPU_WIDE)) return NULL; ptype = pyrf_event__type[event->header.type]; @@ -528,6 +574,7 @@ static int pyrf_evsel__init(struct pyrf_evsel *pevsel, "exclude_hv", "exclude_idle", "mmap", + "context_switch", "comm", "freq", "inherit_stat", @@ -553,6 +600,7 @@ static int pyrf_evsel__init(struct pyrf_evsel *pevsel, exclude_hv = 0, exclude_idle = 0, mmap = 0, + context_switch = 0, comm = 0, freq = 1, inherit_stat = 0, @@ -565,13 +613,13 @@ static int pyrf_evsel__init(struct pyrf_evsel *pevsel, int idx = 0; if (!PyArg_ParseTupleAndKeywords(args, kwargs, - "|iKiKKiiiiiiiiiiiiiiiiiiiiiKK", kwlist, + "|iKiKKiiiiiiiiiiiiiiiiiiiiiiKK", kwlist, &attr.type, &attr.config, &attr.sample_freq, &sample_period, &attr.sample_type, &attr.read_format, &disabled, &inherit, &pinned, &exclusive, &exclude_user, &exclude_kernel, &exclude_hv, &exclude_idle, - &mmap, &comm, &freq, &inherit_stat, + &mmap, &context_switch, &comm, &freq, &inherit_stat, &enable_on_exec, &task, &watermark, &precise_ip, &mmap_data, &sample_id_all, &attr.wakeup_events, &attr.bp_type, @@ -595,6 +643,7 @@ static int pyrf_evsel__init(struct pyrf_evsel *pevsel, attr.exclude_hv = exclude_hv; attr.exclude_idle = exclude_idle; attr.mmap = mmap; + attr.context_switch = context_switch; attr.comm = comm; attr.freq = freq; attr.inherit_stat = inherit_stat; @@ -941,76 +990,86 @@ static int pyrf_evlist__setup_types(void) return PyType_Ready(&pyrf_evlist__type); } +#define PERF_CONST(name) { #name, PERF_##name } + static struct { const char *name; int value; } perf__constants[] = { - { "TYPE_HARDWARE", PERF_TYPE_HARDWARE }, - { "TYPE_SOFTWARE", PERF_TYPE_SOFTWARE }, - { "TYPE_TRACEPOINT", PERF_TYPE_TRACEPOINT }, - { "TYPE_HW_CACHE", PERF_TYPE_HW_CACHE }, - { "TYPE_RAW", PERF_TYPE_RAW }, - { "TYPE_BREAKPOINT", PERF_TYPE_BREAKPOINT }, - - { "COUNT_HW_CPU_CYCLES", PERF_COUNT_HW_CPU_CYCLES }, - { "COUNT_HW_INSTRUCTIONS", PERF_COUNT_HW_INSTRUCTIONS }, - { "COUNT_HW_CACHE_REFERENCES", PERF_COUNT_HW_CACHE_REFERENCES }, - { "COUNT_HW_CACHE_MISSES", PERF_COUNT_HW_CACHE_MISSES }, - { "COUNT_HW_BRANCH_INSTRUCTIONS", PERF_COUNT_HW_BRANCH_INSTRUCTIONS }, - { "COUNT_HW_BRANCH_MISSES", PERF_COUNT_HW_BRANCH_MISSES }, - { "COUNT_HW_BUS_CYCLES", PERF_COUNT_HW_BUS_CYCLES }, - { "COUNT_HW_CACHE_L1D", PERF_COUNT_HW_CACHE_L1D }, - { "COUNT_HW_CACHE_L1I", PERF_COUNT_HW_CACHE_L1I }, - { "COUNT_HW_CACHE_LL", PERF_COUNT_HW_CACHE_LL }, - { "COUNT_HW_CACHE_DTLB", PERF_COUNT_HW_CACHE_DTLB }, - { "COUNT_HW_CACHE_ITLB", PERF_COUNT_HW_CACHE_ITLB }, - { "COUNT_HW_CACHE_BPU", PERF_COUNT_HW_CACHE_BPU }, - { "COUNT_HW_CACHE_OP_READ", PERF_COUNT_HW_CACHE_OP_READ }, - { "COUNT_HW_CACHE_OP_WRITE", PERF_COUNT_HW_CACHE_OP_WRITE }, - { "COUNT_HW_CACHE_OP_PREFETCH", PERF_COUNT_HW_CACHE_OP_PREFETCH }, - { "COUNT_HW_CACHE_RESULT_ACCESS", PERF_COUNT_HW_CACHE_RESULT_ACCESS }, - { "COUNT_HW_CACHE_RESULT_MISS", PERF_COUNT_HW_CACHE_RESULT_MISS }, - - { "COUNT_HW_STALLED_CYCLES_FRONTEND", PERF_COUNT_HW_STALLED_CYCLES_FRONTEND }, - { "COUNT_HW_STALLED_CYCLES_BACKEND", PERF_COUNT_HW_STALLED_CYCLES_BACKEND }, - - { "COUNT_SW_CPU_CLOCK", PERF_COUNT_SW_CPU_CLOCK }, - { "COUNT_SW_TASK_CLOCK", PERF_COUNT_SW_TASK_CLOCK }, - { "COUNT_SW_PAGE_FAULTS", PERF_COUNT_SW_PAGE_FAULTS }, - { "COUNT_SW_CONTEXT_SWITCHES", PERF_COUNT_SW_CONTEXT_SWITCHES }, - { "COUNT_SW_CPU_MIGRATIONS", PERF_COUNT_SW_CPU_MIGRATIONS }, - { "COUNT_SW_PAGE_FAULTS_MIN", PERF_COUNT_SW_PAGE_FAULTS_MIN }, - { "COUNT_SW_PAGE_FAULTS_MAJ", PERF_COUNT_SW_PAGE_FAULTS_MAJ }, - { "COUNT_SW_ALIGNMENT_FAULTS", PERF_COUNT_SW_ALIGNMENT_FAULTS }, - { "COUNT_SW_EMULATION_FAULTS", PERF_COUNT_SW_EMULATION_FAULTS }, - { "COUNT_SW_DUMMY", PERF_COUNT_SW_DUMMY }, - - { "SAMPLE_IP", PERF_SAMPLE_IP }, - { "SAMPLE_TID", PERF_SAMPLE_TID }, - { "SAMPLE_TIME", PERF_SAMPLE_TIME }, - { "SAMPLE_ADDR", PERF_SAMPLE_ADDR }, - { "SAMPLE_READ", PERF_SAMPLE_READ }, - { "SAMPLE_CALLCHAIN", PERF_SAMPLE_CALLCHAIN }, - { "SAMPLE_ID", PERF_SAMPLE_ID }, - { "SAMPLE_CPU", PERF_SAMPLE_CPU }, - { "SAMPLE_PERIOD", PERF_SAMPLE_PERIOD }, - { "SAMPLE_STREAM_ID", PERF_SAMPLE_STREAM_ID }, - { "SAMPLE_RAW", PERF_SAMPLE_RAW }, - - { "FORMAT_TOTAL_TIME_ENABLED", PERF_FORMAT_TOTAL_TIME_ENABLED }, - { "FORMAT_TOTAL_TIME_RUNNING", PERF_FORMAT_TOTAL_TIME_RUNNING }, - { "FORMAT_ID", PERF_FORMAT_ID }, - { "FORMAT_GROUP", PERF_FORMAT_GROUP }, - - { "RECORD_MMAP", PERF_RECORD_MMAP }, - { "RECORD_LOST", PERF_RECORD_LOST }, - { "RECORD_COMM", PERF_RECORD_COMM }, - { "RECORD_EXIT", PERF_RECORD_EXIT }, - { "RECORD_THROTTLE", PERF_RECORD_THROTTLE }, - { "RECORD_UNTHROTTLE", PERF_RECORD_UNTHROTTLE }, - { "RECORD_FORK", PERF_RECORD_FORK }, - { "RECORD_READ", PERF_RECORD_READ }, - { "RECORD_SAMPLE", PERF_RECORD_SAMPLE }, + PERF_CONST(TYPE_HARDWARE), + PERF_CONST(TYPE_SOFTWARE), + PERF_CONST(TYPE_TRACEPOINT), + PERF_CONST(TYPE_HW_CACHE), + PERF_CONST(TYPE_RAW), + PERF_CONST(TYPE_BREAKPOINT), + + PERF_CONST(COUNT_HW_CPU_CYCLES), + PERF_CONST(COUNT_HW_INSTRUCTIONS), + PERF_CONST(COUNT_HW_CACHE_REFERENCES), + PERF_CONST(COUNT_HW_CACHE_MISSES), + PERF_CONST(COUNT_HW_BRANCH_INSTRUCTIONS), + PERF_CONST(COUNT_HW_BRANCH_MISSES), + PERF_CONST(COUNT_HW_BUS_CYCLES), + PERF_CONST(COUNT_HW_CACHE_L1D), + PERF_CONST(COUNT_HW_CACHE_L1I), + PERF_CONST(COUNT_HW_CACHE_LL), + PERF_CONST(COUNT_HW_CACHE_DTLB), + PERF_CONST(COUNT_HW_CACHE_ITLB), + PERF_CONST(COUNT_HW_CACHE_BPU), + PERF_CONST(COUNT_HW_CACHE_OP_READ), + PERF_CONST(COUNT_HW_CACHE_OP_WRITE), + PERF_CONST(COUNT_HW_CACHE_OP_PREFETCH), + PERF_CONST(COUNT_HW_CACHE_RESULT_ACCESS), + PERF_CONST(COUNT_HW_CACHE_RESULT_MISS), + + PERF_CONST(COUNT_HW_STALLED_CYCLES_FRONTEND), + PERF_CONST(COUNT_HW_STALLED_CYCLES_BACKEND), + + PERF_CONST(COUNT_SW_CPU_CLOCK), + PERF_CONST(COUNT_SW_TASK_CLOCK), + PERF_CONST(COUNT_SW_PAGE_FAULTS), + PERF_CONST(COUNT_SW_CONTEXT_SWITCHES), + PERF_CONST(COUNT_SW_CPU_MIGRATIONS), + PERF_CONST(COUNT_SW_PAGE_FAULTS_MIN), + PERF_CONST(COUNT_SW_PAGE_FAULTS_MAJ), + PERF_CONST(COUNT_SW_ALIGNMENT_FAULTS), + PERF_CONST(COUNT_SW_EMULATION_FAULTS), + PERF_CONST(COUNT_SW_DUMMY), + + PERF_CONST(SAMPLE_IP), + PERF_CONST(SAMPLE_TID), + PERF_CONST(SAMPLE_TIME), + PERF_CONST(SAMPLE_ADDR), + PERF_CONST(SAMPLE_READ), + PERF_CONST(SAMPLE_CALLCHAIN), + PERF_CONST(SAMPLE_ID), + PERF_CONST(SAMPLE_CPU), + PERF_CONST(SAMPLE_PERIOD), + PERF_CONST(SAMPLE_STREAM_ID), + PERF_CONST(SAMPLE_RAW), + + PERF_CONST(FORMAT_TOTAL_TIME_ENABLED), + PERF_CONST(FORMAT_TOTAL_TIME_RUNNING), + PERF_CONST(FORMAT_ID), + PERF_CONST(FORMAT_GROUP), + + PERF_CONST(RECORD_MMAP), + PERF_CONST(RECORD_LOST), + PERF_CONST(RECORD_COMM), + PERF_CONST(RECORD_EXIT), + PERF_CONST(RECORD_THROTTLE), + PERF_CONST(RECORD_UNTHROTTLE), + PERF_CONST(RECORD_FORK), + PERF_CONST(RECORD_READ), + PERF_CONST(RECORD_SAMPLE), + PERF_CONST(RECORD_MMAP2), + PERF_CONST(RECORD_AUX), + PERF_CONST(RECORD_ITRACE_START), + PERF_CONST(RECORD_LOST_SAMPLES), + PERF_CONST(RECORD_SWITCH), + PERF_CONST(RECORD_SWITCH_CPU_WIDE), + + PERF_CONST(RECORD_MISC_SWITCH_OUT), { .name = NULL, }, }; diff --git a/tools/perf/util/record.c b/tools/perf/util/record.c index 1f7becbe5e18..0467367dc315 100644 --- a/tools/perf/util/record.c +++ b/tools/perf/util/record.c @@ -85,6 +85,11 @@ static void perf_probe_comm_exec(struct perf_evsel *evsel) evsel->attr.comm_exec = 1; } +static void perf_probe_context_switch(struct perf_evsel *evsel) +{ + evsel->attr.context_switch = 1; +} + bool perf_can_sample_identifier(void) { return perf_probe_api(perf_probe_sample_identifier); @@ -95,6 +100,35 @@ static bool perf_can_comm_exec(void) return perf_probe_api(perf_probe_comm_exec); } +bool perf_can_record_switch_events(void) +{ + return perf_probe_api(perf_probe_context_switch); +} + +bool perf_can_record_cpu_wide(void) +{ + struct perf_event_attr attr = { + .type = PERF_TYPE_SOFTWARE, + .config = PERF_COUNT_SW_CPU_CLOCK, + .exclude_kernel = 1, + }; + struct cpu_map *cpus; + int cpu, fd; + + cpus = cpu_map__new(NULL); + if (!cpus) + return false; + cpu = cpus->map[0]; + cpu_map__put(cpus); + + fd = sys_perf_event_open(&attr, -1, cpu, -1, 0); + if (fd < 0) + return false; + close(fd); + + return true; +} + void perf_evlist__config(struct perf_evlist *evlist, struct record_opts *opts) { struct perf_evsel *evsel; diff --git a/tools/perf/util/scripting-engines/trace-event-perl.c b/tools/perf/util/scripting-engines/trace-event-perl.c index 1bd593bbf7a5..544509c159ce 100644 --- a/tools/perf/util/scripting-engines/trace-event-perl.c +++ b/tools/perf/util/scripting-engines/trace-event-perl.c @@ -221,6 +221,7 @@ static void define_event_symbols(struct event_format *event, break; case PRINT_BSTRING: case PRINT_DYNAMIC_ARRAY: + case PRINT_DYNAMIC_ARRAY_LEN: case PRINT_STRING: case PRINT_BITMASK: break; diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c index ace2484985cb..a8e825fca42a 100644 --- a/tools/perf/util/scripting-engines/trace-event-python.c +++ b/tools/perf/util/scripting-engines/trace-event-python.c @@ -251,6 +251,7 @@ static void define_event_symbols(struct event_format *event, /* gcc warns for these? */ case PRINT_BSTRING: case PRINT_DYNAMIC_ARRAY: + case PRINT_DYNAMIC_ARRAY_LEN: case PRINT_FUNC: case PRINT_BITMASK: /* we should warn... */ @@ -318,7 +319,7 @@ static PyObject *python_process_callchain(struct perf_sample *sample, if (thread__resolve_callchain(al->thread, evsel, sample, NULL, NULL, - PERF_MAX_STACK_DEPTH) != 0) { + scripting_max_stack) != 0) { pr_err("Failed to resolve callchain. Skipping\n"); goto exit; } diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index ed9dc2555ec7..428149bc64d2 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -138,6 +138,8 @@ struct perf_session *perf_session__new(struct perf_data_file *file, perf_session__set_id_hdr_size(session); perf_session__set_comm_exec(session); } + } else { + session->machines.host.env = &perf_env; } if (!file || perf_data_file__is_write(file)) { @@ -170,29 +172,13 @@ static void perf_session__delete_threads(struct perf_session *session) machine__delete_threads(&session->machines.host); } -static void perf_session_env__delete(struct perf_session_env *env) -{ - zfree(&env->hostname); - zfree(&env->os_release); - zfree(&env->version); - zfree(&env->arch); - zfree(&env->cpu_desc); - zfree(&env->cpuid); - - zfree(&env->cmdline); - zfree(&env->sibling_cores); - zfree(&env->sibling_threads); - zfree(&env->numa_nodes); - zfree(&env->pmu_mappings); -} - void perf_session__delete(struct perf_session *session) { auxtrace__free(session); auxtrace_index__free(&session->auxtrace_index); perf_session__destroy_kernel_maps(session); perf_session__delete_threads(session); - perf_session_env__delete(&session->header.env); + perf_env__exit(&session->header.env); machines__exit(&session->machines); if (session->file) perf_data_file__close(session->file); @@ -332,6 +318,8 @@ void perf_tool__fill_defaults(struct perf_tool *tool) tool->aux = perf_event__process_aux; if (tool->itrace_start == NULL) tool->itrace_start = perf_event__process_itrace_start; + if (tool->context_switch == NULL) + tool->context_switch = perf_event__process_switch; if (tool->read == NULL) tool->read = process_event_sample_stub; if (tool->throttle == NULL) @@ -470,6 +458,19 @@ static void perf_event__itrace_start_swap(union perf_event *event, swap_sample_id_all(event, &event->itrace_start + 1); } +static void perf_event__switch_swap(union perf_event *event, bool sample_id_all) +{ + if (event->header.type == PERF_RECORD_SWITCH_CPU_WIDE) { + event->context_switch.next_prev_pid = + bswap_32(event->context_switch.next_prev_pid); + event->context_switch.next_prev_tid = + bswap_32(event->context_switch.next_prev_tid); + } + + if (sample_id_all) + swap_sample_id_all(event, &event->context_switch + 1); +} + static void perf_event__throttle_swap(union perf_event *event, bool sample_id_all) { @@ -632,6 +633,8 @@ static perf_event__swap_op perf_event__swap_ops[] = { [PERF_RECORD_AUX] = perf_event__aux_swap, [PERF_RECORD_ITRACE_START] = perf_event__itrace_start_swap, [PERF_RECORD_LOST_SAMPLES] = perf_event__all64_swap, + [PERF_RECORD_SWITCH] = perf_event__switch_swap, + [PERF_RECORD_SWITCH_CPU_WIDE] = perf_event__switch_swap, [PERF_RECORD_HEADER_ATTR] = perf_event__hdr_attr_swap, [PERF_RECORD_HEADER_EVENT_TYPE] = perf_event__event_type_swap, [PERF_RECORD_HEADER_TRACING_DATA] = perf_event__tracing_data_swap, @@ -766,10 +769,18 @@ static void branch_stack__printf(struct perf_sample *sample) printf("... branch stack: nr:%" PRIu64 "\n", sample->branch_stack->nr); - for (i = 0; i < sample->branch_stack->nr; i++) - printf("..... %2"PRIu64": %016" PRIx64 " -> %016" PRIx64 "\n", - i, sample->branch_stack->entries[i].from, - sample->branch_stack->entries[i].to); + for (i = 0; i < sample->branch_stack->nr; i++) { + struct branch_entry *e = &sample->branch_stack->entries[i]; + + printf("..... %2"PRIu64": %016" PRIx64 " -> %016" PRIx64 " %hu cycles %s%s%s%s %x\n", + i, e->from, e->to, + e->flags.cycles, + e->flags.mispred ? "M" : " ", + e->flags.predicted ? "P" : " ", + e->flags.abort ? "A" : " ", + e->flags.in_tx ? "T" : " ", + (unsigned)e->flags.reserved); + } } static void regs_dump__printf(u64 mask, u64 *regs) @@ -1053,11 +1064,11 @@ static int machines__deliver_event(struct machines *machines, switch (event->header.type) { case PERF_RECORD_SAMPLE: - dump_sample(evsel, event, sample); if (evsel == NULL) { ++evlist->stats.nr_unknown_id; return 0; } + dump_sample(evsel, event, sample); if (machine == NULL) { ++evlist->stats.nr_unprocessable_samples; return 0; @@ -1090,9 +1101,15 @@ static int machines__deliver_event(struct machines *machines, case PERF_RECORD_UNTHROTTLE: return tool->unthrottle(tool, event, sample, machine); case PERF_RECORD_AUX: + if (tool->aux == perf_event__process_aux && + (event->aux.flags & PERF_AUX_FLAG_TRUNCATED)) + evlist->stats.total_aux_lost += 1; return tool->aux(tool, event, sample, machine); case PERF_RECORD_ITRACE_START: return tool->itrace_start(tool, event, sample, machine); + case PERF_RECORD_SWITCH: + case PERF_RECORD_SWITCH_CPU_WIDE: + return tool->context_switch(tool, event, sample, machine); default: ++evlist->stats.nr_unknown_events; return -1; @@ -1294,7 +1311,7 @@ struct thread *perf_session__findnew(struct perf_session *session, pid_t pid) return machine__findnew_thread(&session->machines.host, -1, pid); } -static struct thread *perf_session__register_idle_thread(struct perf_session *session) +struct thread *perf_session__register_idle_thread(struct perf_session *session) { struct thread *thread; @@ -1332,6 +1349,13 @@ static void perf_session__warn_about_errors(const struct perf_session *session) } } + if (session->tool->aux == perf_event__process_aux && + stats->total_aux_lost != 0) { + ui__warning("AUX data lost %" PRIu64 " times out of %u!\n\n", + stats->total_aux_lost, + stats->nr_events[PERF_RECORD_AUX]); + } + if (stats->nr_unknown_events != 0) { ui__warning("Found %u unknown events!\n\n" "Is this an older tool processing a perf.data " @@ -1551,7 +1575,10 @@ static int __perf_session__process_events(struct perf_session *session, file_offset = page_offset; head = data_offset - page_offset; - if (data_size && (data_offset + data_size < file_size)) + if (data_size == 0) + goto out; + + if (data_offset + data_size < file_size) file_size = data_offset + data_size; ui_progress__init(&prog, file_size, "Processing events..."); @@ -1773,7 +1800,7 @@ void perf_evsel__print_ip(struct perf_evsel *evsel, struct perf_sample *sample, if (thread__resolve_callchain(al->thread, evsel, sample, NULL, NULL, - PERF_MAX_STACK_DEPTH) != 0) { + stack_depth) != 0) { if (verbose) error("Failed to resolve callchain. Skipping\n"); return; diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h index b44afc75d1cc..3e900c0efc73 100644 --- a/tools/perf/util/session.h +++ b/tools/perf/util/session.h @@ -89,6 +89,8 @@ struct machine *perf_session__findnew_machine(struct perf_session *session, pid_ } struct thread *perf_session__findnew(struct perf_session *session, pid_t pid); +struct thread *perf_session__register_idle_thread(struct perf_session *session); + size_t perf_session__fprintf(struct perf_session *session, FILE *fp); size_t perf_session__fprintf_dsos(struct perf_session *session, FILE *fp); diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index 4c65a143a34c..2d8ccd4d9e1b 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -9,7 +9,7 @@ regex_t parent_regex; const char default_parent_pattern[] = "^sys_|^do_page_fault"; const char *parent_pattern = default_parent_pattern; const char default_sort_order[] = "comm,dso,symbol"; -const char default_branch_sort_order[] = "comm,dso_from,symbol_from,dso_to,symbol_to"; +const char default_branch_sort_order[] = "comm,dso_from,symbol_from,symbol_to,cycles"; const char default_mem_sort_order[] = "local_weight,mem,sym,dso,symbol_daddr,dso_daddr,snoop,tlb,locked"; const char default_top_sort_order[] = "dso,symbol"; const char default_diff_sort_order[] = "dso,symbol"; @@ -21,6 +21,7 @@ int sort__need_collapse = 0; int sort__has_parent = 0; int sort__has_sym = 0; int sort__has_dso = 0; +int sort__has_socket = 0; enum sort_mode sort__mode = SORT_MODE__NORMAL; @@ -319,6 +320,59 @@ struct sort_entry sort_srcline = { .se_width_idx = HISTC_SRCLINE, }; +/* --sort srcfile */ + +static char no_srcfile[1]; + +static char *get_srcfile(struct hist_entry *e) +{ + char *sf, *p; + struct map *map = e->ms.map; + + sf = __get_srcline(map->dso, map__rip_2objdump(map, e->ip), + e->ms.sym, false, true); + if (!strcmp(sf, SRCLINE_UNKNOWN)) + return no_srcfile; + p = strchr(sf, ':'); + if (p && *sf) { + *p = 0; + return sf; + } + free(sf); + return no_srcfile; +} + +static int64_t +sort__srcfile_cmp(struct hist_entry *left, struct hist_entry *right) +{ + if (!left->srcfile) { + if (!left->ms.map) + left->srcfile = no_srcfile; + else + left->srcfile = get_srcfile(left); + } + if (!right->srcfile) { + if (!right->ms.map) + right->srcfile = no_srcfile; + else + right->srcfile = get_srcfile(right); + } + return strcmp(right->srcfile, left->srcfile); +} + +static int hist_entry__srcfile_snprintf(struct hist_entry *he, char *bf, + size_t size, unsigned int width) +{ + return repsep_snprintf(bf, size, "%-*.*s", width, width, he->srcfile); +} + +struct sort_entry sort_srcfile = { + .se_header = "Source File", + .se_cmp = sort__srcfile_cmp, + .se_snprintf = hist_entry__srcfile_snprintf, + .se_width_idx = HISTC_SRCFILE, +}; + /* --sort parent */ static int64_t @@ -368,6 +422,27 @@ struct sort_entry sort_cpu = { .se_width_idx = HISTC_CPU, }; +/* --sort socket */ + +static int64_t +sort__socket_cmp(struct hist_entry *left, struct hist_entry *right) +{ + return right->socket - left->socket; +} + +static int hist_entry__socket_snprintf(struct hist_entry *he, char *bf, + size_t size, unsigned int width) +{ + return repsep_snprintf(bf, size, "%*.*d", width, width-3, he->socket); +} + +struct sort_entry sort_socket = { + .se_header = "Socket", + .se_cmp = sort__socket_cmp, + .se_snprintf = hist_entry__socket_snprintf, + .se_width_idx = HISTC_SOCKET, +}; + /* sort keys for branch stacks */ static int64_t @@ -526,6 +601,29 @@ static int hist_entry__mispredict_snprintf(struct hist_entry *he, char *bf, return repsep_snprintf(bf, size, "%-*.*s", width, width, out); } +static int64_t +sort__cycles_cmp(struct hist_entry *left, struct hist_entry *right) +{ + return left->branch_info->flags.cycles - + right->branch_info->flags.cycles; +} + +static int hist_entry__cycles_snprintf(struct hist_entry *he, char *bf, + size_t size, unsigned int width) +{ + if (he->branch_info->flags.cycles == 0) + return repsep_snprintf(bf, size, "%-*s", width, "-"); + return repsep_snprintf(bf, size, "%-*hd", width, + he->branch_info->flags.cycles); +} + +struct sort_entry sort_cycles = { + .se_header = "Basic Block Cycles", + .se_cmp = sort__cycles_cmp, + .se_snprintf = hist_entry__cycles_snprintf, + .se_width_idx = HISTC_CYCLES, +}; + /* --sort daddr_sym */ static int64_t sort__daddr_cmp(struct hist_entry *left, struct hist_entry *right) @@ -557,6 +655,35 @@ static int hist_entry__daddr_snprintf(struct hist_entry *he, char *bf, } static int64_t +sort__iaddr_cmp(struct hist_entry *left, struct hist_entry *right) +{ + uint64_t l = 0, r = 0; + + if (left->mem_info) + l = left->mem_info->iaddr.addr; + if (right->mem_info) + r = right->mem_info->iaddr.addr; + + return (int64_t)(r - l); +} + +static int hist_entry__iaddr_snprintf(struct hist_entry *he, char *bf, + size_t size, unsigned int width) +{ + uint64_t addr = 0; + struct map *map = NULL; + struct symbol *sym = NULL; + + if (he->mem_info) { + addr = he->mem_info->iaddr.addr; + map = he->mem_info->iaddr.map; + sym = he->mem_info->iaddr.sym; + } + return _hist_entry__sym_snprintf(map, sym, addr, he->level, bf, size, + width); +} + +static int64_t sort__dso_daddr_cmp(struct hist_entry *left, struct hist_entry *right) { struct map *map_l = NULL; @@ -979,6 +1106,13 @@ struct sort_entry sort_mem_daddr_sym = { .se_width_idx = HISTC_MEM_DADDR_SYMBOL, }; +struct sort_entry sort_mem_iaddr_sym = { + .se_header = "Code Symbol", + .se_cmp = sort__iaddr_cmp, + .se_snprintf = hist_entry__iaddr_snprintf, + .se_width_idx = HISTC_MEM_IADDR_SYMBOL, +}; + struct sort_entry sort_mem_daddr_dso = { .se_header = "Data Object", .se_cmp = sort__dso_daddr_cmp, @@ -1172,7 +1306,9 @@ static struct sort_dimension common_sort_dimensions[] = { DIM(SORT_SYM, "symbol", sort_sym), DIM(SORT_PARENT, "parent", sort_parent), DIM(SORT_CPU, "cpu", sort_cpu), + DIM(SORT_SOCKET, "socket", sort_socket), DIM(SORT_SRCLINE, "srcline", sort_srcline), + DIM(SORT_SRCFILE, "srcfile", sort_srcfile), DIM(SORT_LOCAL_WEIGHT, "local_weight", sort_local_weight), DIM(SORT_GLOBAL_WEIGHT, "weight", sort_global_weight), DIM(SORT_TRANSACTION, "transaction", sort_transaction), @@ -1190,6 +1326,7 @@ static struct sort_dimension bstack_sort_dimensions[] = { DIM(SORT_MISPREDICT, "mispredict", sort_mispredict), DIM(SORT_IN_TX, "in_tx", sort_in_tx), DIM(SORT_ABORT, "abort", sort_abort), + DIM(SORT_CYCLES, "cycles", sort_cycles), }; #undef DIM @@ -1198,6 +1335,7 @@ static struct sort_dimension bstack_sort_dimensions[] = { static struct sort_dimension memory_sort_dimensions[] = { DIM(SORT_MEM_DADDR_SYMBOL, "symbol_daddr", sort_mem_daddr_sym), + DIM(SORT_MEM_IADDR_SYMBOL, "symbol_iaddr", sort_mem_iaddr_sym), DIM(SORT_MEM_DADDR_DSO, "dso_daddr", sort_mem_daddr_dso), DIM(SORT_MEM_LOCKED, "locked", sort_mem_locked), DIM(SORT_MEM_TLB, "tlb", sort_mem_tlb), @@ -1439,6 +1577,12 @@ static int __hpp_dimension__add_output(struct hpp_dimension *hd) return 0; } +int hpp_dimension__add_output(unsigned col) +{ + BUG_ON(col >= PERF_HPP__MAX_INDEX); + return __hpp_dimension__add_output(&hpp_sort_dimensions[col]); +} + int sort_dimension__add(const char *tok) { unsigned int i; @@ -1472,6 +1616,8 @@ int sort_dimension__add(const char *tok) } else if (sd->entry == &sort_dso) { sort__has_dso = 1; + } else if (sd->entry == &sort_socket) { + sort__has_socket = 1; } return __sort_dimension__add(sd); @@ -1777,8 +1923,6 @@ static int __setup_output_field(void) if (field_order == NULL) return 0; - reset_dimensions(); - strp = str = strdup(field_order); if (str == NULL) { error("Not enough memory to setup output fields"); diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h index e97cd476d336..31228851e397 100644 --- a/tools/perf/util/sort.h +++ b/tools/perf/util/sort.h @@ -34,6 +34,7 @@ extern int have_ignore_callees; extern int sort__need_collapse; extern int sort__has_parent; extern int sort__has_sym; +extern int sort__has_socket; extern enum sort_mode sort__mode; extern struct sort_entry sort_comm; extern struct sort_entry sort_dso; @@ -90,6 +91,7 @@ struct hist_entry { struct comm *comm; u64 ip; u64 transaction; + s32 socket; s32 cpu; u8 cpumode; @@ -114,6 +116,7 @@ struct hist_entry { }; }; char *srcline; + char *srcfile; struct symbol *parent; struct rb_root sorted_chain; struct branch_info *branch_info; @@ -171,7 +174,9 @@ enum sort_type { SORT_SYM, SORT_PARENT, SORT_CPU, + SORT_SOCKET, SORT_SRCLINE, + SORT_SRCFILE, SORT_LOCAL_WEIGHT, SORT_GLOBAL_WEIGHT, SORT_TRANSACTION, @@ -185,6 +190,7 @@ enum sort_type { SORT_MISPREDICT, SORT_ABORT, SORT_IN_TX, + SORT_CYCLES, /* memory mode specific sort keys */ __SORT_MEMORY_MODE, @@ -195,6 +201,7 @@ enum sort_type { SORT_MEM_LVL, SORT_MEM_SNOOP, SORT_MEM_DCACHELINE, + SORT_MEM_IADDR_SYMBOL, }; /* @@ -227,4 +234,6 @@ void perf_hpp__set_elide(int idx, bool elide); int report_parse_ignore_callees_opt(const struct option *opt, const char *arg, int unset); bool is_strict_order(const char *order); + +int hpp_dimension__add_output(unsigned col); #endif /* __PERF_SORT_H */ diff --git a/tools/perf/util/srcline.c b/tools/perf/util/srcline.c index c93fb0c5bd0b..b4db3f48e3b0 100644 --- a/tools/perf/util/srcline.c +++ b/tools/perf/util/srcline.c @@ -10,6 +10,8 @@ #include "symbol.h" +bool srcline_full_filename; + #ifdef HAVE_LIBBFD_SUPPORT /* @@ -147,8 +149,11 @@ static void addr2line_cleanup(struct a2l_data *a2l) free(a2l); } +#define MAX_INLINE_NEST 1024 + static int addr2line(const char *dso_name, u64 addr, - char **file, unsigned int *line, struct dso *dso) + char **file, unsigned int *line, struct dso *dso, + bool unwind_inlines) { int ret = 0; struct a2l_data *a2l = dso->a2l; @@ -168,6 +173,15 @@ static int addr2line(const char *dso_name, u64 addr, bfd_map_over_sections(a2l->abfd, find_address_in_section, a2l); + if (a2l->found && unwind_inlines) { + int cnt = 0; + + while (bfd_find_inliner_info(a2l->abfd, &a2l->filename, + &a2l->funcname, &a2l->line) && + cnt++ < MAX_INLINE_NEST) + ; + } + if (a2l->found && a2l->filename) { *file = strdup(a2l->filename); *line = a2l->line; @@ -195,7 +209,8 @@ void dso__free_a2l(struct dso *dso) static int addr2line(const char *dso_name, u64 addr, char **file, unsigned int *line_nr, - struct dso *dso __maybe_unused) + struct dso *dso __maybe_unused, + bool unwind_inlines __maybe_unused) { FILE *fp; char cmd[PATH_MAX]; @@ -252,8 +267,8 @@ void dso__free_a2l(struct dso *dso __maybe_unused) */ #define A2L_FAIL_LIMIT 123 -char *get_srcline(struct dso *dso, u64 addr, struct symbol *sym, - bool show_sym) +char *__get_srcline(struct dso *dso, u64 addr, struct symbol *sym, + bool show_sym, bool unwind_inlines) { char *file = NULL; unsigned line = 0; @@ -274,10 +289,12 @@ char *get_srcline(struct dso *dso, u64 addr, struct symbol *sym, if (!strncmp(dso_name, "/tmp/perf-", 10)) goto out; - if (!addr2line(dso_name, addr, &file, &line, dso)) + if (!addr2line(dso_name, addr, &file, &line, dso, unwind_inlines)) goto out; - if (asprintf(&srcline, "%s:%u", basename(file), line) < 0) { + if (asprintf(&srcline, "%s:%u", + srcline_full_filename ? file : basename(file), + line) < 0) { free(file); goto out; } @@ -306,3 +323,9 @@ void free_srcline(char *srcline) if (srcline && strcmp(srcline, SRCLINE_UNKNOWN) != 0) free(srcline); } + +char *get_srcline(struct dso *dso, u64 addr, struct symbol *sym, + bool show_sym) +{ + return __get_srcline(dso, addr, sym, show_sym, false); +} diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c index 53e8bb7bc852..2a5d8d7698ae 100644 --- a/tools/perf/util/stat-shadow.c +++ b/tools/perf/util/stat-shadow.c @@ -85,7 +85,7 @@ void perf_stat__update_shadow_stats(struct perf_evsel *counter, u64 *count, else if (perf_evsel__match(counter, HARDWARE, HW_CPU_CYCLES)) update_stats(&runtime_cycles_stats[ctx][cpu], count[0]); else if (perf_stat_evsel__is(counter, CYCLES_IN_TX)) - update_stats(&runtime_transaction_stats[ctx][cpu], count[0]); + update_stats(&runtime_cycles_in_tx_stats[ctx][cpu], count[0]); else if (perf_stat_evsel__is(counter, TRANSACTION_START)) update_stats(&runtime_transaction_stats[ctx][cpu], count[0]); else if (perf_stat_evsel__is(counter, ELISION_START)) @@ -398,20 +398,18 @@ void perf_stat__print_shadow_stats(FILE *out, struct perf_evsel *evsel, " # %5.2f%% aborted cycles ", 100.0 * ((total2-avg) / total)); } else if (perf_stat_evsel__is(evsel, TRANSACTION_START) && - avg > 0 && runtime_cycles_in_tx_stats[ctx][cpu].n != 0) { total = avg_stats(&runtime_cycles_in_tx_stats[ctx][cpu]); - if (total) + if (avg) ratio = total / avg; fprintf(out, " # %8.0f cycles / transaction ", ratio); } else if (perf_stat_evsel__is(evsel, ELISION_START) && - avg > 0 && runtime_cycles_in_tx_stats[ctx][cpu].n != 0) { total = avg_stats(&runtime_cycles_in_tx_stats[ctx][cpu]); - if (total) + if (avg) ratio = total / avg; fprintf(out, " # %8.0f cycles / elision ", ratio); diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c index f2a0d1521e26..2d9d8306dbd3 100644 --- a/tools/perf/util/stat.c +++ b/tools/perf/util/stat.c @@ -67,7 +67,7 @@ double rel_stddev_stats(double stddev, double avg) bool __perf_evsel_stat__is(struct perf_evsel *evsel, enum perf_stat_evsel_id id) { - struct perf_stat *ps = evsel->priv; + struct perf_stat_evsel *ps = evsel->priv; return ps->id == id; } @@ -84,7 +84,7 @@ static const char *id_str[PERF_STAT_EVSEL_ID__MAX] = { void perf_stat_evsel_id_init(struct perf_evsel *evsel) { - struct perf_stat *ps = evsel->priv; + struct perf_stat_evsel *ps = evsel->priv; int i; /* ps->id is 0 hence PERF_STAT_EVSEL_ID__NONE by default */ @@ -97,59 +97,10 @@ void perf_stat_evsel_id_init(struct perf_evsel *evsel) } } -struct perf_counts *perf_counts__new(int ncpus, int nthreads) -{ - struct perf_counts *counts = zalloc(sizeof(*counts)); - - if (counts) { - struct xyarray *values; - - values = xyarray__new(ncpus, nthreads, sizeof(struct perf_counts_values)); - if (!values) { - free(counts); - return NULL; - } - - counts->values = values; - } - - return counts; -} - -void perf_counts__delete(struct perf_counts *counts) -{ - if (counts) { - xyarray__delete(counts->values); - free(counts); - } -} - -static void perf_counts__reset(struct perf_counts *counts) -{ - xyarray__reset(counts->values); -} - -void perf_evsel__reset_counts(struct perf_evsel *evsel) -{ - perf_counts__reset(evsel->counts); -} - -int perf_evsel__alloc_counts(struct perf_evsel *evsel, int ncpus, int nthreads) -{ - evsel->counts = perf_counts__new(ncpus, nthreads); - return evsel->counts != NULL ? 0 : -ENOMEM; -} - -void perf_evsel__free_counts(struct perf_evsel *evsel) -{ - perf_counts__delete(evsel->counts); - evsel->counts = NULL; -} - void perf_evsel__reset_stat_priv(struct perf_evsel *evsel) { int i; - struct perf_stat *ps = evsel->priv; + struct perf_stat_evsel *ps = evsel->priv; for (i = 0; i < 3; i++) init_stats(&ps->res_stats[i]); @@ -159,7 +110,7 @@ void perf_evsel__reset_stat_priv(struct perf_evsel *evsel) int perf_evsel__alloc_stat_priv(struct perf_evsel *evsel) { - evsel->priv = zalloc(sizeof(struct perf_stat)); + evsel->priv = zalloc(sizeof(struct perf_stat_evsel)); if (evsel->priv == NULL) return -ENOMEM; perf_evsel__reset_stat_priv(evsel); @@ -238,3 +189,155 @@ void perf_evlist__reset_stats(struct perf_evlist *evlist) perf_evsel__reset_counts(evsel); } } + +static void zero_per_pkg(struct perf_evsel *counter) +{ + if (counter->per_pkg_mask) + memset(counter->per_pkg_mask, 0, MAX_NR_CPUS); +} + +static int check_per_pkg(struct perf_evsel *counter, + struct perf_counts_values *vals, int cpu, bool *skip) +{ + unsigned long *mask = counter->per_pkg_mask; + struct cpu_map *cpus = perf_evsel__cpus(counter); + int s; + + *skip = false; + + if (!counter->per_pkg) + return 0; + + if (cpu_map__empty(cpus)) + return 0; + + if (!mask) { + mask = zalloc(MAX_NR_CPUS); + if (!mask) + return -ENOMEM; + + counter->per_pkg_mask = mask; + } + + /* + * we do not consider an event that has not run as a good + * instance to mark a package as used (skip=1). Otherwise + * we may run into a situation where the first CPU in a package + * is not running anything, yet the second is, and this function + * would mark the package as used after the first CPU and would + * not read the values from the second CPU. + */ + if (!(vals->run && vals->ena)) + return 0; + + s = cpu_map__get_socket(cpus, cpu, NULL); + if (s < 0) + return -1; + + *skip = test_and_set_bit(s, mask) == 1; + return 0; +} + +static int +process_counter_values(struct perf_stat_config *config, struct perf_evsel *evsel, + int cpu, int thread, + struct perf_counts_values *count) +{ + struct perf_counts_values *aggr = &evsel->counts->aggr; + static struct perf_counts_values zero; + bool skip = false; + + if (check_per_pkg(evsel, count, cpu, &skip)) { + pr_err("failed to read per-pkg counter\n"); + return -1; + } + + if (skip) + count = &zero; + + switch (config->aggr_mode) { + case AGGR_THREAD: + case AGGR_CORE: + case AGGR_SOCKET: + case AGGR_NONE: + if (!evsel->snapshot) + perf_evsel__compute_deltas(evsel, cpu, thread, count); + perf_counts_values__scale(count, config->scale, NULL); + if (config->aggr_mode == AGGR_NONE) + perf_stat__update_shadow_stats(evsel, count->values, cpu); + break; + case AGGR_GLOBAL: + aggr->val += count->val; + if (config->scale) { + aggr->ena += count->ena; + aggr->run += count->run; + } + case AGGR_UNSET: + default: + break; + } + + return 0; +} + +static int process_counter_maps(struct perf_stat_config *config, + struct perf_evsel *counter) +{ + int nthreads = thread_map__nr(counter->threads); + int ncpus = perf_evsel__nr_cpus(counter); + int cpu, thread; + + if (counter->system_wide) + nthreads = 1; + + for (thread = 0; thread < nthreads; thread++) { + for (cpu = 0; cpu < ncpus; cpu++) { + if (process_counter_values(config, counter, cpu, thread, + perf_counts(counter->counts, cpu, thread))) + return -1; + } + } + + return 0; +} + +int perf_stat_process_counter(struct perf_stat_config *config, + struct perf_evsel *counter) +{ + struct perf_counts_values *aggr = &counter->counts->aggr; + struct perf_stat_evsel *ps = counter->priv; + u64 *count = counter->counts->aggr.values; + int i, ret; + + aggr->val = aggr->ena = aggr->run = 0; + init_stats(ps->res_stats); + + if (counter->per_pkg) + zero_per_pkg(counter); + + ret = process_counter_maps(config, counter); + if (ret) + return ret; + + if (config->aggr_mode != AGGR_GLOBAL) + return 0; + + if (!counter->snapshot) + perf_evsel__compute_deltas(counter, -1, -1, aggr); + perf_counts_values__scale(aggr, config->scale, &counter->counts->scaled); + + for (i = 0; i < 3; i++) + update_stats(&ps->res_stats[i], count[i]); + + if (verbose) { + fprintf(config->output, "%s: %" PRIu64 " %" PRIu64 " %" PRIu64 "\n", + perf_evsel__name(counter), count[0], count[1], count[2]); + } + + /* + * Save the full runtime - to allow normalization during printout: + */ + perf_stat__update_shadow_stats(counter, count, 0); + + return 0; +} diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h index 1cfbe0a980ac..da1d11c4f8c1 100644 --- a/tools/perf/util/stat.h +++ b/tools/perf/util/stat.h @@ -20,7 +20,7 @@ enum perf_stat_evsel_id { PERF_STAT_EVSEL_ID__MAX, }; -struct perf_stat { +struct perf_stat_evsel { struct stats res_stats[3]; enum perf_stat_evsel_id id; }; @@ -31,31 +31,16 @@ enum aggr_mode { AGGR_SOCKET, AGGR_CORE, AGGR_THREAD, + AGGR_UNSET, }; -struct perf_counts_values { - union { - struct { - u64 val; - u64 ena; - u64 run; - }; - u64 values[3]; - }; +struct perf_stat_config { + enum aggr_mode aggr_mode; + bool scale; + FILE *output; + unsigned int interval; }; -struct perf_counts { - s8 scaled; - struct perf_counts_values aggr; - struct xyarray *values; -}; - -static inline struct perf_counts_values* -perf_counts(struct perf_counts *counts, int cpu, int thread) -{ - return xyarray__entry(counts->values, cpu, thread); -} - void update_stats(struct stats *stats, u64 val); double avg_stats(struct stats *stats); double stddev_stats(struct stats *stats); @@ -89,13 +74,6 @@ void perf_stat__update_shadow_stats(struct perf_evsel *counter, u64 *count, void perf_stat__print_shadow_stats(FILE *out, struct perf_evsel *evsel, double avg, int cpu, enum aggr_mode aggr); -struct perf_counts *perf_counts__new(int ncpus, int nthreads); -void perf_counts__delete(struct perf_counts *counts); - -void perf_evsel__reset_counts(struct perf_evsel *evsel); -int perf_evsel__alloc_counts(struct perf_evsel *evsel, int ncpus, int nthreads); -void perf_evsel__free_counts(struct perf_evsel *evsel); - void perf_evsel__reset_stat_priv(struct perf_evsel *evsel); int perf_evsel__alloc_stat_priv(struct perf_evsel *evsel); void perf_evsel__free_stat_priv(struct perf_evsel *evsel); @@ -109,4 +87,7 @@ int perf_evsel__alloc_stats(struct perf_evsel *evsel, bool alloc_raw); int perf_evlist__alloc_stats(struct perf_evlist *evlist, bool alloc_raw); void perf_evlist__free_stats(struct perf_evlist *evlist); void perf_evlist__reset_stats(struct perf_evlist *evlist); + +int perf_stat_process_counter(struct perf_stat_config *config, + struct perf_evsel *counter); #endif diff --git a/tools/perf/util/strbuf.c b/tools/perf/util/strbuf.c index 4abe23550c73..25671fa16618 100644 --- a/tools/perf/util/strbuf.c +++ b/tools/perf/util/strbuf.c @@ -82,23 +82,22 @@ void strbuf_add(struct strbuf *sb, const void *data, size_t len) strbuf_setlen(sb, sb->len + len); } -void strbuf_addf(struct strbuf *sb, const char *fmt, ...) +void strbuf_addv(struct strbuf *sb, const char *fmt, va_list ap) { int len; - va_list ap; + va_list ap_saved; if (!strbuf_avail(sb)) strbuf_grow(sb, 64); - va_start(ap, fmt); + + va_copy(ap_saved, ap); len = vsnprintf(sb->buf + sb->len, sb->alloc - sb->len, fmt, ap); - va_end(ap); if (len < 0) die("your vsnprintf is broken"); if (len > strbuf_avail(sb)) { strbuf_grow(sb, len); - va_start(ap, fmt); - len = vsnprintf(sb->buf + sb->len, sb->alloc - sb->len, fmt, ap); - va_end(ap); + len = vsnprintf(sb->buf + sb->len, sb->alloc - sb->len, fmt, ap_saved); + va_end(ap_saved); if (len > strbuf_avail(sb)) { die("this should not happen, your vsnprintf is broken"); } @@ -106,6 +105,15 @@ void strbuf_addf(struct strbuf *sb, const char *fmt, ...) strbuf_setlen(sb, sb->len + len); } +void strbuf_addf(struct strbuf *sb, const char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + strbuf_addv(sb, fmt, ap); + va_end(ap); +} + ssize_t strbuf_read(struct strbuf *sb, int fd, ssize_t hint) { size_t oldlen = sb->len; diff --git a/tools/perf/util/strbuf.h b/tools/perf/util/strbuf.h index 436ac319f6c7..529f2f035249 100644 --- a/tools/perf/util/strbuf.h +++ b/tools/perf/util/strbuf.h @@ -39,6 +39,7 @@ */ #include <assert.h> +#include <stdarg.h> extern char strbuf_slopbuf[]; struct strbuf { @@ -85,6 +86,7 @@ static inline void strbuf_addstr(struct strbuf *sb, const char *s) { __attribute__((format(printf,2,3))) extern void strbuf_addf(struct strbuf *sb, const char *fmt, ...); +extern void strbuf_addv(struct strbuf *sb, const char *fmt, va_list ap); /* XXX: if read fails, any partial read is undone */ extern ssize_t strbuf_read(struct strbuf *, int fd, ssize_t hint); diff --git a/tools/perf/util/string.c b/tools/perf/util/string.c index 6afd6106ceb5..fc8781de62db 100644 --- a/tools/perf/util/string.c +++ b/tools/perf/util/string.c @@ -357,3 +357,42 @@ void *memdup(const void *src, size_t len) return p; } + +char *asprintf_expr_inout_ints(const char *var, bool in, size_t nints, int *ints) +{ + /* + * FIXME: replace this with an expression using log10() when we + * find a suitable implementation, maybe the one in the dvb drivers... + * + * "%s == %d || " = log10(MAXINT) * 2 + 8 chars for the operators + */ + size_t size = nints * 28 + 1; /* \0 */ + size_t i, printed = 0; + char *expr = malloc(size); + + if (expr) { + const char *or_and = "||", *eq_neq = "=="; + char *e = expr; + + if (!in) { + or_and = "&&"; + eq_neq = "!="; + } + + for (i = 0; i < nints; ++i) { + if (printed == size) + goto out_err_overflow; + + if (i > 0) + printed += snprintf(e + printed, size - printed, " %s ", or_and); + printed += scnprintf(e + printed, size - printed, + "%s %s %d", var, eq_neq, ints[i]); + } + } + + return expr; + +out_err_overflow: + free(expr); + return NULL; +} diff --git a/tools/perf/util/strlist.c b/tools/perf/util/strlist.c index 71f9d102b96f..bdf98f6f27bb 100644 --- a/tools/perf/util/strlist.c +++ b/tools/perf/util/strlist.c @@ -72,7 +72,7 @@ int strlist__load(struct strlist *slist, const char *filename) FILE *fp = fopen(filename, "r"); if (fp == NULL) - return errno; + return -errno; while (fgets(entry, sizeof(entry), fp) != NULL) { const size_t len = strlen(entry); @@ -108,43 +108,70 @@ struct str_node *strlist__find(struct strlist *slist, const char *entry) return snode; } -static int strlist__parse_list_entry(struct strlist *slist, const char *s) +static int strlist__parse_list_entry(struct strlist *slist, const char *s, + const char *subst_dir) { + int err; + char *subst = NULL; + if (strncmp(s, "file://", 7) == 0) return strlist__load(slist, s + 7); - return strlist__add(slist, s); + if (subst_dir) { + err = -ENOMEM; + if (asprintf(&subst, "%s/%s", subst_dir, s) < 0) + goto out; + + if (access(subst, F_OK) == 0) { + err = strlist__load(slist, subst); + goto out; + } + } + + err = strlist__add(slist, s); +out: + free(subst); + return err; } -int strlist__parse_list(struct strlist *slist, const char *s) +static int strlist__parse_list(struct strlist *slist, const char *s, const char *subst_dir) { char *sep; int err; while ((sep = strchr(s, ',')) != NULL) { *sep = '\0'; - err = strlist__parse_list_entry(slist, s); + err = strlist__parse_list_entry(slist, s, subst_dir); *sep = ','; if (err != 0) return err; s = sep + 1; } - return *s ? strlist__parse_list_entry(slist, s) : 0; + return *s ? strlist__parse_list_entry(slist, s, subst_dir) : 0; } -struct strlist *strlist__new(bool dupstr, const char *list) +struct strlist *strlist__new(const char *list, const struct strlist_config *config) { struct strlist *slist = malloc(sizeof(*slist)); if (slist != NULL) { + bool dupstr = true; + const char *dirname = NULL; + + if (config) { + dupstr = !config->dont_dupstr; + dirname = config->dirname; + } + rblist__init(&slist->rblist); slist->rblist.node_cmp = strlist__node_cmp; slist->rblist.node_new = strlist__node_new; slist->rblist.node_delete = strlist__node_delete; slist->dupstr = dupstr; - if (list && strlist__parse_list(slist, list) != 0) + + if (list && strlist__parse_list(slist, list, dirname) != 0) goto out_error; } diff --git a/tools/perf/util/strlist.h b/tools/perf/util/strlist.h index 5c7f87069d9c..297565aa7535 100644 --- a/tools/perf/util/strlist.h +++ b/tools/perf/util/strlist.h @@ -16,7 +16,12 @@ struct strlist { bool dupstr; }; -struct strlist *strlist__new(bool dupstr, const char *slist); +struct strlist_config { + bool dont_dupstr; + const char *dirname; +}; + +struct strlist *strlist__new(const char *slist, const struct strlist_config *config); void strlist__delete(struct strlist *slist); void strlist__remove(struct strlist *slist, struct str_node *sn); @@ -74,6 +79,4 @@ static inline struct str_node *strlist__next(struct str_node *sn) #define strlist__for_each_safe(pos, n, slist) \ for (pos = strlist__first(slist), n = strlist__next(pos); pos;\ pos = n, n = strlist__next(n)) - -int strlist__parse_list(struct strlist *slist, const char *s); #endif /* __PERF_STRLIST_H */ diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c index 65f7e389ae09..475d88d0a1c9 100644 --- a/tools/perf/util/symbol-elf.c +++ b/tools/perf/util/symbol-elf.c @@ -875,6 +875,17 @@ int dso__load_sym(struct dso *dso, struct map *map, } } + /* + * Handle any relocation of vdso necessary because older kernels + * attempted to prelink vdso to its virtual address. + */ + if (dso__is_vdso(dso)) { + GElf_Shdr tshdr; + + if (elf_section_by_name(elf, &ehdr, &tshdr, ".text", NULL)) + map->reloc = map->start - tshdr.sh_addr + tshdr.sh_offset; + } + dso->adjust_symbols = runtime_ss->adjust_symbols || ref_reloc(kmap); /* * Initial kernel and module mappings do not map to the dso. For @@ -1260,8 +1271,6 @@ out_close: static int kcore__init(struct kcore *kcore, char *filename, int elfclass, bool temp) { - GElf_Ehdr *ehdr; - kcore->elfclass = elfclass; if (temp) @@ -1278,9 +1287,7 @@ static int kcore__init(struct kcore *kcore, char *filename, int elfclass, if (!gelf_newehdr(kcore->elf, elfclass)) goto out_end; - ehdr = gelf_getehdr(kcore->elf, &kcore->ehdr); - if (!ehdr) - goto out_end; + memset(&kcore->ehdr, 0, sizeof(GElf_Ehdr)); return 0; @@ -1337,23 +1344,18 @@ static int kcore__copy_hdr(struct kcore *from, struct kcore *to, size_t count) static int kcore__add_phdr(struct kcore *kcore, int idx, off_t offset, u64 addr, u64 len) { - GElf_Phdr gphdr; - GElf_Phdr *phdr; - - phdr = gelf_getphdr(kcore->elf, idx, &gphdr); - if (!phdr) - return -1; - - phdr->p_type = PT_LOAD; - phdr->p_flags = PF_R | PF_W | PF_X; - phdr->p_offset = offset; - phdr->p_vaddr = addr; - phdr->p_paddr = 0; - phdr->p_filesz = len; - phdr->p_memsz = len; - phdr->p_align = page_size; - - if (!gelf_update_phdr(kcore->elf, idx, phdr)) + GElf_Phdr phdr = { + .p_type = PT_LOAD, + .p_flags = PF_R | PF_W | PF_X, + .p_offset = offset, + .p_vaddr = addr, + .p_paddr = 0, + .p_filesz = len, + .p_memsz = len, + .p_align = page_size, + }; + + if (!gelf_update_phdr(kcore->elf, idx, &phdr)) return -1; return 0; diff --git a/tools/perf/util/symbol-minimal.c b/tools/perf/util/symbol-minimal.c index fd8477cacf88..48906333a858 100644 --- a/tools/perf/util/symbol-minimal.c +++ b/tools/perf/util/symbol-minimal.c @@ -337,7 +337,7 @@ int dso__load_sym(struct dso *dso, struct map *map __maybe_unused, symbol_filter_t filter __maybe_unused, int kmodule __maybe_unused) { - unsigned char *build_id[BUILD_ID_SIZE]; + unsigned char build_id[BUILD_ID_SIZE]; int ret; ret = fd__is_64_bit(ss->fd); diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 60f11414bb5c..b4cc7662677e 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -441,10 +441,25 @@ static struct symbol *symbols__find_by_name(struct rb_root *symbols, return &s->sym; } +void dso__reset_find_symbol_cache(struct dso *dso) +{ + enum map_type type; + + for (type = MAP__FUNCTION; type <= MAP__VARIABLE; ++type) { + dso->last_find_result[type].addr = 0; + dso->last_find_result[type].symbol = NULL; + } +} + struct symbol *dso__find_symbol(struct dso *dso, enum map_type type, u64 addr) { - return symbols__find(&dso->symbols[type], addr); + if (dso->last_find_result[type].addr != addr) { + dso->last_find_result[type].addr = addr; + dso->last_find_result[type].symbol = symbols__find(&dso->symbols[type], addr); + } + + return dso->last_find_result[type].symbol; } struct symbol *dso__first_symbol(struct dso *dso, enum map_type type) @@ -609,7 +624,7 @@ static int map__process_kallsym_symbol(void *arg, const char *name, * symbols, setting length to 0, and rely on * symbols__fixup_end() to fix it up. */ - sym = symbol__new(start, 0, kallsyms2elf_type(type), name); + sym = symbol__new(start, 0, kallsyms2elf_binding(type), name); if (sym == NULL) return -ENOMEM; /* @@ -665,7 +680,7 @@ static int dso__split_kallsyms_for_kcore(struct dso *dso, struct map *map, pos->start -= curr_map->start - curr_map->pgoff; if (pos->end) pos->end -= curr_map->start - curr_map->pgoff; - if (curr_map != map) { + if (curr_map->dso != map->dso) { rb_erase_init(&pos->rb_node, root); symbols__insert( &curr_map->dso->symbols[curr_map->type], @@ -1133,8 +1148,8 @@ static int dso__load_kcore(struct dso *dso, struct map *map, fd = open(kcore_filename, O_RDONLY); if (fd < 0) { - pr_err("%s requires CAP_SYS_RAWIO capability to access.\n", - kcore_filename); + pr_debug("Failed to open %s. Note /proc/kcore requires CAP_SYS_RAWIO capability to access.\n", + kcore_filename); return -EINVAL; } @@ -1391,6 +1406,7 @@ int dso__load(struct dso *dso, struct map *map, symbol_filter_t filter) struct symsrc ss_[2]; struct symsrc *syms_ss = NULL, *runtime_ss = NULL; bool kmod; + unsigned char build_id[BUILD_ID_SIZE]; pthread_mutex_lock(&dso->lock); @@ -1446,6 +1462,14 @@ int dso__load(struct dso *dso, struct map *map, symbol_filter_t filter) dso->symtab_type == DSO_BINARY_TYPE__GUEST_KMODULE || dso->symtab_type == DSO_BINARY_TYPE__GUEST_KMODULE_COMP; + + /* + * Read the build id if possible. This is required for + * DSO_BINARY_TYPE__BUILDID_DEBUGINFO to work + */ + if (filename__read_build_id(dso->name, build_id, BUILD_ID_SIZE) > 0) + dso__set_build_id(dso, build_id); + /* * Iterate over candidate debug images. * Keep track of "interesting" ones (those which have a symtab, dynsym, @@ -1592,6 +1616,15 @@ int dso__load_vmlinux_path(struct dso *dso, struct map *map, int i, err = 0; char *filename = NULL; + pr_debug("Looking at the vmlinux_path (%d entries long)\n", + vmlinux_path__nr_entries + 1); + + for (i = 0; i < vmlinux_path__nr_entries; ++i) { + err = dso__load_vmlinux(dso, map, vmlinux_path[i], false, filter); + if (err > 0) + goto out; + } + if (!symbol_conf.ignore_vmlinux_buildid) filename = dso__build_id_filename(dso, NULL, 0); if (filename != NULL) { @@ -1600,15 +1633,6 @@ int dso__load_vmlinux_path(struct dso *dso, struct map *map, goto out; free(filename); } - - pr_debug("Looking at the vmlinux_path (%d entries long)\n", - vmlinux_path__nr_entries + 1); - - for (i = 0; i < vmlinux_path__nr_entries; ++i) { - err = dso__load_vmlinux(dso, map, vmlinux_path[i], false, filter); - if (err > 0) - break; - } out: return err; } @@ -1838,7 +1862,7 @@ static void vmlinux_path__exit(void) zfree(&vmlinux_path); } -static int vmlinux_path__init(struct perf_session_env *env) +static int vmlinux_path__init(struct perf_env *env) { struct utsname uts; char bf[PATH_MAX]; @@ -1906,7 +1930,7 @@ int setup_list(struct strlist **list, const char *list_str, if (list_str == NULL) return 0; - *list = strlist__new(true, list_str); + *list = strlist__new(list_str, NULL); if (!*list) { pr_err("problems parsing %s list\n", list_name); return -1; @@ -1949,7 +1973,7 @@ static bool symbol__read_kptr_restrict(void) return value; } -int symbol__init(struct perf_session_env *env) +int symbol__init(struct perf_env *env) { const char *symfs; diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index b98ce51af142..40073c60b83d 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -106,7 +106,8 @@ struct symbol_conf { filter_relative, show_hist_headers, branch_callstack, - has_filter; + has_filter, + show_ref_callgraph; const char *vmlinux_name, *kallsyms_name, *source_prefix, @@ -190,6 +191,7 @@ struct addr_location { u8 filtered; u8 cpumode; s32 cpu; + s32 socket; }; struct symsrc { @@ -251,8 +253,8 @@ int modules__parse(const char *filename, void *arg, int filename__read_debuglink(const char *filename, char *debuglink, size_t size); -struct perf_session_env; -int symbol__init(struct perf_session_env *env); +struct perf_env; +int symbol__init(struct perf_env *env); void symbol__exit(void); void symbol__elf_init(void); struct symbol *symbol__new(u64 start, u64 len, u8 binding, const char *name); diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c index 28c4b746baa1..0a9ae8014729 100644 --- a/tools/perf/util/thread.c +++ b/tools/perf/util/thread.c @@ -191,6 +191,12 @@ static int thread__clone_map_groups(struct thread *thread, if (thread->pid_ == parent->pid_) return 0; + if (thread->mg == parent->mg) { + pr_debug("broken map groups on thread %d/%d parent %d/%d\n", + thread->pid_, thread->tid, parent->pid_, parent->tid); + return 0; + } + /* But this one is new process, copy maps. */ for (i = 0; i < MAP__NR_TYPES; ++i) if (map_groups__clone(thread->mg, parent->mg, i) < 0) diff --git a/tools/perf/util/thread_map.c b/tools/perf/util/thread_map.c index 292ae2c90e06..6ec3c5ca438f 100644 --- a/tools/perf/util/thread_map.c +++ b/tools/perf/util/thread_map.c @@ -195,7 +195,8 @@ static struct thread_map *thread_map__new_by_pid_str(const char *pid_str) pid_t pid, prev_pid = INT_MAX; char *end_ptr; struct str_node *pos; - struct strlist *slist = strlist__new(false, pid_str); + struct strlist_config slist_config = { .dont_dupstr = true, }; + struct strlist *slist = strlist__new(pid_str, &slist_config); if (!slist) return NULL; @@ -265,13 +266,14 @@ static struct thread_map *thread_map__new_by_tid_str(const char *tid_str) pid_t tid, prev_tid = INT_MAX; char *end_ptr; struct str_node *pos; + struct strlist_config slist_config = { .dont_dupstr = true, }; struct strlist *slist; /* perf-stat expects threads to be generated even if tid not given */ if (!tid_str) return thread_map__new_dummy(); - slist = strlist__new(false, tid_str); + slist = strlist__new(tid_str, &slist_config); if (!slist) return NULL; diff --git a/tools/perf/util/tool.h b/tools/perf/util/tool.h index c307dd438286..cab8cc24831b 100644 --- a/tools/perf/util/tool.h +++ b/tools/perf/util/tool.h @@ -46,6 +46,7 @@ struct perf_tool { lost_samples, aux, itrace_start, + context_switch, throttle, unthrottle; event_attr_op attr; diff --git a/tools/perf/util/trace-event-info.c b/tools/perf/util/trace-event-info.c index eb72716017ac..d995743cb673 100644 --- a/tools/perf/util/trace-event-info.c +++ b/tools/perf/util/trace-event-info.c @@ -38,7 +38,7 @@ #include "../perf.h" #include "trace-event.h" -#include <api/fs/debugfs.h> +#include <api/fs/tracing_path.h> #include "evsel.h" #include "debug.h" @@ -341,20 +341,14 @@ out: static int record_proc_kallsyms(void) { - unsigned int size; - const char *path = "/proc/kallsyms"; - struct stat st; - int ret, err = 0; - - ret = stat(path, &st); - if (ret < 0) { - /* not found */ - size = 0; - if (write(output_fd, &size, 4) != 4) - err = -EIO; - return err; - } - return record_file(path, 4); + unsigned long long size = 0; + /* + * Just to keep older perf.data file parsers happy, record a zero + * sized kallsyms file, i.e. do the same thing that was done when + * /proc/kallsyms (or something specified via --kallsyms, in a + * different path) couldn't be read. + */ + return write(output_fd, &size, 4) != 4 ? -EIO : 0; } static int record_ftrace_printk(void) diff --git a/tools/perf/util/trace-event-parse.c b/tools/perf/util/trace-event-parse.c index d4957418657e..8ff7d620d942 100644 --- a/tools/perf/util/trace-event-parse.c +++ b/tools/perf/util/trace-event-parse.c @@ -135,36 +135,6 @@ void event_format__print(struct event_format *event, return event_format__fprintf(event, cpu, data, size, stdout); } -void parse_proc_kallsyms(struct pevent *pevent, - char *file, unsigned int size __maybe_unused) -{ - unsigned long long addr; - char *func; - char *line; - char *next = NULL; - char *addr_str; - char *mod; - char *fmt = NULL; - - line = strtok_r(file, "\n", &next); - while (line) { - mod = NULL; - addr_str = strtok_r(line, " ", &fmt); - addr = strtoull(addr_str, NULL, 16); - /* skip character */ - strtok_r(NULL, " ", &fmt); - func = strtok_r(NULL, "\t", &fmt); - mod = strtok_r(NULL, "]", &fmt); - /* truncate the extra '[' */ - if (mod) - mod = mod + 1; - - pevent_register_function(pevent, func, addr, mod); - - line = strtok_r(NULL, "\n", &next); - } -} - void parse_ftrace_printk(struct pevent *pevent, char *file, unsigned int size __maybe_unused) { diff --git a/tools/perf/util/trace-event-read.c b/tools/perf/util/trace-event-read.c index 54d9e9b548a8..b67a0ccf5ab9 100644 --- a/tools/perf/util/trace-event-read.c +++ b/tools/perf/util/trace-event-read.c @@ -162,25 +162,23 @@ out: static int read_proc_kallsyms(struct pevent *pevent) { unsigned int size; - char *buf; size = read4(pevent); if (!size) return 0; - - buf = malloc(size + 1); - if (buf == NULL) - return -1; - - if (do_read(buf, size) < 0) { - free(buf); - return -1; - } - buf[size] = '\0'; - - parse_proc_kallsyms(pevent, buf, size); - - free(buf); + /* + * Just skip it, now that we configure libtraceevent to use the + * tools/perf/ symbol resolver. + * + * We need to skip it so that we can continue parsing old perf.data + * files, that contains this /proc/kallsyms payload. + * + * Newer perf.data files will have just the 4-bytes zeros "kallsyms + * payload", so that older tools can continue reading it and interpret + * it as "no kallsyms payload is present". + */ + lseek(input_fd, size, SEEK_CUR); + trace_data_size += size; return 0; } diff --git a/tools/perf/util/trace-event.c b/tools/perf/util/trace-event.c index 6322d37164c5..802bb868d446 100644 --- a/tools/perf/util/trace-event.c +++ b/tools/perf/util/trace-event.c @@ -7,8 +7,11 @@ #include <sys/stat.h> #include <fcntl.h> #include <linux/kernel.h> +#include <linux/err.h> #include <traceevent/event-parse.h> +#include <api/fs/tracing_path.h> #include "trace-event.h" +#include "machine.h" #include "util.h" /* @@ -19,6 +22,7 @@ * there. */ static struct trace_event tevent; +static bool tevent_initialized; int trace_event__init(struct trace_event *t) { @@ -32,12 +36,40 @@ int trace_event__init(struct trace_event *t) return pevent ? 0 : -1; } +static int trace_event__init2(void) +{ + int be = traceevent_host_bigendian(); + struct pevent *pevent; + + if (trace_event__init(&tevent)) + return -1; + + pevent = tevent.pevent; + pevent_set_flag(pevent, PEVENT_NSEC_OUTPUT); + pevent_set_file_bigendian(pevent, be); + pevent_set_host_bigendian(pevent, be); + tevent_initialized = true; + return 0; +} + +int trace_event__register_resolver(struct machine *machine, + pevent_func_resolver_t *func) +{ + if (!tevent_initialized && trace_event__init2()) + return -1; + + return pevent_set_function_resolver(tevent.pevent, func, machine); +} + void trace_event__cleanup(struct trace_event *t) { traceevent_unload_plugins(t->plugin_list, t->pevent); pevent_free(t->pevent); } +/* + * Returns pointer with encoded error via <linux/err.h> interface. + */ static struct event_format* tp_format(const char *sys, const char *name) { @@ -46,12 +78,14 @@ tp_format(const char *sys, const char *name) char path[PATH_MAX]; size_t size; char *data; + int err; scnprintf(path, PATH_MAX, "%s/%s/%s/format", tracing_events_path, sys, name); - if (filename__read_str(path, &data, &size)) - return NULL; + err = filename__read_str(path, &data, &size); + if (err) + return ERR_PTR(err); pevent_parse_format(pevent, &event, data, size, sys); @@ -59,24 +93,14 @@ tp_format(const char *sys, const char *name) return event; } +/* + * Returns pointer with encoded error via <linux/err.h> interface. + */ struct event_format* trace_event__tp_format(const char *sys, const char *name) { - static bool initialized; - - if (!initialized) { - int be = traceevent_host_bigendian(); - struct pevent *pevent; - - if (trace_event__init(&tevent)) - return NULL; - - pevent = tevent.pevent; - pevent_set_flag(pevent, PEVENT_NSEC_OUTPUT); - pevent_set_file_bigendian(pevent, be); - pevent_set_host_bigendian(pevent, be); - initialized = true; - } + if (!tevent_initialized && trace_event__init2()) + return ERR_PTR(-ENOMEM); return tp_format(sys, name); } diff --git a/tools/perf/util/trace-event.h b/tools/perf/util/trace-event.h index d5168f0be4ec..b85ee55cca0c 100644 --- a/tools/perf/util/trace-event.h +++ b/tools/perf/util/trace-event.h @@ -18,6 +18,8 @@ struct trace_event { int trace_event__init(struct trace_event *t); void trace_event__cleanup(struct trace_event *t); +int trace_event__register_resolver(struct machine *machine, + pevent_func_resolver_t *func); struct event_format* trace_event__tp_format(const char *sys, const char *name); @@ -76,6 +78,8 @@ struct scripting_ops { int (*generate_script) (struct pevent *pevent, const char *outfile); }; +extern unsigned int scripting_max_stack; + int script_spec_register(const char *spec, struct scripting_ops *ops); void setup_perl_scripting(void); diff --git a/tools/perf/util/unwind-libunwind.c b/tools/perf/util/unwind-libunwind.c index 4c00507ee3fd..c83832b555e5 100644 --- a/tools/perf/util/unwind-libunwind.c +++ b/tools/perf/util/unwind-libunwind.c @@ -330,6 +330,7 @@ find_proc_info(unw_addr_space_t as, unw_word_t ip, unw_proc_info_t *pi, struct map *map; unw_dyn_info_t di; u64 table_data, segbase, fde_count; + int ret = -EINVAL; map = find_map(ip, ui); if (!map || !map->dso) @@ -348,29 +349,33 @@ find_proc_info(unw_addr_space_t as, unw_word_t ip, unw_proc_info_t *pi, di.u.rti.table_data = map->start + table_data; di.u.rti.table_len = fde_count * sizeof(struct table_entry) / sizeof(unw_word_t); - return dwarf_search_unwind_table(as, ip, &di, pi, - need_unwind_info, arg); + ret = dwarf_search_unwind_table(as, ip, &di, pi, + need_unwind_info, arg); } #ifndef NO_LIBUNWIND_DEBUG_FRAME /* Check the .debug_frame section for unwinding info */ - if (!read_unwind_spec_debug_frame(map->dso, ui->machine, &segbase)) { + if (ret < 0 && + !read_unwind_spec_debug_frame(map->dso, ui->machine, &segbase)) { int fd = dso__data_get_fd(map->dso, ui->machine); int is_exec = elf_is_exec(fd, map->dso->name); unw_word_t base = is_exec ? 0 : map->start; + const char *symfile; if (fd >= 0) dso__data_put_fd(map->dso); + symfile = map->dso->symsrc_filename ?: map->dso->name; + memset(&di, 0, sizeof(di)); - if (dwarf_find_debug_frame(0, &di, ip, base, map->dso->name, + if (dwarf_find_debug_frame(0, &di, ip, base, symfile, map->start, map->end)) return dwarf_search_unwind_table(as, ip, &di, pi, need_unwind_info, arg); } #endif - return -EINVAL; + return ret; } static int access_fpreg(unw_addr_space_t __maybe_unused as, @@ -461,7 +466,7 @@ static int access_mem(unw_addr_space_t __maybe_unused as, if (ret) { pr_debug("unwind: access_mem %p not inside range" " 0x%" PRIx64 "-0x%" PRIx64 "\n", - (void *) addr, start, end); + (void *) (uintptr_t) addr, start, end); *valp = 0; return ret; } @@ -471,7 +476,7 @@ static int access_mem(unw_addr_space_t __maybe_unused as, offset = addr - start; *valp = *(unw_word_t *)&stack->data[offset]; pr_debug("unwind: access_mem addr %p val %lx, offset %d\n", - (void *) addr, (unsigned long)*valp, offset); + (void *) (uintptr_t) addr, (unsigned long)*valp, offset); return 0; } diff --git a/tools/perf/util/usage.c b/tools/perf/util/usage.c index 4007aca8e0ca..6adfa18cdd4e 100644 --- a/tools/perf/util/usage.c +++ b/tools/perf/util/usage.c @@ -50,6 +50,11 @@ void set_die_routine(void (*routine)(const char *err, va_list params) NORETURN) die_routine = routine; } +void set_warning_routine(void (*routine)(const char *err, va_list params)) +{ + warn_routine = routine; +} + void usage(const char *err) { usage_routine(err); diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c index edc2d633b332..cd12c25e4ea4 100644 --- a/tools/perf/util/util.c +++ b/tools/perf/util/util.c @@ -17,7 +17,7 @@ #include "callchain.h" struct callchain_param callchain_param = { - .mode = CHAIN_GRAPH_REL, + .mode = CHAIN_GRAPH_ABS, .min_percent = 0.5, .order = ORDER_CALLEE, .key = CCKEY_FUNCTION @@ -34,8 +34,6 @@ bool test_attr__enabled; bool perf_host = true; bool perf_guest = false; -char tracing_events_path[PATH_MAX + 1] = "/sys/kernel/debug/tracing/events"; - void event_attr_init(struct perf_event_attr *attr) { if (!perf_host) @@ -389,123 +387,6 @@ void set_term_quiet_input(struct termios *old) tcsetattr(0, TCSANOW, &tc); } -static void set_tracing_events_path(const char *tracing, const char *mountpoint) -{ - snprintf(tracing_events_path, sizeof(tracing_events_path), "%s/%s%s", - mountpoint, tracing, "events"); -} - -static const char *__perf_tracefs_mount(const char *mountpoint) -{ - const char *mnt; - - mnt = tracefs_mount(mountpoint); - if (!mnt) - return NULL; - - set_tracing_events_path("", mnt); - - return mnt; -} - -static const char *__perf_debugfs_mount(const char *mountpoint) -{ - const char *mnt; - - mnt = debugfs_mount(mountpoint); - if (!mnt) - return NULL; - - set_tracing_events_path("tracing/", mnt); - - return mnt; -} - -const char *perf_debugfs_mount(const char *mountpoint) -{ - const char *mnt; - - mnt = __perf_tracefs_mount(mountpoint); - if (mnt) - return mnt; - - mnt = __perf_debugfs_mount(mountpoint); - - return mnt; -} - -void perf_debugfs_set_path(const char *mntpt) -{ - snprintf(debugfs_mountpoint, strlen(debugfs_mountpoint), "%s", mntpt); - set_tracing_events_path("tracing/", mntpt); -} - -static const char *find_tracefs(void) -{ - const char *path = __perf_tracefs_mount(NULL); - - return path; -} - -static const char *find_debugfs(void) -{ - const char *path = __perf_debugfs_mount(NULL); - - if (!path) - fprintf(stderr, "Your kernel does not support the debugfs filesystem"); - - return path; -} - -/* - * Finds the path to the debugfs/tracing - * Allocates the string and stores it. - */ -const char *find_tracing_dir(void) -{ - const char *tracing_dir = ""; - static char *tracing; - static int tracing_found; - const char *debugfs; - - if (tracing_found) - return tracing; - - debugfs = find_tracefs(); - if (!debugfs) { - tracing_dir = "/tracing"; - debugfs = find_debugfs(); - if (!debugfs) - return NULL; - } - - if (asprintf(&tracing, "%s%s", debugfs, tracing_dir) < 0) - return NULL; - - tracing_found = 1; - return tracing; -} - -char *get_tracing_file(const char *name) -{ - const char *tracing; - char *file; - - tracing = find_tracing_dir(); - if (!tracing) - return NULL; - - if (asprintf(&file, "%s/%s", tracing, name) < 0) - return NULL; - - return file; -} - -void put_tracing_file(char *file) -{ - free(file); -} - int parse_nsec_time(const char *str, u64 *ptime) { u64 time_sec, time_nsec; @@ -566,6 +447,96 @@ unsigned long parse_tag_value(const char *str, struct parse_tag *tags) return (unsigned long) -1; } +int get_stack_size(const char *str, unsigned long *_size) +{ + char *endptr; + unsigned long size; + unsigned long max_size = round_down(USHRT_MAX, sizeof(u64)); + + size = strtoul(str, &endptr, 0); + + do { + if (*endptr) + break; + + size = round_up(size, sizeof(u64)); + if (!size || size > max_size) + break; + + *_size = size; + return 0; + + } while (0); + + pr_err("callchain: Incorrect stack dump size (max %ld): %s\n", + max_size, str); + return -1; +} + +int parse_callchain_record(const char *arg, struct callchain_param *param) +{ + char *tok, *name, *saveptr = NULL; + char *buf; + int ret = -1; + + /* We need buffer that we know we can write to. */ + buf = malloc(strlen(arg) + 1); + if (!buf) + return -ENOMEM; + + strcpy(buf, arg); + + tok = strtok_r((char *)buf, ",", &saveptr); + name = tok ? : (char *)buf; + + do { + /* Framepointer style */ + if (!strncmp(name, "fp", sizeof("fp"))) { + if (!strtok_r(NULL, ",", &saveptr)) { + param->record_mode = CALLCHAIN_FP; + ret = 0; + } else + pr_err("callchain: No more arguments " + "needed for --call-graph fp\n"); + break; + +#ifdef HAVE_DWARF_UNWIND_SUPPORT + /* Dwarf style */ + } else if (!strncmp(name, "dwarf", sizeof("dwarf"))) { + const unsigned long default_stack_dump_size = 8192; + + ret = 0; + param->record_mode = CALLCHAIN_DWARF; + param->dump_size = default_stack_dump_size; + + tok = strtok_r(NULL, ",", &saveptr); + if (tok) { + unsigned long size = 0; + + ret = get_stack_size(tok, &size); + param->dump_size = size; + } +#endif /* HAVE_DWARF_UNWIND_SUPPORT */ + } else if (!strncmp(name, "lbr", sizeof("lbr"))) { + if (!strtok_r(NULL, ",", &saveptr)) { + param->record_mode = CALLCHAIN_LBR; + ret = 0; + } else + pr_err("callchain: No more arguments " + "needed for --call-graph lbr\n"); + break; + } else { + pr_err("callchain: Unknown --call-graph option " + "value: %s\n", arg); + break; + } + + } while (0); + + free(buf); + return ret; +} + int filename__read_str(const char *filename, char **buf, size_t *sizep) { size_t size = 0, alloc_size = 0; @@ -668,7 +639,7 @@ bool find_process(const char *name) dir = opendir(procfs__mountpoint()); if (!dir) - return -1; + return false; /* Walk through the directory. */ while (ret && (d = readdir(dir)) != NULL) { diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h index 8bce58b47a82..4cfb913aa9e0 100644 --- a/tools/perf/util/util.h +++ b/tools/perf/util/util.h @@ -74,8 +74,7 @@ #include <linux/magic.h> #include <linux/types.h> #include <sys/ttydefaults.h> -#include <api/fs/debugfs.h> -#include <api/fs/tracefs.h> +#include <api/fs/tracing_path.h> #include <termios.h> #include <linux/bitops.h> #include <termios.h> @@ -83,12 +82,6 @@ extern const char *graph_line; extern const char *graph_dotted_line; extern char buildid_dir[]; -extern char tracing_events_path[]; -extern void perf_debugfs_set_path(const char *mountpoint); -const char *perf_debugfs_mount(const char *mountpoint); -const char *find_tracing_dir(void); -char *get_tracing_file(const char *name); -void put_tracing_file(char *file); /* On most systems <limits.h> would have given us this, but * not on some systems (e.g. GNU/Hurd). @@ -152,6 +145,7 @@ extern void warning(const char *err, ...) __attribute__((format (printf, 1, 2))) extern void set_die_routine(void (*routine)(const char *err, va_list params) NORETURN); +extern void set_warning_routine(void (*routine)(const char *err, va_list params)); extern int prefixcmp(const char *str, const char *prefix); extern void set_buildid_dir(const char *dir); @@ -318,8 +312,11 @@ static inline int path__join3(char *bf, size_t size, struct dso; struct symbol; +extern bool srcline_full_filename; char *get_srcline(struct dso *dso, u64 addr, struct symbol *sym, bool show_sym); +char *__get_srcline(struct dso *dso, u64 addr, struct symbol *sym, + bool show_sym, bool unwind_inlines); void free_srcline(char *srcline); int filename__read_str(const char *filename, char **buf, size_t *sizep); @@ -339,4 +336,18 @@ int gzip_decompress_to_file(const char *input, int output_fd); int lzma_decompress_to_file(const char *input, int output_fd); #endif +char *asprintf_expr_inout_ints(const char *var, bool in, size_t nints, int *ints); + +static inline char *asprintf_expr_in_ints(const char *var, size_t nints, int *ints) +{ + return asprintf_expr_inout_ints(var, true, nints, ints); +} + +static inline char *asprintf_expr_not_in_ints(const char *var, size_t nints, int *ints) +{ + return asprintf_expr_inout_ints(var, false, nints, ints); +} + +int get_stack_size(const char *str, unsigned long *_size); + #endif /* GIT_COMPAT_UTIL_H */ |