diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2017-05-02 06:23:17 +0300 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2017-05-02 06:23:17 +0300 |
commit | 7c8c03bfc7b9f5211d8a69eab7fee99c9fb4f449 (patch) | |
tree | a5cee67325e50e893bf0cc0a0d060983a0df6653 /drivers | |
parent | 6dc2cce9321198172cd96f955a5fc798a4cc35a6 (diff) | |
parent | fd7647979a3948dae4fc6f25dbbdf9ba269bed78 (diff) | |
download | linux-7c8c03bfc7b9f5211d8a69eab7fee99c9fb4f449.tar.xz |
Merge branch 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull perf updates from Ingo Molnar:
"The main changes in this cycle were:
Kernel side changes:
- Kprobes and uprobes changes:
- Make their trampolines read-only while they are used
- Make UPROBES_EVENTS default-y which is the distro practice
- Apply misc fixes and robustization to probe point insertion.
- add support for AMD IOMMU events
- extend hw events on Intel Goldmont CPUs
- ... plus misc fixes and updates.
Tooling side changes:
- support s390 jump instructions in perf annotate (Christian
Borntraeger)
- vendor hardware events updates (Andi Kleen)
- add argument support for SDT events in powerpc (Ravi Bangoria)
- beautify the statx syscall arguments in 'perf trace' (Arnaldo
Carvalho de Melo)
- handle inline functions in callchains (Jin Yao)
- enable sorting by srcline as key (Milian Wolff)
- add 'brstackinsn' field in 'perf script' to reuse the x86
instruction decoder used in the Intel PT code to study hot paths to
samples (Andi Kleen)
- add PERF_RECORD_NAMESPACES so that the kernel can record
information required to associate samples to namespaces, helping in
container problem characterization. (Hari Bathini)
- allow sorting by symbol_size in 'perf report' and 'perf top'
(Charles Baylis)
- in perf stat, make system wide (-a) the default option if no target
was specified and one of following conditions is met:
- no workload specified (current behaviour)
- a workload is specified but all requested events are system wide
ones, like uncore ones. (Jiri Olsa)
- ... plus lots of other updates, enhancements, cleanups and fixes"
* 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (235 commits)
perf tools: Fix the code to strip command name
tools arch x86: Sync cpufeatures.h
tools arch: Sync arch/x86/lib/memcpy_64.S with the kernel
tools: Update asm-generic/mman-common.h copy from the kernel
perf tools: Use just forward declarations for struct thread where possible
perf tools: Add the right header to obtain PERF_ALIGN()
perf tools: Remove poll.h and wait.h from util.h
perf tools: Remove string.h, unistd.h and sys/stat.h from util.h
perf tools: Remove stale prototypes from builtin.h
perf tools: Remove string.h from util.h
perf tools: Remove sys/ioctl.h from util.h
perf tools: Remove a few more needless includes from util.h
perf tools: Include sys/param.h where needed
perf callchain: Move callchain specific routines from util.[ch]
perf tools: Add compress.h for the *_decompress_to_file() headers
perf mem: Fix display of data source snoop indication
perf debug: Move dump_stack() and sighandler_dump_stack() to debug.h
perf kvm: Make function only used by 'perf kvm' static
perf tools: Move timestamp routines from util.h to time-utils.h
perf tools: Move units conversion/formatting routines to separate object
...
Diffstat (limited to 'drivers')
-rw-r--r-- | drivers/hwtracing/coresight/coresight-etb10.c | 9 | ||||
-rw-r--r-- | drivers/hwtracing/coresight/coresight-etm-perf.c | 9 | ||||
-rw-r--r-- | drivers/hwtracing/coresight/coresight-priv.h | 2 | ||||
-rw-r--r-- | drivers/hwtracing/coresight/coresight-tmc-etf.c | 7 | ||||
-rw-r--r-- | drivers/iommu/amd_iommu.c | 6 | ||||
-rw-r--r-- | drivers/iommu/amd_iommu_init.c | 101 | ||||
-rw-r--r-- | drivers/iommu/amd_iommu_proto.h | 8 | ||||
-rw-r--r-- | drivers/iommu/amd_iommu_types.h | 3 |
8 files changed, 76 insertions, 69 deletions
diff --git a/drivers/hwtracing/coresight/coresight-etb10.c b/drivers/hwtracing/coresight/coresight-etb10.c index d7325c6534ad..979ea6ec7902 100644 --- a/drivers/hwtracing/coresight/coresight-etb10.c +++ b/drivers/hwtracing/coresight/coresight-etb10.c @@ -321,7 +321,7 @@ static int etb_set_buffer(struct coresight_device *csdev, static unsigned long etb_reset_buffer(struct coresight_device *csdev, struct perf_output_handle *handle, - void *sink_config, bool *lost) + void *sink_config) { unsigned long size = 0; struct cs_buffers *buf = sink_config; @@ -343,7 +343,6 @@ static unsigned long etb_reset_buffer(struct coresight_device *csdev, * resetting parameters here and squaring off with the ring * buffer API in the tracer PMU is fine. */ - *lost = !!local_xchg(&buf->lost, 0); size = local_xchg(&buf->data_size, 0); } @@ -385,7 +384,7 @@ static void etb_update_buffer(struct coresight_device *csdev, (unsigned long)write_ptr); write_ptr &= ~(ETB_FRAME_SIZE_WORDS - 1); - local_inc(&buf->lost); + perf_aux_output_flag(handle, PERF_AUX_FLAG_TRUNCATED); } /* @@ -396,7 +395,7 @@ static void etb_update_buffer(struct coresight_device *csdev, */ status = readl_relaxed(drvdata->base + ETB_STATUS_REG); if (status & ETB_STATUS_RAM_FULL) { - local_inc(&buf->lost); + perf_aux_output_flag(handle, PERF_AUX_FLAG_TRUNCATED); to_read = capacity; read_ptr = write_ptr; } else { @@ -429,7 +428,7 @@ static void etb_update_buffer(struct coresight_device *csdev, if (read_ptr > (drvdata->buffer_depth - 1)) read_ptr -= drvdata->buffer_depth; /* let the decoder know we've skipped ahead */ - local_inc(&buf->lost); + perf_aux_output_flag(handle, PERF_AUX_FLAG_TRUNCATED); } /* finally tell HW where we want to start reading from */ diff --git a/drivers/hwtracing/coresight/coresight-etm-perf.c b/drivers/hwtracing/coresight/coresight-etm-perf.c index 26cfac3e6de7..288a423c1b27 100644 --- a/drivers/hwtracing/coresight/coresight-etm-perf.c +++ b/drivers/hwtracing/coresight/coresight-etm-perf.c @@ -302,7 +302,8 @@ out: return; fail_end_stop: - perf_aux_output_end(handle, 0, true); + perf_aux_output_flag(handle, PERF_AUX_FLAG_TRUNCATED); + perf_aux_output_end(handle, 0); fail: event->hw.state = PERF_HES_STOPPED; goto out; @@ -310,7 +311,6 @@ fail: static void etm_event_stop(struct perf_event *event, int mode) { - bool lost; int cpu = smp_processor_id(); unsigned long size; struct coresight_device *sink, *csdev = per_cpu(csdev_src, cpu); @@ -348,10 +348,9 @@ static void etm_event_stop(struct perf_event *event, int mode) return; size = sink_ops(sink)->reset_buffer(sink, handle, - event_data->snk_config, - &lost); + event_data->snk_config); - perf_aux_output_end(handle, size, lost); + perf_aux_output_end(handle, size); } /* Disabling the path make its elements available to other sessions */ diff --git a/drivers/hwtracing/coresight/coresight-priv.h b/drivers/hwtracing/coresight/coresight-priv.h index ef9d8e93e3b2..5f662d82052c 100644 --- a/drivers/hwtracing/coresight/coresight-priv.h +++ b/drivers/hwtracing/coresight/coresight-priv.h @@ -76,7 +76,6 @@ enum cs_mode { * @nr_pages: max number of pages granted to us * @offset: offset within the current buffer * @data_size: how much we collected in this run - * @lost: other than zero if we had a HW buffer wrap around * @snapshot: is this run in snapshot mode * @data_pages: a handle the ring buffer */ @@ -85,7 +84,6 @@ struct cs_buffers { unsigned int nr_pages; unsigned long offset; local_t data_size; - local_t lost; bool snapshot; void **data_pages; }; diff --git a/drivers/hwtracing/coresight/coresight-tmc-etf.c b/drivers/hwtracing/coresight/coresight-tmc-etf.c index 1549436e2492..aec61a6d5c63 100644 --- a/drivers/hwtracing/coresight/coresight-tmc-etf.c +++ b/drivers/hwtracing/coresight/coresight-tmc-etf.c @@ -329,7 +329,7 @@ static int tmc_set_etf_buffer(struct coresight_device *csdev, static unsigned long tmc_reset_etf_buffer(struct coresight_device *csdev, struct perf_output_handle *handle, - void *sink_config, bool *lost) + void *sink_config) { long size = 0; struct cs_buffers *buf = sink_config; @@ -350,7 +350,6 @@ static unsigned long tmc_reset_etf_buffer(struct coresight_device *csdev, * resetting parameters here and squaring off with the ring * buffer API in the tracer PMU is fine. */ - *lost = !!local_xchg(&buf->lost, 0); size = local_xchg(&buf->data_size, 0); } @@ -389,7 +388,7 @@ static void tmc_update_etf_buffer(struct coresight_device *csdev, */ status = readl_relaxed(drvdata->base + TMC_STS); if (status & TMC_STS_FULL) { - local_inc(&buf->lost); + perf_aux_output_flag(handle, PERF_AUX_FLAG_TRUNCATED); to_read = drvdata->size; } else { to_read = CIRC_CNT(write_ptr, read_ptr, drvdata->size); @@ -434,7 +433,7 @@ static void tmc_update_etf_buffer(struct coresight_device *csdev, read_ptr -= drvdata->size; /* Tell the HW */ writel_relaxed(read_ptr, drvdata->base + TMC_RRP); - local_inc(&buf->lost); + perf_aux_output_flag(handle, PERF_AUX_FLAG_TRUNCATED); } cur = buf->cur; diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index b17536d6e69b..63cacf5d6cf2 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -1234,7 +1234,7 @@ static void __domain_flush_pages(struct protection_domain *domain, build_inv_iommu_pages(&cmd, address, size, domain->id, pde); - for (i = 0; i < amd_iommus_present; ++i) { + for (i = 0; i < amd_iommu_get_num_iommus(); ++i) { if (!domain->dev_iommu[i]) continue; @@ -1278,7 +1278,7 @@ static void domain_flush_complete(struct protection_domain *domain) { int i; - for (i = 0; i < amd_iommus_present; ++i) { + for (i = 0; i < amd_iommu_get_num_iommus(); ++i) { if (domain && !domain->dev_iommu[i]) continue; @@ -3363,7 +3363,7 @@ static int __flush_pasid(struct protection_domain *domain, int pasid, * IOMMU TLB needs to be flushed before Device TLB to * prevent device TLB refill from IOMMU TLB */ - for (i = 0; i < amd_iommus_present; ++i) { + for (i = 0; i < amd_iommu_get_num_iommus(); ++i) { if (domain->dev_iommu[i] == 0) continue; diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c index 6130278c5d71..5a11328f4d98 100644 --- a/drivers/iommu/amd_iommu_init.c +++ b/drivers/iommu/amd_iommu_init.c @@ -167,7 +167,9 @@ LIST_HEAD(amd_iommu_list); /* list of all AMD IOMMUs in the /* Array to assign indices to IOMMUs*/ struct amd_iommu *amd_iommus[MAX_IOMMUS]; -int amd_iommus_present; + +/* Number of IOMMUs present in the system */ +static int amd_iommus_present; /* IOMMUs have a non-present cache? */ bool amd_iommu_np_cache __read_mostly; @@ -254,10 +256,6 @@ static int amd_iommu_enable_interrupts(void); static int __init iommu_go_to_state(enum iommu_init_state state); static void init_device_table_dma(void); -static int iommu_pc_get_set_reg_val(struct amd_iommu *iommu, - u8 bank, u8 cntr, u8 fxn, - u64 *value, bool is_write); - static inline void update_last_devid(u16 devid) { if (devid > amd_iommu_last_bdf) @@ -272,6 +270,11 @@ static inline unsigned long tbl_size(int entry_size) return 1UL << shift; } +int amd_iommu_get_num_iommus(void) +{ + return amd_iommus_present; +} + /* Access to l1 and l2 indexed register spaces */ static u32 iommu_read_l1(struct amd_iommu *iommu, u16 l1, u8 address) @@ -1336,7 +1339,7 @@ static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h) /* Add IOMMU to internal data structures */ list_add_tail(&iommu->list, &amd_iommu_list); - iommu->index = amd_iommus_present++; + iommu->index = amd_iommus_present++; if (unlikely(iommu->index >= MAX_IOMMUS)) { WARN(1, "AMD-Vi: System has more IOMMUs than supported by this driver\n"); @@ -1477,6 +1480,8 @@ static int __init init_iommu_all(struct acpi_table_header *table) return 0; } +static int iommu_pc_get_set_reg(struct amd_iommu *iommu, u8 bank, u8 cntr, + u8 fxn, u64 *value, bool is_write); static void init_iommu_perf_ctr(struct amd_iommu *iommu) { @@ -1488,8 +1493,8 @@ static void init_iommu_perf_ctr(struct amd_iommu *iommu) amd_iommu_pc_present = true; /* Check if the performance counters can be written to */ - if ((0 != iommu_pc_get_set_reg_val(iommu, 0, 0, 0, &val, true)) || - (0 != iommu_pc_get_set_reg_val(iommu, 0, 0, 0, &val2, false)) || + if ((iommu_pc_get_set_reg(iommu, 0, 0, 0, &val, true)) || + (iommu_pc_get_set_reg(iommu, 0, 0, 0, &val2, false)) || (val != val2)) { pr_err("AMD-Vi: Unable to write to IOMMU perf counter.\n"); amd_iommu_pc_present = false; @@ -2711,6 +2716,18 @@ bool amd_iommu_v2_supported(void) } EXPORT_SYMBOL(amd_iommu_v2_supported); +struct amd_iommu *get_amd_iommu(unsigned int idx) +{ + unsigned int i = 0; + struct amd_iommu *iommu; + + for_each_iommu(iommu) + if (i++ == idx) + return iommu; + return NULL; +} +EXPORT_SYMBOL(get_amd_iommu); + /**************************************************************************** * * IOMMU EFR Performance Counter support functionality. This code allows @@ -2718,17 +2735,14 @@ EXPORT_SYMBOL(amd_iommu_v2_supported); * ****************************************************************************/ -u8 amd_iommu_pc_get_max_banks(u16 devid) +u8 amd_iommu_pc_get_max_banks(unsigned int idx) { - struct amd_iommu *iommu; - u8 ret = 0; + struct amd_iommu *iommu = get_amd_iommu(idx); - /* locate the iommu governing the devid */ - iommu = amd_iommu_rlookup_table[devid]; if (iommu) - ret = iommu->max_banks; + return iommu->max_banks; - return ret; + return 0; } EXPORT_SYMBOL(amd_iommu_pc_get_max_banks); @@ -2738,62 +2752,69 @@ bool amd_iommu_pc_supported(void) } EXPORT_SYMBOL(amd_iommu_pc_supported); -u8 amd_iommu_pc_get_max_counters(u16 devid) +u8 amd_iommu_pc_get_max_counters(unsigned int idx) { - struct amd_iommu *iommu; - u8 ret = 0; + struct amd_iommu *iommu = get_amd_iommu(idx); - /* locate the iommu governing the devid */ - iommu = amd_iommu_rlookup_table[devid]; if (iommu) - ret = iommu->max_counters; + return iommu->max_counters; - return ret; + return 0; } EXPORT_SYMBOL(amd_iommu_pc_get_max_counters); -static int iommu_pc_get_set_reg_val(struct amd_iommu *iommu, - u8 bank, u8 cntr, u8 fxn, - u64 *value, bool is_write) +static int iommu_pc_get_set_reg(struct amd_iommu *iommu, u8 bank, u8 cntr, + u8 fxn, u64 *value, bool is_write) { u32 offset; u32 max_offset_lim; + /* Make sure the IOMMU PC resource is available */ + if (!amd_iommu_pc_present) + return -ENODEV; + /* Check for valid iommu and pc register indexing */ - if (WARN_ON((fxn > 0x28) || (fxn & 7))) + if (WARN_ON(!iommu || (fxn > 0x28) || (fxn & 7))) return -ENODEV; - offset = (u32)(((0x40|bank) << 12) | (cntr << 8) | fxn); + offset = (u32)(((0x40 | bank) << 12) | (cntr << 8) | fxn); /* Limit the offset to the hw defined mmio region aperture */ - max_offset_lim = (u32)(((0x40|iommu->max_banks) << 12) | + max_offset_lim = (u32)(((0x40 | iommu->max_banks) << 12) | (iommu->max_counters << 8) | 0x28); if ((offset < MMIO_CNTR_REG_OFFSET) || (offset > max_offset_lim)) return -EINVAL; if (is_write) { - writel((u32)*value, iommu->mmio_base + offset); - writel((*value >> 32), iommu->mmio_base + offset + 4); + u64 val = *value & GENMASK_ULL(47, 0); + + writel((u32)val, iommu->mmio_base + offset); + writel((val >> 32), iommu->mmio_base + offset + 4); } else { *value = readl(iommu->mmio_base + offset + 4); *value <<= 32; - *value = readl(iommu->mmio_base + offset); + *value |= readl(iommu->mmio_base + offset); + *value &= GENMASK_ULL(47, 0); } return 0; } -EXPORT_SYMBOL(amd_iommu_pc_get_set_reg_val); -int amd_iommu_pc_get_set_reg_val(u16 devid, u8 bank, u8 cntr, u8 fxn, - u64 *value, bool is_write) +int amd_iommu_pc_get_reg(struct amd_iommu *iommu, u8 bank, u8 cntr, u8 fxn, u64 *value) { - struct amd_iommu *iommu = amd_iommu_rlookup_table[devid]; + if (!iommu) + return -EINVAL; - /* Make sure the IOMMU PC resource is available */ - if (!amd_iommu_pc_present || iommu == NULL) - return -ENODEV; + return iommu_pc_get_set_reg(iommu, bank, cntr, fxn, value, false); +} +EXPORT_SYMBOL(amd_iommu_pc_get_reg); + +int amd_iommu_pc_set_reg(struct amd_iommu *iommu, u8 bank, u8 cntr, u8 fxn, u64 *value) +{ + if (!iommu) + return -EINVAL; - return iommu_pc_get_set_reg_val(iommu, bank, cntr, fxn, - value, is_write); + return iommu_pc_get_set_reg(iommu, bank, cntr, fxn, value, true); } +EXPORT_SYMBOL(amd_iommu_pc_set_reg); diff --git a/drivers/iommu/amd_iommu_proto.h b/drivers/iommu/amd_iommu_proto.h index 7eb60c15c582..466260f8a1df 100644 --- a/drivers/iommu/amd_iommu_proto.h +++ b/drivers/iommu/amd_iommu_proto.h @@ -21,6 +21,7 @@ #include "amd_iommu_types.h" +extern int amd_iommu_get_num_iommus(void); extern int amd_iommu_init_dma_ops(void); extern int amd_iommu_init_passthrough(void); extern irqreturn_t amd_iommu_int_thread(int irq, void *data); @@ -56,13 +57,6 @@ extern int amd_iommu_domain_set_gcr3(struct iommu_domain *dom, int pasid, extern int amd_iommu_domain_clear_gcr3(struct iommu_domain *dom, int pasid); extern struct iommu_domain *amd_iommu_get_v2_domain(struct pci_dev *pdev); -/* IOMMU Performance Counter functions */ -extern bool amd_iommu_pc_supported(void); -extern u8 amd_iommu_pc_get_max_banks(u16 devid); -extern u8 amd_iommu_pc_get_max_counters(u16 devid); -extern int amd_iommu_pc_get_set_reg_val(u16 devid, u8 bank, u8 cntr, u8 fxn, - u64 *value, bool is_write); - #ifdef CONFIG_IRQ_REMAP extern int amd_iommu_create_irq_domain(struct amd_iommu *iommu); #else diff --git a/drivers/iommu/amd_iommu_types.h b/drivers/iommu/amd_iommu_types.h index 003f3ceb2661..4de8f4160bb8 100644 --- a/drivers/iommu/amd_iommu_types.h +++ b/drivers/iommu/amd_iommu_types.h @@ -611,9 +611,6 @@ extern struct list_head amd_iommu_list; */ extern struct amd_iommu *amd_iommus[MAX_IOMMUS]; -/* Number of IOMMUs present in the system */ -extern int amd_iommus_present; - /* * Declarations for the global list of all protection domains */ |