From fc0c2028135c7f75fce36b90e44efb8003a9173b Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 8 Mar 2016 10:30:19 -0800 Subject: x86, pmem: use memcpy_mcsafe() for memcpy_from_pmem() Update the definition of memcpy_from_pmem() to return 0 or a negative error code. Implement x86/arch_memcpy_from_pmem() with memcpy_mcsafe(). Cc: Borislav Petkov Cc: Tony Luck Cc: Thomas Gleixner Cc: Andy Lutomirski Cc: Peter Zijlstra Cc: Andrew Morton Cc: Linus Torvalds Acked-by: Ingo Molnar Reviewed-by: Ross Zwisler Signed-off-by: Dan Williams --- arch/x86/include/asm/pmem.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'arch/x86/include/asm') diff --git a/arch/x86/include/asm/pmem.h b/arch/x86/include/asm/pmem.h index bf8b35d2035a..fbc5e92e1ecc 100644 --- a/arch/x86/include/asm/pmem.h +++ b/arch/x86/include/asm/pmem.h @@ -47,6 +47,15 @@ static inline void arch_memcpy_to_pmem(void __pmem *dst, const void *src, BUG(); } +static inline int arch_memcpy_from_pmem(void *dst, const void __pmem *src, + size_t n) +{ + if (static_cpu_has(X86_FEATURE_MCE_RECOVERY)) + return memcpy_mcsafe(dst, (void __force *) src, n); + memcpy(dst, (void __force *) src, n); + return 0; +} + /** * arch_wmb_pmem - synchronize writes to persistent memory * -- cgit v1.2.3 From 14ebda3394fd3e5388747e742e510b0802a65d24 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 29 Mar 2016 17:56:57 +0200 Subject: KVM: x86: reduce default value of halt_poll_ns parameter Windows lets applications choose the frequency of the timer tick, and in Windows 10 the maximum rate was changed from 1024 Hz to 2048 Hz. Unfortunately, because of the way the Windows API works, most applications who need a higher rate than the default 64 Hz will just do timeGetDevCaps(&tc, sizeof(tc)); timeBeginPeriod(tc.wPeriodMin); and pick the maximum rate. This causes very high CPU usage when playing media or games on Windows 10, even if the guest does not actually use the CPU very much, because the frequent timer tick causes halt_poll_ns to kick in. There is no really good solution, especially because Microsoft could sooner or later bump the limit to 4096 Hz, but for now the best we can do is lower a bit the upper limit for halt_poll_ns. :-( Reported-by: Jon Panozzo Cc: stable@vger.kernel.org Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/include/asm') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index f62a9f37f79f..b7e394485a5f 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -43,7 +43,7 @@ #define KVM_PIO_PAGE_OFFSET 1 #define KVM_COALESCED_MMIO_PAGE_OFFSET 2 -#define KVM_HALT_POLL_NS_DEFAULT 500000 +#define KVM_HALT_POLL_NS_DEFAULT 400000 #define KVM_IRQCHIP_NUM_PINS KVM_IOAPIC_NUM_PINS -- cgit v1.2.3 From 858eaaa711700ce4595e039441e239e56d7b9514 Mon Sep 17 00:00:00 2001 From: Nadav Amit Date: Fri, 1 Apr 2016 14:31:26 -0700 Subject: mm/rmap: batched invalidations should use existing api The recently introduced batched invalidations mechanism uses its own mechanism for shootdown. However, it does wrong accounting of interrupts (e.g., inc_irq_stat is called for local invalidations), trace-points (e.g., TLB_REMOTE_SHOOTDOWN for local invalidations) and may break some platforms as it bypasses the invalidation mechanisms of Xen and SGI UV. This patch reuses the existing TLB flushing mechnaisms instead. We use NULL as mm to indicate a global invalidation is required. Fixes 72b252aed506b8 ("mm: send one IPI per CPU to TLB flush all entries after unmapping pages") Signed-off-by: Nadav Amit Cc: Mel Gorman Cc: Rik van Riel Cc: Dave Hansen Cc: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/include/asm/tlbflush.h | 6 ------ arch/x86/mm/tlb.c | 2 +- mm/rmap.c | 28 +++++++--------------------- 3 files changed, 8 insertions(+), 28 deletions(-) (limited to 'arch/x86/include/asm') diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h index c24b4224d439..1fde8d580a5b 100644 --- a/arch/x86/include/asm/tlbflush.h +++ b/arch/x86/include/asm/tlbflush.h @@ -319,12 +319,6 @@ static inline void reset_lazy_tlbstate(void) #endif /* SMP */ -/* Not inlined due to inc_irq_stat not being defined yet */ -#define flush_tlb_local() { \ - inc_irq_stat(irq_tlb_count); \ - local_flush_tlb(); \ -} - #ifndef CONFIG_PARAVIRT #define flush_tlb_others(mask, mm, start, end) \ native_flush_tlb_others(mask, mm, start, end) diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index 5fb6adaaa796..fe9b9f776361 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -104,7 +104,7 @@ static void flush_tlb_func(void *info) inc_irq_stat(irq_tlb_count); - if (f->flush_mm != this_cpu_read(cpu_tlbstate.active_mm)) + if (f->flush_mm && f->flush_mm != this_cpu_read(cpu_tlbstate.active_mm)) return; count_vm_tlb_event(NR_TLB_REMOTE_FLUSH_RECEIVED); diff --git a/mm/rmap.c b/mm/rmap.c index c399a0d41b31..395e314b7996 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -569,19 +569,6 @@ void page_unlock_anon_vma_read(struct anon_vma *anon_vma) } #ifdef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH -static void percpu_flush_tlb_batch_pages(void *data) -{ - /* - * All TLB entries are flushed on the assumption that it is - * cheaper to flush all TLBs and let them be refilled than - * flushing individual PFNs. Note that we do not track mm's - * to flush as that might simply be multiple full TLB flushes - * for no gain. - */ - count_vm_tlb_event(NR_TLB_REMOTE_FLUSH_RECEIVED); - flush_tlb_local(); -} - /* * Flush TLB entries for recently unmapped pages from remote CPUs. It is * important if a PTE was dirty when it was unmapped that it's flushed @@ -598,15 +585,14 @@ void try_to_unmap_flush(void) cpu = get_cpu(); - trace_tlb_flush(TLB_REMOTE_SHOOTDOWN, -1UL); - - if (cpumask_test_cpu(cpu, &tlb_ubc->cpumask)) - percpu_flush_tlb_batch_pages(&tlb_ubc->cpumask); - - if (cpumask_any_but(&tlb_ubc->cpumask, cpu) < nr_cpu_ids) { - smp_call_function_many(&tlb_ubc->cpumask, - percpu_flush_tlb_batch_pages, (void *)tlb_ubc, true); + if (cpumask_test_cpu(cpu, &tlb_ubc->cpumask)) { + count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL); + local_flush_tlb(); + trace_tlb_flush(TLB_LOCAL_SHOOTDOWN, TLB_FLUSH_ALL); } + + if (cpumask_any_but(&tlb_ubc->cpumask, cpu) < nr_cpu_ids) + flush_tlb_others(&tlb_ubc->cpumask, NULL, 0, TLB_FLUSH_ALL); cpumask_clear(&tlb_ubc->cpumask); tlb_ubc->flush_required = false; tlb_ubc->writable = false; -- cgit v1.2.3 From 5a63426e2a18775ed05b20e3bc90c68bacb1f68a Mon Sep 17 00:00:00 2001 From: Len Brown Date: Wed, 6 Apr 2016 17:15:55 -0400 Subject: tools/power turbostat: print IRTL MSRs Some processors use the Interrupt Response Time Limit (IRTL) MSR value to describe the maximum IRQ response time latency for deep package C-states. (Though others have the register, but do not use it) Lets print it out to give insight into the cases where it is used. IRTL begain in SNB, with PC3/PC6/PC7, and HSW added PC8/PC9/PC10. Signed-off-by: Len Brown Signed-off-by: Rafael J. Wysocki --- arch/x86/include/asm/msr-index.h | 8 +++++ tools/power/x86/turbostat/turbostat.c | 59 +++++++++++++++++++++++++++++++++-- 2 files changed, 64 insertions(+), 3 deletions(-) (limited to 'arch/x86/include/asm') diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 3fec311f7b9a..7b27e3241e5a 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -162,6 +162,14 @@ #define MSR_PKG_C9_RESIDENCY 0x00000631 #define MSR_PKG_C10_RESIDENCY 0x00000632 +/* Interrupt Response Limit */ +#define MSR_PKGC3_IRTL 0x0000060a +#define MSR_PKGC6_IRTL 0x0000060b +#define MSR_PKGC7_IRTL 0x0000060c +#define MSR_PKGC8_IRTL 0x00000633 +#define MSR_PKGC9_IRTL 0x00000634 +#define MSR_PKGC10_IRTL 0x00000635 + /* Run Time Average Power Limiting (RAPL) Interface */ #define MSR_RAPL_POWER_UNIT 0x00000606 diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index e2440df6029e..2f60fdbf9213 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -66,6 +66,8 @@ unsigned int do_slm_cstates; unsigned int use_c1_residency_msr; unsigned int has_aperf; unsigned int has_epb; +unsigned int do_irtl_snb; +unsigned int do_irtl_hsw; unsigned int units = 1000000; /* MHz etc */ unsigned int genuine_intel; unsigned int has_invariant_tsc; @@ -1579,6 +1581,47 @@ dump_config_tdp(void) fprintf(outf, " lock=%d", (unsigned int)(msr >> 31) & 1); fprintf(outf, ")\n"); } + +unsigned int irtl_time_units[] = {1, 32, 1024, 32768, 1048576, 33554432, 0, 0 }; + +void print_irtl(void) +{ + unsigned long long msr; + + get_msr(base_cpu, MSR_PKGC3_IRTL, &msr); + fprintf(outf, "cpu%d: MSR_PKGC3_IRTL: 0x%08llx (", base_cpu, msr); + fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT", + (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]); + + get_msr(base_cpu, MSR_PKGC6_IRTL, &msr); + fprintf(outf, "cpu%d: MSR_PKGC6_IRTL: 0x%08llx (", base_cpu, msr); + fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT", + (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]); + + get_msr(base_cpu, MSR_PKGC7_IRTL, &msr); + fprintf(outf, "cpu%d: MSR_PKGC7_IRTL: 0x%08llx (", base_cpu, msr); + fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT", + (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]); + + if (!do_irtl_hsw) + return; + + get_msr(base_cpu, MSR_PKGC8_IRTL, &msr); + fprintf(outf, "cpu%d: MSR_PKGC8_IRTL: 0x%08llx (", base_cpu, msr); + fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT", + (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]); + + get_msr(base_cpu, MSR_PKGC9_IRTL, &msr); + fprintf(outf, "cpu%d: MSR_PKGC9_IRTL: 0x%08llx (", base_cpu, msr); + fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT", + (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]); + + get_msr(base_cpu, MSR_PKGC10_IRTL, &msr); + fprintf(outf, "cpu%d: MSR_PKGC10_IRTL: 0x%08llx (", base_cpu, msr); + fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT", + (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]); + +} void free_fd_percpu(void) { int i; @@ -2879,9 +2922,14 @@ int has_snb_msrs(unsigned int family, unsigned int model) /* * HSW adds support for additional MSRs: * - * MSR_PKG_C8_RESIDENCY 0x00000630 - * MSR_PKG_C9_RESIDENCY 0x00000631 - * MSR_PKG_C10_RESIDENCY 0x00000632 + * MSR_PKG_C8_RESIDENCY 0x00000630 + * MSR_PKG_C9_RESIDENCY 0x00000631 + * MSR_PKG_C10_RESIDENCY 0x00000632 + * + * MSR_PKGC8_IRTL 0x00000633 + * MSR_PKGC9_IRTL 0x00000634 + * MSR_PKGC10_IRTL 0x00000635 + * */ int has_hsw_msrs(unsigned int family, unsigned int model) { @@ -3254,11 +3302,13 @@ void process_cpuid() do_nhm_platform_info = do_nhm_cstates = do_smi = probe_nhm_msrs(family, model); do_snb_cstates = has_snb_msrs(family, model); + do_irtl_snb = has_snb_msrs(family, model); do_pc2 = do_snb_cstates && (pkg_cstate_limit >= PCL__2); do_pc3 = (pkg_cstate_limit >= PCL__3); do_pc6 = (pkg_cstate_limit >= PCL__6); do_pc7 = do_snb_cstates && (pkg_cstate_limit >= PCL__7); do_c8_c9_c10 = has_hsw_msrs(family, model); + do_irtl_hsw = has_hsw_msrs(family, model); do_skl_residency = has_skl_msrs(family, model); do_slm_cstates = is_slm(family, model); do_knl_cstates = is_knl(family, model); @@ -3564,6 +3614,9 @@ void turbostat_init() if (debug) for_all_cpus(print_thermal, ODD_COUNTERS); + + if (debug && do_irtl_snb) + print_irtl(); } int fork_it(char **argv) -- cgit v1.2.3