From 61c77326d1df079f202fa79403c3ccd8c5966a81 Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Mon, 16 Aug 2010 09:16:55 +0800 Subject: x86, mm: Avoid unnecessary TLB flush In x86, access and dirty bits are set automatically by CPU when CPU accesses memory. When we go into the code path of below flush_tlb_fix_spurious_fault(), we already set dirty bit for pte and don't need flush tlb. This might mean tlb entry in some CPUs hasn't dirty bit set, but this doesn't matter. When the CPUs do page write, they will automatically check the bit and no software involved. On the other hand, flush tlb in below position is harmful. Test creates CPU number of threads, each thread writes to a same but random address in same vma range and we measure the total time. Under a 4 socket system, original time is 1.96s, while with the patch, the time is 0.8s. Under a 2 socket system, there is 20% time cut too. perf shows a lot of time are taking to send ipi/handle ipi for tlb flush. Signed-off-by: Shaohua Li LKML-Reference: <20100816011655.GA362@sli10-desk.sh.intel.com> Acked-by: Suresh Siddha Cc: Andrea Archangeli Signed-off-by: H. Peter Anvin --- include/asm-generic/pgtable.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h index e2bd73e8f9c0..f4d4120e5128 100644 --- a/include/asm-generic/pgtable.h +++ b/include/asm-generic/pgtable.h @@ -129,6 +129,10 @@ static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addres #define move_pte(pte, prot, old_addr, new_addr) (pte) #endif +#ifndef flush_tlb_fix_spurious_fault +#define flush_tlb_fix_spurious_fault(vma, address) flush_tlb_page(vma, address) +#endif + #ifndef pgprot_noncached #define pgprot_noncached(prot) (prot) #endif -- cgit v1.2.3 From c957ef2c59e952803766ddc22e89981ab534606f Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Wed, 20 Oct 2010 11:07:02 +0800 Subject: percpu: Introduce a read-mostly percpu API Add a new readmostly percpu section and API. This can be used to avoid dirtying data lines which are generally not written to, which is especially important for data which may be accessed by processors other than the one for which the percpu area belongs to. [ hpa: moved it *after* the page-aligned section, for obvious reasons. ] Signed-off-by: Shaohua Li LKML-Reference: <1287544022.4571.7.camel@sli10-conroe.sh.intel.com> Cc: Eric Dumazet Signed-off-by: H. Peter Anvin --- include/asm-generic/vmlinux.lds.h | 4 ++++ include/linux/percpu-defs.h | 9 +++++++++ 2 files changed, 13 insertions(+) (limited to 'include') diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index 8a92a170fb7d..d7e7b21511b1 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -677,7 +677,9 @@ - LOAD_OFFSET) { \ VMLINUX_SYMBOL(__per_cpu_start) = .; \ *(.data..percpu..first) \ + . = ALIGN(PAGE_SIZE); \ *(.data..percpu..page_aligned) \ + *(.data..percpu..readmostly) \ *(.data..percpu) \ *(.data..percpu..shared_aligned) \ VMLINUX_SYMBOL(__per_cpu_end) = .; \ @@ -703,6 +705,8 @@ VMLINUX_SYMBOL(__per_cpu_load) = .; \ VMLINUX_SYMBOL(__per_cpu_start) = .; \ *(.data..percpu..first) \ + . = ALIGN(PAGE_SIZE); \ + *(.data..percpu..readmostly) \ *(.data..percpu..page_aligned) \ *(.data..percpu) \ *(.data..percpu..shared_aligned) \ diff --git a/include/linux/percpu-defs.h b/include/linux/percpu-defs.h index ce2dc655cd1d..27ef6b190ea6 100644 --- a/include/linux/percpu-defs.h +++ b/include/linux/percpu-defs.h @@ -138,6 +138,15 @@ DEFINE_PER_CPU_SECTION(type, name, "..page_aligned") \ __aligned(PAGE_SIZE) +/* + * Declaration/definition used for per-CPU variables that must be read mostly. + */ +#define DECLARE_PER_CPU_READ_MOSTLY(type, name) \ + DECLARE_PER_CPU_SECTION(type, name, "..readmostly") + +#define DEFINE_PER_CPU_READ_MOSTLY(type, name) \ + DEFINE_PER_CPU_SECTION(type, name, "..readmostly") + /* * Intermodule exports for per-CPU variables. sparse forgets about * address space across EXPORT_SYMBOL(), change EXPORT_SYMBOL() to -- cgit v1.2.3 From 2aeb66d3036dbafc297ac553a257a40283dadb3e Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Thu, 21 Oct 2010 00:15:00 -0700 Subject: x86-32, percpu: Correct the ordering of the percpu readmostly section Checkin c957ef2c59e952803766ddc22e89981ab534606f had inconsistent ordering of .data..percpu..page_aligned and .data..percpu..readmostly; the still-broken version affected x86-32 at least. The page aligned version really must be page aligned... Signed-off-by: H. Peter Anvin LKML-Reference: <1287544022.4571.7.camel@sli10-conroe.sh.intel.com> Cc: Shaohua Li Cc: Eric Dumazet --- include/asm-generic/vmlinux.lds.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index d7e7b21511b1..1457b81357af 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -706,8 +706,8 @@ VMLINUX_SYMBOL(__per_cpu_start) = .; \ *(.data..percpu..first) \ . = ALIGN(PAGE_SIZE); \ - *(.data..percpu..readmostly) \ *(.data..percpu..page_aligned) \ + *(.data..percpu..readmostly) \ *(.data..percpu) \ *(.data..percpu..shared_aligned) \ VMLINUX_SYMBOL(__per_cpu_end) = .; \ -- cgit v1.2.3