summaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
authorPekka Enberg <penberg@kernel.org>2011-03-20 19:13:26 +0300
committerPekka Enberg <penberg@kernel.org>2011-03-20 19:13:26 +0300
commite8c500c2b64b6e237e67ecba7249e72363c47047 (patch)
treee9c62e59a879ebef45b0fc2823d318b2fb2fed84 /include
parentc53badd0801728feedfcccae04239410b52b0d03 (diff)
parenta24c5a0ea902bcda348f086bd909cc2d6e305bf8 (diff)
downloadlinux-e8c500c2b64b6e237e67ecba7249e72363c47047.tar.xz
Merge branch 'slub/lockless' into for-linus
Conflicts: include/linux/slub_def.h
Diffstat (limited to 'include')
-rw-r--r--include/asm-generic/vmlinux.lds.h35
-rw-r--r--include/linux/percpu.h128
-rw-r--r--include/linux/slub_def.h7
3 files changed, 155 insertions, 15 deletions
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index fe77e3395b40..22d3342d6af4 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -15,7 +15,7 @@
* HEAD_TEXT_SECTION
* INIT_TEXT_SECTION(PAGE_SIZE)
* INIT_DATA_SECTION(...)
- * PERCPU(PAGE_SIZE)
+ * PERCPU(CACHELINE_SIZE, PAGE_SIZE)
* __init_end = .;
*
* _stext = .;
@@ -683,13 +683,18 @@
/**
* PERCPU_VADDR - define output section for percpu area
+ * @cacheline: cacheline size
* @vaddr: explicit base address (optional)
* @phdr: destination PHDR (optional)
*
- * Macro which expands to output section for percpu area. If @vaddr
- * is not blank, it specifies explicit base address and all percpu
- * symbols will be offset from the given address. If blank, @vaddr
- * always equals @laddr + LOAD_OFFSET.
+ * Macro which expands to output section for percpu area.
+ *
+ * @cacheline is used to align subsections to avoid false cacheline
+ * sharing between subsections for different purposes.
+ *
+ * If @vaddr is not blank, it specifies explicit base address and all
+ * percpu symbols will be offset from the given address. If blank,
+ * @vaddr always equals @laddr + LOAD_OFFSET.
*
* @phdr defines the output PHDR to use if not blank. Be warned that
* output PHDR is sticky. If @phdr is specified, the next output
@@ -700,7 +705,7 @@
* If there is no need to put the percpu section at a predetermined
* address, use PERCPU().
*/
-#define PERCPU_VADDR(vaddr, phdr) \
+#define PERCPU_VADDR(cacheline, vaddr, phdr) \
VMLINUX_SYMBOL(__per_cpu_load) = .; \
.data..percpu vaddr : AT(VMLINUX_SYMBOL(__per_cpu_load) \
- LOAD_OFFSET) { \
@@ -708,7 +713,9 @@
*(.data..percpu..first) \
. = ALIGN(PAGE_SIZE); \
*(.data..percpu..page_aligned) \
+ . = ALIGN(cacheline); \
*(.data..percpu..readmostly) \
+ . = ALIGN(cacheline); \
*(.data..percpu) \
*(.data..percpu..shared_aligned) \
VMLINUX_SYMBOL(__per_cpu_end) = .; \
@@ -717,18 +724,18 @@
/**
* PERCPU - define output section for percpu area, simple version
+ * @cacheline: cacheline size
* @align: required alignment
*
- * Align to @align and outputs output section for percpu area. This
- * macro doesn't maniuplate @vaddr or @phdr and __per_cpu_load and
+ * Align to @align and outputs output section for percpu area. This macro
+ * doesn't manipulate @vaddr or @phdr and __per_cpu_load and
* __per_cpu_start will be identical.
*
- * This macro is equivalent to ALIGN(align); PERCPU_VADDR( , ) except
- * that __per_cpu_load is defined as a relative symbol against
- * .data..percpu which is required for relocatable x86_32
- * configuration.
+ * This macro is equivalent to ALIGN(@align); PERCPU_VADDR(@cacheline,,)
+ * except that __per_cpu_load is defined as a relative symbol against
+ * .data..percpu which is required for relocatable x86_32 configuration.
*/
-#define PERCPU(align) \
+#define PERCPU(cacheline, align) \
. = ALIGN(align); \
.data..percpu : AT(ADDR(.data..percpu) - LOAD_OFFSET) { \
VMLINUX_SYMBOL(__per_cpu_load) = .; \
@@ -736,7 +743,9 @@
*(.data..percpu..first) \
. = ALIGN(PAGE_SIZE); \
*(.data..percpu..page_aligned) \
+ . = ALIGN(cacheline); \
*(.data..percpu..readmostly) \
+ . = ALIGN(cacheline); \
*(.data..percpu) \
*(.data..percpu..shared_aligned) \
VMLINUX_SYMBOL(__per_cpu_end) = .; \
diff --git a/include/linux/percpu.h b/include/linux/percpu.h
index 27c3c6fcfad3..3a5c4449fd36 100644
--- a/include/linux/percpu.h
+++ b/include/linux/percpu.h
@@ -255,6 +255,30 @@ extern void __bad_size_call_parameter(void);
pscr2_ret__; \
})
+/*
+ * Special handling for cmpxchg_double. cmpxchg_double is passed two
+ * percpu variables. The first has to be aligned to a double word
+ * boundary and the second has to follow directly thereafter.
+ */
+#define __pcpu_double_call_return_bool(stem, pcp1, pcp2, ...) \
+({ \
+ bool pdcrb_ret__; \
+ __verify_pcpu_ptr(&pcp1); \
+ BUILD_BUG_ON(sizeof(pcp1) != sizeof(pcp2)); \
+ VM_BUG_ON((unsigned long)(&pcp1) % (2 * sizeof(pcp1))); \
+ VM_BUG_ON((unsigned long)(&pcp2) != \
+ (unsigned long)(&pcp1) + sizeof(pcp1)); \
+ switch(sizeof(pcp1)) { \
+ case 1: pdcrb_ret__ = stem##1(pcp1, pcp2, __VA_ARGS__); break; \
+ case 2: pdcrb_ret__ = stem##2(pcp1, pcp2, __VA_ARGS__); break; \
+ case 4: pdcrb_ret__ = stem##4(pcp1, pcp2, __VA_ARGS__); break; \
+ case 8: pdcrb_ret__ = stem##8(pcp1, pcp2, __VA_ARGS__); break; \
+ default: \
+ __bad_size_call_parameter(); break; \
+ } \
+ pdcrb_ret__; \
+})
+
#define __pcpu_size_call(stem, variable, ...) \
do { \
__verify_pcpu_ptr(&(variable)); \
@@ -501,6 +525,45 @@ do { \
#endif
/*
+ * cmpxchg_double replaces two adjacent scalars at once. The first
+ * two parameters are per cpu variables which have to be of the same
+ * size. A truth value is returned to indicate success or failure
+ * (since a double register result is difficult to handle). There is
+ * very limited hardware support for these operations, so only certain
+ * sizes may work.
+ */
+#define _this_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) \
+({ \
+ int ret__; \
+ preempt_disable(); \
+ ret__ = __this_cpu_generic_cmpxchg_double(pcp1, pcp2, \
+ oval1, oval2, nval1, nval2); \
+ preempt_enable(); \
+ ret__; \
+})
+
+#ifndef this_cpu_cmpxchg_double
+# ifndef this_cpu_cmpxchg_double_1
+# define this_cpu_cmpxchg_double_1(pcp1, pcp2, oval1, oval2, nval1, nval2) \
+ _this_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2)
+# endif
+# ifndef this_cpu_cmpxchg_double_2
+# define this_cpu_cmpxchg_double_2(pcp1, pcp2, oval1, oval2, nval1, nval2) \
+ _this_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2)
+# endif
+# ifndef this_cpu_cmpxchg_double_4
+# define this_cpu_cmpxchg_double_4(pcp1, pcp2, oval1, oval2, nval1, nval2) \
+ _this_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2)
+# endif
+# ifndef this_cpu_cmpxchg_double_8
+# define this_cpu_cmpxchg_double_8(pcp1, pcp2, oval1, oval2, nval1, nval2) \
+ _this_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2)
+# endif
+# define this_cpu_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) \
+ __pcpu_double_call_return_bool(this_cpu_cmpxchg_double_, (pcp1), (pcp2), (oval1), (oval2), (nval1), (nval2))
+#endif
+
+/*
* Generic percpu operations that do not require preemption handling.
* Either we do not care about races or the caller has the
* responsibility of handling preemptions issues. Arch code can still
@@ -703,6 +766,39 @@ do { \
__pcpu_size_call_return2(__this_cpu_cmpxchg_, pcp, oval, nval)
#endif
+#define __this_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) \
+({ \
+ int __ret = 0; \
+ if (__this_cpu_read(pcp1) == (oval1) && \
+ __this_cpu_read(pcp2) == (oval2)) { \
+ __this_cpu_write(pcp1, (nval1)); \
+ __this_cpu_write(pcp2, (nval2)); \
+ __ret = 1; \
+ } \
+ (__ret); \
+})
+
+#ifndef __this_cpu_cmpxchg_double
+# ifndef __this_cpu_cmpxchg_double_1
+# define __this_cpu_cmpxchg_double_1(pcp1, pcp2, oval1, oval2, nval1, nval2) \
+ __this_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2)
+# endif
+# ifndef __this_cpu_cmpxchg_double_2
+# define __this_cpu_cmpxchg_double_2(pcp1, pcp2, oval1, oval2, nval1, nval2) \
+ __this_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2)
+# endif
+# ifndef __this_cpu_cmpxchg_double_4
+# define __this_cpu_cmpxchg_double_4(pcp1, pcp2, oval1, oval2, nval1, nval2) \
+ __this_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2)
+# endif
+# ifndef __this_cpu_cmpxchg_double_8
+# define __this_cpu_cmpxchg_double_8(pcp1, pcp2, oval1, oval2, nval1, nval2) \
+ __this_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2)
+# endif
+# define __this_cpu_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) \
+ __pcpu_double_call_return_bool(__this_cpu_cmpxchg_double_, (pcp1), (pcp2), (oval1), (oval2), (nval1), (nval2))
+#endif
+
/*
* IRQ safe versions of the per cpu RMW operations. Note that these operations
* are *not* safe against modification of the same variable from another
@@ -823,4 +919,36 @@ do { \
__pcpu_size_call_return2(irqsafe_cpu_cmpxchg_, (pcp), oval, nval)
#endif
+#define irqsafe_generic_cpu_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) \
+({ \
+ int ret__; \
+ unsigned long flags; \
+ local_irq_save(flags); \
+ ret__ = __this_cpu_generic_cmpxchg_double(pcp1, pcp2, \
+ oval1, oval2, nval1, nval2); \
+ local_irq_restore(flags); \
+ ret__; \
+})
+
+#ifndef irqsafe_cpu_cmpxchg_double
+# ifndef irqsafe_cpu_cmpxchg_double_1
+# define irqsafe_cpu_cmpxchg_double_1(pcp1, pcp2, oval1, oval2, nval1, nval2) \
+ irqsafe_generic_cpu_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2)
+# endif
+# ifndef irqsafe_cpu_cmpxchg_double_2
+# define irqsafe_cpu_cmpxchg_double_2(pcp1, pcp2, oval1, oval2, nval1, nval2) \
+ irqsafe_generic_cpu_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2)
+# endif
+# ifndef irqsafe_cpu_cmpxchg_double_4
+# define irqsafe_cpu_cmpxchg_double_4(pcp1, pcp2, oval1, oval2, nval1, nval2) \
+ irqsafe_generic_cpu_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2)
+# endif
+# ifndef irqsafe_cpu_cmpxchg_double_8
+# define irqsafe_cpu_cmpxchg_double_8(pcp1, pcp2, oval1, oval2, nval1, nval2) \
+ irqsafe_generic_cpu_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2)
+# endif
+# define irqsafe_cpu_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) \
+ __pcpu_double_call_return_int(irqsafe_cpu_cmpxchg_double_, (pcp1), (pcp2), (oval1), (oval2), (nval1), (nval2))
+#endif
+
#endif /* __LINUX_PERCPU_H */
diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h
index ae0093cc5189..90fbb6d87e11 100644
--- a/include/linux/slub_def.h
+++ b/include/linux/slub_def.h
@@ -35,7 +35,10 @@ enum stat_item {
NR_SLUB_STAT_ITEMS };
struct kmem_cache_cpu {
- void **freelist; /* Pointer to first free per cpu object */
+ void **freelist; /* Pointer to next available object */
+#ifdef CONFIG_CMPXCHG_LOCAL
+ unsigned long tid; /* Globally unique transaction id */
+#endif
struct page *page; /* The slab from which we are allocating */
int node; /* The node of the page (or -1 for debug) */
#ifdef CONFIG_SLUB_STATS
@@ -70,6 +73,7 @@ struct kmem_cache {
struct kmem_cache_cpu __percpu *cpu_slab;
/* Used for retriving partial slabs etc */
unsigned long flags;
+ unsigned long min_partial;
int size; /* The size of an object including meta data */
int objsize; /* The size of an object without meta data */
int offset; /* Free pointer offset. */
@@ -84,7 +88,6 @@ struct kmem_cache {
int inuse; /* Offset to metadata */
int align; /* Alignment */
int reserved; /* Reserved bytes at the end of slabs */
- unsigned long min_partial;
const char *name; /* Name (only for display!) */
struct list_head list; /* List of slab caches */
#ifdef CONFIG_SYSFS