summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJakub Kicinski <kuba@kernel.org>2023-03-29 04:49:35 +0300
committerJakub Kicinski <kuba@kernel.org>2023-03-29 04:49:35 +0300
commitb133fffe57ae941dedf607142a9616b8701cdcb2 (patch)
tree33e9332fa6c9527e66b875a7422f5f47ec2f7491
parent163c2c7059178898ad7512294ca3a768df2c1515 (diff)
parentee1ee6db07795d9637bc5e8993a8ddcf886541ef (diff)
downloadlinux-b133fffe57ae941dedf607142a9616b8701cdcb2.tar.xz
Merge branch 'locking/rcuref' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pulling rcurefs from Peter for tglx's work. Link: https://lore.kernel.org/all/20230328084534.GE4253@hirez.programming.kicks-ass.net/ Signed-off-by: Jakub Kicinski <kuba@kernel.org>
-rw-r--r--include/linux/atomic/atomic-arch-fallback.h208
-rw-r--r--include/linux/atomic/atomic-instrumented.h68
-rw-r--r--include/linux/atomic/atomic-long.h38
-rw-r--r--include/linux/rcuref.h155
-rw-r--r--include/linux/types.h6
-rw-r--r--lib/Makefile2
-rw-r--r--lib/rcuref.c281
-rw-r--r--scripts/atomic/atomics.tbl2
-rwxr-xr-xscripts/atomic/fallbacks/add_negative11
9 files changed, 752 insertions, 19 deletions
diff --git a/include/linux/atomic/atomic-arch-fallback.h b/include/linux/atomic/atomic-arch-fallback.h
index 77bc5522e61c..4226379a232d 100644
--- a/include/linux/atomic/atomic-arch-fallback.h
+++ b/include/linux/atomic/atomic-arch-fallback.h
@@ -1208,15 +1208,21 @@ arch_atomic_inc_and_test(atomic_t *v)
#define arch_atomic_inc_and_test arch_atomic_inc_and_test
#endif
+#ifndef arch_atomic_add_negative_relaxed
+#ifdef arch_atomic_add_negative
+#define arch_atomic_add_negative_acquire arch_atomic_add_negative
+#define arch_atomic_add_negative_release arch_atomic_add_negative
+#define arch_atomic_add_negative_relaxed arch_atomic_add_negative
+#endif /* arch_atomic_add_negative */
+
#ifndef arch_atomic_add_negative
/**
- * arch_atomic_add_negative - add and test if negative
+ * arch_atomic_add_negative - Add and test if negative
* @i: integer value to add
* @v: pointer of type atomic_t
*
- * Atomically adds @i to @v and returns true
- * if the result is negative, or false when
- * result is greater than or equal to zero.
+ * Atomically adds @i to @v and returns true if the result is negative,
+ * or false when the result is greater than or equal to zero.
*/
static __always_inline bool
arch_atomic_add_negative(int i, atomic_t *v)
@@ -1226,6 +1232,95 @@ arch_atomic_add_negative(int i, atomic_t *v)
#define arch_atomic_add_negative arch_atomic_add_negative
#endif
+#ifndef arch_atomic_add_negative_acquire
+/**
+ * arch_atomic_add_negative_acquire - Add and test if negative
+ * @i: integer value to add
+ * @v: pointer of type atomic_t
+ *
+ * Atomically adds @i to @v and returns true if the result is negative,
+ * or false when the result is greater than or equal to zero.
+ */
+static __always_inline bool
+arch_atomic_add_negative_acquire(int i, atomic_t *v)
+{
+ return arch_atomic_add_return_acquire(i, v) < 0;
+}
+#define arch_atomic_add_negative_acquire arch_atomic_add_negative_acquire
+#endif
+
+#ifndef arch_atomic_add_negative_release
+/**
+ * arch_atomic_add_negative_release - Add and test if negative
+ * @i: integer value to add
+ * @v: pointer of type atomic_t
+ *
+ * Atomically adds @i to @v and returns true if the result is negative,
+ * or false when the result is greater than or equal to zero.
+ */
+static __always_inline bool
+arch_atomic_add_negative_release(int i, atomic_t *v)
+{
+ return arch_atomic_add_return_release(i, v) < 0;
+}
+#define arch_atomic_add_negative_release arch_atomic_add_negative_release
+#endif
+
+#ifndef arch_atomic_add_negative_relaxed
+/**
+ * arch_atomic_add_negative_relaxed - Add and test if negative
+ * @i: integer value to add
+ * @v: pointer of type atomic_t
+ *
+ * Atomically adds @i to @v and returns true if the result is negative,
+ * or false when the result is greater than or equal to zero.
+ */
+static __always_inline bool
+arch_atomic_add_negative_relaxed(int i, atomic_t *v)
+{
+ return arch_atomic_add_return_relaxed(i, v) < 0;
+}
+#define arch_atomic_add_negative_relaxed arch_atomic_add_negative_relaxed
+#endif
+
+#else /* arch_atomic_add_negative_relaxed */
+
+#ifndef arch_atomic_add_negative_acquire
+static __always_inline bool
+arch_atomic_add_negative_acquire(int i, atomic_t *v)
+{
+ bool ret = arch_atomic_add_negative_relaxed(i, v);
+ __atomic_acquire_fence();
+ return ret;
+}
+#define arch_atomic_add_negative_acquire arch_atomic_add_negative_acquire
+#endif
+
+#ifndef arch_atomic_add_negative_release
+static __always_inline bool
+arch_atomic_add_negative_release(int i, atomic_t *v)
+{
+ __atomic_release_fence();
+ return arch_atomic_add_negative_relaxed(i, v);
+}
+#define arch_atomic_add_negative_release arch_atomic_add_negative_release
+#endif
+
+#ifndef arch_atomic_add_negative
+static __always_inline bool
+arch_atomic_add_negative(int i, atomic_t *v)
+{
+ bool ret;
+ __atomic_pre_full_fence();
+ ret = arch_atomic_add_negative_relaxed(i, v);
+ __atomic_post_full_fence();
+ return ret;
+}
+#define arch_atomic_add_negative arch_atomic_add_negative
+#endif
+
+#endif /* arch_atomic_add_negative_relaxed */
+
#ifndef arch_atomic_fetch_add_unless
/**
* arch_atomic_fetch_add_unless - add unless the number is already a given value
@@ -2329,15 +2424,21 @@ arch_atomic64_inc_and_test(atomic64_t *v)
#define arch_atomic64_inc_and_test arch_atomic64_inc_and_test
#endif
+#ifndef arch_atomic64_add_negative_relaxed
+#ifdef arch_atomic64_add_negative
+#define arch_atomic64_add_negative_acquire arch_atomic64_add_negative
+#define arch_atomic64_add_negative_release arch_atomic64_add_negative
+#define arch_atomic64_add_negative_relaxed arch_atomic64_add_negative
+#endif /* arch_atomic64_add_negative */
+
#ifndef arch_atomic64_add_negative
/**
- * arch_atomic64_add_negative - add and test if negative
+ * arch_atomic64_add_negative - Add and test if negative
* @i: integer value to add
* @v: pointer of type atomic64_t
*
- * Atomically adds @i to @v and returns true
- * if the result is negative, or false when
- * result is greater than or equal to zero.
+ * Atomically adds @i to @v and returns true if the result is negative,
+ * or false when the result is greater than or equal to zero.
*/
static __always_inline bool
arch_atomic64_add_negative(s64 i, atomic64_t *v)
@@ -2347,6 +2448,95 @@ arch_atomic64_add_negative(s64 i, atomic64_t *v)
#define arch_atomic64_add_negative arch_atomic64_add_negative
#endif
+#ifndef arch_atomic64_add_negative_acquire
+/**
+ * arch_atomic64_add_negative_acquire - Add and test if negative
+ * @i: integer value to add
+ * @v: pointer of type atomic64_t
+ *
+ * Atomically adds @i to @v and returns true if the result is negative,
+ * or false when the result is greater than or equal to zero.
+ */
+static __always_inline bool
+arch_atomic64_add_negative_acquire(s64 i, atomic64_t *v)
+{
+ return arch_atomic64_add_return_acquire(i, v) < 0;
+}
+#define arch_atomic64_add_negative_acquire arch_atomic64_add_negative_acquire
+#endif
+
+#ifndef arch_atomic64_add_negative_release
+/**
+ * arch_atomic64_add_negative_release - Add and test if negative
+ * @i: integer value to add
+ * @v: pointer of type atomic64_t
+ *
+ * Atomically adds @i to @v and returns true if the result is negative,
+ * or false when the result is greater than or equal to zero.
+ */
+static __always_inline bool
+arch_atomic64_add_negative_release(s64 i, atomic64_t *v)
+{
+ return arch_atomic64_add_return_release(i, v) < 0;
+}
+#define arch_atomic64_add_negative_release arch_atomic64_add_negative_release
+#endif
+
+#ifndef arch_atomic64_add_negative_relaxed
+/**
+ * arch_atomic64_add_negative_relaxed - Add and test if negative
+ * @i: integer value to add
+ * @v: pointer of type atomic64_t
+ *
+ * Atomically adds @i to @v and returns true if the result is negative,
+ * or false when the result is greater than or equal to zero.
+ */
+static __always_inline bool
+arch_atomic64_add_negative_relaxed(s64 i, atomic64_t *v)
+{
+ return arch_atomic64_add_return_relaxed(i, v) < 0;
+}
+#define arch_atomic64_add_negative_relaxed arch_atomic64_add_negative_relaxed
+#endif
+
+#else /* arch_atomic64_add_negative_relaxed */
+
+#ifndef arch_atomic64_add_negative_acquire
+static __always_inline bool
+arch_atomic64_add_negative_acquire(s64 i, atomic64_t *v)
+{
+ bool ret = arch_atomic64_add_negative_relaxed(i, v);
+ __atomic_acquire_fence();
+ return ret;
+}
+#define arch_atomic64_add_negative_acquire arch_atomic64_add_negative_acquire
+#endif
+
+#ifndef arch_atomic64_add_negative_release
+static __always_inline bool
+arch_atomic64_add_negative_release(s64 i, atomic64_t *v)
+{
+ __atomic_release_fence();
+ return arch_atomic64_add_negative_relaxed(i, v);
+}
+#define arch_atomic64_add_negative_release arch_atomic64_add_negative_release
+#endif
+
+#ifndef arch_atomic64_add_negative
+static __always_inline bool
+arch_atomic64_add_negative(s64 i, atomic64_t *v)
+{
+ bool ret;
+ __atomic_pre_full_fence();
+ ret = arch_atomic64_add_negative_relaxed(i, v);
+ __atomic_post_full_fence();
+ return ret;
+}
+#define arch_atomic64_add_negative arch_atomic64_add_negative
+#endif
+
+#endif /* arch_atomic64_add_negative_relaxed */
+
#ifndef arch_atomic64_fetch_add_unless
/**
* arch_atomic64_fetch_add_unless - add unless the number is already a given value
@@ -2456,4 +2646,4 @@ arch_atomic64_dec_if_positive(atomic64_t *v)
#endif
#endif /* _LINUX_ATOMIC_FALLBACK_H */
-// b5e87bdd5ede61470c29f7a7e4de781af3770f09
+// 00071fffa021cec66f6290d706d69c91df87bade
diff --git a/include/linux/atomic/atomic-instrumented.h b/include/linux/atomic/atomic-instrumented.h
index 7a139ec030b0..0496816738ca 100644
--- a/include/linux/atomic/atomic-instrumented.h
+++ b/include/linux/atomic/atomic-instrumented.h
@@ -592,6 +592,28 @@ atomic_add_negative(int i, atomic_t *v)
return arch_atomic_add_negative(i, v);
}
+static __always_inline bool
+atomic_add_negative_acquire(int i, atomic_t *v)
+{
+ instrument_atomic_read_write(v, sizeof(*v));
+ return arch_atomic_add_negative_acquire(i, v);
+}
+
+static __always_inline bool
+atomic_add_negative_release(int i, atomic_t *v)
+{
+ kcsan_release();
+ instrument_atomic_read_write(v, sizeof(*v));
+ return arch_atomic_add_negative_release(i, v);
+}
+
+static __always_inline bool
+atomic_add_negative_relaxed(int i, atomic_t *v)
+{
+ instrument_atomic_read_write(v, sizeof(*v));
+ return arch_atomic_add_negative_relaxed(i, v);
+}
+
static __always_inline int
atomic_fetch_add_unless(atomic_t *v, int a, int u)
{
@@ -1211,6 +1233,28 @@ atomic64_add_negative(s64 i, atomic64_t *v)
return arch_atomic64_add_negative(i, v);
}
+static __always_inline bool
+atomic64_add_negative_acquire(s64 i, atomic64_t *v)
+{
+ instrument_atomic_read_write(v, sizeof(*v));
+ return arch_atomic64_add_negative_acquire(i, v);
+}
+
+static __always_inline bool
+atomic64_add_negative_release(s64 i, atomic64_t *v)
+{
+ kcsan_release();
+ instrument_atomic_read_write(v, sizeof(*v));
+ return arch_atomic64_add_negative_release(i, v);
+}
+
+static __always_inline bool
+atomic64_add_negative_relaxed(s64 i, atomic64_t *v)
+{
+ instrument_atomic_read_write(v, sizeof(*v));
+ return arch_atomic64_add_negative_relaxed(i, v);
+}
+
static __always_inline s64
atomic64_fetch_add_unless(atomic64_t *v, s64 a, s64 u)
{
@@ -1830,6 +1874,28 @@ atomic_long_add_negative(long i, atomic_long_t *v)
return arch_atomic_long_add_negative(i, v);
}
+static __always_inline bool
+atomic_long_add_negative_acquire(long i, atomic_long_t *v)
+{
+ instrument_atomic_read_write(v, sizeof(*v));
+ return arch_atomic_long_add_negative_acquire(i, v);
+}
+
+static __always_inline bool
+atomic_long_add_negative_release(long i, atomic_long_t *v)
+{
+ kcsan_release();
+ instrument_atomic_read_write(v, sizeof(*v));
+ return arch_atomic_long_add_negative_release(i, v);
+}
+
+static __always_inline bool
+atomic_long_add_negative_relaxed(long i, atomic_long_t *v)
+{
+ instrument_atomic_read_write(v, sizeof(*v));
+ return arch_atomic_long_add_negative_relaxed(i, v);
+}
+
static __always_inline long
atomic_long_fetch_add_unless(atomic_long_t *v, long a, long u)
{
@@ -2083,4 +2149,4 @@ atomic_long_dec_if_positive(atomic_long_t *v)
})
#endif /* _LINUX_ATOMIC_INSTRUMENTED_H */
-// 764f741eb77a7ad565dc8d99ce2837d5542e8aee
+// 1b485de9cbaa4900de59e14ee2084357eaeb1c3a
diff --git a/include/linux/atomic/atomic-long.h b/include/linux/atomic/atomic-long.h
index 800b8c35992d..2fc51ba66beb 100644
--- a/include/linux/atomic/atomic-long.h
+++ b/include/linux/atomic/atomic-long.h
@@ -479,6 +479,24 @@ arch_atomic_long_add_negative(long i, atomic_long_t *v)
return arch_atomic64_add_negative(i, v);
}
+static __always_inline bool
+arch_atomic_long_add_negative_acquire(long i, atomic_long_t *v)
+{
+ return arch_atomic64_add_negative_acquire(i, v);
+}
+
+static __always_inline bool
+arch_atomic_long_add_negative_release(long i, atomic_long_t *v)
+{
+ return arch_atomic64_add_negative_release(i, v);
+}
+
+static __always_inline bool
+arch_atomic_long_add_negative_relaxed(long i, atomic_long_t *v)
+{
+ return arch_atomic64_add_negative_relaxed(i, v);
+}
+
static __always_inline long
arch_atomic_long_fetch_add_unless(atomic_long_t *v, long a, long u)
{
@@ -973,6 +991,24 @@ arch_atomic_long_add_negative(long i, atomic_long_t *v)
return arch_atomic_add_negative(i, v);
}
+static __always_inline bool
+arch_atomic_long_add_negative_acquire(long i, atomic_long_t *v)
+{
+ return arch_atomic_add_negative_acquire(i, v);
+}
+
+static __always_inline bool
+arch_atomic_long_add_negative_release(long i, atomic_long_t *v)
+{
+ return arch_atomic_add_negative_release(i, v);
+}
+
+static __always_inline bool
+arch_atomic_long_add_negative_relaxed(long i, atomic_long_t *v)
+{
+ return arch_atomic_add_negative_relaxed(i, v);
+}
+
static __always_inline long
arch_atomic_long_fetch_add_unless(atomic_long_t *v, long a, long u)
{
@@ -1011,4 +1047,4 @@ arch_atomic_long_dec_if_positive(atomic_long_t *v)
#endif /* CONFIG_64BIT */
#endif /* _LINUX_ATOMIC_LONG_H */
-// e8f0e08ff072b74d180eabe2ad001282b38c2c88
+// a194c07d7d2f4b0e178d3c118c919775d5d65f50
diff --git a/include/linux/rcuref.h b/include/linux/rcuref.h
new file mode 100644
index 000000000000..2c8bfd0f1b6b
--- /dev/null
+++ b/include/linux/rcuref.h
@@ -0,0 +1,155 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef _LINUX_RCUREF_H
+#define _LINUX_RCUREF_H
+
+#include <linux/atomic.h>
+#include <linux/bug.h>
+#include <linux/limits.h>
+#include <linux/lockdep.h>
+#include <linux/preempt.h>
+#include <linux/rcupdate.h>
+
+#define RCUREF_ONEREF 0x00000000U
+#define RCUREF_MAXREF 0x7FFFFFFFU
+#define RCUREF_SATURATED 0xA0000000U
+#define RCUREF_RELEASED 0xC0000000U
+#define RCUREF_DEAD 0xE0000000U
+#define RCUREF_NOREF 0xFFFFFFFFU
+
+/**
+ * rcuref_init - Initialize a rcuref reference count with the given reference count
+ * @ref: Pointer to the reference count
+ * @cnt: The initial reference count typically '1'
+ */
+static inline void rcuref_init(rcuref_t *ref, unsigned int cnt)
+{
+ atomic_set(&ref->refcnt, cnt - 1);
+}
+
+/**
+ * rcuref_read - Read the number of held reference counts of a rcuref
+ * @ref: Pointer to the reference count
+ *
+ * Return: The number of held references (0 ... N)
+ */
+static inline unsigned int rcuref_read(rcuref_t *ref)
+{
+ unsigned int c = atomic_read(&ref->refcnt);
+
+ /* Return 0 if within the DEAD zone. */
+ return c >= RCUREF_RELEASED ? 0 : c + 1;
+}
+
+extern __must_check bool rcuref_get_slowpath(rcuref_t *ref);
+
+/**
+ * rcuref_get - Acquire one reference on a rcuref reference count
+ * @ref: Pointer to the reference count
+ *
+ * Similar to atomic_inc_not_zero() but saturates at RCUREF_MAXREF.
+ *
+ * Provides no memory ordering, it is assumed the caller has guaranteed the
+ * object memory to be stable (RCU, etc.). It does provide a control dependency
+ * and thereby orders future stores. See documentation in lib/rcuref.c
+ *
+ * Return:
+ * False if the attempt to acquire a reference failed. This happens
+ * when the last reference has been put already
+ *
+ * True if a reference was successfully acquired
+ */
+static inline __must_check bool rcuref_get(rcuref_t *ref)
+{
+ /*
+ * Unconditionally increase the reference count. The saturation and
+ * dead zones provide enough tolerance for this.
+ */
+ if (likely(!atomic_add_negative_relaxed(1, &ref->refcnt)))
+ return true;
+
+ /* Handle the cases inside the saturation and dead zones */
+ return rcuref_get_slowpath(ref);
+}
+
+extern __must_check bool rcuref_put_slowpath(rcuref_t *ref);
+
+/*
+ * Internal helper. Do not invoke directly.
+ */
+static __always_inline __must_check bool __rcuref_put(rcuref_t *ref)
+{
+ RCU_LOCKDEP_WARN(!rcu_read_lock_held() && preemptible(),
+ "suspicious rcuref_put_rcusafe() usage");
+ /*
+ * Unconditionally decrease the reference count. The saturation and
+ * dead zones provide enough tolerance for this.
+ */
+ if (likely(!atomic_add_negative_release(-1, &ref->refcnt)))
+ return false;
+
+ /*
+ * Handle the last reference drop and cases inside the saturation
+ * and dead zones.
+ */
+ return rcuref_put_slowpath(ref);
+}
+
+/**
+ * rcuref_put_rcusafe -- Release one reference for a rcuref reference count RCU safe
+ * @ref: Pointer to the reference count
+ *
+ * Provides release memory ordering, such that prior loads and stores are done
+ * before, and provides an acquire ordering on success such that free()
+ * must come after.
+ *
+ * Can be invoked from contexts, which guarantee that no grace period can
+ * happen which would free the object concurrently if the decrement drops
+ * the last reference and the slowpath races against a concurrent get() and
+ * put() pair. rcu_read_lock()'ed and atomic contexts qualify.
+ *
+ * Return:
+ * True if this was the last reference with no future references
+ * possible. This signals the caller that it can safely release the
+ * object which is protected by the reference counter.
+ *
+ * False if there are still active references or the put() raced
+ * with a concurrent get()/put() pair. Caller is not allowed to
+ * release the protected object.
+ */
+static inline __must_check bool rcuref_put_rcusafe(rcuref_t *ref)
+{
+ return __rcuref_put(ref);
+}
+
+/**
+ * rcuref_put -- Release one reference for a rcuref reference count
+ * @ref: Pointer to the reference count
+ *
+ * Can be invoked from any context.
+ *
+ * Provides release memory ordering, such that prior loads and stores are done
+ * before, and provides an acquire ordering on success such that free()
+ * must come after.
+ *
+ * Return:
+ *
+ * True if this was the last reference with no future references
+ * possible. This signals the caller that it can safely schedule the
+ * object, which is protected by the reference counter, for
+ * deconstruction.
+ *
+ * False if there are still active references or the put() raced
+ * with a concurrent get()/put() pair. Caller is not allowed to
+ * deconstruct the protected object.
+ */
+static inline __must_check bool rcuref_put(rcuref_t *ref)
+{
+ bool released;
+
+ preempt_disable();
+ released = __rcuref_put(ref);
+ preempt_enable();
+ return released;
+}
+
+#endif
diff --git a/include/linux/types.h b/include/linux/types.h
index ea8cf60a8a79..688fb943556a 100644
--- a/include/linux/types.h
+++ b/include/linux/types.h
@@ -175,6 +175,12 @@ typedef struct {
} atomic64_t;
#endif
+typedef struct {
+ atomic_t refcnt;
+} rcuref_t;
+
+#define RCUREF_INIT(i) { .refcnt = ATOMIC_INIT(i - 1) }
+
struct list_head {
struct list_head *next, *prev;
};
diff --git a/lib/Makefile b/lib/Makefile
index baf2821f7a00..31a3a257fd49 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -47,7 +47,7 @@ obj-y += bcd.o sort.o parser.o debug_locks.o random32.o \
list_sort.o uuid.o iov_iter.o clz_ctz.o \
bsearch.o find_bit.o llist.o memweight.o kfifo.o \
percpu-refcount.o rhashtable.o base64.o \
- once.o refcount.o usercopy.o errseq.o bucket_locks.o \
+ once.o refcount.o rcuref.o usercopy.o errseq.o bucket_locks.o \
generic-radix-tree.o
obj-$(CONFIG_STRING_SELFTEST) += test_string.o
obj-y += string_helpers.o
diff --git a/lib/rcuref.c b/lib/rcuref.c
new file mode 100644
index 000000000000..5ec00a4a64d1
--- /dev/null
+++ b/lib/rcuref.c
@@ -0,0 +1,281 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+/*
+ * rcuref - A scalable reference count implementation for RCU managed objects
+ *
+ * rcuref is provided to replace open coded reference count implementations
+ * based on atomic_t. It protects explicitely RCU managed objects which can
+ * be visible even after the last reference has been dropped and the object
+ * is heading towards destruction.
+ *
+ * A common usage pattern is:
+ *
+ * get()
+ * rcu_read_lock();
+ * p = get_ptr();
+ * if (p && !atomic_inc_not_zero(&p->refcnt))
+ * p = NULL;
+ * rcu_read_unlock();
+ * return p;
+ *
+ * put()
+ * if (!atomic_dec_return(&->refcnt)) {
+ * remove_ptr(p);
+ * kfree_rcu((p, rcu);
+ * }
+ *
+ * atomic_inc_not_zero() is implemented with a try_cmpxchg() loop which has
+ * O(N^2) behaviour under contention with N concurrent operations.
+ *
+ * rcuref uses atomic_add_negative_relaxed() for the fast path, which scales
+ * better under contention.
+ *
+ * Why not refcount?
+ * =================
+ *
+ * In principle it should be possible to make refcount use the rcuref
+ * scheme, but the destruction race described below cannot be prevented
+ * unless the protected object is RCU managed.
+ *
+ * Theory of operation
+ * ===================
+ *
+ * rcuref uses an unsigned integer reference counter. As long as the
+ * counter value is greater than or equal to RCUREF_ONEREF and not larger
+ * than RCUREF_MAXREF the reference is alive:
+ *
+ * ONEREF MAXREF SATURATED RELEASED DEAD NOREF
+ * 0 0x7FFFFFFF 0x8000000 0xA0000000 0xBFFFFFFF 0xC0000000 0xE0000000 0xFFFFFFFF
+ * <---valid --------> <-------saturation zone-------> <-----dead zone----->
+ *
+ * The get() and put() operations do unconditional increments and
+ * decrements. The result is checked after the operation. This optimizes
+ * for the fast path.
+ *
+ * If the reference count is saturated or dead, then the increments and
+ * decrements are not harmful as the reference count still stays in the
+ * respective zones and is always set back to STATURATED resp. DEAD. The
+ * zones have room for 2^28 racing operations in each direction, which
+ * makes it practically impossible to escape the zones.
+ *
+ * Once the last reference is dropped the reference count becomes
+ * RCUREF_NOREF which forces rcuref_put() into the slowpath operation. The
+ * slowpath then tries to set the reference count from RCUREF_NOREF to
+ * RCUREF_DEAD via a cmpxchg(). This opens a small window where a
+ * concurrent rcuref_get() can acquire the reference count and bring it
+ * back to RCUREF_ONEREF or even drop the reference again and mark it DEAD.
+ *
+ * If the cmpxchg() succeeds then a concurrent rcuref_get() will result in
+ * DEAD + 1, which is inside the dead zone. If that happens the reference
+ * count is put back to DEAD.
+ *
+ * The actual race is possible due to the unconditional increment and
+ * decrements in rcuref_get() and rcuref_put():
+ *
+ * T1 T2
+ * get() put()
+ * if (atomic_add_negative(-1, &ref->refcnt))
+ * succeeds-> atomic_cmpxchg(&ref->refcnt, NOREF, DEAD);
+ *
+ * atomic_add_negative(1, &ref->refcnt); <- Elevates refcount to DEAD + 1
+ *
+ * As the result of T1's add is negative, the get() goes into the slow path
+ * and observes refcnt being in the dead zone which makes the operation fail.
+ *
+ * Possible critical states:
+ *
+ * Context Counter References Operation
+ * T1 0 1 init()
+ * T2 1 2 get()
+ * T1 0 1 put()
+ * T2 -1 0 put() tries to mark dead
+ * T1 0 1 get()
+ * T2 0 1 put() mark dead fails
+ * T1 -1 0 put() tries to mark dead
+ * T1 DEAD 0 put() mark dead succeeds
+ * T2 DEAD+1 0 get() fails and puts it back to DEAD
+ *
+ * Of course there are more complex scenarios, but the above illustrates
+ * the working principle. The rest is left to the imagination of the
+ * reader.
+ *
+ * Deconstruction race
+ * ===================
+ *
+ * The release operation must be protected by prohibiting a grace period in
+ * order to prevent a possible use after free:
+ *
+ * T1 T2
+ * put() get()
+ * // ref->refcnt = ONEREF
+ * if (!atomic_add_negative(-1, &ref->refcnt))
+ * return false; <- Not taken
+ *
+ * // ref->refcnt == NOREF
+ * --> preemption
+ * // Elevates ref->refcnt to ONEREF
+ * if (!atomic_add_negative(1, &ref->refcnt))
+ * return true; <- taken
+ *
+ * if (put(&p->ref)) { <-- Succeeds
+ * remove_pointer(p);
+ * kfree_rcu(p, rcu);
+ * }
+ *
+ * RCU grace period ends, object is freed
+ *
+ * atomic_cmpxchg(&ref->refcnt, NOREF, DEAD); <- UAF
+ *
+ * This is prevented by disabling preemption around the put() operation as
+ * that's in most kernel configurations cheaper than a rcu_read_lock() /
+ * rcu_read_unlock() pair and in many cases even a NOOP. In any case it
+ * prevents the grace period which keeps the object alive until all put()
+ * operations complete.
+ *
+ * Saturation protection
+ * =====================
+ *
+ * The reference count has a saturation limit RCUREF_MAXREF (INT_MAX).
+ * Once this is exceedded the reference count becomes stale by setting it
+ * to RCUREF_SATURATED, which will cause a memory leak, but it prevents
+ * wrap arounds which obviously cause worse problems than a memory
+ * leak. When saturation is reached a warning is emitted.
+ *
+ * Race conditions
+ * ===============
+ *
+ * All reference count increment/decrement operations are unconditional and
+ * only verified after the fact. This optimizes for the good case and takes
+ * the occasional race vs. a dead or already saturated refcount into
+ * account. The saturation and dead zones are large enough to accomodate
+ * for that.
+ *
+ * Memory ordering
+ * ===============
+ *
+ * Memory ordering rules are slightly relaxed wrt regular atomic_t functions
+ * and provide only what is strictly required for refcounts.
+ *
+ * The increments are fully relaxed; these will not provide ordering. The
+ * rationale is that whatever is used to obtain the object to increase the
+ * reference count on will provide the ordering. For locked data
+ * structures, its the lock acquire, for RCU/lockless data structures its
+ * the dependent load.
+ *
+ * rcuref_get() provides a control dependency ordering future stores which
+ * ensures that the object is not modified when acquiring a reference
+ * fails.
+ *
+ * rcuref_put() provides release order, i.e. all prior loads and stores
+ * will be issued before. It also provides a control dependency ordering
+ * against the subsequent destruction of the object.
+ *
+ * If rcuref_put() successfully dropped the last reference and marked the
+ * object DEAD it also provides acquire ordering.
+ */
+
+#include <linux/export.h>
+#include <linux/rcuref.h>
+
+/**
+ * rcuref_get_slowpath - Slowpath of rcuref_get()
+ * @ref: Pointer to the reference count
+ *
+ * Invoked when the reference count is outside of the valid zone.
+ *
+ * Return:
+ * False if the reference count was already marked dead
+ *
+ * True if the reference count is saturated, which prevents the
+ * object from being deconstructed ever.
+ */
+bool rcuref_get_slowpath(rcuref_t *ref)
+{
+ unsigned int cnt = atomic_read(&ref->refcnt);
+
+ /*
+ * If the reference count was already marked dead, undo the
+ * increment so it stays in the middle of the dead zone and return
+ * fail.
+ */
+ if (cnt >= RCUREF_RELEASED) {
+ atomic_set(&ref->refcnt, RCUREF_DEAD);
+ return false;
+ }
+
+ /*
+ * If it was saturated, warn and mark it so. In case the increment
+ * was already on a saturated value restore the saturation
+ * marker. This keeps it in the middle of the saturation zone and
+ * prevents the reference count from overflowing. This leaks the
+ * object memory, but prevents the obvious reference count overflow
+ * damage.
+ */
+ if (WARN_ONCE(cnt > RCUREF_MAXREF, "rcuref saturated - leaking memory"))
+ atomic_set(&ref->refcnt, RCUREF_SATURATED);
+ return true;
+}
+EXPORT_SYMBOL_GPL(rcuref_get_slowpath);
+
+/**
+ * rcuref_put_slowpath - Slowpath of __rcuref_put()
+ * @ref: Pointer to the reference count
+ *
+ * Invoked when the reference count is outside of the valid zone.
+ *
+ * Return:
+ * True if this was the last reference with no future references
+ * possible. This signals the caller that it can safely schedule the
+ * object, which is protected by the reference counter, for
+ * deconstruction.
+ *
+ * False if there are still active references or the put() raced
+ * with a concurrent get()/put() pair. Caller is not allowed to
+ * deconstruct the protected object.
+ */
+bool rcuref_put_slowpath(rcuref_t *ref)
+{
+ unsigned int cnt = atomic_read(&ref->refcnt);
+
+ /* Did this drop the last reference? */
+ if (likely(cnt == RCUREF_NOREF)) {
+ /*
+ * Carefully try to set the reference count to RCUREF_DEAD.
+ *
+ * This can fail if a concurrent get() operation has
+ * elevated it again or the corresponding put() even marked
+ * it dead already. Both are valid situations and do not
+ * require a retry. If this fails the caller is not
+ * allowed to deconstruct the object.
+ */
+ if (atomic_cmpxchg_release(&ref->refcnt, RCUREF_NOREF, RCUREF_DEAD) != RCUREF_NOREF)
+ return false;
+
+ /*
+ * The caller can safely schedule the object for
+ * deconstruction. Provide acquire ordering.
+ */
+ smp_acquire__after_ctrl_dep();
+ return true;
+ }
+
+ /*
+ * If the reference count was already in the dead zone, then this
+ * put() operation is imbalanced. Warn, put the reference count back to
+ * DEAD and tell the caller to not deconstruct the object.
+ */
+ if (WARN_ONCE(cnt >= RCUREF_RELEASED, "rcuref - imbalanced put()")) {
+ atomic_set(&ref->refcnt, RCUREF_DEAD);
+ return false;
+ }
+
+ /*
+ * This is a put() operation on a saturated refcount. Restore the
+ * mean saturation value and tell the caller to not deconstruct the
+ * object.
+ */
+ if (cnt > RCUREF_MAXREF)
+ atomic_set(&ref->refcnt, RCUREF_SATURATED);
+ return false;
+}
+EXPORT_SYMBOL_GPL(rcuref_put_slowpath);
diff --git a/scripts/atomic/atomics.tbl b/scripts/atomic/atomics.tbl
index fbee2f6190d9..85ca8d9b5c27 100644
--- a/scripts/atomic/atomics.tbl
+++ b/scripts/atomic/atomics.tbl
@@ -33,7 +33,7 @@ try_cmpxchg B v p:old i:new
sub_and_test b i v
dec_and_test b v
inc_and_test b v
-add_negative b i v
+add_negative B i v
add_unless fb v i:a i:u
inc_not_zero b v
inc_unless_negative b v
diff --git a/scripts/atomic/fallbacks/add_negative b/scripts/atomic/fallbacks/add_negative
index 15caa2eb2371..e5980abf5904 100755
--- a/scripts/atomic/fallbacks/add_negative
+++ b/scripts/atomic/fallbacks/add_negative
@@ -1,16 +1,15 @@
cat <<EOF
/**
- * arch_${atomic}_add_negative - add and test if negative
+ * arch_${atomic}_add_negative${order} - Add and test if negative
* @i: integer value to add
* @v: pointer of type ${atomic}_t
*
- * Atomically adds @i to @v and returns true
- * if the result is negative, or false when
- * result is greater than or equal to zero.
+ * Atomically adds @i to @v and returns true if the result is negative,
+ * or false when the result is greater than or equal to zero.
*/
static __always_inline bool
-arch_${atomic}_add_negative(${int} i, ${atomic}_t *v)
+arch_${atomic}_add_negative${order}(${int} i, ${atomic}_t *v)
{
- return arch_${atomic}_add_return(i, v) < 0;
+ return arch_${atomic}_add_return${order}(i, v) < 0;
}
EOF