summaryrefslogtreecommitdiff
path: root/include/linux
diff options
context:
space:
mode:
authorSuren Baghdasaryan <surenb@google.com>2025-02-14 01:46:47 +0300
committerAndrew Morton <akpm@linux-foundation.org>2025-03-17 08:06:19 +0300
commit7f8ceea0c58039dcea3d31b8d5da58aa5f6e12bf (patch)
treec4e32249d314e70fcb2b3e5ba09e458594803110 /include/linux
parent45ad9f5290dc4bb2249e951d4b3756d3ebda2d66 (diff)
downloadlinux-7f8ceea0c58039dcea3d31b8d5da58aa5f6e12bf.tar.xz
refcount: provide ops for cases when object's memory can be reused
For speculative lookups where a successful inc_not_zero() pins the object, but where we still need to double check if the object acquired is indeed the one we set out to acquire (identity check), needs this validation to happen *after* the increment. Similarly, when a new object is initialized and its memory might have been previously occupied by another object, all stores to initialize the object should happen *before* refcount initialization. Notably SLAB_TYPESAFE_BY_RCU is one such an example when this ordering is required for reference counting. Add refcount_{add|inc}_not_zero_acquire() to guarantee the proper ordering between acquiring a reference count on an object and performing the identity check for that object. Add refcount_set_release() to guarantee proper ordering between stores initializing object attributes and the store initializing the refcount. refcount_set_release() should be done after all other object attributes are initialized. Once refcount_set_release() is called, the object should be considered visible to other tasks even if it was not yet added into an object collection normally used to discover it. This is because other tasks might have discovered the object previously occupying the same memory and after memory reuse they can succeed in taking refcount for the new object and start using it. Object reuse example to consider: consumer: obj = lookup(collection, key); if (!refcount_inc_not_zero_acquire(&obj->ref)) return; if (READ_ONCE(obj->key) != key) { /* identity check */ put_ref(obj); return; } use(obj->value); producer: remove(collection, obj->key); if (!refcount_dec_and_test(&obj->ref)) return; obj->key = KEY_INVALID; free(obj); obj = malloc(); /* obj is reused */ obj->key = new_key; obj->value = new_value; refcount_set_release(obj->ref, 1); add(collection, new_key, obj); refcount_{add|inc}_not_zero_acquire() is required to prevent the following reordering when refcount_inc_not_zero() is used instead: consumer: obj = lookup(collection, key); if (READ_ONCE(obj->key) != key) { /* reordered identity check */ put_ref(obj); return; } producer: remove(collection, obj->key); if (!refcount_dec_and_test(&obj->ref)) return; obj->key = KEY_INVALID; free(obj); obj = malloc(); /* obj is reused */ obj->key = new_key; obj->value = new_value; refcount_set_release(obj->ref, 1); add(collection, new_key, obj); if (!refcount_inc_not_zero(&obj->ref)) return; use(obj->value); /* USING WRONG OBJECT */ refcount_set_release() is required to prevent the following reordering when refcount_set() is used instead: consumer: obj = lookup(collection, key); producer: remove(collection, obj->key); if (!refcount_dec_and_test(&obj->ref)) return; obj->key = KEY_INVALID; free(obj); obj = malloc(); /* obj is reused */ obj->key = new_key; /* new_key == old_key */ refcount_set(obj->ref, 1); if (!refcount_inc_not_zero_acquire(&obj->ref)) return; if (READ_ONCE(obj->key) != key) { /* pass since new_key == old_key */ put_ref(obj); return; } use(obj->value); /* USING STALE obj->value */ obj->value = new_value; /* reordered store */ add(collection, key, obj); [surenb@google.com: fix title underlines in refcount-vs-atomic.rst] Link: https://lkml.kernel.org/r/20250217161645.3137927-1-surenb@google.com Link: https://lkml.kernel.org/r/20250213224655.1680278-11-surenb@google.com Signed-off-by: Suren Baghdasaryan <surenb@google.com> Acked-by: Vlastimil Babka <vbabka@suse.cz> [slab] Tested-by: Shivank Garg <shivankg@amd.com> Link: https://lkml.kernel.org/r/5e19ec93-8307-47c2-bb13-3ddf7150624e@amd.com Cc: Peter Zijlstra <peterz@infradead.org> Cc: Will Deacon <will@kernel.org> Cc: Paul E. McKenney <paulmck@kernel.org> Cc: Christian Brauner <brauner@kernel.org> Cc: David Hildenbrand <david@redhat.com> Cc: David Howells <dhowells@redhat.com> Cc: Davidlohr Bueso <dave@stgolabs.net> Cc: Hugh Dickins <hughd@google.com> Cc: Jann Horn <jannh@google.com> Cc: Johannes Weiner <hannes@cmpxchg.org> Cc: Jonathan Corbet <corbet@lwn.net> Cc: Klara Modin <klarasmodin@gmail.com> Cc: Liam R. Howlett <Liam.Howlett@Oracle.com> Cc: Lokesh Gidra <lokeshgidra@google.com> Cc: Lorenzo Stoakes <lorenzo.stoakes@oracle.com> Cc: Mateusz Guzik <mjguzik@gmail.com> Cc: Matthew Wilcox <willy@infradead.org> Cc: Mel Gorman <mgorman@techsingularity.net> Cc: Michal Hocko <mhocko@suse.com> Cc: Minchan Kim <minchan@google.com> Cc: Oleg Nesterov <oleg@redhat.com> Cc: Pasha Tatashin <pasha.tatashin@soleen.com> Cc: Peter Xu <peterx@redhat.com> Cc: Shakeel Butt <shakeel.butt@linux.dev> Cc: Sourav Panda <souravpanda@google.com> Cc: Wei Yang <richard.weiyang@gmail.com> Cc: Heiko Carstens <hca@linux.ibm.com> Cc: Stephen Rothwell <sfr@canb.auug.org.au> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Diffstat (limited to 'include/linux')
-rw-r--r--include/linux/refcount.h106
-rw-r--r--include/linux/slab.h9
2 files changed, 115 insertions, 0 deletions
diff --git a/include/linux/refcount.h b/include/linux/refcount.h
index 35f039ecb272..4589d2e7bfea 100644
--- a/include/linux/refcount.h
+++ b/include/linux/refcount.h
@@ -87,6 +87,15 @@
* The decrements dec_and_test() and sub_and_test() also provide acquire
* ordering on success.
*
+ * refcount_{add|inc}_not_zero_acquire() and refcount_set_release() provide
+ * acquire and release ordering for cases when the memory occupied by the
+ * object might be reused to store another object. This is important for the
+ * cases where secondary validation is required to detect such reuse, e.g.
+ * SLAB_TYPESAFE_BY_RCU. The secondary validation checks have to happen after
+ * the refcount is taken, hence acquire order is necessary. Similarly, when the
+ * object is initialized, all stores to its attributes should be visible before
+ * the refcount is set, otherwise a stale attribute value might be used by
+ * another task which succeeds in taking a refcount to the new object.
*/
#ifndef _LINUX_REFCOUNT_H
@@ -126,6 +135,31 @@ static inline void refcount_set(refcount_t *r, int n)
}
/**
+ * refcount_set_release - set a refcount's value with release ordering
+ * @r: the refcount
+ * @n: value to which the refcount will be set
+ *
+ * This function should be used when memory occupied by the object might be
+ * reused to store another object -- consider SLAB_TYPESAFE_BY_RCU.
+ *
+ * Provides release memory ordering which will order previous memory operations
+ * against this store. This ensures all updates to this object are visible
+ * once the refcount is set and stale values from the object previously
+ * occupying this memory are overwritten with new ones.
+ *
+ * This function should be called only after new object is fully initialized.
+ * After this call the object should be considered visible to other tasks even
+ * if it was not yet added into an object collection normally used to discover
+ * it. This is because other tasks might have discovered the object previously
+ * occupying the same memory and after memory reuse they can succeed in taking
+ * refcount to the new object and start using it.
+ */
+static inline void refcount_set_release(refcount_t *r, int n)
+{
+ atomic_set_release(&r->refs, n);
+}
+
+/**
* refcount_read - get a refcount's value
* @r: the refcount
*
@@ -178,6 +212,52 @@ static inline __must_check bool refcount_add_not_zero(int i, refcount_t *r)
return __refcount_add_not_zero(i, r, NULL);
}
+static inline __must_check __signed_wrap
+bool __refcount_add_not_zero_acquire(int i, refcount_t *r, int *oldp)
+{
+ int old = refcount_read(r);
+
+ do {
+ if (!old)
+ break;
+ } while (!atomic_try_cmpxchg_acquire(&r->refs, &old, old + i));
+
+ if (oldp)
+ *oldp = old;
+
+ if (unlikely(old < 0 || old + i < 0))
+ refcount_warn_saturate(r, REFCOUNT_ADD_NOT_ZERO_OVF);
+
+ return old;
+}
+
+/**
+ * refcount_add_not_zero_acquire - add a value to a refcount with acquire ordering unless it is 0
+ *
+ * @i: the value to add to the refcount
+ * @r: the refcount
+ *
+ * Will saturate at REFCOUNT_SATURATED and WARN.
+ *
+ * This function should be used when memory occupied by the object might be
+ * reused to store another object -- consider SLAB_TYPESAFE_BY_RCU.
+ *
+ * Provides acquire memory ordering on success, it is assumed the caller has
+ * guaranteed the object memory to be stable (RCU, etc.). It does provide a
+ * control dependency and thereby orders future stores. See the comment on top.
+ *
+ * Use of this function is not recommended for the normal reference counting
+ * use case in which references are taken and released one at a time. In these
+ * cases, refcount_inc_not_zero_acquire() should instead be used to increment a
+ * reference count.
+ *
+ * Return: false if the passed refcount is 0, true otherwise
+ */
+static inline __must_check bool refcount_add_not_zero_acquire(int i, refcount_t *r)
+{
+ return __refcount_add_not_zero_acquire(i, r, NULL);
+}
+
static inline __signed_wrap
void __refcount_add(int i, refcount_t *r, int *oldp)
{
@@ -236,6 +316,32 @@ static inline __must_check bool refcount_inc_not_zero(refcount_t *r)
return __refcount_inc_not_zero(r, NULL);
}
+static inline __must_check bool __refcount_inc_not_zero_acquire(refcount_t *r, int *oldp)
+{
+ return __refcount_add_not_zero_acquire(1, r, oldp);
+}
+
+/**
+ * refcount_inc_not_zero_acquire - increment a refcount with acquire ordering unless it is 0
+ * @r: the refcount to increment
+ *
+ * Similar to refcount_inc_not_zero(), but provides acquire memory ordering on
+ * success.
+ *
+ * This function should be used when memory occupied by the object might be
+ * reused to store another object -- consider SLAB_TYPESAFE_BY_RCU.
+ *
+ * Provides acquire memory ordering on success, it is assumed the caller has
+ * guaranteed the object memory to be stable (RCU, etc.). It does provide a
+ * control dependency and thereby orders future stores. See the comment on top.
+ *
+ * Return: true if the increment was successful, false otherwise
+ */
+static inline __must_check bool refcount_inc_not_zero_acquire(refcount_t *r)
+{
+ return __refcount_inc_not_zero_acquire(r, NULL);
+}
+
static inline void __refcount_inc(refcount_t *r, int *oldp)
{
__refcount_add(1, r, oldp);
diff --git a/include/linux/slab.h b/include/linux/slab.h
index 09eedaecf120..ad902a2d692b 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -136,6 +136,15 @@ enum _slab_flag_bits {
* rcu_read_lock before reading the address, then rcu_read_unlock after
* taking the spinlock within the structure expected at that address.
*
+ * Note that object identity check has to be done *after* acquiring a
+ * reference, therefore user has to ensure proper ordering for loads.
+ * Similarly, when initializing objects allocated with SLAB_TYPESAFE_BY_RCU,
+ * the newly allocated object has to be fully initialized *before* its
+ * refcount gets initialized and proper ordering for stores is required.
+ * refcount_{add|inc}_not_zero_acquire() and refcount_set_release() are
+ * designed with the proper fences required for reference counting objects
+ * allocated with SLAB_TYPESAFE_BY_RCU.
+ *
* Note that it is not possible to acquire a lock within a structure
* allocated with SLAB_TYPESAFE_BY_RCU without first acquiring a reference
* as described above. The reason is that SLAB_TYPESAFE_BY_RCU pages