summaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2025-12-01 20:02:34 +0300
committerLinus Torvalds <torvalds@linux-foundation.org>2025-12-01 20:02:34 +0300
commit9368f0f9419cde028a6e58331065900ff089bc36 (patch)
treeb7e3a8910f9493e8422cd661e1375e5421a91bf0 /include
parentb04b2e7a61830cabd00c6f95308a8e2f5d82fa52 (diff)
parentca0d620b0afae20a7bcd5182606eba6860b2dbf2 (diff)
downloadlinux-9368f0f9419cde028a6e58331065900ff089bc36.tar.xz
Merge tag 'vfs-6.19-rc1.inode' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs
Pull vfs inode updates from Christian Brauner: "Features: - Hide inode->i_state behind accessors. Open-coded accesses prevent asserting they are done correctly. One obvious aspect is locking, but significantly more can be checked. For example it can be detected when the code is clearing flags which are already missing, or is setting flags when it is illegal (e.g., I_FREEING when ->i_count > 0) - Provide accessors for ->i_state, converts all filesystems using coccinelle and manual conversions (btrfs, ceph, smb, f2fs, gfs2, overlayfs, nilfs2, xfs), and makes plain ->i_state access fail to compile - Rework I_NEW handling to operate without fences, simplifying the code after the accessor infrastructure is in place Cleanups: - Move wait_on_inode() from writeback.h to fs.h - Spell out fenced ->i_state accesses with explicit smp_wmb/smp_rmb for clarity - Cosmetic fixes to LRU handling - Push list presence check into inode_io_list_del() - Touch up predicts in __d_lookup_rcu() - ocfs2: retire ocfs2_drop_inode() and I_WILL_FREE usage - Assert on ->i_count in iput_final() - Assert ->i_lock held in __iget() Fixes: - Add missing fences to I_NEW handling" * tag 'vfs-6.19-rc1.inode' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs: (22 commits) dcache: touch up predicts in __d_lookup_rcu() fs: push list presence check into inode_io_list_del() fs: cosmetic fixes to lru handling fs: rework I_NEW handling to operate without fences fs: make plain ->i_state access fail to compile xfs: use the new ->i_state accessors nilfs2: use the new ->i_state accessors overlayfs: use the new ->i_state accessors gfs2: use the new ->i_state accessors f2fs: use the new ->i_state accessors smb: use the new ->i_state accessors ceph: use the new ->i_state accessors btrfs: use the new ->i_state accessors Manual conversion to use ->i_state accessors of all places not covered by coccinelle Coccinelle-based conversion to use ->i_state accessors fs: provide accessors for ->i_state fs: spell out fenced ->i_state accesses with explicit smp_wmb/smp_rmb fs: move wait_on_inode() from writeback.h to fs.h fs: add missing fences to I_NEW handling ocfs2: retire ocfs2_drop_inode() and I_WILL_FREE usage ...
Diffstat (limited to 'include')
-rw-r--r--include/linux/backing-dev.h5
-rw-r--r--include/linux/fs.h99
-rw-r--r--include/linux/writeback.h9
-rw-r--r--include/trace/events/writeback.h8
4 files changed, 98 insertions, 23 deletions
diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
index 3e64f14739dd..0c8342747cab 100644
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h
@@ -277,10 +277,11 @@ unlocked_inode_to_wb_begin(struct inode *inode, struct wb_lock_cookie *cookie)
rcu_read_lock();
/*
- * Paired with store_release in inode_switch_wbs_work_fn() and
+ * Paired with a release fence in inode_do_switch_wbs() and
* ensures that we see the new wb if we see cleared I_WB_SWITCH.
*/
- cookie->locked = smp_load_acquire(&inode->i_state) & I_WB_SWITCH;
+ cookie->locked = inode_state_read_once(inode) & I_WB_SWITCH;
+ smp_rmb();
if (unlikely(cookie->locked))
xa_lock_irqsave(&inode->i_mapping->i_pages, cookie->flags);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index fdca6f46dd76..bc70673b55bb 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -735,7 +735,7 @@ enum inode_state_bits {
/* reserved wait address bit 3 */
};
-enum inode_state_flags_t {
+enum inode_state_flags_enum {
I_NEW = (1U << __I_NEW),
I_SYNC = (1U << __I_SYNC),
I_LRU_ISOLATING = (1U << __I_LRU_ISOLATING),
@@ -762,6 +762,13 @@ enum inode_state_flags_t {
#define I_DIRTY_ALL (I_DIRTY | I_DIRTY_TIME)
/*
+ * Use inode_state_read() & friends to access.
+ */
+struct inode_state_flags {
+ enum inode_state_flags_enum __state;
+};
+
+/*
* Keep mostly read-only and often accessed (especially for
* the RCU path lookup and 'stat' data) fields at the beginning
* of the 'struct inode'
@@ -819,7 +826,7 @@ struct inode {
#endif
/* Misc */
- enum inode_state_flags_t i_state;
+ struct inode_state_flags i_state;
/* 32-bit hole */
struct rw_semaphore i_rwsem;
@@ -878,6 +885,80 @@ struct inode {
void *i_private; /* fs or device private pointer */
} __randomize_layout;
+/*
+ * i_state handling
+ *
+ * We hide all of it behind helpers so that we can validate consumers.
+ */
+static inline enum inode_state_flags_enum inode_state_read_once(struct inode *inode)
+{
+ return READ_ONCE(inode->i_state.__state);
+}
+
+static inline enum inode_state_flags_enum inode_state_read(struct inode *inode)
+{
+ lockdep_assert_held(&inode->i_lock);
+ return inode->i_state.__state;
+}
+
+static inline void inode_state_set_raw(struct inode *inode,
+ enum inode_state_flags_enum flags)
+{
+ WRITE_ONCE(inode->i_state.__state, inode->i_state.__state | flags);
+}
+
+static inline void inode_state_set(struct inode *inode,
+ enum inode_state_flags_enum flags)
+{
+ lockdep_assert_held(&inode->i_lock);
+ inode_state_set_raw(inode, flags);
+}
+
+static inline void inode_state_clear_raw(struct inode *inode,
+ enum inode_state_flags_enum flags)
+{
+ WRITE_ONCE(inode->i_state.__state, inode->i_state.__state & ~flags);
+}
+
+static inline void inode_state_clear(struct inode *inode,
+ enum inode_state_flags_enum flags)
+{
+ lockdep_assert_held(&inode->i_lock);
+ inode_state_clear_raw(inode, flags);
+}
+
+static inline void inode_state_assign_raw(struct inode *inode,
+ enum inode_state_flags_enum flags)
+{
+ WRITE_ONCE(inode->i_state.__state, flags);
+}
+
+static inline void inode_state_assign(struct inode *inode,
+ enum inode_state_flags_enum flags)
+{
+ lockdep_assert_held(&inode->i_lock);
+ inode_state_assign_raw(inode, flags);
+}
+
+static inline void inode_state_replace_raw(struct inode *inode,
+ enum inode_state_flags_enum clearflags,
+ enum inode_state_flags_enum setflags)
+{
+ enum inode_state_flags_enum flags;
+ flags = inode->i_state.__state;
+ flags &= ~clearflags;
+ flags |= setflags;
+ inode_state_assign_raw(inode, flags);
+}
+
+static inline void inode_state_replace(struct inode *inode,
+ enum inode_state_flags_enum clearflags,
+ enum inode_state_flags_enum setflags)
+{
+ lockdep_assert_held(&inode->i_lock);
+ inode_state_replace_raw(inode, clearflags, setflags);
+}
+
static inline void inode_set_cached_link(struct inode *inode, char *link, int linklen)
{
VFS_WARN_ON_INODE(strlen(link) != linklen, inode);
@@ -925,6 +1006,8 @@ static inline void inode_fake_hash(struct inode *inode)
hlist_add_fake(&inode->i_hash);
}
+void wait_on_new_inode(struct inode *inode);
+
/*
* inode->i_rwsem nesting subclasses for the lock validator:
*
@@ -2611,8 +2694,8 @@ static inline int icount_read(const struct inode *inode)
*/
static inline bool inode_is_dirtytime_only(struct inode *inode)
{
- return (inode->i_state & (I_DIRTY_TIME | I_NEW |
- I_FREEING | I_WILL_FREE)) == I_DIRTY_TIME;
+ return (inode_state_read_once(inode) &
+ (I_DIRTY_TIME | I_NEW | I_FREEING | I_WILL_FREE)) == I_DIRTY_TIME;
}
extern void inc_nlink(struct inode *inode);
@@ -3304,7 +3387,7 @@ extern void d_mark_dontcache(struct inode *inode);
extern struct inode *ilookup5_nowait(struct super_block *sb,
unsigned long hashval, int (*test)(struct inode *, void *),
- void *data);
+ void *data, bool *isnew);
extern struct inode *ilookup5(struct super_block *sb, unsigned long hashval,
int (*test)(struct inode *, void *), void *data);
extern struct inode *ilookup(struct super_block *sb, unsigned long ino);
@@ -3356,11 +3439,9 @@ static inline bool is_zero_ino(ino_t ino)
return (u32)ino == 0;
}
-/*
- * inode->i_lock must be held
- */
static inline void __iget(struct inode *inode)
{
+ lockdep_assert_held(&inode->i_lock);
atomic_inc(&inode->i_count);
}
@@ -3399,7 +3480,7 @@ static inline void remove_inode_hash(struct inode *inode)
}
extern void inode_sb_list_add(struct inode *inode);
-extern void inode_add_lru(struct inode *inode);
+extern void inode_lru_list_add(struct inode *inode);
int sb_set_blocksize(struct super_block *sb, int size);
int __must_check sb_min_blocksize(struct super_block *sb, int size);
diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index 22dd4adc5667..102071ffedcb 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -189,13 +189,6 @@ void wakeup_flusher_threads_bdi(struct backing_dev_info *bdi,
void inode_wait_for_writeback(struct inode *inode);
void inode_io_list_del(struct inode *inode);
-/* writeback.h requires fs.h; it, too, is not included from here. */
-static inline void wait_on_inode(struct inode *inode)
-{
- wait_var_event(inode_state_wait_address(inode, __I_NEW),
- !(READ_ONCE(inode->i_state) & I_NEW));
-}
-
#ifdef CONFIG_CGROUP_WRITEBACK
#include <linux/cgroup.h>
@@ -234,7 +227,7 @@ static inline void inode_attach_wb(struct inode *inode, struct folio *folio)
static inline void inode_detach_wb(struct inode *inode)
{
if (inode->i_wb) {
- WARN_ON_ONCE(!(inode->i_state & I_CLEAR));
+ WARN_ON_ONCE(!(inode_state_read_once(inode) & I_CLEAR));
wb_put(inode->i_wb);
inode->i_wb = NULL;
}
diff --git a/include/trace/events/writeback.h b/include/trace/events/writeback.h
index c08aff044e80..311a341e6fe4 100644
--- a/include/trace/events/writeback.h
+++ b/include/trace/events/writeback.h
@@ -120,7 +120,7 @@ DECLARE_EVENT_CLASS(writeback_dirty_inode_template,
/* may be called for files on pseudo FSes w/ unregistered bdi */
strscpy_pad(__entry->name, bdi_dev_name(bdi), 32);
__entry->ino = inode->i_ino;
- __entry->state = inode->i_state;
+ __entry->state = inode_state_read_once(inode);
__entry->flags = flags;
),
@@ -748,7 +748,7 @@ TRACE_EVENT(writeback_sb_inodes_requeue,
strscpy_pad(__entry->name,
bdi_dev_name(inode_to_bdi(inode)), 32);
__entry->ino = inode->i_ino;
- __entry->state = inode->i_state;
+ __entry->state = inode_state_read_once(inode);
__entry->dirtied_when = inode->dirtied_when;
__entry->cgroup_ino = __trace_wb_assign_cgroup(inode_to_wb(inode));
),
@@ -787,7 +787,7 @@ DECLARE_EVENT_CLASS(writeback_single_inode_template,
strscpy_pad(__entry->name,
bdi_dev_name(inode_to_bdi(inode)), 32);
__entry->ino = inode->i_ino;
- __entry->state = inode->i_state;
+ __entry->state = inode_state_read_once(inode);
__entry->dirtied_when = inode->dirtied_when;
__entry->writeback_index = inode->i_mapping->writeback_index;
__entry->nr_to_write = nr_to_write;
@@ -839,7 +839,7 @@ DECLARE_EVENT_CLASS(writeback_inode_template,
TP_fast_assign(
__entry->dev = inode->i_sb->s_dev;
__entry->ino = inode->i_ino;
- __entry->state = inode->i_state;
+ __entry->state = inode_state_read_once(inode);
__entry->mode = inode->i_mode;
__entry->dirtied_when = inode->dirtied_when;
),