From 59eb73b98ae0b12fc9b39c08f0f5a5552cb02d1e Mon Sep 17 00:00:00 2001 From: John Groves Date: Fri, 27 Mar 2026 21:04:22 +0000 Subject: dax: Factor out dax_folio_reset_order() helper Both fs/dax.c:dax_folio_put() and drivers/dax/fsdev.c: fsdev_clear_folio_state() (the latter coming in the next commit after this one) contain nearly identical code to reset a compound DAX folio back to order-0 pages. Factor this out into a shared helper function. The new dax_folio_reset_order() function: - Clears the folio's mapping and share count - Resets compound folio state via folio_reset_order() - Clears PageHead and compound_head for each sub-page - Restores the pgmap pointer for each resulting order-0 folio - Returns the original folio order (for callers that need to advance by that many pages) Two intentional differences from the original dax_folio_put() logic: 1. folio->share is cleared unconditionally. This is correct because the DAX subsystem maintains the invariant that share != 0 only when mapping == NULL (enforced by dax_folio_make_shared()). dax_folio_put() ensures share has reached zero before calling this helper, so the unconditional clear is safe. 2. folio->pgmap is now explicitly restored for order-0 folios. For the dax_folio_put() caller this is a no-op (reads and writes back the same field). It is intentional for the upcoming fsdev_clear_folio_state() caller, which converts previously-compound folios and needs pgmap re-established for all pages regardless of order. This simplifies fsdev_clear_folio_state() from ~50 lines to ~15 lines. Suggested-by: Jonathan Cameron Reviewed-by: Ira Weiny Reviewed-by: Dave Jiang Reviewed-by: Jonathan Cameron Signed-off-by: John Groves Link: https://patch.msgid.link/0100019d311cc6b9-5be7428a-7f16-4774-8f90-a44b88ac5660-000000@email.amazonses.com Signed-off-by: Ira Weiny --- include/linux/dax.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/dax.h b/include/linux/dax.h index bf103f317cac..73cfc1a7c8f1 100644 --- a/include/linux/dax.h +++ b/include/linux/dax.h @@ -153,6 +153,7 @@ static inline void fs_put_dax(struct dax_device *dax_dev, void *holder) #if IS_ENABLED(CONFIG_FS_DAX) int dax_writeback_mapping_range(struct address_space *mapping, struct dax_device *dax_dev, struct writeback_control *wbc); +int dax_folio_reset_order(struct folio *folio); struct page *dax_layout_busy_page(struct address_space *mapping); struct page *dax_layout_busy_page_range(struct address_space *mapping, loff_t start, loff_t end); -- cgit v1.2.3 From 700ecbc1f5aa02ba9ad68d7be1ef7a9c8eae07e9 Mon Sep 17 00:00:00 2001 From: John Groves Date: Fri, 27 Mar 2026 21:05:03 +0000 Subject: dax: Add dax_set_ops() for setting dax_operations at bind time Add a new dax_set_ops() function that allows drivers to set the dax_operations after the dax_device has been allocated. This is needed for fsdev_dax where the operations need to be set during probe and cleared during unbind. The fsdev driver uses devm_add_action_or_reset() for cleanup consistency, avoiding the complexity of mixing devm-managed resources with manual cleanup in a remove() callback. This ensures cleanup happens automatically in the correct reverse order when the device is unbound. Reviewed-by: Dave Jiang Reviewed-by: Jonathan Cameron Signed-off-by: John Groves Link: https://patch.msgid.link/0100019d311d65a0-b9c1419e-f3a0-4afd-b0bd-848f18ff5950-000000@email.amazonses.com Signed-off-by: Ira Weiny --- drivers/dax/fsdev.c | 16 ++++++++++++++++ drivers/dax/super.c | 38 +++++++++++++++++++++++++++++++++++++- include/linux/dax.h | 1 + 3 files changed, 54 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/drivers/dax/fsdev.c b/drivers/dax/fsdev.c index 30f57c74c979..4499d9621f33 100644 --- a/drivers/dax/fsdev.c +++ b/drivers/dax/fsdev.c @@ -117,6 +117,13 @@ static void fsdev_kill(void *dev_dax) kill_dev_dax(dev_dax); } +static void fsdev_clear_ops(void *data) +{ + struct dev_dax *dev_dax = data; + + dax_set_ops(dev_dax->dax_dev, NULL); +} + /* * Page map operations for FS-DAX mode * Similar to fsdax_pagemap_ops in drivers/nvdimm/pmem.c @@ -303,6 +310,15 @@ static int fsdev_dax_probe(struct dev_dax *dev_dax) if (rc) return rc; + /* Set the dax operations for fs-dax access path */ + rc = dax_set_ops(dax_dev, &dev_dax_ops); + if (rc) + return rc; + + rc = devm_add_action_or_reset(dev, fsdev_clear_ops, dev_dax); + if (rc) + return rc; + run_dax(dax_dev); return devm_add_action_or_reset(dev, fsdev_kill, dev_dax); } diff --git a/drivers/dax/super.c b/drivers/dax/super.c index c00b9dff4a06..ba0b4cd18a77 100644 --- a/drivers/dax/super.c +++ b/drivers/dax/super.c @@ -157,6 +157,9 @@ long dax_direct_access(struct dax_device *dax_dev, pgoff_t pgoff, long nr_pages, if (!dax_alive(dax_dev)) return -ENXIO; + if (!dax_dev->ops) + return -EOPNOTSUPP; + if (nr_pages < 0) return -EINVAL; @@ -207,6 +210,10 @@ int dax_zero_page_range(struct dax_device *dax_dev, pgoff_t pgoff, if (!dax_alive(dax_dev)) return -ENXIO; + + if (!dax_dev->ops) + return -EOPNOTSUPP; + /* * There are no callers that want to zero more than one page as of now. * Once users are there, this check can be removed after the @@ -223,7 +230,7 @@ EXPORT_SYMBOL_GPL(dax_zero_page_range); size_t dax_recovery_write(struct dax_device *dax_dev, pgoff_t pgoff, void *addr, size_t bytes, struct iov_iter *iter) { - if (!dax_dev->ops->recovery_write) + if (!dax_dev->ops || !dax_dev->ops->recovery_write) return 0; return dax_dev->ops->recovery_write(dax_dev, pgoff, addr, bytes, iter); } @@ -307,6 +314,35 @@ void set_dax_nomc(struct dax_device *dax_dev) } EXPORT_SYMBOL_GPL(set_dax_nomc); +/** + * dax_set_ops - set the dax_operations for a dax_device + * @dax_dev: the dax_device to configure + * @ops: the operations to set (may be NULL to clear) + * + * This allows drivers to set the dax_operations after the dax_device + * has been allocated. This is needed when the device is created before + * the driver that needs specific ops is bound (e.g., fsdev_dax binding + * to a dev_dax created by hmem). + * + * When setting non-NULL ops, fails if ops are already set (returns -EBUSY). + * When clearing ops (NULL), always succeeds. + * + * Return: 0 on success, -EBUSY if ops already set + */ +int dax_set_ops(struct dax_device *dax_dev, const struct dax_operations *ops) +{ + if (ops) { + /* Setting ops: fail if already set */ + if (cmpxchg(&dax_dev->ops, NULL, ops) != NULL) + return -EBUSY; + } else { + /* Clearing ops: always allowed */ + dax_dev->ops = NULL; + } + return 0; +} +EXPORT_SYMBOL_GPL(dax_set_ops); + bool dax_alive(struct dax_device *dax_dev) { lockdep_assert_held(&dax_srcu); diff --git a/include/linux/dax.h b/include/linux/dax.h index 73cfc1a7c8f1..b19bfe0c2fd1 100644 --- a/include/linux/dax.h +++ b/include/linux/dax.h @@ -243,6 +243,7 @@ static inline void dax_break_layout_final(struct inode *inode) bool dax_alive(struct dax_device *dax_dev); void *dax_get_private(struct dax_device *dax_dev); +int dax_set_ops(struct dax_device *dax_dev, const struct dax_operations *ops); long dax_direct_access(struct dax_device *dax_dev, pgoff_t pgoff, long nr_pages, enum dax_access_mode mode, void **kaddr, unsigned long *pfn); size_t dax_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr, -- cgit v1.2.3 From eec38f5d86d27535509c99f02ccc642ceb0c3e2a Mon Sep 17 00:00:00 2001 From: John Groves Date: Fri, 27 Mar 2026 21:05:12 +0000 Subject: dax: Add fs_dax_get() func to prepare dax for fs-dax usage The fs_dax_get() function should be called by fs-dax file systems after opening a fsdev dax device. This adds holder_operations, which provides a memory failure callback path and effects exclusivity between callers of fs_dax_get(). fs_dax_get() is specific to fsdev_dax, so it checks the driver type (which required touching bus.[ch]). fs_dax_get() fails if fsdev_dax is not bound to the memory. This function serves the same role as fs_dax_get_by_bdev(), which dax file systems call after opening the pmem block device. This can't be located in fsdev.c because struct dax_device is opaque there. This will be called by fs/fuse/famfs.c in a subsequent commit. Reviewed-by: Jonathan Cameron Reviewed-by: Dave Jiang Signed-off-by: John Groves Link: https://patch.msgid.link/0100019d311d8750-75395c22-031b-4d5f-aebe-790dca656b87-000000@email.amazonses.com Signed-off-by: Ira Weiny --- drivers/dax/bus.c | 2 -- drivers/dax/bus.h | 2 ++ drivers/dax/super.c | 66 ++++++++++++++++++++++++++++++++++++++++++++++++++++- include/linux/dax.h | 16 +++++++++---- 4 files changed, 79 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/drivers/dax/bus.c b/drivers/dax/bus.c index 1b412264bb36..32f7b7702c28 100644 --- a/drivers/dax/bus.c +++ b/drivers/dax/bus.c @@ -39,8 +39,6 @@ static int dax_bus_uevent(const struct device *dev, struct kobj_uevent_env *env) return add_uevent_var(env, "MODALIAS=" DAX_DEVICE_MODALIAS_FMT, 0); } -#define to_dax_drv(__drv) container_of_const(__drv, struct dax_device_driver, drv) - static struct dax_id *__dax_match_id(const struct dax_device_driver *dax_drv, const char *dev_name) { diff --git a/drivers/dax/bus.h b/drivers/dax/bus.h index 880bdf7e72d7..dc6f112ac4a4 100644 --- a/drivers/dax/bus.h +++ b/drivers/dax/bus.h @@ -42,6 +42,8 @@ struct dax_device_driver { void (*remove)(struct dev_dax *dev); }; +#define to_dax_drv(__drv) container_of_const(__drv, struct dax_device_driver, drv) + int __dax_driver_register(struct dax_device_driver *dax_drv, struct module *module, const char *mod_name); #define dax_driver_register(driver) \ diff --git a/drivers/dax/super.c b/drivers/dax/super.c index ba0b4cd18a77..d4ab60c406bf 100644 --- a/drivers/dax/super.c +++ b/drivers/dax/super.c @@ -14,6 +14,7 @@ #include #include #include "dax-private.h" +#include "bus.h" /** * struct dax_device - anchor object for dax services @@ -111,6 +112,10 @@ struct dax_device *fs_dax_get_by_bdev(struct block_device *bdev, u64 *start_off, } EXPORT_SYMBOL_GPL(fs_dax_get_by_bdev); +#endif /* CONFIG_BLOCK && CONFIG_FS_DAX */ + +#if IS_ENABLED(CONFIG_FS_DAX) + void fs_put_dax(struct dax_device *dax_dev, void *holder) { if (dax_dev && holder && @@ -119,7 +124,66 @@ void fs_put_dax(struct dax_device *dax_dev, void *holder) put_dax(dax_dev); } EXPORT_SYMBOL_GPL(fs_put_dax); -#endif /* CONFIG_BLOCK && CONFIG_FS_DAX */ + +/** + * fs_dax_get() - get ownership of a devdax via holder/holder_ops + * + * fs-dax file systems call this function to prepare to use a devdax device for + * fsdax. This is like fs_dax_get_by_bdev(), but the caller already has struct + * dev_dax (and there is no bdev). The holder makes this exclusive. + * + * @dax_dev: dev to be prepared for fs-dax usage + * @holder: filesystem or mapped device inside the dax_device + * @hops: operations for the inner holder + * + * Returns: 0 on success, <0 on failure + */ +int fs_dax_get(struct dax_device *dax_dev, void *holder, + const struct dax_holder_operations *hops) +{ + struct dev_dax *dev_dax; + struct dax_device_driver *dax_drv; + int id; + + id = dax_read_lock(); + if (!dax_dev || !dax_alive(dax_dev) || !igrab(&dax_dev->inode)) { + dax_read_unlock(id); + return -ENODEV; + } + dax_read_unlock(id); + + /* Verify the device is bound to fsdev_dax driver */ + dev_dax = dax_get_private(dax_dev); + if (!dev_dax) { + iput(&dax_dev->inode); + return -ENODEV; + } + + device_lock(&dev_dax->dev); + if (!dev_dax->dev.driver) { + device_unlock(&dev_dax->dev); + iput(&dax_dev->inode); + return -ENODEV; + } + dax_drv = to_dax_drv(dev_dax->dev.driver); + if (dax_drv->type != DAXDRV_FSDEV_TYPE) { + device_unlock(&dev_dax->dev); + iput(&dax_dev->inode); + return -EOPNOTSUPP; + } + device_unlock(&dev_dax->dev); + + if (cmpxchg(&dax_dev->holder_data, NULL, holder)) { + iput(&dax_dev->inode); + return -EBUSY; + } + + dax_dev->holder_ops = hops; + + return 0; +} +EXPORT_SYMBOL_GPL(fs_dax_get); +#endif /* CONFIG_FS_DAX */ enum dax_device_flags { /* !alive + rcu grace period == no new operations / mappings */ diff --git a/include/linux/dax.h b/include/linux/dax.h index b19bfe0c2fd1..a85e270bfb3c 100644 --- a/include/linux/dax.h +++ b/include/linux/dax.h @@ -130,7 +130,6 @@ int dax_add_host(struct dax_device *dax_dev, struct gendisk *disk); void dax_remove_host(struct gendisk *disk); struct dax_device *fs_dax_get_by_bdev(struct block_device *bdev, u64 *start_off, void *holder, const struct dax_holder_operations *ops); -void fs_put_dax(struct dax_device *dax_dev, void *holder); #else static inline int dax_add_host(struct dax_device *dax_dev, struct gendisk *disk) { @@ -145,12 +144,12 @@ static inline struct dax_device *fs_dax_get_by_bdev(struct block_device *bdev, { return NULL; } -static inline void fs_put_dax(struct dax_device *dax_dev, void *holder) -{ -} #endif /* CONFIG_BLOCK && CONFIG_FS_DAX */ #if IS_ENABLED(CONFIG_FS_DAX) +void fs_put_dax(struct dax_device *dax_dev, void *holder); +int fs_dax_get(struct dax_device *dax_dev, void *holder, + const struct dax_holder_operations *hops); int dax_writeback_mapping_range(struct address_space *mapping, struct dax_device *dax_dev, struct writeback_control *wbc); int dax_folio_reset_order(struct folio *folio); @@ -164,6 +163,15 @@ dax_entry_t dax_lock_mapping_entry(struct address_space *mapping, void dax_unlock_mapping_entry(struct address_space *mapping, unsigned long index, dax_entry_t cookie); #else +static inline void fs_put_dax(struct dax_device *dax_dev, void *holder) +{ +} + +static inline int fs_dax_get(struct dax_device *dax_dev, void *holder, + const struct dax_holder_operations *hops) +{ + return -EOPNOTSUPP; +} static inline struct page *dax_layout_busy_page(struct address_space *mapping) { return NULL; -- cgit v1.2.3 From 2ae624d5a555d47a735fb3f4d850402859a4db77 Mon Sep 17 00:00:00 2001 From: John Groves Date: Fri, 27 Mar 2026 21:05:21 +0000 Subject: dax: export dax_dev_get() famfs needs to look up a dax_device by dev_t when resolving fmap entries that reference character dax devices. Reviewed-by: Dave Jiang Reviewed-by: Jonathan Cameron Signed-off-by: John Groves Link: https://patch.msgid.link/0100019d311daab5-bb212f0b-4e05-4668-bf53-d76fab56be68-000000@email.amazonses.com Signed-off-by: Ira Weiny --- drivers/dax/super.c | 3 ++- include/linux/dax.h | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/drivers/dax/super.c b/drivers/dax/super.c index d4ab60c406bf..25cf99dd9360 100644 --- a/drivers/dax/super.c +++ b/drivers/dax/super.c @@ -521,7 +521,7 @@ static int dax_set(struct inode *inode, void *data) return 0; } -static struct dax_device *dax_dev_get(dev_t devt) +struct dax_device *dax_dev_get(dev_t devt) { struct dax_device *dax_dev; struct inode *inode; @@ -544,6 +544,7 @@ static struct dax_device *dax_dev_get(dev_t devt) return dax_dev; } +EXPORT_SYMBOL_GPL(dax_dev_get); struct dax_device *alloc_dax(void *private, const struct dax_operations *ops) { diff --git a/include/linux/dax.h b/include/linux/dax.h index a85e270bfb3c..9ef95b136bb8 100644 --- a/include/linux/dax.h +++ b/include/linux/dax.h @@ -54,6 +54,7 @@ struct dax_device *alloc_dax(void *private, const struct dax_operations *ops); void *dax_holder(struct dax_device *dax_dev); void put_dax(struct dax_device *dax_dev); void kill_dax(struct dax_device *dax_dev); +struct dax_device *dax_dev_get(dev_t devt); void dax_write_cache(struct dax_device *dax_dev, bool wc); bool dax_write_cache_enabled(struct dax_device *dax_dev); bool dax_synchronous(struct dax_device *dax_dev); -- cgit v1.2.3