From 02b42d58f3898134b900ff3030561099e38adb32 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 31 May 2023 14:55:13 +0200 Subject: PM: hibernate: factor out a helper to find the resume device Split the logic to find the resume device out software_resume and into a separate helper to start unwindig the convoluted goto logic. Signed-off-by: Christoph Hellwig Acked-by: Rafael J. Wysocki Link: https://lore.kernel.org/r/20230531125535.676098-3-hch@lst.de Signed-off-by: Jens Axboe --- kernel/power/hibernate.c | 72 +++++++++++++++++++++++++----------------------- 1 file changed, 37 insertions(+), 35 deletions(-) (limited to 'kernel/power/hibernate.c') diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c index 30d1274f03f6..072795063662 100644 --- a/kernel/power/hibernate.c +++ b/kernel/power/hibernate.c @@ -910,6 +910,41 @@ unlock: } EXPORT_SYMBOL_GPL(hibernate_quiet_exec); +static int find_resume_device(void) +{ + if (!strlen(resume_file)) + return -ENOENT; + + pm_pr_dbg("Checking hibernation image partition %s\n", resume_file); + + if (resume_delay) { + pr_info("Waiting %dsec before reading resume device ...\n", + resume_delay); + ssleep(resume_delay); + } + + /* Check if the device is there */ + swsusp_resume_device = name_to_dev_t(resume_file); + if (swsusp_resume_device) + return 0; + + /* + * Some device discovery might still be in progress; we need to wait for + * this to finish. + */ + wait_for_device_probe(); + if (resume_wait) { + while (!(swsusp_resume_device = name_to_dev_t(resume_file))) + msleep(10); + async_synchronize_full(); + } + + swsusp_resume_device = name_to_dev_t(resume_file); + if (!swsusp_resume_device) + return -ENODEV; + return 0; +} + /** * software_resume - Resume from a saved hibernation image. * @@ -949,45 +984,12 @@ static int software_resume(void) snapshot_test = false; - if (swsusp_resume_device) - goto Check_image; - - if (!strlen(resume_file)) { - error = -ENOENT; - goto Unlock; - } - - pm_pr_dbg("Checking hibernation image partition %s\n", resume_file); - - if (resume_delay) { - pr_info("Waiting %dsec before reading resume device ...\n", - resume_delay); - ssleep(resume_delay); - } - - /* Check if the device is there */ - swsusp_resume_device = name_to_dev_t(resume_file); if (!swsusp_resume_device) { - /* - * Some device discovery might still be in progress; we need - * to wait for this to finish. - */ - wait_for_device_probe(); - - if (resume_wait) { - while ((swsusp_resume_device = name_to_dev_t(resume_file)) == 0) - msleep(10); - async_synchronize_full(); - } - - swsusp_resume_device = name_to_dev_t(resume_file); - if (!swsusp_resume_device) { - error = -ENODEV; + error = find_resume_device(); + if (error) goto Unlock; - } } - Check_image: pm_pr_dbg("Hibernation image partition %d:%d present\n", MAJOR(swsusp_resume_device), MINOR(swsusp_resume_device)); -- cgit v1.2.3 From d6545e687271ab27472eebff770f2de6a5f1a464 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 31 May 2023 14:55:14 +0200 Subject: PM: hibernate: remove the global snapshot_test variable Passing call dependent variable in global variables is a huge antipattern. Fix it up. Signed-off-by: Christoph Hellwig Acked-by: Rafael J. Wysocki Link: https://lore.kernel.org/r/20230531125535.676098-4-hch@lst.de Signed-off-by: Jens Axboe --- kernel/power/hibernate.c | 17 ++++++----------- kernel/power/power.h | 3 +-- kernel/power/swap.c | 2 +- 3 files changed, 8 insertions(+), 14 deletions(-) (limited to 'kernel/power/hibernate.c') diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c index 072795063662..78696aa04f5c 100644 --- a/kernel/power/hibernate.c +++ b/kernel/power/hibernate.c @@ -64,7 +64,6 @@ enum { static int hibernation_mode = HIBERNATION_SHUTDOWN; bool freezer_test_done; -bool snapshot_test; static const struct platform_hibernation_ops *hibernation_ops; @@ -684,7 +683,7 @@ static void power_down(void) cpu_relax(); } -static int load_image_and_restore(void) +static int load_image_and_restore(bool snapshot_test) { int error; unsigned int flags; @@ -721,6 +720,7 @@ static int load_image_and_restore(void) */ int hibernate(void) { + bool snapshot_test = false; unsigned int sleep_flags; int error; @@ -748,9 +748,6 @@ int hibernate(void) if (error) goto Exit; - /* protected by system_transition_mutex */ - snapshot_test = false; - lock_device_hotplug(); /* Allocate memory management structures */ error = create_basic_memory_bitmaps(); @@ -792,9 +789,9 @@ int hibernate(void) unlock_device_hotplug(); if (snapshot_test) { pm_pr_dbg("Checking hibernation image\n"); - error = swsusp_check(); + error = swsusp_check(snapshot_test); if (!error) - error = load_image_and_restore(); + error = load_image_and_restore(snapshot_test); } thaw_processes(); @@ -982,8 +979,6 @@ static int software_resume(void) */ mutex_lock_nested(&system_transition_mutex, SINGLE_DEPTH_NESTING); - snapshot_test = false; - if (!swsusp_resume_device) { error = find_resume_device(); if (error) @@ -994,7 +989,7 @@ static int software_resume(void) MAJOR(swsusp_resume_device), MINOR(swsusp_resume_device)); pm_pr_dbg("Looking for hibernation image.\n"); - error = swsusp_check(); + error = swsusp_check(false); if (error) goto Unlock; @@ -1022,7 +1017,7 @@ static int software_resume(void) goto Close_Finish; } - error = load_image_and_restore(); + error = load_image_and_restore(false); thaw_processes(); Finish: pm_notifier_call_chain(PM_POST_RESTORE); diff --git a/kernel/power/power.h b/kernel/power/power.h index b83c8d5e188d..978189fcafd1 100644 --- a/kernel/power/power.h +++ b/kernel/power/power.h @@ -59,7 +59,6 @@ asmlinkage int swsusp_save(void); /* kernel/power/hibernate.c */ extern bool freezer_test_done; -extern bool snapshot_test; extern int hibernation_snapshot(int platform_mode); extern int hibernation_restore(int platform_mode); @@ -174,7 +173,7 @@ extern int swsusp_swap_in_use(void); #define SF_HW_SIG 8 /* kernel/power/hibernate.c */ -extern int swsusp_check(void); +int swsusp_check(bool snapshot_test); extern void swsusp_free(void); extern int swsusp_read(unsigned int *flags_p); extern int swsusp_write(unsigned int flags); diff --git a/kernel/power/swap.c b/kernel/power/swap.c index 801c411530d1..81aec3b2c605 100644 --- a/kernel/power/swap.c +++ b/kernel/power/swap.c @@ -1514,7 +1514,7 @@ end: * swsusp_check - Check for swsusp signature in the resume device */ -int swsusp_check(void) +int swsusp_check(bool snapshot_test) { int error; void *holder; -- cgit v1.2.3 From cc89c63e2fe37d476357c82390dfb12edcd41cdd Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 31 May 2023 14:55:15 +0200 Subject: PM: hibernate: move finding the resume device out of software_resume software_resume can be called either from an init call in the boot code, or from sysfs once the system has finished booting, and the two invocation methods this can't race with each other. For the latter case we did just parse the suspend device manually, while the former might not have one. Split software_resume so that the search only happens for the boot case, which also means the special lockdep nesting annotation can go away as the system transition mutex can be taken a little later and doesn't have the sysfs locking nest inside it. Signed-off-by: Christoph Hellwig Acked-by: Rafael J. Wysocki Link: https://lore.kernel.org/r/20230531125535.676098-5-hch@lst.de Signed-off-by: Jens Axboe --- kernel/power/hibernate.c | 80 +++++++++++++++++++++++------------------------- 1 file changed, 39 insertions(+), 41 deletions(-) (limited to 'kernel/power/hibernate.c') diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c index 78696aa04f5c..45e24b02cd50 100644 --- a/kernel/power/hibernate.c +++ b/kernel/power/hibernate.c @@ -907,7 +907,7 @@ unlock: } EXPORT_SYMBOL_GPL(hibernate_quiet_exec); -static int find_resume_device(void) +static int __init find_resume_device(void) { if (!strlen(resume_file)) return -ENOENT; @@ -942,53 +942,16 @@ static int find_resume_device(void) return 0; } -/** - * software_resume - Resume from a saved hibernation image. - * - * This routine is called as a late initcall, when all devices have been - * discovered and initialized already. - * - * The image reading code is called to see if there is a hibernation image - * available for reading. If that is the case, devices are quiesced and the - * contents of memory is restored from the saved image. - * - * If this is successful, control reappears in the restored target kernel in - * hibernation_snapshot() which returns to hibernate(). Otherwise, the routine - * attempts to recover gracefully and make the kernel return to the normal mode - * of operation. - */ static int software_resume(void) { int error; - /* - * If the user said "noresume".. bail out early. - */ - if (noresume || !hibernation_available()) - return 0; - - /* - * name_to_dev_t() below takes a sysfs buffer mutex when sysfs - * is configured into the kernel. Since the regular hibernate - * trigger path is via sysfs which takes a buffer mutex before - * calling hibernate functions (which take system_transition_mutex) - * this can cause lockdep to complain about a possible ABBA deadlock - * which cannot happen since we're in the boot code here and - * sysfs can't be invoked yet. Therefore, we use a subclass - * here to avoid lockdep complaining. - */ - mutex_lock_nested(&system_transition_mutex, SINGLE_DEPTH_NESTING); - - if (!swsusp_resume_device) { - error = find_resume_device(); - if (error) - goto Unlock; - } - pm_pr_dbg("Hibernation image partition %d:%d present\n", MAJOR(swsusp_resume_device), MINOR(swsusp_resume_device)); pm_pr_dbg("Looking for hibernation image.\n"); + + mutex_lock(&system_transition_mutex); error = swsusp_check(false); if (error) goto Unlock; @@ -1035,7 +998,39 @@ static int software_resume(void) goto Finish; } -late_initcall_sync(software_resume); +/** + * software_resume_initcall - Resume from a saved hibernation image. + * + * This routine is called as a late initcall, when all devices have been + * discovered and initialized already. + * + * The image reading code is called to see if there is a hibernation image + * available for reading. If that is the case, devices are quiesced and the + * contents of memory is restored from the saved image. + * + * If this is successful, control reappears in the restored target kernel in + * hibernation_snapshot() which returns to hibernate(). Otherwise, the routine + * attempts to recover gracefully and make the kernel return to the normal mode + * of operation. + */ +static int __init software_resume_initcall(void) +{ + /* + * If the user said "noresume".. bail out early. + */ + if (noresume || !hibernation_available()) + return 0; + + if (!swsusp_resume_device) { + int error = find_resume_device(); + + if (error) + return error; + } + + return software_resume(); +} +late_initcall_sync(software_resume_initcall); static const char * const hibernation_modes[] = { @@ -1176,6 +1171,9 @@ static ssize_t resume_store(struct kobject *kobj, struct kobj_attribute *attr, char *name; dev_t res; + if (!hibernation_available()) + return 0; + if (len && buf[len-1] == '\n') len--; name = kstrndup(buf, len, GFP_KERNEL); -- cgit v1.2.3 From cf056a43121559d3642419917d405c3237ded90a Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 31 May 2023 14:55:24 +0200 Subject: init: improve the name_to_dev_t interface name_to_dev_t has a very misleading name, that doesn't make clear it should only be used by the early init code, and also has a bad calling convention that doesn't allow returning different kinds of errors. Rename it to early_lookup_bdev to make the use case clear, and return an errno, where -EINVAL means the string could not be parsed, and -ENODEV means it the string was valid, but there was no device found for it. Also stub out the whole call for !CONFIG_BLOCK as all the non-block root cases are always covered in the caller. Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/r/20230531125535.676098-14-hch@lst.de Signed-off-by: Jens Axboe --- Documentation/admin-guide/kernel-parameters.txt | 4 +- drivers/md/dm-table.c | 5 +- drivers/md/md-autodetect.c | 3 +- drivers/mtd/devices/block2mtd.c | 3 +- fs/pstore/blk.c | 4 +- include/linux/blkdev.h | 5 ++ include/linux/mount.h | 1 - init/do_mounts.c | 102 ++++++++++++------------ kernel/power/hibernate.c | 22 +++-- 9 files changed, 74 insertions(+), 75 deletions(-) (limited to 'kernel/power/hibernate.c') diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 457c342d1597..a6bc31349cbb 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -5453,8 +5453,8 @@ root= [KNL] Root filesystem Usually this a a block device specifier of some kind, - see the name_to_dev_t comment in init/do_mounts.c for - details. + see the early_lookup_bdev comment in init/do_mounts.c + for details. Alternatively this can be "ram" for the legacy initial ramdisk, "nfs" and "cifs" for root on a network file system, or "mtd" and "ubi" for mounting from raw flash. diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index 1398f1d6e83e..05aa16da43b0 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -330,8 +330,9 @@ dev_t dm_get_dev_t(const char *path) { dev_t dev; - if (lookup_bdev(path, &dev)) - dev = name_to_dev_t(path); + if (lookup_bdev(path, &dev) && + early_lookup_bdev(path, &dev)) + return 0; return dev; } EXPORT_SYMBOL_GPL(dm_get_dev_t); diff --git a/drivers/md/md-autodetect.c b/drivers/md/md-autodetect.c index 91836e6de326..6eaa0eab40f9 100644 --- a/drivers/md/md-autodetect.c +++ b/drivers/md/md-autodetect.c @@ -147,7 +147,8 @@ static void __init md_setup_drive(struct md_setup_args *args) if (p) *p++ = 0; - dev = name_to_dev_t(devname); + if (early_lookup_bdev(devname, &dev)) + dev = 0; if (strncmp(devname, "/dev/", 5) == 0) devname += 5; snprintf(comp_name, 63, "/dev/%s", devname); diff --git a/drivers/mtd/devices/block2mtd.c b/drivers/mtd/devices/block2mtd.c index 7ac82c6fe350..a127cdde03b7 100644 --- a/drivers/mtd/devices/block2mtd.c +++ b/drivers/mtd/devices/block2mtd.c @@ -254,8 +254,7 @@ static struct block2mtd_dev *add_device(char *devname, int erase_size, msleep(1000); wait_for_device_probe(); - devt = name_to_dev_t(devname); - if (!devt) + if (early_lookup_bdev(devname, &devt)) continue; bdev = blkdev_get_by_dev(devt, mode, dev, NULL); } diff --git a/fs/pstore/blk.c b/fs/pstore/blk.c index 4ae0cfcd15f2..de8cf5d75f34 100644 --- a/fs/pstore/blk.c +++ b/fs/pstore/blk.c @@ -263,9 +263,9 @@ static __init const char *early_boot_devpath(const char *initial_devname) * same scheme to find the device that we use for mounting * the root file system. */ - dev_t dev = name_to_dev_t(initial_devname); + dev_t dev; - if (!dev) { + if (early_lookup_bdev(initial_devname, &dev)) { pr_err("failed to resolve '%s'!\n", initial_devname); return initial_devname; } diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 9e9a9e4edee9..d682e233fd66 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1501,6 +1501,7 @@ int sync_blockdev_nowait(struct block_device *bdev); void sync_bdevs(bool wait); void bdev_statx_dioalign(struct inode *inode, struct kstat *stat); void printk_all_partitions(void); +int early_lookup_bdev(const char *pathname, dev_t *dev); #else static inline void invalidate_bdev(struct block_device *bdev) { @@ -1522,6 +1523,10 @@ static inline void bdev_statx_dioalign(struct inode *inode, struct kstat *stat) static inline void printk_all_partitions(void) { } +static inline int early_lookup_bdev(const char *pathname, dev_t *dev) +{ + return -EINVAL; +} #endif /* CONFIG_BLOCK */ int fsync_bdev(struct block_device *bdev); diff --git a/include/linux/mount.h b/include/linux/mount.h index 1ea326c368f7..4b81ea90440e 100644 --- a/include/linux/mount.h +++ b/include/linux/mount.h @@ -107,7 +107,6 @@ extern struct vfsmount *vfs_submount(const struct dentry *mountpoint, extern void mnt_set_expiry(struct vfsmount *mnt, struct list_head *expiry_list); extern void mark_mounts_for_expiry(struct list_head *mounts); -extern dev_t name_to_dev_t(const char *name); extern bool path_is_mountpoint(const struct path *path); extern bool our_mnt(struct vfsmount *mnt); diff --git a/init/do_mounts.c b/init/do_mounts.c index 86599faf2bf8..f1953aeb3219 100644 --- a/init/do_mounts.c +++ b/init/do_mounts.c @@ -96,11 +96,10 @@ static int match_dev_by_uuid(struct device *dev, const void *data) * * Returns the matching dev_t on success or 0 on failure. */ -static dev_t devt_from_partuuid(const char *uuid_str) +static int devt_from_partuuid(const char *uuid_str, dev_t *devt) { struct uuidcmp cmp; struct device *dev = NULL; - dev_t devt = 0; int offset = 0; char *slash; @@ -124,21 +123,21 @@ static dev_t devt_from_partuuid(const char *uuid_str) dev = class_find_device(&block_class, NULL, &cmp, &match_dev_by_uuid); if (!dev) - return 0; + return -ENODEV; if (offset) { /* * Attempt to find the requested partition by adding an offset * to the partition number found by UUID. */ - devt = part_devt(dev_to_disk(dev), - dev_to_bdev(dev)->bd_partno + offset); + *devt = part_devt(dev_to_disk(dev), + dev_to_bdev(dev)->bd_partno + offset); } else { - devt = dev->devt; + *devt = dev->devt; } put_device(dev); - return devt; + return 0; clear_root_wait: pr_err("VFS: PARTUUID= is invalid.\n" @@ -146,7 +145,7 @@ clear_root_wait: if (root_wait) pr_err("Disabling rootwait; root= is invalid.\n"); root_wait = 0; - return 0; + return -EINVAL; } /** @@ -166,38 +165,35 @@ static int match_dev_by_label(struct device *dev, const void *data) return 1; } -static dev_t devt_from_partlabel(const char *label) +static int devt_from_partlabel(const char *label, dev_t *devt) { struct device *dev; - dev_t devt = 0; dev = class_find_device(&block_class, NULL, label, &match_dev_by_label); - if (dev) { - devt = dev->devt; - put_device(dev); - } - - return devt; + if (!dev) + return -ENODEV; + *devt = dev->devt; + put_device(dev); + return 0; } -static dev_t devt_from_devname(const char *name) +static int devt_from_devname(const char *name, dev_t *devt) { - dev_t devt = 0; int part; char s[32]; char *p; if (strlen(name) > 31) - return 0; + return -EINVAL; strcpy(s, name); for (p = s; *p; p++) { if (*p == '/') *p = '!'; } - devt = blk_lookup_devt(s, 0); - if (devt) - return devt; + *devt = blk_lookup_devt(s, 0); + if (*devt) + return 0; /* * Try non-existent, but valid partition, which may only exist after @@ -206,41 +202,42 @@ static dev_t devt_from_devname(const char *name) while (p > s && isdigit(p[-1])) p--; if (p == s || !*p || *p == '0') - return 0; + return -EINVAL; /* try disk name without */ part = simple_strtoul(p, NULL, 10); *p = '\0'; - devt = blk_lookup_devt(s, part); - if (devt) - return devt; + *devt = blk_lookup_devt(s, part); + if (*devt) + return 0; /* try disk name without p */ if (p < s + 2 || !isdigit(p[-2]) || p[-1] != 'p') - return 0; + return -EINVAL; p[-1] = '\0'; - return blk_lookup_devt(s, part); + *devt = blk_lookup_devt(s, part); + if (*devt) + return 0; + return -EINVAL; } -#endif /* CONFIG_BLOCK */ -static dev_t devt_from_devnum(const char *name) +static int devt_from_devnum(const char *name, dev_t *devt) { unsigned maj, min, offset; - dev_t devt = 0; char *p, dummy; if (sscanf(name, "%u:%u%c", &maj, &min, &dummy) == 2 || sscanf(name, "%u:%u:%u:%c", &maj, &min, &offset, &dummy) == 3) { - devt = MKDEV(maj, min); - if (maj != MAJOR(devt) || min != MINOR(devt)) - return 0; + *devt = MKDEV(maj, min); + if (maj != MAJOR(*devt) || min != MINOR(*devt)) + return -EINVAL; } else { - devt = new_decode_dev(simple_strtoul(name, &p, 16)); + *devt = new_decode_dev(simple_strtoul(name, &p, 16)); if (*p) - return 0; + return -EINVAL; } - return devt; + return 0; } /* @@ -271,19 +268,18 @@ static dev_t devt_from_devnum(const char *name) * name contains slashes, the device name has them replaced with * bangs. */ -dev_t name_to_dev_t(const char *name) +int early_lookup_bdev(const char *name, dev_t *devt) { -#ifdef CONFIG_BLOCK if (strncmp(name, "PARTUUID=", 9) == 0) - return devt_from_partuuid(name + 9); + return devt_from_partuuid(name + 9, devt); if (strncmp(name, "PARTLABEL=", 10) == 0) - return devt_from_partlabel(name + 10); + return devt_from_partlabel(name + 10, devt); if (strncmp(name, "/dev/", 5) == 0) - return devt_from_devname(name + 5); -#endif - return devt_from_devnum(name); + return devt_from_devname(name + 5, devt); + return devt_from_devnum(name, devt); } -EXPORT_SYMBOL_GPL(name_to_dev_t); +EXPORT_SYMBOL_GPL(early_lookup_bdev); +#endif static int __init root_dev_setup(char *line) { @@ -606,20 +602,17 @@ static void __init wait_for_root(char *root_device_name) pr_info("Waiting for root device %s...\n", root_device_name); - for (;;) { - if (driver_probe_done()) { - ROOT_DEV = name_to_dev_t(root_device_name); - if (ROOT_DEV) - break; - } + while (!driver_probe_done() || + early_lookup_bdev(root_device_name, &ROOT_DEV) < 0) msleep(5); - } async_synchronize_full(); } static dev_t __init parse_root_device(char *root_device_name) { + dev_t dev; + if (!strncmp(root_device_name, "mtd", 3) || !strncmp(root_device_name, "ubi", 3)) return Root_Generic; @@ -629,7 +622,10 @@ static dev_t __init parse_root_device(char *root_device_name) return Root_CIFS; if (strcmp(root_device_name, "/dev/ram") == 0) return Root_RAM0; - return name_to_dev_t(root_device_name); + + if (early_lookup_bdev(root_device_name, &dev)) + return 0; + return dev; } /* diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c index 45e24b02cd50..c52dedb9f7c8 100644 --- a/kernel/power/hibernate.c +++ b/kernel/power/hibernate.c @@ -11,6 +11,7 @@ #define pr_fmt(fmt) "PM: hibernation: " fmt +#include #include #include #include @@ -921,8 +922,7 @@ static int __init find_resume_device(void) } /* Check if the device is there */ - swsusp_resume_device = name_to_dev_t(resume_file); - if (swsusp_resume_device) + if (!early_lookup_bdev(resume_file, &swsusp_resume_device)) return 0; /* @@ -931,15 +931,12 @@ static int __init find_resume_device(void) */ wait_for_device_probe(); if (resume_wait) { - while (!(swsusp_resume_device = name_to_dev_t(resume_file))) + while (early_lookup_bdev(resume_file, &swsusp_resume_device)) msleep(10); async_synchronize_full(); } - swsusp_resume_device = name_to_dev_t(resume_file); - if (!swsusp_resume_device) - return -ENODEV; - return 0; + return early_lookup_bdev(resume_file, &swsusp_resume_device); } static int software_resume(void) @@ -1169,7 +1166,8 @@ static ssize_t resume_store(struct kobject *kobj, struct kobj_attribute *attr, unsigned int sleep_flags; int len = n; char *name; - dev_t res; + dev_t dev; + int error; if (!hibernation_available()) return 0; @@ -1180,13 +1178,13 @@ static ssize_t resume_store(struct kobject *kobj, struct kobj_attribute *attr, if (!name) return -ENOMEM; - res = name_to_dev_t(name); + error = early_lookup_bdev(name, &dev); kfree(name); - if (!res) - return -EINVAL; + if (error) + return error; sleep_flags = lock_system_sleep(); - swsusp_resume_device = res; + swsusp_resume_device = dev; unlock_system_sleep(sleep_flags); pm_pr_dbg("Configured hibernation resume from disk to %u\n", -- cgit v1.2.3 From 1e8c813b083c4122dfeaa5c3b11028331026e85d Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 31 May 2023 14:55:32 +0200 Subject: PM: hibernate: don't use early_lookup_bdev in resume_store resume_store is a sysfs attribute written during normal kernel runtime, and it should not use the early_lookup_bdev API that bypasses all normal path based permission checking, and might cause problems with certain container environments renaming devices. Switch to lookup_bdev, which does a normal path lookup instead, and fall back to trying to parse a numeric dev_t just like early_lookup_bdev did. Note that this strictly speaking changes the kernel ABI as the PARTUUID= and PARTLABEL= style syntax is now not available during a running systems. They never were intended for that, but this breaks things we'll have to figure out a way to make them available again. But if avoidable in any way I'd rather avoid that. Fixes: 421a5fa1a6cf ("PM / hibernate: use name_to_dev_t to parse resume") Signed-off-by: Christoph Hellwig Acked-by: Rafael J. Wysocki Link: https://lore.kernel.org/r/20230531125535.676098-22-hch@lst.de Signed-off-by: Jens Axboe --- kernel/power/hibernate.c | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) (limited to 'kernel/power/hibernate.c') diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c index c52dedb9f7c8..7ae95ec72f99 100644 --- a/kernel/power/hibernate.c +++ b/kernel/power/hibernate.c @@ -1178,7 +1178,23 @@ static ssize_t resume_store(struct kobject *kobj, struct kobj_attribute *attr, if (!name) return -ENOMEM; - error = early_lookup_bdev(name, &dev); + error = lookup_bdev(name, &dev); + if (error) { + unsigned maj, min, offset; + char *p, dummy; + + if (sscanf(name, "%u:%u%c", &maj, &min, &dummy) == 2 || + sscanf(name, "%u:%u:%u:%c", &maj, &min, &offset, + &dummy) == 3) { + dev = MKDEV(maj, min); + if (maj != MAJOR(dev) || min != MINOR(dev)) + error = -EINVAL; + } else { + dev = new_decode_dev(simple_strtoul(name, &p, 16)); + if (*p) + error = -EINVAL; + } + } kfree(name); if (error) return error; -- cgit v1.2.3 From 2736e8eeb0ccdc71d1f4256c9c9a28f58cc43307 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 8 Jun 2023 13:02:43 +0200 Subject: block: use the holder as indication for exclusive opens The current interface for exclusive opens is rather confusing as it requires both the FMODE_EXCL flag and a holder. Remove the need to pass FMODE_EXCL and just key off the exclusive open off a non-NULL holder. For blkdev_put this requires adding the holder argument, which provides better debug checking that only the holder actually releases the hold, but at the same time allows removing the now superfluous mode argument. Signed-off-by: Christoph Hellwig Reviewed-by: Hannes Reinecke Acked-by: Christian Brauner Acked-by: David Sterba [btrfs] Acked-by: Jack Wang [rnbd] Link: https://lore.kernel.org/r/20230608110258.189493-16-hch@lst.de Signed-off-by: Jens Axboe --- block/bdev.c | 37 ++++++++++++++++++++---------------- block/fops.c | 6 ++++-- block/genhd.c | 5 ++--- block/ioctl.c | 5 ++--- drivers/block/drbd/drbd_nl.c | 23 +++++++++++++--------- drivers/block/pktcdvd.c | 13 ++++++------- drivers/block/rnbd/rnbd-srv.c | 4 ++-- drivers/block/xen-blkback/xenbus.c | 2 +- drivers/block/zram/zram_drv.c | 8 ++++---- drivers/md/bcache/super.c | 15 +++++++-------- drivers/md/dm.c | 6 +++--- drivers/md/md.c | 38 +++++++++++++++++++------------------ drivers/mtd/devices/block2mtd.c | 4 ++-- drivers/nvme/target/io-cmd-bdev.c | 2 +- drivers/s390/block/dasd_genhd.c | 2 +- drivers/target/target_core_iblock.c | 6 +++--- drivers/target/target_core_pscsi.c | 8 +++----- fs/btrfs/dev-replace.c | 6 +++--- fs/btrfs/ioctl.c | 12 ++++++------ fs/btrfs/volumes.c | 28 +++++++++++++-------------- fs/btrfs/volumes.h | 6 +++--- fs/erofs/super.c | 7 ++++--- fs/ext4/super.c | 11 +++-------- fs/f2fs/super.c | 2 +- fs/jfs/jfs_logmgr.c | 6 +++--- fs/nfs/blocklayout/dev.c | 4 ++-- fs/nilfs2/super.c | 6 +++--- fs/ocfs2/cluster/heartbeat.c | 4 ++-- fs/reiserfs/journal.c | 19 +++++++++---------- fs/reiserfs/reiserfs.h | 1 - fs/super.c | 20 +++++++++---------- fs/xfs/xfs_super.c | 15 ++++++++------- include/linux/blkdev.h | 2 +- kernel/power/hibernate.c | 12 ++++-------- kernel/power/power.h | 2 +- kernel/power/swap.c | 21 +++++++++----------- mm/swapfile.c | 7 +++---- 37 files changed, 183 insertions(+), 192 deletions(-) (limited to 'kernel/power/hibernate.c') diff --git a/block/bdev.c b/block/bdev.c index 2c6888ceb378..db63e5bcc46f 100644 --- a/block/bdev.c +++ b/block/bdev.c @@ -604,7 +604,7 @@ void bd_abort_claiming(struct block_device *bdev, void *holder) } EXPORT_SYMBOL(bd_abort_claiming); -static void bd_end_claim(struct block_device *bdev) +static void bd_end_claim(struct block_device *bdev, void *holder) { struct block_device *whole = bdev_whole(bdev); bool unblock = false; @@ -614,6 +614,7 @@ static void bd_end_claim(struct block_device *bdev) * bdev_lock. open_mutex is used to synchronize disk_holder unlinking. */ mutex_lock(&bdev_lock); + WARN_ON_ONCE(bdev->bd_holder != holder); WARN_ON_ONCE(--bdev->bd_holders < 0); WARN_ON_ONCE(--whole->bd_holders < 0); if (!bdev->bd_holders) { @@ -750,10 +751,9 @@ void blkdev_put_no_open(struct block_device *bdev) * @holder: exclusive holder identifier * @hops: holder operations * - * Open the block device described by device number @dev. If @mode includes - * %FMODE_EXCL, the block device is opened with exclusive access. Specifying - * %FMODE_EXCL with a %NULL @holder is invalid. Exclusive opens may nest for - * the same @holder. + * Open the block device described by device number @dev. If @holder is not + * %NULL, the block device is opened with exclusive access. Exclusive opens may + * nest for the same @holder. * * Use this interface ONLY if you really do not have anything better - i.e. when * you are behind a truly sucky interface and all you are given is a device @@ -785,10 +785,16 @@ struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode, void *holder, return ERR_PTR(-ENXIO); disk = bdev->bd_disk; - if (mode & FMODE_EXCL) { + if (holder) { + mode |= FMODE_EXCL; ret = bd_prepare_to_claim(bdev, holder, hops); if (ret) goto put_blkdev; + } else { + if (WARN_ON_ONCE(mode & FMODE_EXCL)) { + ret = -EIO; + goto put_blkdev; + } } disk_block_events(disk); @@ -805,7 +811,7 @@ struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode, void *holder, ret = blkdev_get_whole(bdev, mode); if (ret) goto put_module; - if (mode & FMODE_EXCL) { + if (holder) { bd_finish_claiming(bdev, holder, hops); /* @@ -829,7 +835,7 @@ struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode, void *holder, put_module: module_put(disk->fops->owner); abort_claiming: - if (mode & FMODE_EXCL) + if (holder) bd_abort_claiming(bdev, holder); mutex_unlock(&disk->open_mutex); disk_unblock_events(disk); @@ -845,10 +851,9 @@ EXPORT_SYMBOL(blkdev_get_by_dev); * @mode: FMODE_* mask * @holder: exclusive holder identifier * - * Open the block device described by the device file at @path. If @mode - * includes %FMODE_EXCL, the block device is opened with exclusive access. - * Specifying %FMODE_EXCL with a %NULL @holder is invalid. Exclusive opens may - * nest for the same @holder. + * Open the block device described by the device file at @path. If @holder is + * not %NULL, the block device is opened with exclusive access. Exclusive opens + * may nest for the same @holder. * * CONTEXT: * Might sleep. @@ -869,7 +874,7 @@ struct block_device *blkdev_get_by_path(const char *path, fmode_t mode, bdev = blkdev_get_by_dev(dev, mode, holder, hops); if (!IS_ERR(bdev) && (mode & FMODE_WRITE) && bdev_read_only(bdev)) { - blkdev_put(bdev, mode); + blkdev_put(bdev, holder); return ERR_PTR(-EACCES); } @@ -877,7 +882,7 @@ struct block_device *blkdev_get_by_path(const char *path, fmode_t mode, } EXPORT_SYMBOL(blkdev_get_by_path); -void blkdev_put(struct block_device *bdev, fmode_t mode) +void blkdev_put(struct block_device *bdev, void *holder) { struct gendisk *disk = bdev->bd_disk; @@ -892,8 +897,8 @@ void blkdev_put(struct block_device *bdev, fmode_t mode) sync_blockdev(bdev); mutex_lock(&disk->open_mutex); - if (mode & FMODE_EXCL) - bd_end_claim(bdev); + if (holder) + bd_end_claim(bdev, holder); /* * Trigger event checking and tell drivers to flush MEDIA_CHANGE diff --git a/block/fops.c b/block/fops.c index 26af2b39c758..9f26e25bafa1 100644 --- a/block/fops.c +++ b/block/fops.c @@ -490,7 +490,9 @@ static int blkdev_open(struct inode *inode, struct file *filp) if ((filp->f_flags & O_ACCMODE) == 3) filp->f_mode |= FMODE_WRITE_IOCTL; - bdev = blkdev_get_by_dev(inode->i_rdev, filp->f_mode, filp, NULL); + bdev = blkdev_get_by_dev(inode->i_rdev, filp->f_mode, + (filp->f_mode & FMODE_EXCL) ? filp : NULL, + NULL); if (IS_ERR(bdev)) return PTR_ERR(bdev); @@ -504,7 +506,7 @@ static int blkdev_release(struct inode *inode, struct file *filp) { struct block_device *bdev = filp->private_data; - blkdev_put(bdev, filp->f_mode); + blkdev_put(bdev, (filp->f_mode & FMODE_EXCL) ? filp : NULL); return 0; } diff --git a/block/genhd.c b/block/genhd.c index 4e5fd6aaa883..b56f8b5c88b3 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -365,12 +365,11 @@ int disk_scan_partitions(struct gendisk *disk, fmode_t mode) } set_bit(GD_NEED_PART_SCAN, &disk->state); - bdev = blkdev_get_by_dev(disk_devt(disk), mode & ~FMODE_EXCL, NULL, - NULL); + bdev = blkdev_get_by_dev(disk_devt(disk), mode, NULL, NULL); if (IS_ERR(bdev)) ret = PTR_ERR(bdev); else - blkdev_put(bdev, mode & ~FMODE_EXCL); + blkdev_put(bdev, NULL); /* * If blkdev_get_by_dev() failed early, GD_NEED_PART_SCAN is still set, diff --git a/block/ioctl.c b/block/ioctl.c index c7d7d4345edb..b39bd5b41ee4 100644 --- a/block/ioctl.c +++ b/block/ioctl.c @@ -454,11 +454,10 @@ static int blkdev_bszset(struct block_device *bdev, fmode_t mode, if (mode & FMODE_EXCL) return set_blocksize(bdev, n); - if (IS_ERR(blkdev_get_by_dev(bdev->bd_dev, mode | FMODE_EXCL, &bdev, - NULL))) + if (IS_ERR(blkdev_get_by_dev(bdev->bd_dev, mode, &bdev, NULL))) return -EBUSY; ret = set_blocksize(bdev, n); - blkdev_put(bdev, mode | FMODE_EXCL); + blkdev_put(bdev, &bdev); return ret; } diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index cab59dab3410..10b1e5171332 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -1640,8 +1640,7 @@ static struct block_device *open_backing_dev(struct drbd_device *device, struct block_device *bdev; int err = 0; - bdev = blkdev_get_by_path(bdev_path, - FMODE_READ | FMODE_WRITE | FMODE_EXCL, + bdev = blkdev_get_by_path(bdev_path, FMODE_READ | FMODE_WRITE, claim_ptr, NULL); if (IS_ERR(bdev)) { drbd_err(device, "open(\"%s\") failed with %ld\n", @@ -1654,7 +1653,7 @@ static struct block_device *open_backing_dev(struct drbd_device *device, err = bd_link_disk_holder(bdev, device->vdisk); if (err) { - blkdev_put(bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL); + blkdev_put(bdev, claim_ptr); drbd_err(device, "bd_link_disk_holder(\"%s\", ...) failed with %d\n", bdev_path, err); bdev = ERR_PTR(err); @@ -1696,13 +1695,13 @@ static int open_backing_devices(struct drbd_device *device, } static void close_backing_dev(struct drbd_device *device, struct block_device *bdev, - bool do_bd_unlink) + void *claim_ptr, bool do_bd_unlink) { if (!bdev) return; if (do_bd_unlink) bd_unlink_disk_holder(bdev, device->vdisk); - blkdev_put(bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL); + blkdev_put(bdev, claim_ptr); } void drbd_backing_dev_free(struct drbd_device *device, struct drbd_backing_dev *ldev) @@ -1710,8 +1709,11 @@ void drbd_backing_dev_free(struct drbd_device *device, struct drbd_backing_dev * if (ldev == NULL) return; - close_backing_dev(device, ldev->md_bdev, ldev->md_bdev != ldev->backing_bdev); - close_backing_dev(device, ldev->backing_bdev, true); + close_backing_dev(device, ldev->md_bdev, + ldev->md.meta_dev_idx < 0 ? + (void *)device : (void *)drbd_m_holder, + ldev->md_bdev != ldev->backing_bdev); + close_backing_dev(device, ldev->backing_bdev, device, true); kfree(ldev->disk_conf); kfree(ldev); @@ -2127,8 +2129,11 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) fail: conn_reconfig_done(connection); if (nbc) { - close_backing_dev(device, nbc->md_bdev, nbc->md_bdev != nbc->backing_bdev); - close_backing_dev(device, nbc->backing_bdev, true); + close_backing_dev(device, nbc->md_bdev, + nbc->disk_conf->meta_dev_idx < 0 ? + (void *)device : (void *)drbd_m_holder, + nbc->md_bdev != nbc->backing_bdev); + close_backing_dev(device, nbc->backing_bdev, device, true); kfree(nbc); } kfree(new_disk_conf); diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c index 7bfc058cb665..c3299e49edd5 100644 --- a/drivers/block/pktcdvd.c +++ b/drivers/block/pktcdvd.c @@ -2167,8 +2167,7 @@ static int pkt_open_dev(struct pktcdvd_device *pd, fmode_t write) * to read/write from/to it. It is already opened in O_NONBLOCK mode * so open should not fail. */ - bdev = blkdev_get_by_dev(pd->bdev->bd_dev, FMODE_READ | FMODE_EXCL, pd, - NULL); + bdev = blkdev_get_by_dev(pd->bdev->bd_dev, FMODE_READ, pd, NULL); if (IS_ERR(bdev)) { ret = PTR_ERR(bdev); goto out; @@ -2215,7 +2214,7 @@ static int pkt_open_dev(struct pktcdvd_device *pd, fmode_t write) return 0; out_putdev: - blkdev_put(bdev, FMODE_READ | FMODE_EXCL); + blkdev_put(bdev, pd); out: return ret; } @@ -2234,7 +2233,7 @@ static void pkt_release_dev(struct pktcdvd_device *pd, int flush) pkt_lock_door(pd, 0); pkt_set_speed(pd, MAX_SPEED, MAX_SPEED); - blkdev_put(pd->bdev, FMODE_READ | FMODE_EXCL); + blkdev_put(pd->bdev, pd); pkt_shrink_pktlist(pd); } @@ -2520,7 +2519,7 @@ static int pkt_new_dev(struct pktcdvd_device *pd, dev_t dev) return PTR_ERR(bdev); sdev = scsi_device_from_queue(bdev->bd_disk->queue); if (!sdev) { - blkdev_put(bdev, FMODE_READ | FMODE_NDELAY); + blkdev_put(bdev, NULL); return -EINVAL; } put_device(&sdev->sdev_gendev); @@ -2545,7 +2544,7 @@ static int pkt_new_dev(struct pktcdvd_device *pd, dev_t dev) return 0; out_mem: - blkdev_put(bdev, FMODE_READ | FMODE_NDELAY); + blkdev_put(bdev, NULL); /* This is safe: open() is still holding a reference. */ module_put(THIS_MODULE); return -ENOMEM; @@ -2751,7 +2750,7 @@ static int pkt_remove_dev(dev_t pkt_dev) pkt_debugfs_dev_remove(pd); pkt_sysfs_dev_remove(pd); - blkdev_put(pd->bdev, FMODE_READ | FMODE_NDELAY); + blkdev_put(pd->bdev, NULL); remove_proc_entry(pd->disk->disk_name, pkt_proc); dev_notice(ddev, "writer unmapped\n"); diff --git a/drivers/block/rnbd/rnbd-srv.c b/drivers/block/rnbd/rnbd-srv.c index a92a4289d0ec..a909f8763ce7 100644 --- a/drivers/block/rnbd/rnbd-srv.c +++ b/drivers/block/rnbd/rnbd-srv.c @@ -219,7 +219,7 @@ void rnbd_destroy_sess_dev(struct rnbd_srv_sess_dev *sess_dev, bool keep_id) rnbd_put_sess_dev(sess_dev); wait_for_completion(&dc); /* wait for inflights to drop to zero */ - blkdev_put(sess_dev->bdev, sess_dev->open_flags); + blkdev_put(sess_dev->bdev, NULL); mutex_lock(&sess_dev->dev->lock); list_del(&sess_dev->dev_list); if (sess_dev->open_flags & FMODE_WRITE) @@ -791,7 +791,7 @@ srv_dev_put: } rnbd_put_srv_dev(srv_dev); blkdev_put: - blkdev_put(bdev, open_flags); + blkdev_put(bdev, NULL); free_path: kfree(full_path); reject: diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c index 43b36da9b354..141b60aad570 100644 --- a/drivers/block/xen-blkback/xenbus.c +++ b/drivers/block/xen-blkback/xenbus.c @@ -473,7 +473,7 @@ static void xenvbd_sysfs_delif(struct xenbus_device *dev) static void xen_vbd_free(struct xen_vbd *vbd) { if (vbd->bdev) - blkdev_put(vbd->bdev, vbd->readonly ? FMODE_READ : FMODE_WRITE); + blkdev_put(vbd->bdev, NULL); vbd->bdev = NULL; } diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index f5644c606040..21615d67a9bd 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -420,7 +420,7 @@ static void reset_bdev(struct zram *zram) return; bdev = zram->bdev; - blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL); + blkdev_put(bdev, zram); /* hope filp_close flush all of IO */ filp_close(zram->backing_dev, NULL); zram->backing_dev = NULL; @@ -507,8 +507,8 @@ static ssize_t backing_dev_store(struct device *dev, goto out; } - bdev = blkdev_get_by_dev(inode->i_rdev, - FMODE_READ | FMODE_WRITE | FMODE_EXCL, zram, NULL); + bdev = blkdev_get_by_dev(inode->i_rdev, FMODE_READ | FMODE_WRITE, zram, + NULL); if (IS_ERR(bdev)) { err = PTR_ERR(bdev); bdev = NULL; @@ -539,7 +539,7 @@ out: kvfree(bitmap); if (bdev) - blkdev_put(bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL); + blkdev_put(bdev, zram); if (backing_dev) filp_close(backing_dev, NULL); diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index 4a2aed047aec..7022fea396f2 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -1369,7 +1369,7 @@ static void cached_dev_free(struct closure *cl) put_page(virt_to_page(dc->sb_disk)); if (!IS_ERR_OR_NULL(dc->bdev)) - blkdev_put(dc->bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL); + blkdev_put(dc->bdev, bcache_kobj); wake_up(&unregister_wait); @@ -2218,7 +2218,7 @@ void bch_cache_release(struct kobject *kobj) put_page(virt_to_page(ca->sb_disk)); if (!IS_ERR_OR_NULL(ca->bdev)) - blkdev_put(ca->bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL); + blkdev_put(ca->bdev, bcache_kobj); kfree(ca); module_put(THIS_MODULE); @@ -2359,7 +2359,7 @@ static int register_cache(struct cache_sb *sb, struct cache_sb_disk *sb_disk, * call blkdev_put() to bdev in bch_cache_release(). So we * explicitly call blkdev_put() here. */ - blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL); + blkdev_put(bdev, bcache_kobj); if (ret == -ENOMEM) err = "cache_alloc(): -ENOMEM"; else if (ret == -EPERM) @@ -2461,7 +2461,7 @@ static void register_bdev_worker(struct work_struct *work) if (!dc) { fail = true; put_page(virt_to_page(args->sb_disk)); - blkdev_put(args->bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL); + blkdev_put(args->bdev, bcache_kobj); goto out; } @@ -2491,7 +2491,7 @@ static void register_cache_worker(struct work_struct *work) if (!ca) { fail = true; put_page(virt_to_page(args->sb_disk)); - blkdev_put(args->bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL); + blkdev_put(args->bdev, bcache_kobj); goto out; } @@ -2558,8 +2558,7 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr, ret = -EINVAL; err = "failed to open device"; - bdev = blkdev_get_by_path(strim(path), - FMODE_READ|FMODE_WRITE|FMODE_EXCL, + bdev = blkdev_get_by_path(strim(path), FMODE_READ | FMODE_WRITE, bcache_kobj, NULL); if (IS_ERR(bdev)) { if (bdev == ERR_PTR(-EBUSY)) { @@ -2648,7 +2647,7 @@ async_done: out_put_sb_page: put_page(virt_to_page(sb_disk)); out_blkdev_put: - blkdev_put(bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL); + blkdev_put(bdev, register_bcache); out_free_sb: kfree(sb); out_free_path: diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 246b8f028a98..b16e37362c5a 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -746,7 +746,7 @@ static struct table_device *open_table_device(struct mapped_device *md, return ERR_PTR(-ENOMEM); refcount_set(&td->count, 1); - bdev = blkdev_get_by_dev(dev, mode | FMODE_EXCL, _dm_claim_ptr, NULL); + bdev = blkdev_get_by_dev(dev, mode, _dm_claim_ptr, NULL); if (IS_ERR(bdev)) { r = PTR_ERR(bdev); goto out_free_td; @@ -771,7 +771,7 @@ static struct table_device *open_table_device(struct mapped_device *md, return td; out_blkdev_put: - blkdev_put(bdev, mode | FMODE_EXCL); + blkdev_put(bdev, _dm_claim_ptr); out_free_td: kfree(td); return ERR_PTR(r); @@ -784,7 +784,7 @@ static void close_table_device(struct table_device *td, struct mapped_device *md { if (md->disk->slave_dir) bd_unlink_disk_holder(td->dm_dev.bdev, md->disk); - blkdev_put(td->dm_dev.bdev, td->dm_dev.mode | FMODE_EXCL); + blkdev_put(td->dm_dev.bdev, _dm_claim_ptr); put_dax(td->dm_dev.dax_dev); list_del(&td->list); kfree(td); diff --git a/drivers/md/md.c b/drivers/md/md.c index 159197dd7b6d..dad4a5539f9f 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -2449,7 +2449,10 @@ static void rdev_delayed_delete(struct work_struct *ws) void md_autodetect_dev(dev_t dev); -static void export_rdev(struct md_rdev *rdev) +/* just for claiming the bdev */ +static struct md_rdev claim_rdev; + +static void export_rdev(struct md_rdev *rdev, struct mddev *mddev) { pr_debug("md: export_rdev(%pg)\n", rdev->bdev); md_rdev_clear(rdev); @@ -2457,7 +2460,7 @@ static void export_rdev(struct md_rdev *rdev) if (test_bit(AutoDetected, &rdev->flags)) md_autodetect_dev(rdev->bdev->bd_dev); #endif - blkdev_put(rdev->bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL); + blkdev_put(rdev->bdev, mddev->major_version == -2 ? &claim_rdev : rdev); rdev->bdev = NULL; kobject_put(&rdev->kobj); } @@ -2485,7 +2488,7 @@ static void md_kick_rdev_from_array(struct md_rdev *rdev) INIT_WORK(&rdev->del_work, rdev_delayed_delete); kobject_get(&rdev->kobj); queue_work(md_rdev_misc_wq, &rdev->del_work); - export_rdev(rdev); + export_rdev(rdev, rdev->mddev); } static void export_array(struct mddev *mddev) @@ -3612,6 +3615,7 @@ int md_rdev_init(struct md_rdev *rdev) return badblocks_init(&rdev->badblocks, 0); } EXPORT_SYMBOL_GPL(md_rdev_init); + /* * Import a device. If 'super_format' >= 0, then sanity check the superblock * @@ -3624,7 +3628,6 @@ EXPORT_SYMBOL_GPL(md_rdev_init); */ static struct md_rdev *md_import_device(dev_t newdev, int super_format, int super_minor) { - static struct md_rdev claim_rdev; /* just for claiming the bdev */ struct md_rdev *rdev; sector_t size; int err; @@ -3640,8 +3643,7 @@ static struct md_rdev *md_import_device(dev_t newdev, int super_format, int supe if (err) goto out_clear_rdev; - rdev->bdev = blkdev_get_by_dev(newdev, - FMODE_READ | FMODE_WRITE | FMODE_EXCL, + rdev->bdev = blkdev_get_by_dev(newdev, FMODE_READ | FMODE_WRITE, super_format == -2 ? &claim_rdev : rdev, NULL); if (IS_ERR(rdev->bdev)) { pr_warn("md: could not open device unknown-block(%u,%u).\n", @@ -3679,7 +3681,7 @@ static struct md_rdev *md_import_device(dev_t newdev, int super_format, int supe return rdev; out_blkdev_put: - blkdev_put(rdev->bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL); + blkdev_put(rdev->bdev, super_format == -2 ? &claim_rdev : rdev); out_clear_rdev: md_rdev_clear(rdev); out_free_rdev: @@ -4560,7 +4562,7 @@ new_dev_store(struct mddev *mddev, const char *buf, size_t len) err = bind_rdev_to_array(rdev, mddev); out: if (err) - export_rdev(rdev); + export_rdev(rdev, mddev); mddev_unlock(mddev); if (!err) md_new_event(); @@ -6498,7 +6500,7 @@ static void autorun_devices(int part) rdev_for_each_list(rdev, tmp, &candidates) { list_del_init(&rdev->same_set); if (bind_rdev_to_array(rdev, mddev)) - export_rdev(rdev); + export_rdev(rdev, mddev); } autorun_array(mddev); mddev_unlock(mddev); @@ -6508,7 +6510,7 @@ static void autorun_devices(int part) */ rdev_for_each_list(rdev, tmp, &candidates) { list_del_init(&rdev->same_set); - export_rdev(rdev); + export_rdev(rdev, mddev); } mddev_put(mddev); } @@ -6696,13 +6698,13 @@ int md_add_new_disk(struct mddev *mddev, struct mdu_disk_info_s *info) pr_warn("md: %pg has different UUID to %pg\n", rdev->bdev, rdev0->bdev); - export_rdev(rdev); + export_rdev(rdev, mddev); return -EINVAL; } } err = bind_rdev_to_array(rdev, mddev); if (err) - export_rdev(rdev); + export_rdev(rdev, mddev); return err; } @@ -6746,7 +6748,7 @@ int md_add_new_disk(struct mddev *mddev, struct mdu_disk_info_s *info) /* This was a hot-add request, but events doesn't * match, so reject it. */ - export_rdev(rdev); + export_rdev(rdev, mddev); return -EINVAL; } @@ -6772,7 +6774,7 @@ int md_add_new_disk(struct mddev *mddev, struct mdu_disk_info_s *info) } } if (has_journal || mddev->bitmap) { - export_rdev(rdev); + export_rdev(rdev, mddev); return -EBUSY; } set_bit(Journal, &rdev->flags); @@ -6787,7 +6789,7 @@ int md_add_new_disk(struct mddev *mddev, struct mdu_disk_info_s *info) /* --add initiated by this node */ err = md_cluster_ops->add_new_disk(mddev, rdev); if (err) { - export_rdev(rdev); + export_rdev(rdev, mddev); return err; } } @@ -6797,7 +6799,7 @@ int md_add_new_disk(struct mddev *mddev, struct mdu_disk_info_s *info) err = bind_rdev_to_array(rdev, mddev); if (err) - export_rdev(rdev); + export_rdev(rdev, mddev); if (mddev_is_clustered(mddev)) { if (info->state & (1 << MD_DISK_CANDIDATE)) { @@ -6860,7 +6862,7 @@ int md_add_new_disk(struct mddev *mddev, struct mdu_disk_info_s *info) err = bind_rdev_to_array(rdev, mddev); if (err) { - export_rdev(rdev); + export_rdev(rdev, mddev); return err; } } @@ -6985,7 +6987,7 @@ static int hot_add_disk(struct mddev *mddev, dev_t dev) return 0; abort_export: - export_rdev(rdev); + export_rdev(rdev, mddev); return err; } diff --git a/drivers/mtd/devices/block2mtd.c b/drivers/mtd/devices/block2mtd.c index 218eb2af564a..44fc23af4c3f 100644 --- a/drivers/mtd/devices/block2mtd.c +++ b/drivers/mtd/devices/block2mtd.c @@ -209,7 +209,7 @@ static void block2mtd_free_device(struct block2mtd_dev *dev) if (dev->blkdev) { invalidate_mapping_pages(dev->blkdev->bd_inode->i_mapping, 0, -1); - blkdev_put(dev->blkdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL); + blkdev_put(dev->blkdev, NULL); } kfree(dev); @@ -261,7 +261,7 @@ static struct block_device __ref *mdtblock_early_get_bdev(const char *devname, static struct block2mtd_dev *add_device(char *devname, int erase_size, char *label, int timeout) { - const fmode_t mode = FMODE_READ | FMODE_WRITE | FMODE_EXCL; + const fmode_t mode = FMODE_READ | FMODE_WRITE; struct block_device *bdev; struct block2mtd_dev *dev; char *name; diff --git a/drivers/nvme/target/io-cmd-bdev.c b/drivers/nvme/target/io-cmd-bdev.c index 9b6d6d85c725..65ed2d478fac 100644 --- a/drivers/nvme/target/io-cmd-bdev.c +++ b/drivers/nvme/target/io-cmd-bdev.c @@ -51,7 +51,7 @@ void nvmet_bdev_set_limits(struct block_device *bdev, struct nvme_id_ns *id) void nvmet_bdev_ns_disable(struct nvmet_ns *ns) { if (ns->bdev) { - blkdev_put(ns->bdev, FMODE_WRITE | FMODE_READ); + blkdev_put(ns->bdev, NULL); ns->bdev = NULL; } } diff --git a/drivers/s390/block/dasd_genhd.c b/drivers/s390/block/dasd_genhd.c index f21198bc483e..d2b27b84f854 100644 --- a/drivers/s390/block/dasd_genhd.c +++ b/drivers/s390/block/dasd_genhd.c @@ -179,7 +179,7 @@ void dasd_destroy_partitions(struct dasd_block *block) mutex_unlock(&bdev->bd_disk->open_mutex); /* Matching blkdev_put to the blkdev_get in dasd_scan_partitions. */ - blkdev_put(bdev, FMODE_READ); + blkdev_put(bdev, NULL); } int dasd_gendisk_init(void) diff --git a/drivers/target/target_core_iblock.c b/drivers/target/target_core_iblock.c index a5cbbefa78ee..c62f961f46e3 100644 --- a/drivers/target/target_core_iblock.c +++ b/drivers/target/target_core_iblock.c @@ -108,7 +108,7 @@ static int iblock_configure_device(struct se_device *dev) pr_debug( "IBLOCK: Claiming struct block_device: %s\n", ib_dev->ibd_udev_path); - mode = FMODE_READ|FMODE_EXCL; + mode = FMODE_READ; if (!ib_dev->ibd_readonly) mode |= FMODE_WRITE; else @@ -175,7 +175,7 @@ static int iblock_configure_device(struct se_device *dev) return 0; out_blkdev_put: - blkdev_put(ib_dev->ibd_bd, FMODE_WRITE|FMODE_READ|FMODE_EXCL); + blkdev_put(ib_dev->ibd_bd, ib_dev); out_free_bioset: bioset_exit(&ib_dev->ibd_bio_set); out: @@ -201,7 +201,7 @@ static void iblock_destroy_device(struct se_device *dev) struct iblock_dev *ib_dev = IBLOCK_DEV(dev); if (ib_dev->ibd_bd != NULL) - blkdev_put(ib_dev->ibd_bd, FMODE_WRITE|FMODE_READ|FMODE_EXCL); + blkdev_put(ib_dev->ibd_bd, ib_dev); bioset_exit(&ib_dev->ibd_bio_set); } diff --git a/drivers/target/target_core_pscsi.c b/drivers/target/target_core_pscsi.c index e3494e036c6c..da3b5512d7ae 100644 --- a/drivers/target/target_core_pscsi.c +++ b/drivers/target/target_core_pscsi.c @@ -366,8 +366,7 @@ static int pscsi_create_type_disk(struct se_device *dev, struct scsi_device *sd) * Claim exclusive struct block_device access to struct scsi_device * for TYPE_DISK and TYPE_ZBC using supplied udev_path */ - bd = blkdev_get_by_path(dev->udev_path, - FMODE_WRITE|FMODE_READ|FMODE_EXCL, pdv, + bd = blkdev_get_by_path(dev->udev_path, FMODE_WRITE | FMODE_READ, pdv, NULL); if (IS_ERR(bd)) { pr_err("pSCSI: blkdev_get_by_path() failed\n"); @@ -378,7 +377,7 @@ static int pscsi_create_type_disk(struct se_device *dev, struct scsi_device *sd) ret = pscsi_add_device_to_list(dev, sd); if (ret) { - blkdev_put(pdv->pdv_bd, FMODE_WRITE|FMODE_READ|FMODE_EXCL); + blkdev_put(pdv->pdv_bd, pdv); scsi_device_put(sd); return ret; } @@ -566,8 +565,7 @@ static void pscsi_destroy_device(struct se_device *dev) */ if ((sd->type == TYPE_DISK || sd->type == TYPE_ZBC) && pdv->pdv_bd) { - blkdev_put(pdv->pdv_bd, - FMODE_WRITE|FMODE_READ|FMODE_EXCL); + blkdev_put(pdv->pdv_bd, pdv); pdv->pdv_bd = NULL; } /* diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c index 4de4984fa99b..677e9d9e1527 100644 --- a/fs/btrfs/dev-replace.c +++ b/fs/btrfs/dev-replace.c @@ -257,7 +257,7 @@ static int btrfs_init_dev_replace_tgtdev(struct btrfs_fs_info *fs_info, return -EINVAL; } - bdev = blkdev_get_by_path(device_path, FMODE_WRITE | FMODE_EXCL, + bdev = blkdev_get_by_path(device_path, FMODE_WRITE, fs_info->bdev_holder, NULL); if (IS_ERR(bdev)) { btrfs_err(fs_info, "target device %s is invalid!", device_path); @@ -315,7 +315,7 @@ static int btrfs_init_dev_replace_tgtdev(struct btrfs_fs_info *fs_info, device->bdev = bdev; set_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &device->dev_state); set_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state); - device->mode = FMODE_EXCL; + device->holder = fs_info->bdev_holder; device->dev_stats_valid = 1; set_blocksize(device->bdev, BTRFS_BDEV_BLOCKSIZE); device->fs_devices = fs_devices; @@ -334,7 +334,7 @@ static int btrfs_init_dev_replace_tgtdev(struct btrfs_fs_info *fs_info, return 0; error: - blkdev_put(bdev, FMODE_EXCL); + blkdev_put(bdev, fs_info->bdev_holder); return ret; } diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 2fa36f694daa..d99376a79ef4 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -2672,7 +2672,7 @@ static long btrfs_ioctl_rm_dev_v2(struct file *file, void __user *arg) struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); struct btrfs_ioctl_vol_args_v2 *vol_args; struct block_device *bdev = NULL; - fmode_t mode; + void *holder; int ret; bool cancel = false; @@ -2709,7 +2709,7 @@ static long btrfs_ioctl_rm_dev_v2(struct file *file, void __user *arg) goto err_drop; /* Exclusive operation is now claimed */ - ret = btrfs_rm_device(fs_info, &args, &bdev, &mode); + ret = btrfs_rm_device(fs_info, &args, &bdev, &holder); btrfs_exclop_finish(fs_info); @@ -2724,7 +2724,7 @@ static long btrfs_ioctl_rm_dev_v2(struct file *file, void __user *arg) err_drop: mnt_drop_write_file(file); if (bdev) - blkdev_put(bdev, mode); + blkdev_put(bdev, holder); out: btrfs_put_dev_args_from_path(&args); kfree(vol_args); @@ -2738,7 +2738,7 @@ static long btrfs_ioctl_rm_dev(struct file *file, void __user *arg) struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); struct btrfs_ioctl_vol_args *vol_args; struct block_device *bdev = NULL; - fmode_t mode; + void *holder; int ret; bool cancel = false; @@ -2765,7 +2765,7 @@ static long btrfs_ioctl_rm_dev(struct file *file, void __user *arg) ret = exclop_start_or_cancel_reloc(fs_info, BTRFS_EXCLOP_DEV_REMOVE, cancel); if (ret == 0) { - ret = btrfs_rm_device(fs_info, &args, &bdev, &mode); + ret = btrfs_rm_device(fs_info, &args, &bdev, &holder); if (!ret) btrfs_info(fs_info, "disk deleted %s", vol_args->name); btrfs_exclop_finish(fs_info); @@ -2773,7 +2773,7 @@ static long btrfs_ioctl_rm_dev(struct file *file, void __user *arg) mnt_drop_write_file(file); if (bdev) - blkdev_put(bdev, mode); + blkdev_put(bdev, holder); out: btrfs_put_dev_args_from_path(&args); kfree(vol_args); diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 035868cee3dd..7b12e05cdbf0 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -507,14 +507,14 @@ btrfs_get_bdev_and_sb(const char *device_path, fmode_t flags, void *holder, sync_blockdev(*bdev); ret = set_blocksize(*bdev, BTRFS_BDEV_BLOCKSIZE); if (ret) { - blkdev_put(*bdev, flags); + blkdev_put(*bdev, holder); goto error; } invalidate_bdev(*bdev); *disk_super = btrfs_read_dev_super(*bdev); if (IS_ERR(*disk_super)) { ret = PTR_ERR(*disk_super); - blkdev_put(*bdev, flags); + blkdev_put(*bdev, holder); goto error; } @@ -642,7 +642,7 @@ static int btrfs_open_one_device(struct btrfs_fs_devices *fs_devices, device->bdev = bdev; clear_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &device->dev_state); - device->mode = flags; + device->holder = holder; fs_devices->open_devices++; if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state) && @@ -656,7 +656,7 @@ static int btrfs_open_one_device(struct btrfs_fs_devices *fs_devices, error_free_page: btrfs_release_disk_super(disk_super); - blkdev_put(bdev, flags); + blkdev_put(bdev, holder); return -EINVAL; } @@ -1057,7 +1057,7 @@ static void __btrfs_free_extra_devids(struct btrfs_fs_devices *fs_devices, continue; if (device->bdev) { - blkdev_put(device->bdev, device->mode); + blkdev_put(device->bdev, device->holder); device->bdev = NULL; fs_devices->open_devices--; } @@ -1103,7 +1103,7 @@ static void btrfs_close_bdev(struct btrfs_device *device) invalidate_bdev(device->bdev); } - blkdev_put(device->bdev, device->mode); + blkdev_put(device->bdev, device->holder); } static void btrfs_close_one_device(struct btrfs_device *device) @@ -1213,8 +1213,6 @@ static int open_fs_devices(struct btrfs_fs_devices *fs_devices, struct btrfs_device *latest_dev = NULL; struct btrfs_device *tmp_device; - flags |= FMODE_EXCL; - list_for_each_entry_safe(device, tmp_device, &fs_devices->devices, dev_list) { int ret; @@ -1400,7 +1398,7 @@ struct btrfs_device *btrfs_scan_one_device(const char *path, fmode_t flags) btrfs_release_disk_super(disk_super); error_bdev_put: - blkdev_put(bdev, flags); + blkdev_put(bdev, NULL); return device; } @@ -2087,7 +2085,7 @@ void btrfs_scratch_superblocks(struct btrfs_fs_info *fs_info, int btrfs_rm_device(struct btrfs_fs_info *fs_info, struct btrfs_dev_lookup_args *args, - struct block_device **bdev, fmode_t *mode) + struct block_device **bdev, void **holder) { struct btrfs_trans_handle *trans; struct btrfs_device *device; @@ -2226,7 +2224,7 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info, } *bdev = device->bdev; - *mode = device->mode; + *holder = device->holder; synchronize_rcu(); btrfs_free_device(device); @@ -2394,7 +2392,7 @@ int btrfs_get_dev_args_from_path(struct btrfs_fs_info *fs_info, else memcpy(args->fsid, disk_super->fsid, BTRFS_FSID_SIZE); btrfs_release_disk_super(disk_super); - blkdev_put(bdev, FMODE_READ); + blkdev_put(bdev, NULL); return 0; } @@ -2627,7 +2625,7 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path if (sb_rdonly(sb) && !fs_devices->seeding) return -EROFS; - bdev = blkdev_get_by_path(device_path, FMODE_WRITE | FMODE_EXCL, + bdev = blkdev_get_by_path(device_path, FMODE_WRITE, fs_info->bdev_holder, NULL); if (IS_ERR(bdev)) return PTR_ERR(bdev); @@ -2690,7 +2688,7 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path device->commit_total_bytes = device->total_bytes; set_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &device->dev_state); clear_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state); - device->mode = FMODE_EXCL; + device->holder = fs_info->bdev_holder; device->dev_stats_valid = 1; set_blocksize(device->bdev, BTRFS_BDEV_BLOCKSIZE); @@ -2848,7 +2846,7 @@ error_free_zone: error_free_device: btrfs_free_device(device); error: - blkdev_put(bdev, FMODE_EXCL); + blkdev_put(bdev, fs_info->bdev_holder); if (locked) { mutex_unlock(&uuid_mutex); up_write(&sb->s_umount); diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index eb97a397b3c3..840a8df39907 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -94,8 +94,8 @@ struct btrfs_device { struct btrfs_zoned_device_info *zone_info; - /* the mode sent to blkdev_get */ - fmode_t mode; + /* block device holder for blkdev_get/put */ + void *holder; /* * Device's major-minor number. Must be set even if the device is not @@ -619,7 +619,7 @@ void btrfs_put_dev_args_from_path(struct btrfs_dev_lookup_args *args); void btrfs_free_device(struct btrfs_device *device); int btrfs_rm_device(struct btrfs_fs_info *fs_info, struct btrfs_dev_lookup_args *args, - struct block_device **bdev, fmode_t *mode); + struct block_device **bdev, void **holder); void __exit btrfs_cleanup_fs_uuids(void); int btrfs_num_copies(struct btrfs_fs_info *fs_info, u64 logical, u64 len); int btrfs_grow_device(struct btrfs_trans_handle *trans, diff --git a/fs/erofs/super.c b/fs/erofs/super.c index 6c263e9cd38b..54dba967a2d4 100644 --- a/fs/erofs/super.c +++ b/fs/erofs/super.c @@ -19,6 +19,7 @@ #include static struct kmem_cache *erofs_inode_cachep __read_mostly; +struct file_system_type erofs_fs_type; void _erofs_err(struct super_block *sb, const char *function, const char *fmt, ...) @@ -253,8 +254,8 @@ static int erofs_init_device(struct erofs_buf *buf, struct super_block *sb, return PTR_ERR(fscache); dif->fscache = fscache; } else if (!sbi->devs->flatdev) { - bdev = blkdev_get_by_path(dif->path, FMODE_READ | FMODE_EXCL, - sb->s_type, NULL); + bdev = blkdev_get_by_path(dif->path, FMODE_READ, sb->s_type, + NULL); if (IS_ERR(bdev)) return PTR_ERR(bdev); dif->bdev = bdev; @@ -877,7 +878,7 @@ static int erofs_release_device_info(int id, void *ptr, void *data) fs_put_dax(dif->dax_dev, NULL); if (dif->bdev) - blkdev_put(dif->bdev, FMODE_READ | FMODE_EXCL); + blkdev_put(dif->bdev, &erofs_fs_type); erofs_fscache_unregister_cookie(dif->fscache); dif->fscache = NULL; kfree(dif->path); diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 9070ea9154d7..92dd699139a3 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -1112,7 +1112,7 @@ static struct block_device *ext4_blkdev_get(dev_t dev, struct super_block *sb) { struct block_device *bdev; - bdev = blkdev_get_by_dev(dev, FMODE_READ|FMODE_WRITE|FMODE_EXCL, sb, + bdev = blkdev_get_by_dev(dev, FMODE_READ | FMODE_WRITE, sb, &ext4_holder_ops); if (IS_ERR(bdev)) goto fail; @@ -1128,17 +1128,12 @@ fail: /* * Release the journal device */ -static void ext4_blkdev_put(struct block_device *bdev) -{ - blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL); -} - static void ext4_blkdev_remove(struct ext4_sb_info *sbi) { struct block_device *bdev; bdev = sbi->s_journal_bdev; if (bdev) { - ext4_blkdev_put(bdev); + blkdev_put(bdev, sbi->s_es); sbi->s_journal_bdev = NULL; } } @@ -5915,7 +5910,7 @@ static journal_t *ext4_get_dev_journal(struct super_block *sb, out_journal: jbd2_journal_destroy(journal); out_bdev: - ext4_blkdev_put(bdev); + blkdev_put(bdev, sb); return NULL; } diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 7c34ab082f13..a5adb1d316e3 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1538,7 +1538,7 @@ static void destroy_device_list(struct f2fs_sb_info *sbi) int i; for (i = 0; i < sbi->s_ndevs; i++) { - blkdev_put(FDEV(i).bdev, FMODE_EXCL); + blkdev_put(FDEV(i).bdev, sbi->sb->s_type); #ifdef CONFIG_BLK_DEV_ZONED kvfree(FDEV(i).blkz_seq); #endif diff --git a/fs/jfs/jfs_logmgr.c b/fs/jfs/jfs_logmgr.c index 46d393c8088a..82f70d46f4e5 100644 --- a/fs/jfs/jfs_logmgr.c +++ b/fs/jfs/jfs_logmgr.c @@ -1100,7 +1100,7 @@ int lmLogOpen(struct super_block *sb) * file systems to log may have n-to-1 relationship; */ - bdev = blkdev_get_by_dev(sbi->logdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL, + bdev = blkdev_get_by_dev(sbi->logdev, FMODE_READ | FMODE_WRITE, log, NULL); if (IS_ERR(bdev)) { rc = PTR_ERR(bdev); @@ -1141,7 +1141,7 @@ journal_found: lbmLogShutdown(log); close: /* close external log device */ - blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL); + blkdev_put(bdev, log); free: /* free log descriptor */ mutex_unlock(&jfs_log_mutex); @@ -1485,7 +1485,7 @@ int lmLogClose(struct super_block *sb) bdev = log->bdev; rc = lmLogShutdown(log); - blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL); + blkdev_put(bdev, log); kfree(log); diff --git a/fs/nfs/blocklayout/dev.c b/fs/nfs/blocklayout/dev.c index 38b066ca699e..9be7f958f60e 100644 --- a/fs/nfs/blocklayout/dev.c +++ b/fs/nfs/blocklayout/dev.c @@ -35,7 +35,7 @@ bl_free_device(struct pnfs_block_dev *dev) } if (dev->bdev) - blkdev_put(dev->bdev, FMODE_READ | FMODE_WRITE); + blkdev_put(dev->bdev, NULL); } } @@ -374,7 +374,7 @@ bl_parse_scsi(struct nfs_server *server, struct pnfs_block_dev *d, return 0; out_blkdev_put: - blkdev_put(d->bdev, FMODE_READ | FMODE_WRITE); + blkdev_put(d->bdev, NULL); return error; } diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c index 91bfbd973d1d..61d5e79a5e81 100644 --- a/fs/nilfs2/super.c +++ b/fs/nilfs2/super.c @@ -1278,7 +1278,7 @@ nilfs_mount(struct file_system_type *fs_type, int flags, { struct nilfs_super_data sd; struct super_block *s; - fmode_t mode = FMODE_READ | FMODE_EXCL; + fmode_t mode = FMODE_READ; struct dentry *root_dentry; int err, s_new = false; @@ -1357,7 +1357,7 @@ nilfs_mount(struct file_system_type *fs_type, int flags, } if (!s_new) - blkdev_put(sd.bdev, mode); + blkdev_put(sd.bdev, fs_type); return root_dentry; @@ -1366,7 +1366,7 @@ nilfs_mount(struct file_system_type *fs_type, int flags, failed: if (!s_new) - blkdev_put(sd.bdev, mode); + blkdev_put(sd.bdev, fs_type); return ERR_PTR(err); } diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c index 6b13b8c3f2b8..c6ae9aee01ed 100644 --- a/fs/ocfs2/cluster/heartbeat.c +++ b/fs/ocfs2/cluster/heartbeat.c @@ -1503,7 +1503,7 @@ static void o2hb_region_release(struct config_item *item) } if (reg->hr_bdev) - blkdev_put(reg->hr_bdev, FMODE_READ|FMODE_WRITE); + blkdev_put(reg->hr_bdev, NULL); kfree(reg->hr_slots); @@ -1893,7 +1893,7 @@ static ssize_t o2hb_region_dev_store(struct config_item *item, out3: if (ret < 0) { - blkdev_put(reg->hr_bdev, FMODE_READ | FMODE_WRITE); + blkdev_put(reg->hr_bdev, NULL); reg->hr_bdev = NULL; } out2: diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c index 5e4db9a0c8e5..905297ea5545 100644 --- a/fs/reiserfs/journal.c +++ b/fs/reiserfs/journal.c @@ -2589,7 +2589,7 @@ static void release_journal_dev(struct super_block *super, struct reiserfs_journal *journal) { if (journal->j_dev_bd != NULL) { - blkdev_put(journal->j_dev_bd, journal->j_dev_mode); + blkdev_put(journal->j_dev_bd, journal); journal->j_dev_bd = NULL; } } @@ -2598,9 +2598,10 @@ static int journal_init_dev(struct super_block *super, struct reiserfs_journal *journal, const char *jdev_name) { + fmode_t blkdev_mode = FMODE_READ; + void *holder = journal; int result; dev_t jdev; - fmode_t blkdev_mode = FMODE_READ | FMODE_WRITE | FMODE_EXCL; result = 0; @@ -2608,16 +2609,15 @@ static int journal_init_dev(struct super_block *super, jdev = SB_ONDISK_JOURNAL_DEVICE(super) ? new_decode_dev(SB_ONDISK_JOURNAL_DEVICE(super)) : super->s_dev; - if (bdev_read_only(super->s_bdev)) - blkdev_mode = FMODE_READ; + if (!bdev_read_only(super->s_bdev)) + blkdev_mode |= FMODE_WRITE; /* there is no "jdev" option and journal is on separate device */ if ((!jdev_name || !jdev_name[0])) { if (jdev == super->s_dev) - blkdev_mode &= ~FMODE_EXCL; - journal->j_dev_bd = blkdev_get_by_dev(jdev, blkdev_mode, - journal, NULL); - journal->j_dev_mode = blkdev_mode; + holder = NULL; + journal->j_dev_bd = blkdev_get_by_dev(jdev, blkdev_mode, holder, + NULL); if (IS_ERR(journal->j_dev_bd)) { result = PTR_ERR(journal->j_dev_bd); journal->j_dev_bd = NULL; @@ -2631,8 +2631,7 @@ static int journal_init_dev(struct super_block *super, return 0; } - journal->j_dev_mode = blkdev_mode; - journal->j_dev_bd = blkdev_get_by_path(jdev_name, blkdev_mode, journal, + journal->j_dev_bd = blkdev_get_by_path(jdev_name, blkdev_mode, holder, NULL); if (IS_ERR(journal->j_dev_bd)) { result = PTR_ERR(journal->j_dev_bd); diff --git a/fs/reiserfs/reiserfs.h b/fs/reiserfs/reiserfs.h index 1bccf6a2e908..55e85256aae8 100644 --- a/fs/reiserfs/reiserfs.h +++ b/fs/reiserfs/reiserfs.h @@ -300,7 +300,6 @@ struct reiserfs_journal { struct reiserfs_journal_cnode *j_first; struct block_device *j_dev_bd; - fmode_t j_dev_mode; /* first block on s_dev of reserved area journal */ int j_1st_reserved_block; diff --git a/fs/super.c b/fs/super.c index f127589700ab..8563794a8bc4 100644 --- a/fs/super.c +++ b/fs/super.c @@ -1255,7 +1255,7 @@ int get_tree_bdev(struct fs_context *fc, { struct block_device *bdev; struct super_block *s; - fmode_t mode = FMODE_READ | FMODE_EXCL; + fmode_t mode = FMODE_READ; int error = 0; if (!(fc->sb_flags & SB_RDONLY)) @@ -1279,7 +1279,7 @@ int get_tree_bdev(struct fs_context *fc, if (bdev->bd_fsfreeze_count > 0) { mutex_unlock(&bdev->bd_fsfreeze_mutex); warnf(fc, "%pg: Can't mount, blockdev is frozen", bdev); - blkdev_put(bdev, mode); + blkdev_put(bdev, fc->fs_type); return -EBUSY; } @@ -1288,7 +1288,7 @@ int get_tree_bdev(struct fs_context *fc, s = sget_fc(fc, test_bdev_super_fc, set_bdev_super_fc); mutex_unlock(&bdev->bd_fsfreeze_mutex); if (IS_ERR(s)) { - blkdev_put(bdev, mode); + blkdev_put(bdev, fc->fs_type); return PTR_ERR(s); } @@ -1297,7 +1297,7 @@ int get_tree_bdev(struct fs_context *fc, if ((fc->sb_flags ^ s->s_flags) & SB_RDONLY) { warnf(fc, "%pg: Can't mount, would change RO state", bdev); deactivate_locked_super(s); - blkdev_put(bdev, mode); + blkdev_put(bdev, fc->fs_type); return -EBUSY; } @@ -1309,7 +1309,7 @@ int get_tree_bdev(struct fs_context *fc, * holding an active reference. */ up_write(&s->s_umount); - blkdev_put(bdev, mode); + blkdev_put(bdev, fc->fs_type); down_write(&s->s_umount); } else { s->s_mode = mode; @@ -1344,7 +1344,7 @@ struct dentry *mount_bdev(struct file_system_type *fs_type, { struct block_device *bdev; struct super_block *s; - fmode_t mode = FMODE_READ | FMODE_EXCL; + fmode_t mode = FMODE_READ; int error = 0; if (!(flags & SB_RDONLY)) @@ -1386,7 +1386,7 @@ struct dentry *mount_bdev(struct file_system_type *fs_type, * holding an active reference. */ up_write(&s->s_umount); - blkdev_put(bdev, mode); + blkdev_put(bdev, fs_type); down_write(&s->s_umount); } else { s->s_mode = mode; @@ -1409,7 +1409,7 @@ struct dentry *mount_bdev(struct file_system_type *fs_type, error_s: error = PTR_ERR(s); error_bdev: - blkdev_put(bdev, mode); + blkdev_put(bdev, fs_type); error: return ERR_PTR(error); } @@ -1418,13 +1418,11 @@ EXPORT_SYMBOL(mount_bdev); void kill_block_super(struct super_block *sb) { struct block_device *bdev = sb->s_bdev; - fmode_t mode = sb->s_mode; bdev->bd_super = NULL; generic_shutdown_super(sb); sync_blockdev(bdev); - WARN_ON_ONCE(!(mode & FMODE_EXCL)); - blkdev_put(bdev, mode | FMODE_EXCL); + blkdev_put(bdev, sb->s_type); } EXPORT_SYMBOL(kill_block_super); diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index 1b4bd5c88f4a..3b7cf8268057 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -396,8 +396,8 @@ xfs_blkdev_get( { int error = 0; - *bdevp = blkdev_get_by_path(name, FMODE_READ|FMODE_WRITE|FMODE_EXCL, - mp, &xfs_holder_ops); + *bdevp = blkdev_get_by_path(name, FMODE_READ | FMODE_WRITE, mp, + &xfs_holder_ops); if (IS_ERR(*bdevp)) { error = PTR_ERR(*bdevp); xfs_warn(mp, "Invalid device [%s], error=%d", name, error); @@ -408,10 +408,11 @@ xfs_blkdev_get( STATIC void xfs_blkdev_put( + struct xfs_mount *mp, struct block_device *bdev) { if (bdev) - blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL); + blkdev_put(bdev, mp); } STATIC void @@ -422,13 +423,13 @@ xfs_close_devices( struct block_device *logdev = mp->m_logdev_targp->bt_bdev; xfs_free_buftarg(mp->m_logdev_targp); - xfs_blkdev_put(logdev); + xfs_blkdev_put(mp, logdev); } if (mp->m_rtdev_targp) { struct block_device *rtdev = mp->m_rtdev_targp->bt_bdev; xfs_free_buftarg(mp->m_rtdev_targp); - xfs_blkdev_put(rtdev); + xfs_blkdev_put(mp, rtdev); } xfs_free_buftarg(mp->m_ddev_targp); } @@ -503,10 +504,10 @@ xfs_open_devices( out_free_ddev_targ: xfs_free_buftarg(mp->m_ddev_targp); out_close_rtdev: - xfs_blkdev_put(rtdev); + xfs_blkdev_put(mp, rtdev); out_close_logdev: if (logdev && logdev != ddev) - xfs_blkdev_put(logdev); + xfs_blkdev_put(mp, logdev); return error; } diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 25bdd0cc74dc..d5b99796f12c 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1480,7 +1480,7 @@ struct block_device *blkdev_get_by_path(const char *path, fmode_t mode, int bd_prepare_to_claim(struct block_device *bdev, void *holder, const struct blk_holder_ops *hops); void bd_abort_claiming(struct block_device *bdev, void *holder); -void blkdev_put(struct block_device *bdev, fmode_t mode); +void blkdev_put(struct block_device *bdev, void *holder); /* just for blk-cgroup, don't use elsewhere */ struct block_device *blkdev_get_no_open(dev_t dev); diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c index 7ae95ec72f99..f62e89d0d906 100644 --- a/kernel/power/hibernate.c +++ b/kernel/power/hibernate.c @@ -688,22 +688,18 @@ static int load_image_and_restore(bool snapshot_test) { int error; unsigned int flags; - fmode_t mode = FMODE_READ; - - if (snapshot_test) - mode |= FMODE_EXCL; pm_pr_dbg("Loading hibernation image.\n"); lock_device_hotplug(); error = create_basic_memory_bitmaps(); if (error) { - swsusp_close(mode); + swsusp_close(snapshot_test); goto Unlock; } error = swsusp_read(&flags); - swsusp_close(mode); + swsusp_close(snapshot_test); if (!error) error = hibernation_restore(flags & SF_PLATFORM_MODE); @@ -956,7 +952,7 @@ static int software_resume(void) /* The snapshot device should not be opened while we're running */ if (!hibernate_acquire()) { error = -EBUSY; - swsusp_close(FMODE_READ | FMODE_EXCL); + swsusp_close(false); goto Unlock; } @@ -991,7 +987,7 @@ static int software_resume(void) pm_pr_dbg("Hibernation image not present or could not be loaded.\n"); return error; Close_Finish: - swsusp_close(FMODE_READ | FMODE_EXCL); + swsusp_close(false); goto Finish; } diff --git a/kernel/power/power.h b/kernel/power/power.h index 978189fcafd1..a8e0c44b804e 100644 --- a/kernel/power/power.h +++ b/kernel/power/power.h @@ -177,7 +177,7 @@ int swsusp_check(bool snapshot_test); extern void swsusp_free(void); extern int swsusp_read(unsigned int *flags_p); extern int swsusp_write(unsigned int flags); -extern void swsusp_close(fmode_t); +void swsusp_close(bool snapshot_test); #ifdef CONFIG_SUSPEND extern int swsusp_unmark(void); #endif diff --git a/kernel/power/swap.c b/kernel/power/swap.c index b03ff1a33c7f..cc9259307c94 100644 --- a/kernel/power/swap.c +++ b/kernel/power/swap.c @@ -363,7 +363,7 @@ static int swsusp_swap_check(void) res = set_blocksize(hib_resume_bdev, PAGE_SIZE); if (res < 0) - blkdev_put(hib_resume_bdev, FMODE_WRITE); + blkdev_put(hib_resume_bdev, NULL); return res; } @@ -443,7 +443,7 @@ static int get_swap_writer(struct swap_map_handle *handle) err_rel: release_swap_writer(handle); err_close: - swsusp_close(FMODE_WRITE); + swsusp_close(false); return ret; } @@ -508,7 +508,7 @@ static int swap_writer_finish(struct swap_map_handle *handle, if (error) free_all_swap_pages(root_swap); release_swap_writer(handle); - swsusp_close(FMODE_WRITE); + swsusp_close(false); return error; } @@ -1518,14 +1518,11 @@ static void *swsusp_holder; int swsusp_check(bool snapshot_test) { + void *holder = snapshot_test ? &swsusp_holder : NULL; int error; - fmode_t mode = FMODE_READ; - if (snapshot_test) - mode |= FMODE_EXCL; - - hib_resume_bdev = blkdev_get_by_dev(swsusp_resume_device, - mode, &swsusp_holder, NULL); + hib_resume_bdev = blkdev_get_by_dev(swsusp_resume_device, FMODE_READ, + holder, NULL); if (!IS_ERR(hib_resume_bdev)) { set_blocksize(hib_resume_bdev, PAGE_SIZE); clear_page(swsusp_header); @@ -1552,7 +1549,7 @@ int swsusp_check(bool snapshot_test) put: if (error) - blkdev_put(hib_resume_bdev, mode); + blkdev_put(hib_resume_bdev, holder); else pr_debug("Image signature found, resuming\n"); } else { @@ -1569,14 +1566,14 @@ put: * swsusp_close - close swap device. */ -void swsusp_close(fmode_t mode) +void swsusp_close(bool snapshot_test) { if (IS_ERR(hib_resume_bdev)) { pr_debug("Image device not initialised\n"); return; } - blkdev_put(hib_resume_bdev, mode); + blkdev_put(hib_resume_bdev, snapshot_test ? &swsusp_holder : NULL); } /** diff --git a/mm/swapfile.c b/mm/swapfile.c index cfbcf7d5705f..16554256be65 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -2539,7 +2539,7 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile) struct block_device *bdev = I_BDEV(inode); set_blocksize(bdev, old_block_size); - blkdev_put(bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL); + blkdev_put(bdev, p); } inode_lock(inode); @@ -2770,8 +2770,7 @@ static int claim_swapfile(struct swap_info_struct *p, struct inode *inode) if (S_ISBLK(inode->i_mode)) { p->bdev = blkdev_get_by_dev(inode->i_rdev, - FMODE_READ | FMODE_WRITE | FMODE_EXCL, p, - NULL); + FMODE_READ | FMODE_WRITE, p, NULL); if (IS_ERR(p->bdev)) { error = PTR_ERR(p->bdev); p->bdev = NULL; @@ -3222,7 +3221,7 @@ bad_swap: p->cluster_next_cpu = NULL; if (inode && S_ISBLK(inode->i_mode) && p->bdev) { set_blocksize(p->bdev, p->old_block_size); - blkdev_put(p->bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL); + blkdev_put(p->bdev, p); } inode = NULL; destroy_swap_extents(p); -- cgit v1.2.3 From c9e4bf607d8c431452ef362c00e62ce999bbae93 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Tue, 11 Jul 2023 13:48:12 +0200 Subject: PM: hibernate: Fix writing maj:min to /sys/power/resume resume_store() first calls lookup_bdev() and after tries to handle maj:min, but it does not reset the error before, hence if you will write maj:min you will get ENOENT: # echo 259:2 >| /sys/power/resume bash: echo: write error: No such file or directory This also should fix hiberation via systemd, since it uses this way. Fixes: 1e8c813b083c4 ("PM: hibernate: don't use early_lookup_bdev in resume_store") Signed-off-by: Azat Khuzhin Reviewed-by: Christoph Hellwig [ rjw: Subject edits ] Signed-off-by: Rafael J. Wysocki --- kernel/power/hibernate.c | 1 + 1 file changed, 1 insertion(+) (limited to 'kernel/power/hibernate.c') diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c index f62e89d0d906..e1b4bfa938dd 100644 --- a/kernel/power/hibernate.c +++ b/kernel/power/hibernate.c @@ -1179,6 +1179,7 @@ static ssize_t resume_store(struct kobject *kobj, struct kobj_attribute *attr, unsigned maj, min, offset; char *p, dummy; + error = 0; if (sscanf(name, "%u:%u%c", &maj, &min, &dummy) == 2 || sscanf(name, "%u:%u:%u:%c", &maj, &min, &offset, &dummy) == 3) { -- cgit v1.2.3