From 9566256518de0520c964bdf23140eac324b136af Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 30 Sep 2019 16:00:42 -0700 Subject: block: Remove request_queue.nr_queues Commit 897bb0c7f1ea ("blk-mq: Use proper cpumask iterator"; v4.6) removed the last use of request_queue.nr_queues from outside blk_mq_init_allocate_queue(). Remove this member variable to make struct request_queue smaller. This patch does not change any functionality. Cc: Christoph Hellwig Cc: Ming Lei Cc: Hannes Reinecke Cc: Johannes Thumshirn Signed-off-by: Bart Van Assche Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux/blkdev.h') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index f3ea78b0c91c..d4051acb92a1 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -411,7 +411,6 @@ struct request_queue { /* sw queues */ struct blk_mq_ctx __percpu *queue_ctx; - unsigned int nr_queues; unsigned int queue_depth; -- cgit v1.2.3 From 98aaaec4a150c39219a8aaa68c3adc6eed443ea8 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 14 Mar 2019 17:45:18 +0100 Subject: compat_ioctl: reimplement SG_IO handling There are two code locations that implement the SG_IO ioctl: the old sg.c driver, and the generic scsi_ioctl helper that is in turn used by multiple drivers. To eradicate the old compat_ioctl conversion handler for the SG_IO command, I implement a readable pair of put_sg_io_hdr() /get_sg_io_hdr() helper functions that can be used for both compat and native mode, and then I call this from both drivers. For the iovec handling, there is already a compat_import_iovec() function that can simply be called in place of import_iovec(). To avoid having to pass the compat/native state through multiple indirections, I mark the SG_IO command itself as compatible in fs/compat_ioctl.c and use in_compat_syscall() to figure out where we are called from. As a side-effect of this, the sg.c driver now also accepts the 32-bit sg_io_hdr format in compat mode using the read/write interface, not just ioctl. This should improve compatiblity with old 32-bit binaries, but it would break if any application intentionally passes the 64-bit data structure in compat mode here. Steffen Maier helped debug an issue in an earlier version of this patch. Cc: Steffen Maier Cc: linux-scsi@vger.kernel.org Cc: Doug Gilbert Cc: "James E.J. Bottomley" Cc: "Martin K. Petersen" Signed-off-by: Arnd Bergmann --- block/scsi_ioctl.c | 132 +++++++++++++++++++++++++++++++++++++++++-- drivers/scsi/sg.c | 19 ++++--- fs/compat_ioctl.c | 148 +------------------------------------------------ include/linux/blkdev.h | 2 + lib/iov_iter.c | 1 + 5 files changed, 143 insertions(+), 159 deletions(-) (limited to 'include/linux/blkdev.h') diff --git a/block/scsi_ioctl.c b/block/scsi_ioctl.c index f5e0ad65e86a..650bade5ea5a 100644 --- a/block/scsi_ioctl.c +++ b/block/scsi_ioctl.c @@ -2,6 +2,7 @@ /* * Copyright (C) 2001 Jens Axboe */ +#include #include #include #include @@ -327,7 +328,14 @@ static int sg_io(struct request_queue *q, struct gendisk *bd_disk, struct iov_iter i; struct iovec *iov = NULL; - ret = import_iovec(rq_data_dir(rq), +#ifdef CONFIG_COMPAT + if (in_compat_syscall()) + ret = compat_import_iovec(rq_data_dir(rq), + hdr->dxferp, hdr->iovec_count, + 0, &iov, &i); + else +#endif + ret = import_iovec(rq_data_dir(rq), hdr->dxferp, hdr->iovec_count, 0, &iov, &i); if (ret < 0) @@ -542,6 +550,122 @@ static inline int blk_send_start_stop(struct request_queue *q, return __blk_send_generic(q, bd_disk, GPCMD_START_STOP_UNIT, data); } +#ifdef CONFIG_COMPAT +struct compat_sg_io_hdr { + compat_int_t interface_id; /* [i] 'S' for SCSI generic (required) */ + compat_int_t dxfer_direction; /* [i] data transfer direction */ + unsigned char cmd_len; /* [i] SCSI command length ( <= 16 bytes) */ + unsigned char mx_sb_len; /* [i] max length to write to sbp */ + unsigned short iovec_count; /* [i] 0 implies no scatter gather */ + compat_uint_t dxfer_len; /* [i] byte count of data transfer */ + compat_uint_t dxferp; /* [i], [*io] points to data transfer memory + or scatter gather list */ + compat_uptr_t cmdp; /* [i], [*i] points to command to perform */ + compat_uptr_t sbp; /* [i], [*o] points to sense_buffer memory */ + compat_uint_t timeout; /* [i] MAX_UINT->no timeout (unit: millisec) */ + compat_uint_t flags; /* [i] 0 -> default, see SG_FLAG... */ + compat_int_t pack_id; /* [i->o] unused internally (normally) */ + compat_uptr_t usr_ptr; /* [i->o] unused internally */ + unsigned char status; /* [o] scsi status */ + unsigned char masked_status; /* [o] shifted, masked scsi status */ + unsigned char msg_status; /* [o] messaging level data (optional) */ + unsigned char sb_len_wr; /* [o] byte count actually written to sbp */ + unsigned short host_status; /* [o] errors from host adapter */ + unsigned short driver_status; /* [o] errors from software driver */ + compat_int_t resid; /* [o] dxfer_len - actual_transferred */ + compat_uint_t duration; /* [o] time taken by cmd (unit: millisec) */ + compat_uint_t info; /* [o] auxiliary information */ +}; +#endif + +int put_sg_io_hdr(const struct sg_io_hdr *hdr, void __user *argp) +{ +#ifdef CONFIG_COMPAT + if (in_compat_syscall()) { + struct compat_sg_io_hdr hdr32 = { + .interface_id = hdr->interface_id, + .dxfer_direction = hdr->dxfer_direction, + .cmd_len = hdr->cmd_len, + .mx_sb_len = hdr->mx_sb_len, + .iovec_count = hdr->iovec_count, + .dxfer_len = hdr->dxfer_len, + .dxferp = (uintptr_t)hdr->dxferp, + .cmdp = (uintptr_t)hdr->cmdp, + .sbp = (uintptr_t)hdr->sbp, + .timeout = hdr->timeout, + .flags = hdr->flags, + .pack_id = hdr->pack_id, + .usr_ptr = (uintptr_t)hdr->usr_ptr, + .status = hdr->status, + .masked_status = hdr->masked_status, + .msg_status = hdr->msg_status, + .sb_len_wr = hdr->sb_len_wr, + .host_status = hdr->host_status, + .driver_status = hdr->driver_status, + .resid = hdr->resid, + .duration = hdr->duration, + .info = hdr->info, + }; + + if (copy_to_user(argp, &hdr32, sizeof(hdr32))) + return -EFAULT; + + return 0; + } +#endif + + if (copy_to_user(argp, hdr, sizeof(*hdr))) + return -EFAULT; + + return 0; +} +EXPORT_SYMBOL(put_sg_io_hdr); + +int get_sg_io_hdr(struct sg_io_hdr *hdr, const void __user *argp) +{ +#ifdef CONFIG_COMPAT + struct compat_sg_io_hdr hdr32; + + if (in_compat_syscall()) { + if (copy_from_user(&hdr32, argp, sizeof(hdr32))) + return -EFAULT; + + *hdr = (struct sg_io_hdr) { + .interface_id = hdr32.interface_id, + .dxfer_direction = hdr32.dxfer_direction, + .cmd_len = hdr32.cmd_len, + .mx_sb_len = hdr32.mx_sb_len, + .iovec_count = hdr32.iovec_count, + .dxfer_len = hdr32.dxfer_len, + .dxferp = compat_ptr(hdr32.dxferp), + .cmdp = compat_ptr(hdr32.cmdp), + .sbp = compat_ptr(hdr32.sbp), + .timeout = hdr32.timeout, + .flags = hdr32.flags, + .pack_id = hdr32.pack_id, + .usr_ptr = compat_ptr(hdr32.usr_ptr), + .status = hdr32.status, + .masked_status = hdr32.masked_status, + .msg_status = hdr32.msg_status, + .sb_len_wr = hdr32.sb_len_wr, + .host_status = hdr32.host_status, + .driver_status = hdr32.driver_status, + .resid = hdr32.resid, + .duration = hdr32.duration, + .info = hdr32.info, + }; + + return 0; + } +#endif + + if (copy_from_user(hdr, argp, sizeof(*hdr))) + return -EFAULT; + + return 0; +} +EXPORT_SYMBOL(get_sg_io_hdr); + int scsi_cmd_ioctl(struct request_queue *q, struct gendisk *bd_disk, fmode_t mode, unsigned int cmd, void __user *arg) { @@ -581,14 +705,14 @@ int scsi_cmd_ioctl(struct request_queue *q, struct gendisk *bd_disk, fmode_t mod case SG_IO: { struct sg_io_hdr hdr; - err = -EFAULT; - if (copy_from_user(&hdr, arg, sizeof(hdr))) + err = get_sg_io_hdr(&hdr, arg); + if (err) break; err = sg_io(q, bd_disk, &hdr, mode); if (err == -EFAULT) break; - if (copy_to_user(arg, &hdr, sizeof(hdr))) + if (put_sg_io_hdr(&hdr, arg)) err = -EFAULT; break; } diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c index cce757506383..8ae096af2667 100644 --- a/drivers/scsi/sg.c +++ b/drivers/scsi/sg.c @@ -447,8 +447,7 @@ sg_read(struct file *filp, char __user *buf, size_t count, loff_t * ppos) retval = -ENOMEM; goto free_old_hdr; } - retval =__copy_from_user - (new_hdr, buf, SZ_SG_IO_HDR); + retval = get_sg_io_hdr(new_hdr, buf); req_pack_id = new_hdr->pack_id; kfree(new_hdr); if (retval) { @@ -589,10 +588,7 @@ sg_new_read(Sg_fd * sfp, char __user *buf, size_t count, Sg_request * srp) } if (hp->masked_status || hp->host_status || hp->driver_status) hp->info |= SG_INFO_CHECK; - if (copy_to_user(buf, hp, SZ_SG_IO_HDR)) { - err = -EFAULT; - goto err_out; - } + err = put_sg_io_hdr(hp, buf); err_out: err2 = sg_finish_rem_req(srp); sg_remove_request(sfp, srp); @@ -735,7 +731,7 @@ sg_new_write(Sg_fd *sfp, struct file *file, const char __user *buf, } srp->sg_io_owned = sg_io_owned; hp = &srp->header; - if (__copy_from_user(hp, buf, SZ_SG_IO_HDR)) { + if (get_sg_io_hdr(hp, buf)) { sg_remove_request(sfp, srp); return -EFAULT; } @@ -1797,7 +1793,14 @@ sg_start_req(Sg_request *srp, unsigned char *cmd) struct iovec *iov = NULL; struct iov_iter i; - res = import_iovec(rw, hp->dxferp, iov_count, 0, &iov, &i); +#ifdef CONFIG_COMPAT + if (in_compat_syscall()) + res = compat_import_iovec(rw, hp->dxferp, iov_count, + 0, &iov, &i); + else +#endif + res = import_iovec(rw, hp->dxferp, iov_count, + 0, &iov, &i); if (res < 0) return res; diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c index 10ba2d9e20bc..f279e77df256 100644 --- a/fs/compat_ioctl.c +++ b/fs/compat_ioctl.c @@ -64,151 +64,6 @@ static int do_ioctl(struct file *file, unsigned int cmd, unsigned long arg) } #ifdef CONFIG_BLOCK -typedef struct sg_io_hdr32 { - compat_int_t interface_id; /* [i] 'S' for SCSI generic (required) */ - compat_int_t dxfer_direction; /* [i] data transfer direction */ - unsigned char cmd_len; /* [i] SCSI command length ( <= 16 bytes) */ - unsigned char mx_sb_len; /* [i] max length to write to sbp */ - unsigned short iovec_count; /* [i] 0 implies no scatter gather */ - compat_uint_t dxfer_len; /* [i] byte count of data transfer */ - compat_uint_t dxferp; /* [i], [*io] points to data transfer memory - or scatter gather list */ - compat_uptr_t cmdp; /* [i], [*i] points to command to perform */ - compat_uptr_t sbp; /* [i], [*o] points to sense_buffer memory */ - compat_uint_t timeout; /* [i] MAX_UINT->no timeout (unit: millisec) */ - compat_uint_t flags; /* [i] 0 -> default, see SG_FLAG... */ - compat_int_t pack_id; /* [i->o] unused internally (normally) */ - compat_uptr_t usr_ptr; /* [i->o] unused internally */ - unsigned char status; /* [o] scsi status */ - unsigned char masked_status; /* [o] shifted, masked scsi status */ - unsigned char msg_status; /* [o] messaging level data (optional) */ - unsigned char sb_len_wr; /* [o] byte count actually written to sbp */ - unsigned short host_status; /* [o] errors from host adapter */ - unsigned short driver_status; /* [o] errors from software driver */ - compat_int_t resid; /* [o] dxfer_len - actual_transferred */ - compat_uint_t duration; /* [o] time taken by cmd (unit: millisec) */ - compat_uint_t info; /* [o] auxiliary information */ -} sg_io_hdr32_t; /* 64 bytes long (on sparc32) */ - -typedef struct sg_iovec32 { - compat_uint_t iov_base; - compat_uint_t iov_len; -} sg_iovec32_t; - -static int sg_build_iovec(sg_io_hdr_t __user *sgio, void __user *dxferp, u16 iovec_count) -{ - sg_iovec_t __user *iov = (sg_iovec_t __user *) (sgio + 1); - sg_iovec32_t __user *iov32 = dxferp; - int i; - - for (i = 0; i < iovec_count; i++) { - u32 base, len; - - if (get_user(base, &iov32[i].iov_base) || - get_user(len, &iov32[i].iov_len) || - put_user(compat_ptr(base), &iov[i].iov_base) || - put_user(len, &iov[i].iov_len)) - return -EFAULT; - } - - if (put_user(iov, &sgio->dxferp)) - return -EFAULT; - return 0; -} - -static int sg_ioctl_trans(struct file *file, unsigned int cmd, - sg_io_hdr32_t __user *sgio32) -{ - sg_io_hdr_t __user *sgio; - u16 iovec_count; - u32 data; - void __user *dxferp; - int err; - int interface_id; - - if (get_user(interface_id, &sgio32->interface_id)) - return -EFAULT; - if (interface_id != 'S') - return do_ioctl(file, cmd, (unsigned long)sgio32); - - if (get_user(iovec_count, &sgio32->iovec_count)) - return -EFAULT; - - { - void __user *top = compat_alloc_user_space(0); - void __user *new = compat_alloc_user_space(sizeof(sg_io_hdr_t) + - (iovec_count * sizeof(sg_iovec_t))); - if (new > top) - return -EINVAL; - - sgio = new; - } - - /* Ok, now construct. */ - if (copy_in_user(&sgio->interface_id, &sgio32->interface_id, - (2 * sizeof(int)) + - (2 * sizeof(unsigned char)) + - (1 * sizeof(unsigned short)) + - (1 * sizeof(unsigned int)))) - return -EFAULT; - - if (get_user(data, &sgio32->dxferp)) - return -EFAULT; - dxferp = compat_ptr(data); - if (iovec_count) { - if (sg_build_iovec(sgio, dxferp, iovec_count)) - return -EFAULT; - } else { - if (put_user(dxferp, &sgio->dxferp)) - return -EFAULT; - } - - { - unsigned char __user *cmdp; - unsigned char __user *sbp; - - if (get_user(data, &sgio32->cmdp)) - return -EFAULT; - cmdp = compat_ptr(data); - - if (get_user(data, &sgio32->sbp)) - return -EFAULT; - sbp = compat_ptr(data); - - if (put_user(cmdp, &sgio->cmdp) || - put_user(sbp, &sgio->sbp)) - return -EFAULT; - } - - if (copy_in_user(&sgio->timeout, &sgio32->timeout, - 3 * sizeof(int))) - return -EFAULT; - - if (get_user(data, &sgio32->usr_ptr)) - return -EFAULT; - if (put_user(compat_ptr(data), &sgio->usr_ptr)) - return -EFAULT; - - err = do_ioctl(file, cmd, (unsigned long) sgio); - - if (err >= 0) { - void __user *datap; - - if (copy_in_user(&sgio32->pack_id, &sgio->pack_id, - sizeof(int)) || - get_user(datap, &sgio->usr_ptr) || - put_user((u32)(unsigned long)datap, - &sgio32->usr_ptr) || - copy_in_user(&sgio32->status, &sgio->status, - (4 * sizeof(unsigned char)) + - (2 * sizeof(unsigned short)) + - (3 * sizeof(int)))) - err = -EFAULT; - } - - return err; -} - struct compat_sg_req_info { /* used by SG_GET_REQUEST_TABLE ioctl() */ char req_state; char orphan; @@ -358,6 +213,7 @@ COMPATIBLE_IOCTL(SCSI_IOCTL_GET_PCI) #endif #ifdef CONFIG_BLOCK /* SG stuff */ +COMPATIBLE_IOCTL(SG_IO) COMPATIBLE_IOCTL(SG_SET_TIMEOUT) COMPATIBLE_IOCTL(SG_GET_TIMEOUT) COMPATIBLE_IOCTL(SG_EMULATED_HOST) @@ -435,8 +291,6 @@ static long do_ioctl_trans(unsigned int cmd, case PPPIOCSACTIVE32: return ppp_sock_fprog_ioctl_trans(file, cmd, argp); #ifdef CONFIG_BLOCK - case SG_IO: - return sg_ioctl_trans(file, cmd, argp); case SG_GET_REQUEST_TABLE: return sg_grt_trans(file, cmd, argp); #endif diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index f3ea78b0c91c..2c8cd22b176b 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -870,6 +870,8 @@ extern int scsi_cmd_ioctl(struct request_queue *, struct gendisk *, fmode_t, unsigned int, void __user *); extern int sg_scsi_ioctl(struct request_queue *, struct gendisk *, fmode_t, struct scsi_ioctl_command __user *); +extern int get_sg_io_hdr(struct sg_io_hdr *hdr, const void __user *argp); +extern int put_sg_io_hdr(const struct sg_io_hdr *hdr, void __user *argp); extern int blk_queue_enter(struct request_queue *q, blk_mq_req_flags_t flags); extern void blk_queue_exit(struct request_queue *q); diff --git a/lib/iov_iter.c b/lib/iov_iter.c index 639d5e7014c1..ffb52f2c0ef4 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -1678,6 +1678,7 @@ ssize_t compat_import_iovec(int type, *iov = p == *iov ? NULL : p; return n; } +EXPORT_SYMBOL(compat_import_iovec); #endif int import_single_range(int rw, void __user *buf, size_t len, -- cgit v1.2.3 From 6c1b1da58f8c7a697a88ae35afeba196fc7b701e Mon Sep 17 00:00:00 2001 From: Ajay Joshi Date: Sun, 27 Oct 2019 23:05:45 +0900 Subject: block: add zone open, close and finish operations MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Zoned block devices (ZBC and ZAC devices) allow an explicit control over the condition (state) of zones. The operations allowed are: * Open a zone: Transition to open condition to indicate that a zone will actively be written * Close a zone: Transition to closed condition to release the drive resources used for writing to a zone * Finish a zone: Transition an open or closed zone to the full condition to prevent write operations To enable this control for in-kernel zoned block device users, define the new request operations REQ_OP_ZONE_OPEN, REQ_OP_ZONE_CLOSE and REQ_OP_ZONE_FINISH as well as the generic function blkdev_zone_mgmt() for submitting these operations on a range of zones. This results in blkdev_reset_zones() removal and replacement with this new zone magement function. Users of blkdev_reset_zones() (f2fs and dm-zoned) are updated accordingly. Contains contributions from Matias Bjorling, Hans Holmberg, Dmitry Fomichev, Keith Busch, Damien Le Moal and Christoph Hellwig. Reviewed-by: Javier González Reviewed-by: Christoph Hellwig Signed-off-by: Ajay Joshi Signed-off-by: Matias Bjorling Signed-off-by: Hans Holmberg Signed-off-by: Dmitry Fomichev Signed-off-by: Keith Busch Signed-off-by: Damien Le Moal Signed-off-by: Jens Axboe --- block/blk-core.c | 12 +++++++++--- block/blk-zoned.c | 35 +++++++++++++++++++++-------------- drivers/md/dm-zoned-metadata.c | 6 +++--- fs/f2fs/segment.c | 3 ++- include/linux/blk_types.h | 25 +++++++++++++++++++++++++ include/linux/blkdev.h | 5 +++-- 6 files changed, 63 insertions(+), 23 deletions(-) (limited to 'include/linux/blkdev.h') diff --git a/block/blk-core.c b/block/blk-core.c index ae506ac2dd48..f0d82227a2fc 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -132,6 +132,9 @@ static const char *const blk_op_name[] = { REQ_OP_NAME(SECURE_ERASE), REQ_OP_NAME(ZONE_RESET), REQ_OP_NAME(ZONE_RESET_ALL), + REQ_OP_NAME(ZONE_OPEN), + REQ_OP_NAME(ZONE_CLOSE), + REQ_OP_NAME(ZONE_FINISH), REQ_OP_NAME(WRITE_SAME), REQ_OP_NAME(WRITE_ZEROES), REQ_OP_NAME(SCSI_IN), @@ -849,10 +852,10 @@ static inline int blk_partition_remap(struct bio *bio) goto out; /* - * Zone reset does not include bi_size so bio_sectors() is always 0. - * Include a test for the reset op code and perform the remap if needed. + * Zone management bios do not have a sector count but they do have + * a start sector filled out and need to be remapped. */ - if (bio_sectors(bio) || bio_op(bio) == REQ_OP_ZONE_RESET) { + if (bio_sectors(bio) || op_is_zone_mgmt(bio_op(bio))) { if (bio_check_eod(bio, part_nr_sects_read(p))) goto out; bio->bi_iter.bi_sector += p->start_sect; @@ -936,6 +939,9 @@ generic_make_request_checks(struct bio *bio) goto not_supported; break; case REQ_OP_ZONE_RESET: + case REQ_OP_ZONE_OPEN: + case REQ_OP_ZONE_CLOSE: + case REQ_OP_ZONE_FINISH: if (!blk_queue_is_zoned(q)) goto not_supported; break; diff --git a/block/blk-zoned.c b/block/blk-zoned.c index 14785011e798..dab34dc48fb6 100644 --- a/block/blk-zoned.c +++ b/block/blk-zoned.c @@ -221,23 +221,27 @@ static inline bool blkdev_allow_reset_all_zones(struct block_device *bdev, } /** - * blkdev_reset_zones - Reset zones write pointer + * blkdev_zone_mgmt - Execute a zone management operation on a range of zones * @bdev: Target block device - * @sector: Start sector of the first zone to reset - * @nr_sectors: Number of sectors, at least the length of one zone + * @op: Operation to be performed on the zones + * @sector: Start sector of the first zone to operate on + * @nr_sectors: Number of sectors, should be at least the length of one zone and + * must be zone size aligned. * @gfp_mask: Memory allocation flags (for bio_alloc) * * Description: - * Reset the write pointer of the zones contained in the range + * Perform the specified operation on the range of zones specified by * @sector..@sector+@nr_sectors. Specifying the entire disk sector range * is valid, but the specified range should not contain conventional zones. + * The operation to execute on each zone can be a zone reset, open, close + * or finish request. */ -int blkdev_reset_zones(struct block_device *bdev, - sector_t sector, sector_t nr_sectors, - gfp_t gfp_mask) +int blkdev_zone_mgmt(struct block_device *bdev, enum req_opf op, + sector_t sector, sector_t nr_sectors, + gfp_t gfp_mask) { struct request_queue *q = bdev_get_queue(bdev); - sector_t zone_sectors; + sector_t zone_sectors = blk_queue_zone_sectors(q); sector_t end_sector = sector + nr_sectors; struct bio *bio = NULL; int ret; @@ -248,12 +252,14 @@ int blkdev_reset_zones(struct block_device *bdev, if (bdev_read_only(bdev)) return -EPERM; + if (!op_is_zone_mgmt(op)) + return -EOPNOTSUPP; + if (!nr_sectors || end_sector > bdev->bd_part->nr_sects) /* Out of range */ return -EINVAL; /* Check alignment (handle eventual smaller last zone) */ - zone_sectors = blk_queue_zone_sectors(q); if (sector & (zone_sectors - 1)) return -EINVAL; @@ -269,12 +275,13 @@ int blkdev_reset_zones(struct block_device *bdev, * Special case for the zone reset operation that reset all * zones, this is useful for applications like mkfs. */ - if (blkdev_allow_reset_all_zones(bdev, sector, nr_sectors)) { + if (op == REQ_OP_ZONE_RESET && + blkdev_allow_reset_all_zones(bdev, sector, nr_sectors)) { bio->bi_opf = REQ_OP_ZONE_RESET_ALL; break; } - bio->bi_opf = REQ_OP_ZONE_RESET; + bio->bi_opf = op; bio->bi_iter.bi_sector = sector; sector += zone_sectors; @@ -287,7 +294,7 @@ int blkdev_reset_zones(struct block_device *bdev, return ret; } -EXPORT_SYMBOL_GPL(blkdev_reset_zones); +EXPORT_SYMBOL_GPL(blkdev_zone_mgmt); /* * BLKREPORTZONE ioctl processing. @@ -379,8 +386,8 @@ int blkdev_reset_zones_ioctl(struct block_device *bdev, fmode_t mode, if (copy_from_user(&zrange, argp, sizeof(struct blk_zone_range))) return -EFAULT; - return blkdev_reset_zones(bdev, zrange.sector, zrange.nr_sectors, - GFP_KERNEL); + return blkdev_zone_mgmt(bdev, REQ_OP_ZONE_RESET, + zrange.sector, zrange.nr_sectors, GFP_KERNEL); } static inline unsigned long *blk_alloc_zone_bitmap(int node, diff --git a/drivers/md/dm-zoned-metadata.c b/drivers/md/dm-zoned-metadata.c index 595a73110e17..feb4718ce6a6 100644 --- a/drivers/md/dm-zoned-metadata.c +++ b/drivers/md/dm-zoned-metadata.c @@ -1312,9 +1312,9 @@ static int dmz_reset_zone(struct dmz_metadata *zmd, struct dm_zone *zone) if (!dmz_is_empty(zone) || dmz_seq_write_err(zone)) { struct dmz_dev *dev = zmd->dev; - ret = blkdev_reset_zones(dev->bdev, - dmz_start_sect(zmd, zone), - dev->zone_nr_sectors, GFP_NOIO); + ret = blkdev_zone_mgmt(dev->bdev, REQ_OP_ZONE_RESET, + dmz_start_sect(zmd, zone), + dev->zone_nr_sectors, GFP_NOIO); if (ret) { dmz_dev_err(dev, "Reset zone %u failed %d", dmz_id(zmd, zone), ret); diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 808709581481..2c997f94a3b2 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1771,7 +1771,8 @@ static int __f2fs_issue_discard_zone(struct f2fs_sb_info *sbi, return -EIO; } trace_f2fs_issue_reset_zone(bdev, blkstart); - return blkdev_reset_zones(bdev, sector, nr_sects, GFP_NOFS); + return blkdev_zone_mgmt(bdev, REQ_OP_ZONE_RESET, + sector, nr_sects, GFP_NOFS); } /* For conventional zones, use regular discard if supported */ diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 1e7eeec16458..23a2fd534817 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -290,6 +290,12 @@ enum req_opf { REQ_OP_ZONE_RESET_ALL = 8, /* write the zero filled sector many times */ REQ_OP_WRITE_ZEROES = 9, + /* Open a zone */ + REQ_OP_ZONE_OPEN = 10, + /* Close a zone */ + REQ_OP_ZONE_CLOSE = 11, + /* Transition a zone to full */ + REQ_OP_ZONE_FINISH = 12, /* SCSI passthrough using struct scsi_request */ REQ_OP_SCSI_IN = 32, @@ -417,6 +423,25 @@ static inline bool op_is_discard(unsigned int op) return (op & REQ_OP_MASK) == REQ_OP_DISCARD; } +/* + * Check if a bio or request operation is a zone management operation, with + * the exception of REQ_OP_ZONE_RESET_ALL which is treated as a special case + * due to its different handling in the block layer and device response in + * case of command failure. + */ +static inline bool op_is_zone_mgmt(enum req_opf op) +{ + switch (op & REQ_OP_MASK) { + case REQ_OP_ZONE_RESET: + case REQ_OP_ZONE_OPEN: + case REQ_OP_ZONE_CLOSE: + case REQ_OP_ZONE_FINISH: + return true; + default: + return false; + } +} + static inline int op_stat_group(unsigned int op) { if (op_is_discard(op)) diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index d4051acb92a1..9cfafff86f66 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -360,8 +360,9 @@ extern unsigned int blkdev_nr_zones(struct block_device *bdev); extern int blkdev_report_zones(struct block_device *bdev, sector_t sector, struct blk_zone *zones, unsigned int *nr_zones); -extern int blkdev_reset_zones(struct block_device *bdev, sector_t sectors, - sector_t nr_sectors, gfp_t gfp_mask); +extern int blkdev_zone_mgmt(struct block_device *bdev, enum req_opf op, + sector_t sectors, sector_t nr_sectors, + gfp_t gfp_mask); extern int blk_revalidate_disk_zones(struct gendisk *disk); extern int blkdev_report_zones_ioctl(struct block_device *bdev, fmode_t mode, -- cgit v1.2.3 From e876df1fe0ad1b191284ee6ed2db7960bd322d00 Mon Sep 17 00:00:00 2001 From: Ajay Joshi Date: Sun, 27 Oct 2019 23:05:46 +0900 Subject: block: add zone open, close and finish ioctl support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduce three new ioctl commands BLKOPENZONE, BLKCLOSEZONE and BLKFINISHZONE to allow applications to control the condition of zones on a zoned block device through the execution of the REQ_OP_ZONE_OPEN, REQ_OP_ZONE_CLOSE and REQ_OP_ZONE_FINISH operations. Contains contributions from Matias Bjorling, Hans Holmberg, Dmitry Fomichev, Keith Busch, Damien Le Moal and Christoph Hellwig. Reviewed-by: Javier González Reviewed-by: Christoph Hellwig Signed-off-by: Ajay Joshi Signed-off-by: Matias Bjorling Signed-off-by: Hans Holmberg Signed-off-by: Dmitry Fomichev Signed-off-by: Keith Busch Signed-off-by: Damien Le Moal Signed-off-by: Jens Axboe --- block/blk-zoned.c | 28 +++++++++++++++++++++++----- block/ioctl.c | 5 ++++- include/linux/blkdev.h | 10 +++++----- include/uapi/linux/blkzoned.h | 17 ++++++++++++++--- 4 files changed, 46 insertions(+), 14 deletions(-) (limited to 'include/linux/blkdev.h') diff --git a/block/blk-zoned.c b/block/blk-zoned.c index dab34dc48fb6..481eaf7d04d4 100644 --- a/block/blk-zoned.c +++ b/block/blk-zoned.c @@ -357,15 +357,16 @@ int blkdev_report_zones_ioctl(struct block_device *bdev, fmode_t mode, } /* - * BLKRESETZONE ioctl processing. + * BLKRESETZONE, BLKOPENZONE, BLKCLOSEZONE and BLKFINISHZONE ioctl processing. * Called from blkdev_ioctl. */ -int blkdev_reset_zones_ioctl(struct block_device *bdev, fmode_t mode, - unsigned int cmd, unsigned long arg) +int blkdev_zone_mgmt_ioctl(struct block_device *bdev, fmode_t mode, + unsigned int cmd, unsigned long arg) { void __user *argp = (void __user *)arg; struct request_queue *q; struct blk_zone_range zrange; + enum req_opf op; if (!argp) return -EINVAL; @@ -386,8 +387,25 @@ int blkdev_reset_zones_ioctl(struct block_device *bdev, fmode_t mode, if (copy_from_user(&zrange, argp, sizeof(struct blk_zone_range))) return -EFAULT; - return blkdev_zone_mgmt(bdev, REQ_OP_ZONE_RESET, - zrange.sector, zrange.nr_sectors, GFP_KERNEL); + switch (cmd) { + case BLKRESETZONE: + op = REQ_OP_ZONE_RESET; + break; + case BLKOPENZONE: + op = REQ_OP_ZONE_OPEN; + break; + case BLKCLOSEZONE: + op = REQ_OP_ZONE_CLOSE; + break; + case BLKFINISHZONE: + op = REQ_OP_ZONE_FINISH; + break; + default: + return -ENOTTY; + } + + return blkdev_zone_mgmt(bdev, op, zrange.sector, zrange.nr_sectors, + GFP_KERNEL); } static inline unsigned long *blk_alloc_zone_bitmap(int node, diff --git a/block/ioctl.c b/block/ioctl.c index 15a0eb80ada9..8756efb1419e 100644 --- a/block/ioctl.c +++ b/block/ioctl.c @@ -532,7 +532,10 @@ int blkdev_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd, case BLKREPORTZONE: return blkdev_report_zones_ioctl(bdev, mode, cmd, arg); case BLKRESETZONE: - return blkdev_reset_zones_ioctl(bdev, mode, cmd, arg); + case BLKOPENZONE: + case BLKCLOSEZONE: + case BLKFINISHZONE: + return blkdev_zone_mgmt_ioctl(bdev, mode, cmd, arg); case BLKGETZONESZ: return put_uint(arg, bdev_zone_sectors(bdev)); case BLKGETNRZONES: diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 9cfafff86f66..6a4f7abbdcf7 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -367,8 +367,8 @@ extern int blk_revalidate_disk_zones(struct gendisk *disk); extern int blkdev_report_zones_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd, unsigned long arg); -extern int blkdev_reset_zones_ioctl(struct block_device *bdev, fmode_t mode, - unsigned int cmd, unsigned long arg); +extern int blkdev_zone_mgmt_ioctl(struct block_device *bdev, fmode_t mode, + unsigned int cmd, unsigned long arg); #else /* CONFIG_BLK_DEV_ZONED */ @@ -389,9 +389,9 @@ static inline int blkdev_report_zones_ioctl(struct block_device *bdev, return -ENOTTY; } -static inline int blkdev_reset_zones_ioctl(struct block_device *bdev, - fmode_t mode, unsigned int cmd, - unsigned long arg) +static inline int blkdev_zone_mgmt_ioctl(struct block_device *bdev, + fmode_t mode, unsigned int cmd, + unsigned long arg) { return -ENOTTY; } diff --git a/include/uapi/linux/blkzoned.h b/include/uapi/linux/blkzoned.h index 498eec813494..0cdef67135f0 100644 --- a/include/uapi/linux/blkzoned.h +++ b/include/uapi/linux/blkzoned.h @@ -120,9 +120,11 @@ struct blk_zone_report { }; /** - * struct blk_zone_range - BLKRESETZONE ioctl request - * @sector: starting sector of the first zone to issue reset write pointer - * @nr_sectors: Total number of sectors of 1 or more zones to reset + * struct blk_zone_range - BLKRESETZONE/BLKOPENZONE/ + * BLKCLOSEZONE/BLKFINISHZONE ioctl + * requests + * @sector: Starting sector of the first zone to operate on. + * @nr_sectors: Total number of sectors of all zones to operate on. */ struct blk_zone_range { __u64 sector; @@ -139,10 +141,19 @@ struct blk_zone_range { * sector range. The sector range must be zone aligned. * @BLKGETZONESZ: Get the device zone size in number of 512 B sectors. * @BLKGETNRZONES: Get the total number of zones of the device. + * @BLKOPENZONE: Open the zones in the specified sector range. + * The 512 B sector range must be zone aligned. + * @BLKCLOSEZONE: Close the zones in the specified sector range. + * The 512 B sector range must be zone aligned. + * @BLKFINISHZONE: Mark the zones as full in the specified sector range. + * The 512 B sector range must be zone aligned. */ #define BLKREPORTZONE _IOWR(0x12, 130, struct blk_zone_report) #define BLKRESETZONE _IOW(0x12, 131, struct blk_zone_range) #define BLKGETZONESZ _IOR(0x12, 132, __u32) #define BLKGETNRZONES _IOR(0x12, 133, __u32) +#define BLKOPENZONE _IOW(0x12, 134, struct blk_zone_range) +#define BLKCLOSEZONE _IOW(0x12, 135, struct blk_zone_range) +#define BLKFINISHZONE _IOW(0x12, 136, struct blk_zone_range) #endif /* _UAPI_BLKZONED_H */ -- cgit v1.2.3 From d41003513e61dd9d4974cb441d30b63650b85654 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 11 Nov 2019 11:39:30 +0900 Subject: block: rework zone reporting Avoid the need to allocate a potentially large array of struct blk_zone in the block layer by switching the ->report_zones method interface to a callback model. Now the caller simply supplies a callback that is executed on each reported zone, and private data for it. Signed-off-by: Christoph Hellwig Signed-off-by: Shin'ichiro Kawasaki Signed-off-by: Damien Le Moal Reviewed-by: Hannes Reinecke Reviewed-by: Mike Snitzer Signed-off-by: Jens Axboe --- block/blk-zoned.c | 253 ++++++++++++++++------------------------- drivers/block/null_blk.h | 2 +- drivers/block/null_blk_zoned.c | 31 +++-- drivers/md/dm-flakey.c | 18 +-- drivers/md/dm-linear.c | 20 ++-- drivers/md/dm-zoned-metadata.c | 131 ++++++++------------- drivers/md/dm.c | 121 +++++++++----------- drivers/scsi/sd.h | 4 +- drivers/scsi/sd_zbc.c | 106 ++++++++--------- fs/f2fs/super.c | 51 +++------ include/linux/blkdev.h | 15 ++- include/linux/device-mapper.h | 24 +++- 12 files changed, 329 insertions(+), 447 deletions(-) (limited to 'include/linux/blkdev.h') diff --git a/block/blk-zoned.c b/block/blk-zoned.c index ae665e490858..6fad6f3f6980 100644 --- a/block/blk-zoned.c +++ b/block/blk-zoned.c @@ -101,44 +101,35 @@ EXPORT_SYMBOL_GPL(blkdev_nr_zones); * blkdev_report_zones - Get zones information * @bdev: Target block device * @sector: Sector from which to report zones - * @zones: Array of zone structures where to return the zones information - * @nr_zones: Number of zone structures in the zone array + * @nr_zones: Maximum number of zones to report + * @cb: Callback function called for each reported zone + * @data: Private data for the callback * * Description: - * Get zone information starting from the zone containing @sector. - * The number of zone information reported may be less than the number - * requested by @nr_zones. The number of zones actually reported is - * returned in @nr_zones. - * The caller must use memalloc_noXX_save/restore() calls to control - * memory allocations done within this function (zone array and command - * buffer allocation by the device driver). + * Get zone information starting from the zone containing @sector for at most + * @nr_zones, and call @cb for each zone reported by the device. + * To report all zones in a device starting from @sector, the BLK_ALL_ZONES + * constant can be passed to @nr_zones. + * Returns the number of zones reported by the device, or a negative errno + * value in case of failure. + * + * Note: The caller must use memalloc_noXX_save/restore() calls to control + * memory allocations done within this function. */ int blkdev_report_zones(struct block_device *bdev, sector_t sector, - struct blk_zone *zones, unsigned int *nr_zones) + unsigned int nr_zones, report_zones_cb cb, void *data) { - struct request_queue *q = bdev_get_queue(bdev); struct gendisk *disk = bdev->bd_disk; sector_t capacity = get_capacity(disk); - if (!blk_queue_is_zoned(q)) - return -EOPNOTSUPP; - - /* - * A block device that advertized itself as zoned must have a - * report_zones method. If it does not have one defined, the device - * driver has a bug. So warn about that. - */ - if (WARN_ON_ONCE(!disk->fops->report_zones)) + if (!blk_queue_is_zoned(bdev_get_queue(bdev)) || + WARN_ON_ONCE(!disk->fops->report_zones)) return -EOPNOTSUPP; - if (!*nr_zones || sector >= capacity) { - *nr_zones = 0; + if (!nr_zones || sector >= capacity) return 0; - } - *nr_zones = min(*nr_zones, __blkdev_nr_zones(q, capacity - sector)); - - return disk->fops->report_zones(disk, sector, zones, nr_zones); + return disk->fops->report_zones(disk, sector, nr_zones, cb, data); } EXPORT_SYMBOL_GPL(blkdev_report_zones); @@ -232,6 +223,20 @@ int blkdev_zone_mgmt(struct block_device *bdev, enum req_opf op, } EXPORT_SYMBOL_GPL(blkdev_zone_mgmt); +struct zone_report_args { + struct blk_zone __user *zones; +}; + +static int blkdev_copy_zone_to_user(struct blk_zone *zone, unsigned int idx, + void *data) +{ + struct zone_report_args *args = data; + + if (copy_to_user(&args->zones[idx], zone, sizeof(struct blk_zone))) + return -EFAULT; + return 0; +} + /* * BLKREPORTZONE ioctl processing. * Called from blkdev_ioctl. @@ -240,9 +245,9 @@ int blkdev_report_zones_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd, unsigned long arg) { void __user *argp = (void __user *)arg; + struct zone_report_args args; struct request_queue *q; struct blk_zone_report rep; - struct blk_zone *zones; int ret; if (!argp) @@ -264,32 +269,16 @@ int blkdev_report_zones_ioctl(struct block_device *bdev, fmode_t mode, if (!rep.nr_zones) return -EINVAL; - rep.nr_zones = min(blkdev_nr_zones(bdev), rep.nr_zones); - - zones = kvmalloc_array(rep.nr_zones, sizeof(struct blk_zone), - GFP_KERNEL | __GFP_ZERO); - if (!zones) - return -ENOMEM; - - ret = blkdev_report_zones(bdev, rep.sector, zones, &rep.nr_zones); - if (ret) - goto out; - - if (copy_to_user(argp, &rep, sizeof(struct blk_zone_report))) { - ret = -EFAULT; - goto out; - } - - if (rep.nr_zones) { - if (copy_to_user(argp + sizeof(struct blk_zone_report), zones, - sizeof(struct blk_zone) * rep.nr_zones)) - ret = -EFAULT; - } + args.zones = argp + sizeof(struct blk_zone_report); + ret = blkdev_report_zones(bdev, rep.sector, rep.nr_zones, + blkdev_copy_zone_to_user, &args); + if (ret < 0) + return ret; - out: - kvfree(zones); - - return ret; + rep.nr_zones = ret; + if (copy_to_user(argp, &rep, sizeof(struct blk_zone_report))) + return -EFAULT; + return 0; } /* @@ -351,31 +340,6 @@ static inline unsigned long *blk_alloc_zone_bitmap(int node, GFP_NOIO, node); } -/* - * Allocate an array of struct blk_zone to get nr_zones zone information. - * The allocated array may be smaller than nr_zones. - */ -static struct blk_zone *blk_alloc_zones(unsigned int *nr_zones) -{ - struct blk_zone *zones; - size_t nrz = min(*nr_zones, BLK_ZONED_REPORT_MAX_ZONES); - - /* - * GFP_KERNEL here is meaningless as the caller task context has - * the PF_MEMALLOC_NOIO flag set in blk_revalidate_disk_zones() - * with memalloc_noio_save(). - */ - zones = kvcalloc(nrz, sizeof(struct blk_zone), GFP_KERNEL); - if (!zones) { - *nr_zones = 0; - return NULL; - } - - *nr_zones = nrz; - - return zones; -} - void blk_queue_free_zone_bitmaps(struct request_queue *q) { kfree(q->seq_zones_bitmap); @@ -384,12 +348,21 @@ void blk_queue_free_zone_bitmaps(struct request_queue *q) q->seq_zones_wlock = NULL; } +struct blk_revalidate_zone_args { + struct gendisk *disk; + unsigned long *seq_zones_bitmap; + unsigned long *seq_zones_wlock; + sector_t sector; +}; + /* * Helper function to check the validity of zones of a zoned block device. */ -static bool blk_zone_valid(struct gendisk *disk, struct blk_zone *zone, - sector_t *sector) +static int blk_revalidate_zone_cb(struct blk_zone *zone, unsigned int idx, + void *data) { + struct blk_revalidate_zone_args *args = data; + struct gendisk *disk = args->disk; struct request_queue *q = disk->queue; sector_t zone_sectors = blk_queue_zone_sectors(q); sector_t capacity = get_capacity(disk); @@ -409,14 +382,14 @@ static bool blk_zone_valid(struct gendisk *disk, struct blk_zone *zone, zone->len > zone_sectors) { pr_warn("%s: Invalid zoned device with larger last zone size\n", disk->disk_name); - return false; + return -ENODEV; } /* Check for holes in the zone report */ - if (zone->start != *sector) { + if (zone->start != args->sector) { pr_warn("%s: Zone gap at sectors %llu..%llu\n", - disk->disk_name, *sector, zone->start); - return false; + disk->disk_name, args->sector, zone->start); + return -ENODEV; } /* Check zone type */ @@ -428,12 +401,38 @@ static bool blk_zone_valid(struct gendisk *disk, struct blk_zone *zone, default: pr_warn("%s: Invalid zone type 0x%x at sectors %llu\n", disk->disk_name, (int)zone->type, zone->start); - return false; + return -ENODEV; } - *sector += zone->len; + if (zone->type != BLK_ZONE_TYPE_CONVENTIONAL) + set_bit(idx, args->seq_zones_bitmap); - return true; + args->sector += zone->len; + return 0; +} + +static int blk_update_zone_info(struct gendisk *disk, unsigned int nr_zones, + struct blk_revalidate_zone_args *args) +{ + /* + * Ensure that all memory allocations in this context are done as + * if GFP_NOIO was specified. + */ + unsigned int noio_flag = memalloc_noio_save(); + struct request_queue *q = disk->queue; + int ret; + + args->seq_zones_wlock = blk_alloc_zone_bitmap(q->node, nr_zones); + if (!args->seq_zones_wlock) + return -ENOMEM; + args->seq_zones_bitmap = blk_alloc_zone_bitmap(q->node, nr_zones); + if (!args->seq_zones_bitmap) + return -ENOMEM; + + ret = disk->fops->report_zones(disk, 0, nr_zones, + blk_revalidate_zone_cb, args); + memalloc_noio_restore(noio_flag); + return ret; } /** @@ -449,11 +448,7 @@ int blk_revalidate_disk_zones(struct gendisk *disk) { struct request_queue *q = disk->queue; unsigned int nr_zones = __blkdev_nr_zones(q, get_capacity(disk)); - unsigned long *seq_zones_wlock = NULL, *seq_zones_bitmap = NULL; - unsigned int i, rep_nr_zones = 0, z = 0, nrz; - struct blk_zone *zones = NULL; - unsigned int noio_flag; - sector_t sector = 0; + struct blk_revalidate_zone_args args = { .disk = disk }; int ret = 0; if (WARN_ON_ONCE(!blk_queue_is_zoned(q))) @@ -468,82 +463,28 @@ int blk_revalidate_disk_zones(struct gendisk *disk) return 0; } - /* - * Ensure that all memory allocations in this context are done as - * if GFP_NOIO was specified. - */ - noio_flag = memalloc_noio_save(); - - if (!nr_zones) - goto update; - - /* Allocate bitmaps */ - ret = -ENOMEM; - seq_zones_wlock = blk_alloc_zone_bitmap(q->node, nr_zones); - if (!seq_zones_wlock) - goto out; - seq_zones_bitmap = blk_alloc_zone_bitmap(q->node, nr_zones); - if (!seq_zones_bitmap) - goto out; - - /* - * Get zone information to check the zones and initialize - * seq_zones_bitmap. - */ - rep_nr_zones = nr_zones; - zones = blk_alloc_zones(&rep_nr_zones); - if (!zones) - goto out; - - while (z < nr_zones) { - nrz = min(nr_zones - z, rep_nr_zones); - ret = disk->fops->report_zones(disk, sector, zones, &nrz); - if (ret) - goto out; - if (!nrz) - break; - for (i = 0; i < nrz; i++) { - if (!blk_zone_valid(disk, &zones[i], §or)) { - ret = -ENODEV; - goto out; - } - if (zones[i].type != BLK_ZONE_TYPE_CONVENTIONAL) - set_bit(z, seq_zones_bitmap); - z++; - } - } - - if (WARN_ON(z != nr_zones)) { - ret = -EIO; - goto out; - } + if (nr_zones) + ret = blk_update_zone_info(disk, nr_zones, &args); -update: /* * Install the new bitmaps, making sure the queue is stopped and * all I/Os are completed (i.e. a scheduler is not referencing the * bitmaps). */ blk_mq_freeze_queue(q); - q->nr_zones = nr_zones; - swap(q->seq_zones_wlock, seq_zones_wlock); - swap(q->seq_zones_bitmap, seq_zones_bitmap); - blk_mq_unfreeze_queue(q); - -out: - memalloc_noio_restore(noio_flag); - - kvfree(zones); - kfree(seq_zones_wlock); - kfree(seq_zones_bitmap); - - if (ret) { + if (ret >= 0) { + q->nr_zones = nr_zones; + swap(q->seq_zones_wlock, args.seq_zones_wlock); + swap(q->seq_zones_bitmap, args.seq_zones_bitmap); + ret = 0; + } else { pr_warn("%s: failed to revalidate zones\n", disk->disk_name); - blk_mq_freeze_queue(q); blk_queue_free_zone_bitmaps(q); - blk_mq_unfreeze_queue(q); } + blk_mq_unfreeze_queue(q); + kfree(args.seq_zones_wlock); + kfree(args.seq_zones_bitmap); return ret; } EXPORT_SYMBOL_GPL(blk_revalidate_disk_zones); diff --git a/drivers/block/null_blk.h b/drivers/block/null_blk.h index 9bf56fbab091..bc837862b767 100644 --- a/drivers/block/null_blk.h +++ b/drivers/block/null_blk.h @@ -92,7 +92,7 @@ struct nullb { int null_zone_init(struct nullb_device *dev); void null_zone_exit(struct nullb_device *dev); int null_report_zones(struct gendisk *disk, sector_t sector, - struct blk_zone *zones, unsigned int *nr_zones); + unsigned int nr_zones, report_zones_cb cb, void *data); blk_status_t null_handle_zoned(struct nullb_cmd *cmd, enum req_opf op, sector_t sector, sector_t nr_sectors); diff --git a/drivers/block/null_blk_zoned.c b/drivers/block/null_blk_zoned.c index 00696f16664b..d4d88b581822 100644 --- a/drivers/block/null_blk_zoned.c +++ b/drivers/block/null_blk_zoned.c @@ -67,21 +67,34 @@ void null_zone_exit(struct nullb_device *dev) } int null_report_zones(struct gendisk *disk, sector_t sector, - struct blk_zone *zones, unsigned int *nr_zones) + unsigned int nr_zones, report_zones_cb cb, void *data) { struct nullb *nullb = disk->private_data; struct nullb_device *dev = nullb->dev; - unsigned int zno, nrz = 0; + unsigned int first_zone, i; + struct blk_zone zone; + int error; - zno = null_zone_no(dev, sector); - if (zno < dev->nr_zones) { - nrz = min_t(unsigned int, *nr_zones, dev->nr_zones - zno); - memcpy(zones, &dev->zones[zno], nrz * sizeof(struct blk_zone)); - } + first_zone = null_zone_no(dev, sector); + if (first_zone >= dev->nr_zones) + return 0; - *nr_zones = nrz; + nr_zones = min(nr_zones, dev->nr_zones - first_zone); + for (i = 0; i < nr_zones; i++) { + /* + * Stacked DM target drivers will remap the zone information by + * modifying the zone information passed to the report callback. + * So use a local copy to avoid corruption of the device zone + * array. + */ + memcpy(&zone, &dev->zones[first_zone + i], + sizeof(struct blk_zone)); + error = cb(&zone, i, data); + if (error) + return error; + } - return 0; + return nr_zones; } size_t null_zone_valid_read_len(struct nullb *nullb, diff --git a/drivers/md/dm-flakey.c b/drivers/md/dm-flakey.c index 76587e9af0ef..a2cc9e45cbba 100644 --- a/drivers/md/dm-flakey.c +++ b/drivers/md/dm-flakey.c @@ -459,21 +459,15 @@ static int flakey_prepare_ioctl(struct dm_target *ti, struct block_device **bdev } #ifdef CONFIG_BLK_DEV_ZONED -static int flakey_report_zones(struct dm_target *ti, sector_t sector, - struct blk_zone *zones, unsigned int *nr_zones) +static int flakey_report_zones(struct dm_target *ti, + struct dm_report_zones_args *args, unsigned int nr_zones) { struct flakey_c *fc = ti->private; - int ret; + sector_t sector = flakey_map_sector(ti, args->next_sector); - /* Do report and remap it */ - ret = blkdev_report_zones(fc->dev->bdev, flakey_map_sector(ti, sector), - zones, nr_zones); - if (ret != 0) - return ret; - - if (*nr_zones) - dm_remap_zone_report(ti, fc->start, zones, nr_zones); - return 0; + args->start = fc->start; + return blkdev_report_zones(fc->dev->bdev, sector, nr_zones, + dm_report_zones_cb, args); } #endif diff --git a/drivers/md/dm-linear.c b/drivers/md/dm-linear.c index 97acafd48c85..8d07fdf63a47 100644 --- a/drivers/md/dm-linear.c +++ b/drivers/md/dm-linear.c @@ -136,21 +136,15 @@ static int linear_prepare_ioctl(struct dm_target *ti, struct block_device **bdev } #ifdef CONFIG_BLK_DEV_ZONED -static int linear_report_zones(struct dm_target *ti, sector_t sector, - struct blk_zone *zones, unsigned int *nr_zones) +static int linear_report_zones(struct dm_target *ti, + struct dm_report_zones_args *args, unsigned int nr_zones) { - struct linear_c *lc = (struct linear_c *) ti->private; - int ret; - - /* Do report and remap it */ - ret = blkdev_report_zones(lc->dev->bdev, linear_map_sector(ti, sector), - zones, nr_zones); - if (ret != 0) - return ret; + struct linear_c *lc = ti->private; + sector_t sector = linear_map_sector(ti, args->next_sector); - if (*nr_zones) - dm_remap_zone_report(ti, lc->start, zones, nr_zones); - return 0; + args->start = lc->start; + return blkdev_report_zones(lc->dev->bdev, sector, nr_zones, + dm_report_zones_cb, args); } #endif diff --git a/drivers/md/dm-zoned-metadata.c b/drivers/md/dm-zoned-metadata.c index feb4718ce6a6..069e4675da6b 100644 --- a/drivers/md/dm-zoned-metadata.c +++ b/drivers/md/dm-zoned-metadata.c @@ -1080,9 +1080,10 @@ static int dmz_load_sb(struct dmz_metadata *zmd) /* * Initialize a zone descriptor. */ -static int dmz_init_zone(struct dmz_metadata *zmd, struct dm_zone *zone, - struct blk_zone *blkz) +static int dmz_init_zone(struct blk_zone *blkz, unsigned int idx, void *data) { + struct dmz_metadata *zmd = data; + struct dm_zone *zone = &zmd->zones[idx]; struct dmz_dev *dev = zmd->dev; /* Ignore the eventual last runt (smaller) zone */ @@ -1096,26 +1097,29 @@ static int dmz_init_zone(struct dmz_metadata *zmd, struct dm_zone *zone, atomic_set(&zone->refcount, 0); zone->chunk = DMZ_MAP_UNMAPPED; - if (blkz->type == BLK_ZONE_TYPE_CONVENTIONAL) { + switch (blkz->type) { + case BLK_ZONE_TYPE_CONVENTIONAL: set_bit(DMZ_RND, &zone->flags); zmd->nr_rnd_zones++; - } else if (blkz->type == BLK_ZONE_TYPE_SEQWRITE_REQ || - blkz->type == BLK_ZONE_TYPE_SEQWRITE_PREF) { + break; + case BLK_ZONE_TYPE_SEQWRITE_REQ: + case BLK_ZONE_TYPE_SEQWRITE_PREF: set_bit(DMZ_SEQ, &zone->flags); - } else + break; + default: return -ENXIO; - - if (blkz->cond == BLK_ZONE_COND_OFFLINE) - set_bit(DMZ_OFFLINE, &zone->flags); - else if (blkz->cond == BLK_ZONE_COND_READONLY) - set_bit(DMZ_READ_ONLY, &zone->flags); + } if (dmz_is_rnd(zone)) zone->wp_block = 0; else zone->wp_block = dmz_sect2blk(blkz->wp - blkz->start); - if (!dmz_is_offline(zone) && !dmz_is_readonly(zone)) { + if (blkz->cond == BLK_ZONE_COND_OFFLINE) + set_bit(DMZ_OFFLINE, &zone->flags); + else if (blkz->cond == BLK_ZONE_COND_READONLY) + set_bit(DMZ_READ_ONLY, &zone->flags); + else { zmd->nr_useable_zones++; if (dmz_is_rnd(zone)) { zmd->nr_rnd_zones++; @@ -1138,12 +1142,6 @@ static void dmz_drop_zones(struct dmz_metadata *zmd) zmd->zones = NULL; } -/* - * The size of a zone report in number of zones. - * This results in 4096*64B=256KB report zones commands. - */ -#define DMZ_REPORT_NR_ZONES 4096 - /* * Allocate and initialize zone descriptors using the zone * information from disk. @@ -1151,11 +1149,7 @@ static void dmz_drop_zones(struct dmz_metadata *zmd) static int dmz_init_zones(struct dmz_metadata *zmd) { struct dmz_dev *dev = zmd->dev; - struct dm_zone *zone; - struct blk_zone *blkz; - unsigned int nr_blkz; - sector_t sector = 0; - int i, ret = 0; + int ret; /* Init */ zmd->zone_bitmap_size = dev->zone_nr_blocks >> 3; @@ -1169,54 +1163,38 @@ static int dmz_init_zones(struct dmz_metadata *zmd) dmz_dev_info(dev, "Using %zu B for zone information", sizeof(struct dm_zone) * dev->nr_zones); - /* Get zone information */ - nr_blkz = DMZ_REPORT_NR_ZONES; - blkz = kcalloc(nr_blkz, sizeof(struct blk_zone), GFP_KERNEL); - if (!blkz) { - ret = -ENOMEM; - goto out; - } - /* - * Get zone information and initialize zone descriptors. - * At the same time, determine where the super block - * should be: first block of the first randomly writable - * zone. + * Get zone information and initialize zone descriptors. At the same + * time, determine where the super block should be: first block of the + * first randomly writable zone. */ - zone = zmd->zones; - while (sector < dev->capacity) { - /* Get zone information */ - nr_blkz = DMZ_REPORT_NR_ZONES; - ret = blkdev_report_zones(dev->bdev, sector, blkz, &nr_blkz); - if (ret) { - dmz_dev_err(dev, "Report zones failed %d", ret); - goto out; - } + ret = blkdev_report_zones(dev->bdev, 0, BLK_ALL_ZONES, dmz_init_zone, + zmd); + if (ret < 0) { + dmz_drop_zones(zmd); + return ret; + } - if (!nr_blkz) - break; + return 0; +} - /* Process report */ - for (i = 0; i < nr_blkz; i++) { - ret = dmz_init_zone(zmd, zone, &blkz[i]); - if (ret) - goto out; - sector += dev->zone_nr_sectors; - zone++; - } - } +static int dmz_update_zone_cb(struct blk_zone *blkz, unsigned int idx, + void *data) +{ + struct dm_zone *zone = data; - /* The entire zone configuration of the disk should now be known */ - if (sector < dev->capacity) { - dmz_dev_err(dev, "Failed to get correct zone information"); - ret = -ENXIO; - } -out: - kfree(blkz); - if (ret) - dmz_drop_zones(zmd); + clear_bit(DMZ_OFFLINE, &zone->flags); + clear_bit(DMZ_READ_ONLY, &zone->flags); + if (blkz->cond == BLK_ZONE_COND_OFFLINE) + set_bit(DMZ_OFFLINE, &zone->flags); + else if (blkz->cond == BLK_ZONE_COND_READONLY) + set_bit(DMZ_READ_ONLY, &zone->flags); - return ret; + if (dmz_is_seq(zone)) + zone->wp_block = dmz_sect2blk(blkz->wp - blkz->start); + else + zone->wp_block = 0; + return 0; } /* @@ -1224,9 +1202,7 @@ out: */ static int dmz_update_zone(struct dmz_metadata *zmd, struct dm_zone *zone) { - unsigned int nr_blkz = 1; unsigned int noio_flag; - struct blk_zone blkz; int ret; /* @@ -1236,29 +1212,18 @@ static int dmz_update_zone(struct dmz_metadata *zmd, struct dm_zone *zone) * GFP_NOIO was specified. */ noio_flag = memalloc_noio_save(); - ret = blkdev_report_zones(zmd->dev->bdev, dmz_start_sect(zmd, zone), - &blkz, &nr_blkz); + ret = blkdev_report_zones(zmd->dev->bdev, dmz_start_sect(zmd, zone), 1, + dmz_update_zone_cb, zone); memalloc_noio_restore(noio_flag); - if (!nr_blkz) + + if (ret == 0) ret = -EIO; - if (ret) { + if (ret < 0) { dmz_dev_err(zmd->dev, "Get zone %u report failed", dmz_id(zmd, zone)); return ret; } - clear_bit(DMZ_OFFLINE, &zone->flags); - clear_bit(DMZ_READ_ONLY, &zone->flags); - if (blkz.cond == BLK_ZONE_COND_OFFLINE) - set_bit(DMZ_OFFLINE, &zone->flags); - else if (blkz.cond == BLK_ZONE_COND_READONLY) - set_bit(DMZ_READ_ONLY, &zone->flags); - - if (dmz_is_seq(zone)) - zone->wp_block = dmz_sect2blk(blkz.wp - blkz.start); - else - zone->wp_block = 0; - return 0; } diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 76f4cfdd6b41..e8f9661a10a1 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -440,14 +440,48 @@ static int dm_blk_getgeo(struct block_device *bdev, struct hd_geometry *geo) return dm_get_geometry(md, geo); } +#ifdef CONFIG_BLK_DEV_ZONED +int dm_report_zones_cb(struct blk_zone *zone, unsigned int idx, void *data) +{ + struct dm_report_zones_args *args = data; + sector_t sector_diff = args->tgt->begin - args->start; + + /* + * Ignore zones beyond the target range. + */ + if (zone->start >= args->start + args->tgt->len) + return 0; + + /* + * Remap the start sector and write pointer position of the zone + * to match its position in the target range. + */ + zone->start += sector_diff; + if (zone->type != BLK_ZONE_TYPE_CONVENTIONAL) { + if (zone->cond == BLK_ZONE_COND_FULL) + zone->wp = zone->start + zone->len; + else if (zone->cond == BLK_ZONE_COND_EMPTY) + zone->wp = zone->start; + else + zone->wp += sector_diff; + } + + args->next_sector = zone->start + zone->len; + return args->orig_cb(zone, args->zone_idx++, args->orig_data); +} +EXPORT_SYMBOL_GPL(dm_report_zones_cb); + static int dm_blk_report_zones(struct gendisk *disk, sector_t sector, - struct blk_zone *zones, unsigned int *nr_zones) + unsigned int nr_zones, report_zones_cb cb, void *data) { -#ifdef CONFIG_BLK_DEV_ZONED struct mapped_device *md = disk->private_data; - struct dm_target *tgt; struct dm_table *map; int srcu_idx, ret; + struct dm_report_zones_args args = { + .next_sector = sector, + .orig_data = data, + .orig_cb = cb, + }; if (dm_suspended_md(md)) return -EAGAIN; @@ -456,32 +490,30 @@ static int dm_blk_report_zones(struct gendisk *disk, sector_t sector, if (!map) return -EIO; - tgt = dm_table_find_target(map, sector); - if (!tgt) { - ret = -EIO; - goto out; - } + do { + struct dm_target *tgt; - /* - * If we are executing this, we already know that the block device - * is a zoned device and so each target should have support for that - * type of drive. A missing report_zones method means that the target - * driver has a problem. - */ - if (WARN_ON(!tgt->type->report_zones)) { - ret = -EIO; - goto out; - } + tgt = dm_table_find_target(map, args.next_sector); + if (WARN_ON_ONCE(!tgt->type->report_zones)) { + ret = -EIO; + goto out; + } - ret = tgt->type->report_zones(tgt, sector, zones, nr_zones); + args.tgt = tgt; + ret = tgt->type->report_zones(tgt, &args, nr_zones); + if (ret < 0) + goto out; + } while (args.zone_idx < nr_zones && + args.next_sector < get_capacity(disk)); + ret = args.zone_idx; out: dm_put_live_table(md, srcu_idx); return ret; -#else - return -ENOTSUPP; -#endif } +#else +#define dm_blk_report_zones NULL +#endif /* CONFIG_BLK_DEV_ZONED */ static int dm_prepare_ioctl(struct mapped_device *md, int *srcu_idx, struct block_device **bdev) @@ -1207,51 +1239,6 @@ void dm_accept_partial_bio(struct bio *bio, unsigned n_sectors) } EXPORT_SYMBOL_GPL(dm_accept_partial_bio); -/* - * The zone descriptors obtained with a zone report indicate - * zone positions within the underlying device of the target. The zone - * descriptors must be remapped to match their position within the dm device. - */ -void dm_remap_zone_report(struct dm_target *ti, sector_t start, - struct blk_zone *zones, unsigned int *nr_zones) -{ -#ifdef CONFIG_BLK_DEV_ZONED - struct blk_zone *zone; - unsigned int nrz = *nr_zones; - int i; - - /* - * Remap the start sector and write pointer position of the zones in - * the array. Since we may have obtained from the target underlying - * device more zones that the target size, also adjust the number - * of zones. - */ - for (i = 0; i < nrz; i++) { - zone = zones + i; - if (zone->start >= start + ti->len) { - memset(zone, 0, sizeof(struct blk_zone) * (nrz - i)); - break; - } - - zone->start = zone->start + ti->begin - start; - if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL) - continue; - - if (zone->cond == BLK_ZONE_COND_FULL) - zone->wp = zone->start + zone->len; - else if (zone->cond == BLK_ZONE_COND_EMPTY) - zone->wp = zone->start; - else - zone->wp = zone->wp + ti->begin - start; - } - - *nr_zones = i; -#else /* !CONFIG_BLK_DEV_ZONED */ - *nr_zones = 0; -#endif -} -EXPORT_SYMBOL_GPL(dm_remap_zone_report); - static blk_qc_t __map_bio(struct dm_target_io *tio) { int r; diff --git a/drivers/scsi/sd.h b/drivers/scsi/sd.h index bf2102a749bc..42fd3f00e4a5 100644 --- a/drivers/scsi/sd.h +++ b/drivers/scsi/sd.h @@ -213,8 +213,8 @@ blk_status_t sd_zbc_setup_zone_mgmt_cmnd(struct scsi_cmnd *cmd, unsigned char op, bool all); extern void sd_zbc_complete(struct scsi_cmnd *cmd, unsigned int good_bytes, struct scsi_sense_hdr *sshdr); -extern int sd_zbc_report_zones(struct gendisk *disk, sector_t sector, - struct blk_zone *zones, unsigned int *nr_zones); +int sd_zbc_report_zones(struct gendisk *disk, sector_t sector, + unsigned int nr_zones, report_zones_cb cb, void *data); #else /* CONFIG_BLK_DEV_ZONED */ diff --git a/drivers/scsi/sd_zbc.c b/drivers/scsi/sd_zbc.c index 663608d1003b..23281825ec38 100644 --- a/drivers/scsi/sd_zbc.c +++ b/drivers/scsi/sd_zbc.c @@ -19,34 +19,27 @@ #include "sd.h" -/** - * sd_zbc_parse_report - Convert a zone descriptor to a struct blk_zone, - * @sdkp: The disk the report originated from - * @buf: Address of the report zone descriptor - * @zone: the destination zone structure - * - * All LBA sized values are converted to 512B sectors unit. - */ -static void sd_zbc_parse_report(struct scsi_disk *sdkp, u8 *buf, - struct blk_zone *zone) +static int sd_zbc_parse_report(struct scsi_disk *sdkp, u8 *buf, + unsigned int idx, report_zones_cb cb, void *data) { struct scsi_device *sdp = sdkp->device; + struct blk_zone zone = { 0 }; - memset(zone, 0, sizeof(struct blk_zone)); - - zone->type = buf[0] & 0x0f; - zone->cond = (buf[1] >> 4) & 0xf; + zone.type = buf[0] & 0x0f; + zone.cond = (buf[1] >> 4) & 0xf; if (buf[1] & 0x01) - zone->reset = 1; + zone.reset = 1; if (buf[1] & 0x02) - zone->non_seq = 1; - - zone->len = logical_to_sectors(sdp, get_unaligned_be64(&buf[8])); - zone->start = logical_to_sectors(sdp, get_unaligned_be64(&buf[16])); - zone->wp = logical_to_sectors(sdp, get_unaligned_be64(&buf[24])); - if (zone->type != ZBC_ZONE_TYPE_CONV && - zone->cond == ZBC_ZONE_COND_FULL) - zone->wp = zone->start + zone->len; + zone.non_seq = 1; + + zone.len = logical_to_sectors(sdp, get_unaligned_be64(&buf[8])); + zone.start = logical_to_sectors(sdp, get_unaligned_be64(&buf[16])); + zone.wp = logical_to_sectors(sdp, get_unaligned_be64(&buf[24])); + if (zone.type != ZBC_ZONE_TYPE_CONV && + zone.cond == ZBC_ZONE_COND_FULL) + zone.wp = zone.start + zone.len; + + return cb(&zone, idx, data); } /** @@ -154,60 +147,61 @@ static void *sd_zbc_alloc_report_buffer(struct scsi_disk *sdkp, } /** - * sd_zbc_report_zones - Disk report zones operation. - * @disk: The target disk - * @sector: Start 512B sector of the report - * @zones: Array of zone descriptors - * @nr_zones: Number of descriptors in the array - * - * Execute a report zones command on the target disk. + * sd_zbc_zone_sectors - Get the device zone size in number of 512B sectors. + * @sdkp: The target disk */ +static inline sector_t sd_zbc_zone_sectors(struct scsi_disk *sdkp) +{ + return logical_to_sectors(sdkp->device, sdkp->zone_blocks); +} + int sd_zbc_report_zones(struct gendisk *disk, sector_t sector, - struct blk_zone *zones, unsigned int *nr_zones) + unsigned int nr_zones, report_zones_cb cb, void *data) { struct scsi_disk *sdkp = scsi_disk(disk); - unsigned int i, nrz = *nr_zones; + unsigned int nr, i; unsigned char *buf; - size_t buflen = 0, offset = 0; - int ret = 0; + size_t offset, buflen = 0; + int zone_idx = 0; + int ret; if (!sd_is_zoned(sdkp)) /* Not a zoned device */ return -EOPNOTSUPP; - buf = sd_zbc_alloc_report_buffer(sdkp, nrz, &buflen); + buf = sd_zbc_alloc_report_buffer(sdkp, nr_zones, &buflen); if (!buf) return -ENOMEM; - ret = sd_zbc_do_report_zones(sdkp, buf, buflen, - sectors_to_logical(sdkp->device, sector), true); - if (ret) - goto out; + while (zone_idx < nr_zones && sector < get_capacity(disk)) { + ret = sd_zbc_do_report_zones(sdkp, buf, buflen, + sectors_to_logical(sdkp->device, sector), true); + if (ret) + goto out; + + offset = 0; + nr = min(nr_zones, get_unaligned_be32(&buf[0]) / 64); + if (!nr) + break; + + for (i = 0; i < nr && zone_idx < nr_zones; i++) { + offset += 64; + ret = sd_zbc_parse_report(sdkp, buf + offset, zone_idx, + cb, data); + if (ret) + goto out; + zone_idx++; + } - nrz = min(nrz, get_unaligned_be32(&buf[0]) / 64); - for (i = 0; i < nrz; i++) { - offset += 64; - sd_zbc_parse_report(sdkp, buf + offset, zones); - zones++; + sector += sd_zbc_zone_sectors(sdkp) * i; } - *nr_zones = nrz; - + ret = zone_idx; out: kvfree(buf); - return ret; } -/** - * sd_zbc_zone_sectors - Get the device zone size in number of 512B sectors. - * @sdkp: The target disk - */ -static inline sector_t sd_zbc_zone_sectors(struct scsi_disk *sdkp) -{ - return logical_to_sectors(sdkp->device, sdkp->zone_blocks); -} - /** * sd_zbc_setup_zone_mgmt_cmnd - Prepare a zone ZBC_OUT command. The operations * can be RESET WRITE POINTER, OPEN, CLOSE or FINISH. diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 1443cee15863..95761740cf1f 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -2857,15 +2857,21 @@ static int init_percpu_info(struct f2fs_sb_info *sbi) } #ifdef CONFIG_BLK_DEV_ZONED +static int f2fs_report_zone_cb(struct blk_zone *zone, unsigned int idx, + void *data) +{ + struct f2fs_dev_info *dev = data; + + if (zone->type != BLK_ZONE_TYPE_CONVENTIONAL) + set_bit(idx, dev->blkz_seq); + return 0; +} + static int init_blkz_info(struct f2fs_sb_info *sbi, int devi) { struct block_device *bdev = FDEV(devi).bdev; sector_t nr_sectors = bdev->bd_part->nr_sects; - sector_t sector = 0; - struct blk_zone *zones; - unsigned int i, nr_zones; - unsigned int n = 0; - int err = -EIO; + int ret; if (!f2fs_sb_has_blkzoned(sbi)) return 0; @@ -2890,38 +2896,13 @@ static int init_blkz_info(struct f2fs_sb_info *sbi, int devi) if (!FDEV(devi).blkz_seq) return -ENOMEM; -#define F2FS_REPORT_NR_ZONES 4096 - - zones = f2fs_kzalloc(sbi, - array_size(F2FS_REPORT_NR_ZONES, - sizeof(struct blk_zone)), - GFP_KERNEL); - if (!zones) - return -ENOMEM; - /* Get block zones type */ - while (zones && sector < nr_sectors) { - - nr_zones = F2FS_REPORT_NR_ZONES; - err = blkdev_report_zones(bdev, sector, zones, &nr_zones); - if (err) - break; - if (!nr_zones) { - err = -EIO; - break; - } - - for (i = 0; i < nr_zones; i++) { - if (zones[i].type != BLK_ZONE_TYPE_CONVENTIONAL) - set_bit(n, FDEV(devi).blkz_seq); - sector += zones[i].len; - n++; - } - } - - kvfree(zones); + ret = blkdev_report_zones(bdev, 0, BLK_ALL_ZONES, f2fs_report_zone_cb, + &FDEV(devi)); + if (ret < 0) + return ret; - return err; + return 0; } #endif diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 6a4f7abbdcf7..397bb9bc230b 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -349,17 +349,16 @@ struct queue_limits { enum blk_zoned_model zoned; }; +typedef int (*report_zones_cb)(struct blk_zone *zone, unsigned int idx, + void *data); + #ifdef CONFIG_BLK_DEV_ZONED -/* - * Maximum number of zones to report with a single report zones command. - */ -#define BLK_ZONED_REPORT_MAX_ZONES 8192U +#define BLK_ALL_ZONES ((unsigned int)-1) +int blkdev_report_zones(struct block_device *bdev, sector_t sector, + unsigned int nr_zones, report_zones_cb cb, void *data); extern unsigned int blkdev_nr_zones(struct block_device *bdev); -extern int blkdev_report_zones(struct block_device *bdev, - sector_t sector, struct blk_zone *zones, - unsigned int *nr_zones); extern int blkdev_zone_mgmt(struct block_device *bdev, enum req_opf op, sector_t sectors, sector_t nr_sectors, gfp_t gfp_mask); @@ -1709,7 +1708,7 @@ struct block_device_operations { /* this callback is with swap_lock and sometimes page table lock held */ void (*swap_slot_free_notify) (struct block_device *, unsigned long); int (*report_zones)(struct gendisk *, sector_t sector, - struct blk_zone *zones, unsigned int *nr_zones); + unsigned int nr_zones, report_zones_cb cb, void *data); struct module *owner; const struct pr_ops *pr_ops; }; diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index 399ad8632356..a164cc81b710 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -17,6 +17,7 @@ struct dm_dev; struct dm_target; struct dm_table; +struct dm_report_zones_args; struct mapped_device; struct bio_vec; @@ -93,9 +94,9 @@ typedef int (*dm_message_fn) (struct dm_target *ti, unsigned argc, char **argv, typedef int (*dm_prepare_ioctl_fn) (struct dm_target *ti, struct block_device **bdev); -typedef int (*dm_report_zones_fn) (struct dm_target *ti, sector_t sector, - struct blk_zone *zones, - unsigned int *nr_zones); +typedef int (*dm_report_zones_fn) (struct dm_target *ti, + struct dm_report_zones_args *args, + unsigned int nr_zones); /* * These iteration functions are typically used to check (and combine) @@ -422,10 +423,23 @@ struct gendisk *dm_disk(struct mapped_device *md); int dm_suspended(struct dm_target *ti); int dm_noflush_suspending(struct dm_target *ti); void dm_accept_partial_bio(struct bio *bio, unsigned n_sectors); -void dm_remap_zone_report(struct dm_target *ti, sector_t start, - struct blk_zone *zones, unsigned int *nr_zones); union map_info *dm_get_rq_mapinfo(struct request *rq); +#ifdef CONFIG_BLK_DEV_ZONED +struct dm_report_zones_args { + struct dm_target *tgt; + sector_t next_sector; + + void *orig_data; + report_zones_cb orig_cb; + unsigned int zone_idx; + + /* must be filled by ->report_zones before calling dm_report_zones_cb */ + sector_t start; +}; +int dm_report_zones_cb(struct blk_zone *zone, unsigned int idx, void *data); +#endif /* CONFIG_BLK_DEV_ZONED */ + /* * Device mapper functions to parse and create devices specified by the * parameter "dm-mod.create=" -- cgit v1.2.3 From 9b38bb4b1e6de47b379afaad2c707df639bb4dc7 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 3 Dec 2019 10:39:04 +0100 Subject: block: simplify blkdev_nr_zones Simplify the arguments to blkdev_nr_zones by passing a gendisk instead of the block_device and capacity. This also removes the need for __blkdev_nr_zones as all callers are outside the fast path and can deal with the additional branch. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- block/blk-zoned.c | 26 ++++++++------------------ block/ioctl.c | 2 +- drivers/md/dm-zoned-target.c | 2 +- include/linux/blkdev.h | 5 ++--- 4 files changed, 12 insertions(+), 23 deletions(-) (limited to 'include/linux/blkdev.h') diff --git a/block/blk-zoned.c b/block/blk-zoned.c index 618786f8275c..65a9bdc9fe27 100644 --- a/block/blk-zoned.c +++ b/block/blk-zoned.c @@ -70,30 +70,20 @@ void __blk_req_zone_write_unlock(struct request *rq) } EXPORT_SYMBOL_GPL(__blk_req_zone_write_unlock); -static inline unsigned int __blkdev_nr_zones(struct request_queue *q, - sector_t nr_sectors) -{ - sector_t zone_sectors = blk_queue_zone_sectors(q); - - return (nr_sectors + zone_sectors - 1) >> ilog2(zone_sectors); -} - /** * blkdev_nr_zones - Get number of zones - * @bdev: Target block device + * @disk: Target gendisk * - * Description: - * Return the total number of zones of a zoned block device. - * For a regular block device, the number of zones is always 0. + * Return the total number of zones of a zoned block device. For a block + * device without zone capabilities, the number of zones is always 0. */ -unsigned int blkdev_nr_zones(struct block_device *bdev) +unsigned int blkdev_nr_zones(struct gendisk *disk) { - struct request_queue *q = bdev_get_queue(bdev); + sector_t zone_sectors = blk_queue_zone_sectors(disk->queue); - if (!blk_queue_is_zoned(q)) + if (!blk_queue_is_zoned(disk->queue)) return 0; - - return __blkdev_nr_zones(q, get_capacity(bdev->bd_disk)); + return (get_capacity(disk) + zone_sectors - 1) >> ilog2(zone_sectors); } EXPORT_SYMBOL_GPL(blkdev_nr_zones); @@ -447,7 +437,7 @@ static int blk_update_zone_info(struct gendisk *disk, unsigned int nr_zones, int blk_revalidate_disk_zones(struct gendisk *disk) { struct request_queue *q = disk->queue; - unsigned int nr_zones = __blkdev_nr_zones(q, get_capacity(disk)); + unsigned int nr_zones = blkdev_nr_zones(disk); struct blk_revalidate_zone_args args = { .disk = disk }; int ret = 0; diff --git a/block/ioctl.c b/block/ioctl.c index 7ac8a66c9787..5de98b97af2a 100644 --- a/block/ioctl.c +++ b/block/ioctl.c @@ -512,7 +512,7 @@ int blkdev_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd, case BLKGETZONESZ: return put_uint(arg, bdev_zone_sectors(bdev)); case BLKGETNRZONES: - return put_uint(arg, blkdev_nr_zones(bdev)); + return put_uint(arg, blkdev_nr_zones(bdev->bd_disk)); case HDIO_GETGEO: return blkdev_getgeo(bdev, argp); case BLKRAGET: diff --git a/drivers/md/dm-zoned-target.c b/drivers/md/dm-zoned-target.c index 4574e0dedbd6..70a1063161c0 100644 --- a/drivers/md/dm-zoned-target.c +++ b/drivers/md/dm-zoned-target.c @@ -727,7 +727,7 @@ static int dmz_get_zoned_device(struct dm_target *ti, char *path) dev->zone_nr_blocks = dmz_sect2blk(dev->zone_nr_sectors); dev->zone_nr_blocks_shift = ilog2(dev->zone_nr_blocks); - dev->nr_zones = blkdev_nr_zones(dev->bdev); + dev->nr_zones = blkdev_nr_zones(dev->bdev->bd_disk); dmz->dev = dev; diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 6012e2592628..c5852de402b6 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -357,8 +357,7 @@ typedef int (*report_zones_cb)(struct blk_zone *zone, unsigned int idx, #define BLK_ALL_ZONES ((unsigned int)-1) int blkdev_report_zones(struct block_device *bdev, sector_t sector, unsigned int nr_zones, report_zones_cb cb, void *data); - -extern unsigned int blkdev_nr_zones(struct block_device *bdev); +unsigned int blkdev_nr_zones(struct gendisk *disk); extern int blkdev_zone_mgmt(struct block_device *bdev, enum req_opf op, sector_t sectors, sector_t nr_sectors, gfp_t gfp_mask); @@ -371,7 +370,7 @@ extern int blkdev_zone_mgmt_ioctl(struct block_device *bdev, fmode_t mode, #else /* CONFIG_BLK_DEV_ZONED */ -static inline unsigned int blkdev_nr_zones(struct block_device *bdev) +static inline unsigned int blkdev_nr_zones(struct gendisk *disk) { return 0; } -- cgit v1.2.3 From f216fdd77b5654f8c4f6fac6020d6aabc58878ef Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 3 Dec 2019 10:39:05 +0100 Subject: block: replace seq_zones_bitmap with conv_zones_bitmap MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Invert the meaning of seq_zones_bitmap by keeping a bitmap of conventional zones. This allows not having a bitmap for devices that do not have conventional zones. Reviewed-by: Javier González Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- block/blk-zoned.c | 18 +++++++++--------- include/linux/blkdev.h | 14 ++++++++------ 2 files changed, 17 insertions(+), 15 deletions(-) (limited to 'include/linux/blkdev.h') diff --git a/block/blk-zoned.c b/block/blk-zoned.c index 65a9bdc9fe27..9c3931051f4f 100644 --- a/block/blk-zoned.c +++ b/block/blk-zoned.c @@ -332,15 +332,15 @@ static inline unsigned long *blk_alloc_zone_bitmap(int node, void blk_queue_free_zone_bitmaps(struct request_queue *q) { - kfree(q->seq_zones_bitmap); - q->seq_zones_bitmap = NULL; + kfree(q->conv_zones_bitmap); + q->conv_zones_bitmap = NULL; kfree(q->seq_zones_wlock); q->seq_zones_wlock = NULL; } struct blk_revalidate_zone_args { struct gendisk *disk; - unsigned long *seq_zones_bitmap; + unsigned long *conv_zones_bitmap; unsigned long *seq_zones_wlock; sector_t sector; }; @@ -394,8 +394,8 @@ static int blk_revalidate_zone_cb(struct blk_zone *zone, unsigned int idx, return -ENODEV; } - if (zone->type != BLK_ZONE_TYPE_CONVENTIONAL) - set_bit(idx, args->seq_zones_bitmap); + if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL) + set_bit(idx, args->conv_zones_bitmap); args->sector += zone->len; return 0; @@ -415,8 +415,8 @@ static int blk_update_zone_info(struct gendisk *disk, unsigned int nr_zones, args->seq_zones_wlock = blk_alloc_zone_bitmap(q->node, nr_zones); if (!args->seq_zones_wlock) return -ENOMEM; - args->seq_zones_bitmap = blk_alloc_zone_bitmap(q->node, nr_zones); - if (!args->seq_zones_bitmap) + args->conv_zones_bitmap = blk_alloc_zone_bitmap(q->node, nr_zones); + if (!args->conv_zones_bitmap) return -ENOMEM; ret = disk->fops->report_zones(disk, 0, nr_zones, @@ -465,7 +465,7 @@ int blk_revalidate_disk_zones(struct gendisk *disk) if (ret >= 0) { q->nr_zones = nr_zones; swap(q->seq_zones_wlock, args.seq_zones_wlock); - swap(q->seq_zones_bitmap, args.seq_zones_bitmap); + swap(q->conv_zones_bitmap, args.conv_zones_bitmap); ret = 0; } else { pr_warn("%s: failed to revalidate zones\n", disk->disk_name); @@ -474,7 +474,7 @@ int blk_revalidate_disk_zones(struct gendisk *disk) blk_mq_unfreeze_queue(q); kfree(args.seq_zones_wlock); - kfree(args.seq_zones_bitmap); + kfree(args.conv_zones_bitmap); return ret; } EXPORT_SYMBOL_GPL(blk_revalidate_disk_zones); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index c5852de402b6..503c4d4c5884 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -503,9 +503,9 @@ struct request_queue { /* * Zoned block device information for request dispatch control. * nr_zones is the total number of zones of the device. This is always - * 0 for regular block devices. seq_zones_bitmap is a bitmap of nr_zones - * bits which indicates if a zone is conventional (bit clear) or - * sequential (bit set). seq_zones_wlock is a bitmap of nr_zones + * 0 for regular block devices. conv_zones_bitmap is a bitmap of nr_zones + * bits which indicates if a zone is conventional (bit set) or + * sequential (bit clear). seq_zones_wlock is a bitmap of nr_zones * bits which indicates if a zone is write locked, that is, if a write * request targeting the zone was dispatched. All three fields are * initialized by the low level device driver (e.g. scsi/sd.c). @@ -518,7 +518,7 @@ struct request_queue { * blk_mq_unfreeze_queue(). */ unsigned int nr_zones; - unsigned long *seq_zones_bitmap; + unsigned long *conv_zones_bitmap; unsigned long *seq_zones_wlock; #endif /* CONFIG_BLK_DEV_ZONED */ @@ -723,9 +723,11 @@ static inline unsigned int blk_queue_zone_no(struct request_queue *q, static inline bool blk_queue_zone_is_seq(struct request_queue *q, sector_t sector) { - if (!blk_queue_is_zoned(q) || !q->seq_zones_bitmap) + if (!blk_queue_is_zoned(q)) return false; - return test_bit(blk_queue_zone_no(q, sector), q->seq_zones_bitmap); + if (!q->conv_zones_bitmap) + return true; + return !test_bit(blk_queue_zone_no(q, sector), q->conv_zones_bitmap); } #else /* CONFIG_BLK_DEV_ZONED */ static inline unsigned int blk_queue_nr_zones(struct request_queue *q) -- cgit v1.2.3 From ae58954d8734c44298f55ed71e683ea944994fab Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 3 Dec 2019 10:39:07 +0100 Subject: block: don't handle bio based drivers in blk_revalidate_disk_zones MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit bio based drivers only need to update q->nr_zones. Do that manually instead of overloading blk_revalidate_disk_zones to keep that function simpler for the next round of changes that will rely even more on the request based functionality. Reviewed-by: Javier González Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- block/blk-zoned.c | 16 +++++----------- drivers/block/null_blk_main.c | 12 +++++++++--- drivers/md/dm-table.c | 12 +++++++----- include/linux/blkdev.h | 5 ----- 4 files changed, 21 insertions(+), 24 deletions(-) (limited to 'include/linux/blkdev.h') diff --git a/block/blk-zoned.c b/block/blk-zoned.c index 0131f9e14bd1..51d427659ce7 100644 --- a/block/blk-zoned.c +++ b/block/blk-zoned.c @@ -419,8 +419,9 @@ static int blk_revalidate_zone_cb(struct blk_zone *zone, unsigned int idx, * * Helper function for low-level device drivers to (re) allocate and initialize * a disk request queue zone bitmaps. This functions should normally be called - * within the disk ->revalidate method. For BIO based queues, no zone bitmap - * is allocated. + * within the disk ->revalidate method for blk-mq based drivers. For BIO based + * drivers only q->nr_zones needs to be updated so that the sysfs exposed value + * is correct. */ int blk_revalidate_disk_zones(struct gendisk *disk) { @@ -433,15 +434,8 @@ int blk_revalidate_disk_zones(struct gendisk *disk) if (WARN_ON_ONCE(!blk_queue_is_zoned(q))) return -EIO; - - /* - * BIO based queues do not use a scheduler so only q->nr_zones - * needs to be updated so that the sysfs exposed value is correct. - */ - if (!queue_is_mq(q)) { - q->nr_zones = args.nr_zones; - return 0; - } + if (WARN_ON_ONCE(!queue_is_mq(q))) + return -EIO; /* * Ensure that all memory allocations in this context are done as diff --git a/drivers/block/null_blk_main.c b/drivers/block/null_blk_main.c index dd6026289fbf..068cd0ae6e2c 100644 --- a/drivers/block/null_blk_main.c +++ b/drivers/block/null_blk_main.c @@ -1576,11 +1576,17 @@ static int null_gendisk_register(struct nullb *nullb) disk->queue = nullb->q; strncpy(disk->disk_name, nullb->disk_name, DISK_NAME_LEN); +#ifdef CONFIG_BLK_DEV_ZONED if (nullb->dev->zoned) { - ret = blk_revalidate_disk_zones(disk); - if (ret) - return ret; + if (queue_is_mq(nullb->q)) { + ret = blk_revalidate_disk_zones(disk); + if (ret) + return ret; + } else { + nullb->q->nr_zones = blkdev_nr_zones(disk); + } } +#endif add_disk(disk); return 0; diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index 2ae0c1913766..0a2cc197f62b 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -1954,12 +1954,14 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q, /* * For a zoned target, the number of zones should be updated for the * correct value to be exposed in sysfs queue/nr_zones. For a BIO based - * target, this is all that is needed. For a request based target, the - * queue zone bitmaps must also be updated. - * Use blk_revalidate_disk_zones() to handle this. + * target, this is all that is needed. */ - if (blk_queue_is_zoned(q)) - blk_revalidate_disk_zones(t->md->disk); +#ifdef CONFIG_BLK_DEV_ZONED + if (blk_queue_is_zoned(q)) { + WARN_ON_ONCE(queue_is_mq(q)); + q->nr_zones = blkdev_nr_zones(t->md->disk); + } +#endif /* Allow reads to exceed readahead limits */ q->backing_dev_info->io_pages = limits->max_sectors >> (PAGE_SHIFT - 9); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 503c4d4c5884..47eb22a3b7f9 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -375,11 +375,6 @@ static inline unsigned int blkdev_nr_zones(struct gendisk *disk) return 0; } -static inline int blk_revalidate_disk_zones(struct gendisk *disk) -{ - return 0; -} - static inline int blkdev_report_zones_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd, unsigned long arg) -- cgit v1.2.3