From d05d199883b09cd34937ebb045adbed9098e9780 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 25 Jun 2018 15:51:00 -0700 Subject: drbd: Do not redefine __must_hold() Since __must_hold() is defined in , do not redefine it in DRBD. Compile-tested only. Reviewed-by: Christoph Hellwig Signed-off-by: Bart Van Assche Cc: Philipp Reisner Cc: Lars Ellenberg Signed-off-by: Jens Axboe --- drivers/block/drbd/drbd_int.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index bc4ed2ed40a2..e35a234b0a8f 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -55,12 +55,10 @@ # define __protected_by(x) __attribute__((require_context(x,1,999,"rdwr"))) # define __protected_read_by(x) __attribute__((require_context(x,1,999,"read"))) # define __protected_write_by(x) __attribute__((require_context(x,1,999,"write"))) -# define __must_hold(x) __attribute__((context(x,1,1), require_context(x,1,999,"call"))) #else # define __protected_by(x) # define __protected_read_by(x) # define __protected_write_by(x) -# define __must_hold(x) #endif /* shared module parameters, defined in drbd_main.c */ -- cgit v1.2.3 From b64a71a0130dd2a88b0fc36a3c0a4882f47813e8 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Mon, 2 Jul 2018 08:42:34 +0100 Subject: block/floppy: remove redundant variable dflags Variable dflags is being assigned but is never used hence it is redundant and can be removed. Cleans up clang warning: warning: variable 'dflags' set but not used [-Wunused-but-set-variable] Signed-off-by: Colin Ian King Signed-off-by: Jens Axboe --- drivers/block/floppy.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c index 8871b5044d9e..48f622728ce6 100644 --- a/drivers/block/floppy.c +++ b/drivers/block/floppy.c @@ -1461,7 +1461,6 @@ static void setup_rw_floppy(void) int i; int r; int flags; - int dflags; unsigned long ready_date; void (*function)(void); @@ -1485,8 +1484,6 @@ static void setup_rw_floppy(void) if (fd_wait_for_completion(ready_date, function)) return; } - dflags = DRS->flags; - if ((flags & FD_RAW_READ) || (flags & FD_RAW_WRITE)) setup_DMA(); -- cgit v1.2.3 From f4354a94e2097fe87a14d47ff502754bb547029a Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Mon, 2 Jul 2018 08:47:06 +0100 Subject: loop: remove redundant pointer inode Pointer inode is being assigned but is never used hence it is redundant and can be removed. Cleans up clang warning: warning: variable 'inode' set but not used [-Wunused-but-set-variable] Signed-off-by: Colin Ian King Signed-off-by: Jens Axboe --- drivers/block/loop.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 4cb1d1be3cfb..bae472646e4a 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -690,7 +690,6 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev, unsigned int arg) { struct file *file, *old_file; - struct inode *inode; int error; error = -ENXIO; @@ -711,7 +710,6 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev, if (error) goto out_putf; - inode = file->f_mapping->host; old_file = lo->lo_backing_file; error = -EINVAL; -- cgit v1.2.3 From 5efac89c849849ad3a959224eb711f9c311e5bde Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Mon, 2 Jul 2018 09:14:19 +0100 Subject: paride: remove redundant variable n Variable n is being assigned but is never used hence it is redundant and can be removed. Also put spacing between variables in declaration to clean up checkpatch warnings. Cleans up clang warning: warning: variable 'n' set but not used [-Wunused-but-set-variable] Signed-off-by: Colin Ian King Signed-off-by: Jens Axboe --- drivers/block/paride/bpck.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/paride/bpck.c b/drivers/block/paride/bpck.c index 4f27e7392e38..f5f63ca2889d 100644 --- a/drivers/block/paride/bpck.c +++ b/drivers/block/paride/bpck.c @@ -347,7 +347,7 @@ static int bpck_test_proto( PIA *pi, char * scratch, int verbose ) static void bpck_read_eeprom ( PIA *pi, char * buf ) -{ int i,j,k,n,p,v,f, om, od; +{ int i, j, k, p, v, f, om, od; bpck_force_spp(pi); @@ -356,7 +356,6 @@ static void bpck_read_eeprom ( PIA *pi, char * buf ) bpck_connect(pi); - n = 0; WR(4,0); for (i=0;i<64;i++) { WR(6,8); -- cgit v1.2.3 From d769a992966b0a188096fda24fc08fc769ec6547 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Mon, 2 Jul 2018 12:49:02 -0500 Subject: drbd: mark expected switch fall-throughs In preparation to enabling -Wimplicit-fallthrough, mark switch cases where we are expecting to fall through. Warning level 2 was used in this case: -Wimplicit-fallthrough=2 Signed-off-by: Gustavo A. R. Silva Signed-off-by: Jens Axboe --- drivers/block/drbd/drbd_receiver.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'drivers/block') diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index be9450f5ad1c..a36a30795c43 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -2790,6 +2790,7 @@ static int receive_DataRequest(struct drbd_connection *connection, struct packet then we would do something smarter here than reading the block... */ peer_req->flags |= EE_RS_THIN_REQ; + /* fall through */ case P_RS_DATA_REQUEST: peer_req->w.cb = w_e_end_rsdata_req; fault_type = DRBD_FAULT_RS_RD; @@ -2968,6 +2969,7 @@ static int drbd_asb_recover_0p(struct drbd_peer_device *peer_device) __must_hold /* Else fall through to one of the other strategies... */ drbd_warn(device, "Discard younger/older primary did not find a decision\n" "Using discard-least-changes instead\n"); + /* fall through */ case ASB_DISCARD_ZERO_CHG: if (ch_peer == 0 && ch_self == 0) { rv = test_bit(RESOLVE_CONFLICTS, &peer_device->connection->flags) @@ -2979,6 +2981,7 @@ static int drbd_asb_recover_0p(struct drbd_peer_device *peer_device) __must_hold } if (after_sb_0p == ASB_DISCARD_ZERO_CHG) break; + /* else: fall through */ case ASB_DISCARD_LEAST_CHG: if (ch_self < ch_peer) rv = -1; -- cgit v1.2.3 From d893ff86034f7107f89d8b740c2b5902a21a49db Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Mon, 2 Jul 2018 12:52:06 -0500 Subject: block/loop: mark expected switch fall-through In preparation to enabling -Wimplicit-fallthrough, mark switch cases where we are expecting to fall through. Signed-off-by: Gustavo A. R. Silva Signed-off-by: Jens Axboe --- drivers/block/loop.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/block') diff --git a/drivers/block/loop.c b/drivers/block/loop.c index bae472646e4a..ea9debf59b22 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -1609,6 +1609,7 @@ static int lo_compat_ioctl(struct block_device *bdev, fmode_t mode, case LOOP_GET_STATUS64: case LOOP_SET_STATUS64: arg = (unsigned long) compat_ptr(arg); + /* fall through */ case LOOP_SET_FD: case LOOP_CHANGE_FD: case LOOP_SET_BLOCK_SIZE: -- cgit v1.2.3 From 00a8cdb84fcb64c7f9f1061298ff676a96dfaf41 Mon Sep 17 00:00:00 2001 From: Liu Bo Date: Fri, 6 Jul 2018 03:07:13 +0800 Subject: null_blk: remove NULLB_DEV_FL_CONFIGURED on turning off nullb device Currently mbps knob could only be set once before switching power knob to on, after power knob has been set at least once, there is no way to set mbps knob again due to -EBUSY. As nullb is mainly used for testing, in order to make it flexible, this removes the flag NULLB_DEV_FL_CONFIGURED so that mbps knob can be reset when power knob is off, e.g. echo 0 > /config/nullb/a/power echo 40 > /config/nullb/a/mbps echo 1 > /config/nullb/a/power So does other knobs under /config/nullb/a. Signed-off-by: Liu Bo Signed-off-by: Jens Axboe --- drivers/block/null_blk.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/block') diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c index 042c778e5a4e..8abfb1059909 100644 --- a/drivers/block/null_blk.c +++ b/drivers/block/null_blk.c @@ -390,6 +390,7 @@ static ssize_t nullb_device_power_store(struct config_item *item, null_del_dev(dev->nullb); mutex_unlock(&lock); clear_bit(NULLB_DEV_FL_UP, &dev->flags); + clear_bit(NULLB_DEV_FL_CONFIGURED, &dev->flags); } return count; -- cgit v1.2.3 From 6dad38d38f20c0c8a84b5ae4f23c62b2c8758ec5 Mon Sep 17 00:00:00 2001 From: Matias Bjørling Date: Fri, 6 Jul 2018 19:38:38 +0200 Subject: null_blk: move shared definitions to header file MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Split the null_blk device driver, such that it can prepare for zoned block interface support. Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- drivers/block/null_blk.c | 76 +-------------------------------------------- drivers/block/null_blk.h | 80 ++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 81 insertions(+), 75 deletions(-) create mode 100644 drivers/block/null_blk.h (limited to 'drivers/block') diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c index 8abfb1059909..47dd08499145 100644 --- a/drivers/block/null_blk.c +++ b/drivers/block/null_blk.c @@ -7,14 +7,8 @@ #include #include #include -#include #include -#include -#include -#include -#include -#include -#include +#include "null_blk.h" #define PAGE_SECTORS_SHIFT (PAGE_SHIFT - SECTOR_SHIFT) #define PAGE_SECTORS (1 << PAGE_SECTORS_SHIFT) @@ -35,28 +29,6 @@ static inline u64 mb_per_tick(int mbps) return (1 << 20) / TICKS_PER_SEC * ((u64) mbps); } -struct nullb_cmd { - struct list_head list; - struct llist_node ll_list; - struct __call_single_data csd; - struct request *rq; - struct bio *bio; - unsigned int tag; - blk_status_t error; - struct nullb_queue *nq; - struct hrtimer timer; -}; - -struct nullb_queue { - unsigned long *tag_map; - wait_queue_head_t wait; - unsigned int queue_depth; - struct nullb_device *dev; - unsigned int requeue_selection; - - struct nullb_cmd *cmds; -}; - /* * Status flags for nullb_device. * @@ -92,52 +64,6 @@ struct nullb_page { #define NULLB_PAGE_LOCK (MAP_SZ - 1) #define NULLB_PAGE_FREE (MAP_SZ - 2) -struct nullb_device { - struct nullb *nullb; - struct config_item item; - struct radix_tree_root data; /* data stored in the disk */ - struct radix_tree_root cache; /* disk cache data */ - unsigned long flags; /* device flags */ - unsigned int curr_cache; - struct badblocks badblocks; - - unsigned long size; /* device size in MB */ - unsigned long completion_nsec; /* time in ns to complete a request */ - unsigned long cache_size; /* disk cache size in MB */ - unsigned int submit_queues; /* number of submission queues */ - unsigned int home_node; /* home node for the device */ - unsigned int queue_mode; /* block interface */ - unsigned int blocksize; /* block size */ - unsigned int irqmode; /* IRQ completion handler */ - unsigned int hw_queue_depth; /* queue depth */ - unsigned int index; /* index of the disk, only valid with a disk */ - unsigned int mbps; /* Bandwidth throttle cap (in MB/s) */ - bool blocking; /* blocking blk-mq device */ - bool use_per_node_hctx; /* use per-node allocation for hardware context */ - bool power; /* power on/off the device */ - bool memory_backed; /* if data is stored in memory */ - bool discard; /* if support discard */ -}; - -struct nullb { - struct nullb_device *dev; - struct list_head list; - unsigned int index; - struct request_queue *q; - struct gendisk *disk; - struct blk_mq_tag_set *tag_set; - struct blk_mq_tag_set __tag_set; - unsigned int queue_depth; - atomic_long_t cur_bytes; - struct hrtimer bw_timer; - unsigned long cache_flush_pos; - spinlock_t lock; - - struct nullb_queue *queues; - unsigned int nr_queues; - char disk_name[DISK_NAME_LEN]; -}; - static LIST_HEAD(nullb_list); static struct mutex lock; static int null_major; diff --git a/drivers/block/null_blk.h b/drivers/block/null_blk.h new file mode 100644 index 000000000000..d82c5501806d --- /dev/null +++ b/drivers/block/null_blk.h @@ -0,0 +1,80 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __BLK_NULL_BLK_H +#define __BLK_NULL_BLK_H + +#include +#include +#include +#include +#include +#include +#include + +struct nullb_cmd { + struct list_head list; + struct llist_node ll_list; + struct __call_single_data csd; + struct request *rq; + struct bio *bio; + unsigned int tag; + blk_status_t error; + struct nullb_queue *nq; + struct hrtimer timer; +}; + +struct nullb_queue { + unsigned long *tag_map; + wait_queue_head_t wait; + unsigned int queue_depth; + struct nullb_device *dev; + unsigned int requeue_selection; + + struct nullb_cmd *cmds; +}; + +struct nullb_device { + struct nullb *nullb; + struct config_item item; + struct radix_tree_root data; /* data stored in the disk */ + struct radix_tree_root cache; /* disk cache data */ + unsigned long flags; /* device flags */ + unsigned int curr_cache; + struct badblocks badblocks; + + unsigned long size; /* device size in MB */ + unsigned long completion_nsec; /* time in ns to complete a request */ + unsigned long cache_size; /* disk cache size in MB */ + unsigned int submit_queues; /* number of submission queues */ + unsigned int home_node; /* home node for the device */ + unsigned int queue_mode; /* block interface */ + unsigned int blocksize; /* block size */ + unsigned int irqmode; /* IRQ completion handler */ + unsigned int hw_queue_depth; /* queue depth */ + unsigned int index; /* index of the disk, only valid with a disk */ + unsigned int mbps; /* Bandwidth throttle cap (in MB/s) */ + bool blocking; /* blocking blk-mq device */ + bool use_per_node_hctx; /* use per-node allocation for hardware context */ + bool power; /* power on/off the device */ + bool memory_backed; /* if data is stored in memory */ + bool discard; /* if support discard */ +}; + +struct nullb { + struct nullb_device *dev; + struct list_head list; + unsigned int index; + struct request_queue *q; + struct gendisk *disk; + struct blk_mq_tag_set *tag_set; + struct blk_mq_tag_set __tag_set; + unsigned int queue_depth; + atomic_long_t cur_bytes; + struct hrtimer bw_timer; + unsigned long cache_flush_pos; + spinlock_t lock; + + struct nullb_queue *queues; + unsigned int nr_queues; + char disk_name[DISK_NAME_LEN]; +}; +#endif /* __NULL_BLK_H */ -- cgit v1.2.3 From ca4b2a011948fae4e4d31490107db4926385a983 Mon Sep 17 00:00:00 2001 From: Matias Bjørling Date: Fri, 6 Jul 2018 19:38:39 +0200 Subject: null_blk: add zone support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds support for exposing a null_blk device through the zone device interface. The interface is managed with the parameters zoned and zone_size. If zoned is set, the null_blk instance registers as a zoned block device. The zone_size parameter defines how big each zone will be. Signed-off-by: Matias Bjørling Signed-off-by: Bart Van Assche Signed-off-by: Damien Le Moal Signed-off-by: Jens Axboe --- Documentation/block/null_blk.txt | 7 ++ drivers/block/Makefile | 5 +- drivers/block/null_blk.c | 48 ++++++++++++- drivers/block/null_blk.h | 28 ++++++++ drivers/block/null_blk_zoned.c | 149 +++++++++++++++++++++++++++++++++++++++ 5 files changed, 234 insertions(+), 3 deletions(-) create mode 100644 drivers/block/null_blk_zoned.c (limited to 'drivers/block') diff --git a/Documentation/block/null_blk.txt b/Documentation/block/null_blk.txt index 07f147381f32..ea2dafe49ae8 100644 --- a/Documentation/block/null_blk.txt +++ b/Documentation/block/null_blk.txt @@ -85,3 +85,10 @@ shared_tags=[0/1]: Default: 0 0: Tag set is not shared. 1: Tag set shared between devices for blk-mq. Only makes sense with nr_devices > 1, otherwise there's no tag set to share. + +zoned=[0/1]: Default: 0 + 0: Block device is exposed as a random-access block device. + 1: Block device is exposed as a host-managed zoned block device. + +zone_size=[MB]: Default: 256 + Per zone size when exposed as a zoned block device. Must be a power of two. diff --git a/drivers/block/Makefile b/drivers/block/Makefile index dc061158b403..a0d88aa0c05d 100644 --- a/drivers/block/Makefile +++ b/drivers/block/Makefile @@ -36,8 +36,11 @@ obj-$(CONFIG_BLK_DEV_RBD) += rbd.o obj-$(CONFIG_BLK_DEV_PCIESSD_MTIP32XX) += mtip32xx/ obj-$(CONFIG_BLK_DEV_RSXX) += rsxx/ -obj-$(CONFIG_BLK_DEV_NULL_BLK) += null_blk.o obj-$(CONFIG_ZRAM) += zram/ +obj-$(CONFIG_BLK_DEV_NULL_BLK) += null_blk_mod.o +null_blk_mod-objs := null_blk.o +null_blk_mod-$(CONFIG_BLK_DEV_ZONED) += null_blk_zoned.o + skd-y := skd_main.o swim_mod-y := swim.o swim_asm.o diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c index 47dd08499145..86cafa6d3b41 100644 --- a/drivers/block/null_blk.c +++ b/drivers/block/null_blk.c @@ -180,6 +180,14 @@ static bool g_use_per_node_hctx; module_param_named(use_per_node_hctx, g_use_per_node_hctx, bool, 0444); MODULE_PARM_DESC(use_per_node_hctx, "Use per-node allocation for hardware context queues. Default: false"); +static bool g_zoned; +module_param_named(zoned, g_zoned, bool, S_IRUGO); +MODULE_PARM_DESC(zoned, "Make device as a host-managed zoned block device. Default: false"); + +static unsigned long g_zone_size = 256; +module_param_named(zone_size, g_zone_size, ulong, S_IRUGO); +MODULE_PARM_DESC(zone_size, "Zone size in MB when block device is zoned. Must be power-of-two: Default: 256"); + static struct nullb_device *null_alloc_dev(void); static void null_free_dev(struct nullb_device *dev); static void null_del_dev(struct nullb *nullb); @@ -283,6 +291,8 @@ NULLB_DEVICE_ATTR(memory_backed, bool); NULLB_DEVICE_ATTR(discard, bool); NULLB_DEVICE_ATTR(mbps, uint); NULLB_DEVICE_ATTR(cache_size, ulong); +NULLB_DEVICE_ATTR(zoned, bool); +NULLB_DEVICE_ATTR(zone_size, ulong); static ssize_t nullb_device_power_show(struct config_item *item, char *page) { @@ -395,6 +405,8 @@ static struct configfs_attribute *nullb_device_attrs[] = { &nullb_device_attr_mbps, &nullb_device_attr_cache_size, &nullb_device_attr_badblocks, + &nullb_device_attr_zoned, + &nullb_device_attr_zone_size, NULL, }; @@ -447,7 +459,7 @@ nullb_group_drop_item(struct config_group *group, struct config_item *item) static ssize_t memb_group_features_show(struct config_item *item, char *page) { - return snprintf(page, PAGE_SIZE, "memory_backed,discard,bandwidth,cache,badblocks\n"); + return snprintf(page, PAGE_SIZE, "memory_backed,discard,bandwidth,cache,badblocks,zoned,zone_size\n"); } CONFIGFS_ATTR_RO(memb_group_, features); @@ -506,6 +518,8 @@ static struct nullb_device *null_alloc_dev(void) dev->hw_queue_depth = g_hw_queue_depth; dev->blocking = g_blocking; dev->use_per_node_hctx = g_use_per_node_hctx; + dev->zoned = g_zoned; + dev->zone_size = g_zone_size; return dev; } @@ -514,6 +528,7 @@ static void null_free_dev(struct nullb_device *dev) if (!dev) return; + null_zone_exit(dev); badblocks_exit(&dev->badblocks); kfree(dev); } @@ -1146,6 +1161,11 @@ static blk_status_t null_handle_cmd(struct nullb_cmd *cmd) struct nullb *nullb = dev->nullb; int err = 0; + if (req_op(cmd->rq) == REQ_OP_ZONE_REPORT) { + cmd->error = null_zone_report(nullb, cmd); + goto out; + } + if (test_bit(NULLB_DEV_FL_THROTTLED, &dev->flags)) { struct request *rq = cmd->rq; @@ -1210,6 +1230,13 @@ static blk_status_t null_handle_cmd(struct nullb_cmd *cmd) } } cmd->error = errno_to_blk_status(err); + + if (!cmd->error && dev->zoned) { + if (req_op(cmd->rq) == REQ_OP_WRITE) + null_zone_write(cmd); + else if (req_op(cmd->rq) == REQ_OP_ZONE_RESET) + null_zone_reset(cmd); + } out: /* Complete IO by inline, softirq or timer */ switch (dev->irqmode) { @@ -1737,6 +1764,15 @@ static int null_add_dev(struct nullb_device *dev) blk_queue_flush_queueable(nullb->q, true); } + if (dev->zoned) { + rv = null_zone_init(dev); + if (rv) + goto out_cleanup_blk_queue; + + blk_queue_chunk_sectors(nullb->q, dev->zone_size_sects); + nullb->q->limits.zoned = BLK_ZONED_HM; + } + nullb->q->queuedata = nullb; blk_queue_flag_set(QUEUE_FLAG_NONROT, nullb->q); blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, nullb->q); @@ -1755,13 +1791,16 @@ static int null_add_dev(struct nullb_device *dev) rv = null_gendisk_register(nullb); if (rv) - goto out_cleanup_blk_queue; + goto out_cleanup_zone; mutex_lock(&lock); list_add_tail(&nullb->list, &nullb_list); mutex_unlock(&lock); return 0; +out_cleanup_zone: + if (dev->zoned) + null_zone_exit(dev); out_cleanup_blk_queue: blk_cleanup_queue(nullb->q); out_cleanup_tags: @@ -1788,6 +1827,11 @@ static int __init null_init(void) g_bs = PAGE_SIZE; } + if (!is_power_of_2(g_zone_size)) { + pr_err("null_blk: zone_size must be power-of-two\n"); + return -EINVAL; + } + if (g_queue_mode == NULL_Q_MQ && g_use_per_node_hctx) { if (g_submit_queues != nr_online_nodes) { pr_warn("null_blk: submit_queues param is set to %u.\n", diff --git a/drivers/block/null_blk.h b/drivers/block/null_blk.h index d82c5501806d..d81781f22dba 100644 --- a/drivers/block/null_blk.h +++ b/drivers/block/null_blk.h @@ -41,9 +41,14 @@ struct nullb_device { unsigned int curr_cache; struct badblocks badblocks; + unsigned int nr_zones; + struct blk_zone *zones; + sector_t zone_size_sects; + unsigned long size; /* device size in MB */ unsigned long completion_nsec; /* time in ns to complete a request */ unsigned long cache_size; /* disk cache size in MB */ + unsigned long zone_size; /* zone size in MB if device is zoned */ unsigned int submit_queues; /* number of submission queues */ unsigned int home_node; /* home node for the device */ unsigned int queue_mode; /* block interface */ @@ -57,6 +62,7 @@ struct nullb_device { bool power; /* power on/off the device */ bool memory_backed; /* if data is stored in memory */ bool discard; /* if support discard */ + bool zoned; /* if device is zoned */ }; struct nullb { @@ -77,4 +83,26 @@ struct nullb { unsigned int nr_queues; char disk_name[DISK_NAME_LEN]; }; + +#ifdef CONFIG_BLK_DEV_ZONED +int null_zone_init(struct nullb_device *dev); +void null_zone_exit(struct nullb_device *dev); +blk_status_t null_zone_report(struct nullb *nullb, + struct nullb_cmd *cmd); +void null_zone_write(struct nullb_cmd *cmd); +void null_zone_reset(struct nullb_cmd *cmd); +#else +static inline int null_zone_init(struct nullb_device *dev) +{ + return -EINVAL; +} +static inline void null_zone_exit(struct nullb_device *dev) {} +static inline blk_status_t null_zone_report(struct nullb *nullb, + struct nullb_cmd *cmd) +{ + return BLK_STS_NOTSUPP; +} +static inline void null_zone_write(struct nullb_cmd *cmd) {} +static inline void null_zone_reset(struct nullb_cmd *cmd) {} +#endif /* CONFIG_BLK_DEV_ZONED */ #endif /* __NULL_BLK_H */ diff --git a/drivers/block/null_blk_zoned.c b/drivers/block/null_blk_zoned.c new file mode 100644 index 000000000000..a979ca00d7be --- /dev/null +++ b/drivers/block/null_blk_zoned.c @@ -0,0 +1,149 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include "null_blk.h" + +/* zone_size in MBs to sectors. */ +#define ZONE_SIZE_SHIFT 11 + +static inline unsigned int null_zone_no(struct nullb_device *dev, sector_t sect) +{ + return sect >> ilog2(dev->zone_size_sects); +} + +int null_zone_init(struct nullb_device *dev) +{ + sector_t dev_size = (sector_t)dev->size * 1024 * 1024; + sector_t sector = 0; + unsigned int i; + + if (!is_power_of_2(dev->zone_size)) { + pr_err("null_blk: zone_size must be power-of-two\n"); + return -EINVAL; + } + + dev->zone_size_sects = dev->zone_size << ZONE_SIZE_SHIFT; + dev->nr_zones = dev_size >> + (SECTOR_SHIFT + ilog2(dev->zone_size_sects)); + dev->zones = kvmalloc_array(dev->nr_zones, sizeof(struct blk_zone), + GFP_KERNEL | __GFP_ZERO); + if (!dev->zones) + return -ENOMEM; + + for (i = 0; i < dev->nr_zones; i++) { + struct blk_zone *zone = &dev->zones[i]; + + zone->start = zone->wp = sector; + zone->len = dev->zone_size_sects; + zone->type = BLK_ZONE_TYPE_SEQWRITE_REQ; + zone->cond = BLK_ZONE_COND_EMPTY; + + sector += dev->zone_size_sects; + } + + return 0; +} + +void null_zone_exit(struct nullb_device *dev) +{ + kvfree(dev->zones); +} + +static void null_zone_fill_rq(struct nullb_device *dev, struct request *rq, + unsigned int zno, unsigned int nr_zones) +{ + struct blk_zone_report_hdr *hdr = NULL; + struct bio_vec bvec; + struct bvec_iter iter; + void *addr; + unsigned int zones_to_cpy; + + bio_for_each_segment(bvec, rq->bio, iter) { + addr = kmap_atomic(bvec.bv_page); + + zones_to_cpy = bvec.bv_len / sizeof(struct blk_zone); + + if (!hdr) { + hdr = (struct blk_zone_report_hdr *)addr; + hdr->nr_zones = nr_zones; + zones_to_cpy--; + addr += sizeof(struct blk_zone_report_hdr); + } + + zones_to_cpy = min_t(unsigned int, zones_to_cpy, nr_zones); + + memcpy(addr, &dev->zones[zno], + zones_to_cpy * sizeof(struct blk_zone)); + + kunmap_atomic(addr); + + nr_zones -= zones_to_cpy; + zno += zones_to_cpy; + + if (!nr_zones) + break; + } +} + +blk_status_t null_zone_report(struct nullb *nullb, + struct nullb_cmd *cmd) +{ + struct nullb_device *dev = nullb->dev; + struct request *rq = cmd->rq; + unsigned int zno = null_zone_no(dev, blk_rq_pos(rq)); + unsigned int nr_zones = dev->nr_zones - zno; + unsigned int max_zones = (blk_rq_bytes(rq) / + sizeof(struct blk_zone)) - 1; + + nr_zones = min_t(unsigned int, nr_zones, max_zones); + + null_zone_fill_rq(nullb->dev, rq, zno, nr_zones); + + return BLK_STS_OK; +} + +void null_zone_write(struct nullb_cmd *cmd) +{ + struct nullb_device *dev = cmd->nq->dev; + struct request *rq = cmd->rq; + sector_t sector = blk_rq_pos(rq); + unsigned int rq_sectors = blk_rq_sectors(rq); + unsigned int zno = null_zone_no(dev, sector); + struct blk_zone *zone = &dev->zones[zno]; + + switch (zone->cond) { + case BLK_ZONE_COND_FULL: + /* Cannot write to a full zone */ + cmd->error = BLK_STS_IOERR; + break; + case BLK_ZONE_COND_EMPTY: + case BLK_ZONE_COND_IMP_OPEN: + /* Writes must be at the write pointer position */ + if (blk_rq_pos(rq) != zone->wp) { + cmd->error = BLK_STS_IOERR; + break; + } + + if (zone->cond == BLK_ZONE_COND_EMPTY) + zone->cond = BLK_ZONE_COND_IMP_OPEN; + + zone->wp += rq_sectors; + if (zone->wp == zone->start + zone->len) + zone->cond = BLK_ZONE_COND_FULL; + break; + default: + /* Invalid zone condition */ + cmd->error = BLK_STS_IOERR; + break; + } +} + +void null_zone_reset(struct nullb_cmd *cmd) +{ + struct nullb_device *dev = cmd->nq->dev; + struct request *rq = cmd->rq; + unsigned int zno = null_zone_no(dev, blk_rq_pos(rq)); + struct blk_zone *zone = &dev->zones[zno]; + + zone->cond = BLK_ZONE_COND_EMPTY; + zone->wp = zone->start; +} -- cgit v1.2.3 From 3993e501bf853cce85c5114a704b86b8f486790c Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Fri, 6 Jul 2018 20:49:19 -0700 Subject: block/DAC960.c: fix defined but not used build warnings Fix build warnings in DAC960.c when CONFIG_PROC_FS is not enabled by marking the unused functions as __maybe_unused. ../drivers/block/DAC960.c:6429:12: warning: 'dac960_proc_show' defined but not used [-Wunused-function] ../drivers/block/DAC960.c:6449:12: warning: 'dac960_initial_status_proc_show' defined but not used [-Wunused-function] ../drivers/block/DAC960.c:6456:12: warning: 'dac960_current_status_proc_show' defined but not used [-Wunused-function] Signed-off-by: Randy Dunlap Cc: Jens Axboe Cc: linux-block@vger.kernel.org Signed-off-by: Jens Axboe --- drivers/block/DAC960.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/DAC960.c b/drivers/block/DAC960.c index f6518067aa7d..f99e5c883368 100644 --- a/drivers/block/DAC960.c +++ b/drivers/block/DAC960.c @@ -21,6 +21,7 @@ #define DAC960_DriverDate "21 Aug 2007" +#include #include #include #include @@ -6426,7 +6427,7 @@ static bool DAC960_V2_ExecuteUserCommand(DAC960_Controller_T *Controller, return true; } -static int dac960_proc_show(struct seq_file *m, void *v) +static int __maybe_unused dac960_proc_show(struct seq_file *m, void *v) { unsigned char *StatusMessage = "OK\n"; int ControllerNumber; @@ -6446,14 +6447,16 @@ static int dac960_proc_show(struct seq_file *m, void *v) return 0; } -static int dac960_initial_status_proc_show(struct seq_file *m, void *v) +static int __maybe_unused dac960_initial_status_proc_show(struct seq_file *m, + void *v) { DAC960_Controller_T *Controller = (DAC960_Controller_T *)m->private; seq_printf(m, "%.*s", Controller->InitialStatusLength, Controller->CombinedStatusBuffer); return 0; } -static int dac960_current_status_proc_show(struct seq_file *m, void *v) +static int __maybe_unused dac960_current_status_proc_show(struct seq_file *m, + void *v) { DAC960_Controller_T *Controller = (DAC960_Controller_T *) m->private; unsigned char *StatusMessage = -- cgit v1.2.3 From ea870bb2ae6cbc1a5ba1f3ec8c8fca921a51880b Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Thu, 12 Jul 2018 22:29:16 +0200 Subject: block: skd: Use %pad printk format for dma_addr_t values Use the existing %pad printk format to print dma_addr_t values. This avoids the following warnings when compiling on the parisc64 platform: drivers/block/skd_main.c: In function 'skd_preop_sg_list': drivers/block/skd_main.c:660:4: warning: format '%llx' expects argument of type 'long long unsigned int', but argument 6 has type 'dma_addr_t {aka unsigned int}' [-Wformat=] Reviewed-by: Bart Van Assche Signed-off-by: Helge Deller Signed-off-by: Jens Axboe --- drivers/block/skd_main.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/skd_main.c b/drivers/block/skd_main.c index bc7aea6d7b7c..87b9e7fbf062 100644 --- a/drivers/block/skd_main.c +++ b/drivers/block/skd_main.c @@ -657,8 +657,8 @@ static bool skd_preop_sg_list(struct skd_device *skdev, if (unlikely(skdev->dbg_level > 1)) { dev_dbg(&skdev->pdev->dev, - "skreq=%x sksg_list=%p sksg_dma=%llx\n", - skreq->id, skreq->sksg_list, skreq->sksg_dma_address); + "skreq=%x sksg_list=%p sksg_dma=%pad\n", + skreq->id, skreq->sksg_list, &skreq->sksg_dma_address); for (i = 0; i < n_sg; i++) { struct fit_sg_descriptor *sgd = &skreq->sksg_list[i]; @@ -1190,8 +1190,8 @@ static void skd_send_fitmsg(struct skd_device *skdev, { u64 qcmd; - dev_dbg(&skdev->pdev->dev, "dma address 0x%llx, busy=%d\n", - skmsg->mb_dma_address, skd_in_flight(skdev)); + dev_dbg(&skdev->pdev->dev, "dma address %pad, busy=%d\n", + &skmsg->mb_dma_address, skd_in_flight(skdev)); dev_dbg(&skdev->pdev->dev, "msg_buf %p\n", skmsg->msg_buf); qcmd = skmsg->mb_dma_address; @@ -1250,9 +1250,9 @@ static void skd_send_special_fitmsg(struct skd_device *skdev, } dev_dbg(&skdev->pdev->dev, - "skspcl=%p id=%04x sksg_list=%p sksg_dma=%llx\n", + "skspcl=%p id=%04x sksg_list=%p sksg_dma=%pad\n", skspcl, skspcl->req.id, skspcl->req.sksg_list, - skspcl->req.sksg_dma_address); + &skspcl->req.sksg_dma_address); for (i = 0; i < skspcl->req.n_sg; i++) { struct fit_sg_descriptor *sgd = &skspcl->req.sksg_list[i]; @@ -2685,8 +2685,8 @@ static int skd_cons_skmsg(struct skd_device *skdev) WARN(((uintptr_t)skmsg->msg_buf | skmsg->mb_dma_address) & (FIT_QCMD_ALIGN - 1), - "not aligned: msg_buf %p mb_dma_address %#llx\n", - skmsg->msg_buf, skmsg->mb_dma_address); + "not aligned: msg_buf %p mb_dma_address %pad\n", + skmsg->msg_buf, &skmsg->mb_dma_address); memset(skmsg->msg_buf, 0, SKD_N_FITMSG_BYTES); } -- cgit v1.2.3 From ada94973f15f175283fd3b8f9bfcf9de6f2cc818 Mon Sep 17 00:00:00 2001 From: RAGHU Halharvi Date: Tue, 17 Jul 2018 22:32:12 +0530 Subject: pktcdvd: remove assignment in if condition * Remove checkpatch errors caused due to assignment operation in if condition Signed-off-by: RAGHU Halharvi Signed-off-by: Jens Axboe --- drivers/block/pktcdvd.c | 69 ++++++++++++++++++++++++++++++++----------------- 1 file changed, 46 insertions(+), 23 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c index b3f83cd96f33..a4b4d524c3af 100644 --- a/drivers/block/pktcdvd.c +++ b/drivers/block/pktcdvd.c @@ -798,7 +798,8 @@ static noinline_for_stack int pkt_set_speed(struct pktcdvd_device *pd, cgc.cmd[4] = (write_speed >> 8) & 0xff; cgc.cmd[5] = write_speed & 0xff; - if ((ret = pkt_generic_packet(pd, &cgc))) + ret = pkt_generic_packet(pd, &cgc); + if (ret) pkt_dump_sense(pd, &cgc); return ret; @@ -1562,7 +1563,8 @@ static int pkt_get_disc_info(struct pktcdvd_device *pd, disc_information *di) cgc.cmd[8] = cgc.buflen = 2; cgc.quiet = 1; - if ((ret = pkt_generic_packet(pd, &cgc))) + ret = pkt_generic_packet(pd, &cgc); + if (ret) return ret; /* not all drives have the same disc_info length, so requeue @@ -1591,7 +1593,8 @@ static int pkt_get_track_info(struct pktcdvd_device *pd, __u16 track, __u8 type, cgc.cmd[8] = 8; cgc.quiet = 1; - if ((ret = pkt_generic_packet(pd, &cgc))) + ret = pkt_generic_packet(pd, &cgc); + if (ret) return ret; cgc.buflen = be16_to_cpu(ti->track_information_length) + @@ -1612,17 +1615,20 @@ static noinline_for_stack int pkt_get_last_written(struct pktcdvd_device *pd, __u32 last_track; int ret = -1; - if ((ret = pkt_get_disc_info(pd, &di))) + ret = pkt_get_disc_info(pd, &di); + if (ret) return ret; last_track = (di.last_track_msb << 8) | di.last_track_lsb; - if ((ret = pkt_get_track_info(pd, last_track, 1, &ti))) + ret = pkt_get_track_info(pd, last_track, 1, &ti); + if (ret) return ret; /* if this track is blank, try the previous. */ if (ti.blank) { last_track--; - if ((ret = pkt_get_track_info(pd, last_track, 1, &ti))) + ret = pkt_get_track_info(pd, last_track, 1, &ti); + if (ret) return ret; } @@ -1657,7 +1663,8 @@ static noinline_for_stack int pkt_set_write_settings(struct pktcdvd_device *pd) memset(buffer, 0, sizeof(buffer)); init_cdrom_command(&cgc, buffer, sizeof(*wp), CGC_DATA_READ); cgc.sense = &sense; - if ((ret = pkt_mode_sense(pd, &cgc, GPMODE_WRITE_PARMS_PAGE, 0))) { + ret = pkt_mode_sense(pd, &cgc, GPMODE_WRITE_PARMS_PAGE, 0); + if (ret) { pkt_dump_sense(pd, &cgc); return ret; } @@ -1672,7 +1679,8 @@ static noinline_for_stack int pkt_set_write_settings(struct pktcdvd_device *pd) */ init_cdrom_command(&cgc, buffer, size, CGC_DATA_READ); cgc.sense = &sense; - if ((ret = pkt_mode_sense(pd, &cgc, GPMODE_WRITE_PARMS_PAGE, 0))) { + ret = pkt_mode_sense(pd, &cgc, GPMODE_WRITE_PARMS_PAGE, 0); + if (ret) { pkt_dump_sense(pd, &cgc); return ret; } @@ -1714,7 +1722,8 @@ static noinline_for_stack int pkt_set_write_settings(struct pktcdvd_device *pd) wp->packet_size = cpu_to_be32(pd->settings.size >> 2); cgc.buflen = cgc.cmd[8] = size; - if ((ret = pkt_mode_select(pd, &cgc))) { + ret = pkt_mode_select(pd, &cgc); + if (ret) { pkt_dump_sense(pd, &cgc); return ret; } @@ -1819,7 +1828,8 @@ static noinline_for_stack int pkt_probe_settings(struct pktcdvd_device *pd) memset(&di, 0, sizeof(disc_information)); memset(&ti, 0, sizeof(track_information)); - if ((ret = pkt_get_disc_info(pd, &di))) { + ret = pkt_get_disc_info(pd, &di); + if (ret) { pkt_err(pd, "failed get_disc\n"); return ret; } @@ -1830,7 +1840,8 @@ static noinline_for_stack int pkt_probe_settings(struct pktcdvd_device *pd) pd->type = di.erasable ? PACKET_CDRW : PACKET_CDR; track = 1; /* (di.last_track_msb << 8) | di.last_track_lsb; */ - if ((ret = pkt_get_track_info(pd, track, 1, &ti))) { + ret = pkt_get_track_info(pd, track, 1, &ti); + if (ret) { pkt_err(pd, "failed get_track\n"); return ret; } @@ -1918,7 +1929,8 @@ static noinline_for_stack int pkt_write_caching(struct pktcdvd_device *pd, */ cgc.quiet = 1; - if ((ret = pkt_mode_sense(pd, &cgc, GPMODE_WCACHING_PAGE, 0))) + ret = pkt_mode_sense(pd, &cgc, GPMODE_WCACHING_PAGE, 0); + if (ret) return ret; buf[pd->mode_offset + 10] |= (!!set << 2); @@ -2093,7 +2105,8 @@ static noinline_for_stack int pkt_perform_opc(struct pktcdvd_device *pd) cgc.timeout = 60*HZ; cgc.cmd[0] = GPCMD_SEND_OPC; cgc.cmd[1] = 1; - if ((ret = pkt_generic_packet(pd, &cgc))) + ret = pkt_generic_packet(pd, &cgc); + if (ret) pkt_dump_sense(pd, &cgc); return ret; } @@ -2103,19 +2116,22 @@ static int pkt_open_write(struct pktcdvd_device *pd) int ret; unsigned int write_speed, media_write_speed, read_speed; - if ((ret = pkt_probe_settings(pd))) { + ret = pkt_probe_settings(pd); + if (ret) { pkt_dbg(2, pd, "failed probe\n"); return ret; } - if ((ret = pkt_set_write_settings(pd))) { + ret = pkt_set_write_settings(pd); + if (ret) { pkt_dbg(1, pd, "failed saving write settings\n"); return -EIO; } pkt_write_caching(pd, USE_WCACHING); - if ((ret = pkt_get_max_speed(pd, &write_speed))) + ret = pkt_get_max_speed(pd, &write_speed); + if (ret) write_speed = 16 * 177; switch (pd->mmc3_profile) { case 0x13: /* DVD-RW */ @@ -2124,7 +2140,8 @@ static int pkt_open_write(struct pktcdvd_device *pd) pkt_dbg(1, pd, "write speed %ukB/s\n", write_speed); break; default: - if ((ret = pkt_media_speed(pd, &media_write_speed))) + ret = pkt_media_speed(pd, &media_write_speed); + if (ret) media_write_speed = 16; write_speed = min(write_speed, media_write_speed * 177); pkt_dbg(1, pd, "write speed %ux\n", write_speed / 176); @@ -2132,14 +2149,16 @@ static int pkt_open_write(struct pktcdvd_device *pd) } read_speed = write_speed; - if ((ret = pkt_set_speed(pd, write_speed, read_speed))) { + ret = pkt_set_speed(pd, write_speed, read_speed); + if (ret) { pkt_dbg(1, pd, "couldn't set write speed\n"); return -EIO; } pd->write_speed = write_speed; pd->read_speed = read_speed; - if ((ret = pkt_perform_opc(pd))) { + ret = pkt_perform_opc(pd); + if (ret) { pkt_dbg(1, pd, "Optimum Power Calibration failed\n"); } @@ -2161,10 +2180,12 @@ static int pkt_open_dev(struct pktcdvd_device *pd, fmode_t write) * so bdget() can't fail. */ bdget(pd->bdev->bd_dev); - if ((ret = blkdev_get(pd->bdev, FMODE_READ | FMODE_EXCL, pd))) + ret = blkdev_get(pd->bdev, FMODE_READ | FMODE_EXCL, pd); + if (ret) goto out; - if ((ret = pkt_get_last_written(pd, &lba))) { + ret = pkt_get_last_written(pd, &lba); + if (ret) { pkt_err(pd, "pkt_get_last_written failed\n"); goto out_putdev; } @@ -2175,7 +2196,8 @@ static int pkt_open_dev(struct pktcdvd_device *pd, fmode_t write) q = bdev_get_queue(pd->bdev); if (write) { - if ((ret = pkt_open_write(pd))) + ret = pkt_open_write(pd); + if (ret) goto out_putdev; /* * Some CDRW drives can not handle writes larger than one packet, @@ -2190,7 +2212,8 @@ static int pkt_open_dev(struct pktcdvd_device *pd, fmode_t write) clear_bit(PACKET_WRITABLE, &pd->flags); } - if ((ret = pkt_set_segment_merging(pd, q))) + ret = pkt_set_segment_merging(pd, q); + if (ret) goto out_putdev; if (write) { -- cgit v1.2.3 From 3f289dcb4b265416a57ca79cf4a324060bb09060 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 18 Jul 2018 04:47:36 -0700 Subject: block: make bdev_ops->rw_page() take a REQ_OP instead of bool c11f0c0b5bb9 ("block/mm: make bdev_ops->rw_page() take a bool for read/write") replaced @op with boolean @is_write, which limited the amount of information going into ->rw_page() and more importantly page_endio(), which removed the need to expose block internals to mm. Unfortunately, we want to track discards separately and @is_write isn't enough information. This patch updates bdev_ops->rw_page() to take REQ_OP instead but leaves page_endio() to take bool @is_write. This allows the block part of operations to have enough information while not leaking it to mm. Signed-off-by: Tejun Heo Cc: Mike Christie Cc: Minchan Kim Cc: Dan Williams Signed-off-by: Jens Axboe --- drivers/block/brd.c | 14 +++++++------- drivers/block/zram/zram_drv.c | 16 ++++++++-------- drivers/nvdimm/btt.c | 12 ++++++------ drivers/nvdimm/pmem.c | 13 ++++++------- fs/block_dev.c | 6 ++++-- fs/mpage.c | 4 ++-- include/linux/blkdev.h | 2 +- 7 files changed, 34 insertions(+), 33 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/brd.c b/drivers/block/brd.c index bb976598ee43..df8103dd40ac 100644 --- a/drivers/block/brd.c +++ b/drivers/block/brd.c @@ -254,20 +254,20 @@ static void copy_from_brd(void *dst, struct brd_device *brd, * Process a single bvec of a bio. */ static int brd_do_bvec(struct brd_device *brd, struct page *page, - unsigned int len, unsigned int off, bool is_write, + unsigned int len, unsigned int off, unsigned int op, sector_t sector) { void *mem; int err = 0; - if (is_write) { + if (op_is_write(op)) { err = copy_to_brd_setup(brd, sector, len); if (err) goto out; } mem = kmap_atomic(page); - if (!is_write) { + if (!op_is_write(op)) { copy_from_brd(mem + off, brd, sector, len); flush_dcache_page(page); } else { @@ -296,7 +296,7 @@ static blk_qc_t brd_make_request(struct request_queue *q, struct bio *bio) int err; err = brd_do_bvec(brd, bvec.bv_page, len, bvec.bv_offset, - op_is_write(bio_op(bio)), sector); + bio_op(bio), sector); if (err) goto io_error; sector += len >> SECTOR_SHIFT; @@ -310,15 +310,15 @@ io_error: } static int brd_rw_page(struct block_device *bdev, sector_t sector, - struct page *page, bool is_write) + struct page *page, unsigned int op) { struct brd_device *brd = bdev->bd_disk->private_data; int err; if (PageTransHuge(page)) return -ENOTSUPP; - err = brd_do_bvec(brd, page, PAGE_SIZE, 0, is_write, sector); - page_endio(page, is_write, err); + err = brd_do_bvec(brd, page, PAGE_SIZE, 0, op, sector); + page_endio(page, op_is_write(op), err); return err; } diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 7436b2d27fa3..78c29044684a 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -1274,17 +1274,17 @@ static void zram_bio_discard(struct zram *zram, u32 index, * Returns 1 if IO request was successfully submitted. */ static int zram_bvec_rw(struct zram *zram, struct bio_vec *bvec, u32 index, - int offset, bool is_write, struct bio *bio) + int offset, unsigned int op, struct bio *bio) { unsigned long start_time = jiffies; - int rw_acct = is_write ? REQ_OP_WRITE : REQ_OP_READ; + int rw_acct = op_is_write(op) ? REQ_OP_WRITE : REQ_OP_READ; struct request_queue *q = zram->disk->queue; int ret; generic_start_io_acct(q, rw_acct, bvec->bv_len >> SECTOR_SHIFT, &zram->disk->part0); - if (!is_write) { + if (!op_is_write(op)) { atomic64_inc(&zram->stats.num_reads); ret = zram_bvec_read(zram, bvec, index, offset, bio); flush_dcache_page(bvec->bv_page); @@ -1300,7 +1300,7 @@ static int zram_bvec_rw(struct zram *zram, struct bio_vec *bvec, u32 index, zram_slot_unlock(zram, index); if (unlikely(ret < 0)) { - if (!is_write) + if (!op_is_write(op)) atomic64_inc(&zram->stats.failed_reads); else atomic64_inc(&zram->stats.failed_writes); @@ -1338,7 +1338,7 @@ static void __zram_make_request(struct zram *zram, struct bio *bio) bv.bv_len = min_t(unsigned int, PAGE_SIZE - offset, unwritten); if (zram_bvec_rw(zram, &bv, index, offset, - op_is_write(bio_op(bio)), bio) < 0) + bio_op(bio), bio) < 0) goto out; bv.bv_offset += bv.bv_len; @@ -1390,7 +1390,7 @@ static void zram_slot_free_notify(struct block_device *bdev, } static int zram_rw_page(struct block_device *bdev, sector_t sector, - struct page *page, bool is_write) + struct page *page, unsigned int op) { int offset, ret; u32 index; @@ -1414,7 +1414,7 @@ static int zram_rw_page(struct block_device *bdev, sector_t sector, bv.bv_len = PAGE_SIZE; bv.bv_offset = 0; - ret = zram_bvec_rw(zram, &bv, index, offset, is_write, NULL); + ret = zram_bvec_rw(zram, &bv, index, offset, op, NULL); out: /* * If I/O fails, just return error(ie, non-zero) without @@ -1429,7 +1429,7 @@ out: switch (ret) { case 0: - page_endio(page, is_write, 0); + page_endio(page, op_is_write(op), 0); break; case 1: ret = 0; diff --git a/drivers/nvdimm/btt.c b/drivers/nvdimm/btt.c index 85de8053aa34..0360c015f658 100644 --- a/drivers/nvdimm/btt.c +++ b/drivers/nvdimm/btt.c @@ -1423,11 +1423,11 @@ static int btt_write_pg(struct btt *btt, struct bio_integrity_payload *bip, static int btt_do_bvec(struct btt *btt, struct bio_integrity_payload *bip, struct page *page, unsigned int len, unsigned int off, - bool is_write, sector_t sector) + unsigned int op, sector_t sector) { int ret; - if (!is_write) { + if (!op_is_write(op)) { ret = btt_read_pg(btt, bip, page, off, sector, len); flush_dcache_page(page); } else { @@ -1464,7 +1464,7 @@ static blk_qc_t btt_make_request(struct request_queue *q, struct bio *bio) } err = btt_do_bvec(btt, bip, bvec.bv_page, len, bvec.bv_offset, - op_is_write(bio_op(bio)), iter.bi_sector); + bio_op(bio), iter.bi_sector); if (err) { dev_err(&btt->nd_btt->dev, "io error in %s sector %lld, len %d,\n", @@ -1483,16 +1483,16 @@ static blk_qc_t btt_make_request(struct request_queue *q, struct bio *bio) } static int btt_rw_page(struct block_device *bdev, sector_t sector, - struct page *page, bool is_write) + struct page *page, unsigned int op) { struct btt *btt = bdev->bd_disk->private_data; int rc; unsigned int len; len = hpage_nr_pages(page) * PAGE_SIZE; - rc = btt_do_bvec(btt, NULL, page, len, 0, is_write, sector); + rc = btt_do_bvec(btt, NULL, page, len, 0, op, sector); if (rc == 0) - page_endio(page, is_write, 0); + page_endio(page, op_is_write(op), 0); return rc; } diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c index 8b1fd7f1a224..dd17acd8fe68 100644 --- a/drivers/nvdimm/pmem.c +++ b/drivers/nvdimm/pmem.c @@ -120,7 +120,7 @@ static blk_status_t read_pmem(struct page *page, unsigned int off, } static blk_status_t pmem_do_bvec(struct pmem_device *pmem, struct page *page, - unsigned int len, unsigned int off, bool is_write, + unsigned int len, unsigned int off, unsigned int op, sector_t sector) { blk_status_t rc = BLK_STS_OK; @@ -131,7 +131,7 @@ static blk_status_t pmem_do_bvec(struct pmem_device *pmem, struct page *page, if (unlikely(is_bad_pmem(&pmem->bb, sector, len))) bad_pmem = true; - if (!is_write) { + if (!op_is_write(op)) { if (unlikely(bad_pmem)) rc = BLK_STS_IOERR; else { @@ -180,8 +180,7 @@ static blk_qc_t pmem_make_request(struct request_queue *q, struct bio *bio) do_acct = nd_iostat_start(bio, &start); bio_for_each_segment(bvec, bio, iter) { rc = pmem_do_bvec(pmem, bvec.bv_page, bvec.bv_len, - bvec.bv_offset, op_is_write(bio_op(bio)), - iter.bi_sector); + bvec.bv_offset, bio_op(bio), iter.bi_sector); if (rc) { bio->bi_status = rc; break; @@ -198,13 +197,13 @@ static blk_qc_t pmem_make_request(struct request_queue *q, struct bio *bio) } static int pmem_rw_page(struct block_device *bdev, sector_t sector, - struct page *page, bool is_write) + struct page *page, unsigned int op) { struct pmem_device *pmem = bdev->bd_queue->queuedata; blk_status_t rc; rc = pmem_do_bvec(pmem, page, hpage_nr_pages(page) * PAGE_SIZE, - 0, is_write, sector); + 0, op, sector); /* * The ->rw_page interface is subtle and tricky. The core @@ -213,7 +212,7 @@ static int pmem_rw_page(struct block_device *bdev, sector_t sector, * caused by double completion. */ if (rc == 0) - page_endio(page, is_write, 0); + page_endio(page, op_is_write(op), 0); return blk_status_to_errno(rc); } diff --git a/fs/block_dev.c b/fs/block_dev.c index 0dd87aaeb39a..496fb51a1e1a 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -665,7 +665,8 @@ int bdev_read_page(struct block_device *bdev, sector_t sector, result = blk_queue_enter(bdev->bd_queue, 0); if (result) return result; - result = ops->rw_page(bdev, sector + get_start_sect(bdev), page, false); + result = ops->rw_page(bdev, sector + get_start_sect(bdev), page, + REQ_OP_READ); blk_queue_exit(bdev->bd_queue); return result; } @@ -703,7 +704,8 @@ int bdev_write_page(struct block_device *bdev, sector_t sector, return result; set_page_writeback(page); - result = ops->rw_page(bdev, sector + get_start_sect(bdev), page, true); + result = ops->rw_page(bdev, sector + get_start_sect(bdev), page, + REQ_OP_WRITE); if (result) { end_page_writeback(page); } else { diff --git a/fs/mpage.c b/fs/mpage.c index b7e7f570733a..b73638db9866 100644 --- a/fs/mpage.c +++ b/fs/mpage.c @@ -51,8 +51,8 @@ static void mpage_end_io(struct bio *bio) bio_for_each_segment_all(bv, bio, i) { struct page *page = bv->bv_page; - page_endio(page, op_is_write(bio_op(bio)), - blk_status_to_errno(bio->bi_status)); + page_endio(page, bio_op(bio), + blk_status_to_errno(bio->bi_status)); } bio_put(bio); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 1939ed95f936..331a6cb8805f 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1943,7 +1943,7 @@ static inline bool integrity_req_gap_front_merge(struct request *req, struct block_device_operations { int (*open) (struct block_device *, fmode_t); void (*release) (struct gendisk *, fmode_t); - int (*rw_page)(struct block_device *, sector_t, struct page *, bool); + int (*rw_page)(struct block_device *, sector_t, struct page *, unsigned int); int (*ioctl) (struct block_device *, fmode_t, unsigned, unsigned long); int (*compat_ioctl) (struct block_device *, fmode_t, unsigned, unsigned long); unsigned int (*check_events) (struct gendisk *disk, -- cgit v1.2.3 From 59767fbd49d794b4499d30b314df6c0d4aca584b Mon Sep 17 00:00:00 2001 From: Michael Callahan Date: Wed, 18 Jul 2018 04:47:37 -0700 Subject: block: Add part_stat_read_accum to read across field entries. Add a part_stat_read_accum macro to genhd.h to read and sum across field entries. For example to sum up the number read and write sectors completed. In addition to being ar reasonable cleanup by itself this will make it easier to add new stat fields in the future. tj: Refreshed on top of v4.17. Signed-off-by: Michael Callahan Signed-off-by: Tejun Heo Signed-off-by: Jens Axboe --- drivers/block/drbd/drbd_receiver.c | 3 +-- drivers/block/drbd/drbd_worker.c | 4 +--- drivers/md/md.c | 3 +-- include/linux/genhd.h | 4 ++++ 4 files changed, 7 insertions(+), 7 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index a36a30795c43..75f6b47169e6 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -2674,8 +2674,7 @@ bool drbd_rs_c_min_rate_throttle(struct drbd_device *device) if (c_min_rate == 0) return false; - curr_events = (int)part_stat_read(&disk->part0, sectors[0]) + - (int)part_stat_read(&disk->part0, sectors[1]) - + curr_events = (int)part_stat_read_accum(&disk->part0, sectors) - atomic_read(&device->rs_sect_ev); if (atomic_read(&device->ap_actlog_cnt) diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index 5e793dd7adfb..b8f77e83d456 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -1690,9 +1690,7 @@ void drbd_rs_controller_reset(struct drbd_device *device) atomic_set(&device->rs_sect_in, 0); atomic_set(&device->rs_sect_ev, 0); device->rs_in_flight = 0; - device->rs_last_events = - (int)part_stat_read(&disk->part0, sectors[0]) + - (int)part_stat_read(&disk->part0, sectors[1]); + device->rs_last_events = (int)part_stat_read_accum(&disk->part0, sectors); /* Updating the RCU protected object in place is necessary since this function gets called from atomic context. diff --git a/drivers/md/md.c b/drivers/md/md.c index 994aed2f9dff..dabe36723d60 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -8046,8 +8046,7 @@ static int is_mddev_idle(struct mddev *mddev, int init) rcu_read_lock(); rdev_for_each_rcu(rdev, mddev) { struct gendisk *disk = rdev->bdev->bd_contains->bd_disk; - curr_events = (int)part_stat_read(&disk->part0, sectors[0]) + - (int)part_stat_read(&disk->part0, sectors[1]) - + curr_events = (int)part_stat_read_accum(&disk->part0, sectors) - atomic_read(&disk->sync_io); /* sync IO will cause sync_io to increase before the disk_stats * as sync_io is counted when a request starts, and diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 6cb8a5789668..19f36fa10995 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -353,6 +353,10 @@ static inline void free_part_stats(struct hd_struct *part) #endif /* CONFIG_SMP */ +#define part_stat_read_accum(part, field) \ + (part_stat_read(part, field[0]) + \ + part_stat_read(part, field[1])) + #define part_stat_add(cpu, part, field, addnd) do { \ __part_stat_add((cpu), (part), field, addnd); \ if ((part)->partno) \ -- cgit v1.2.3 From ddcf35d397976421a4ec1d0d00fbcc027a8cb034 Mon Sep 17 00:00:00 2001 From: Michael Callahan Date: Wed, 18 Jul 2018 04:47:39 -0700 Subject: block: Add and use op_stat_group() for indexing disk_stat fields. Add and use a new op_stat_group() function for indexing partition stat fields rather than indexing them by rq_data_dir() or bio_data_dir(). This function works similarly to op_is_sync() in that it takes the request::cmd_flags or bio::bi_opf flags and determines which stats should et updated. In addition, the second parameter to generic_start_io_acct() and generic_end_io_acct() is now a REQ_OP rather than simply a read or write bit and it uses op_stat_group() on the parameter to determine the stat group. Note that the partition in_flight counts are not part of the per-cpu statistics and as such are not indexed via this function. It's now indexed by op_is_write(). tj: Refreshed on top of v4.17. Updated to pass around REQ_OP. Signed-off-by: Michael Callahan Signed-off-by: Tejun Heo Cc: Minchan Kim Cc: Dan Williams Cc: Joshua Morris Cc: Philipp Reisner Cc: Matias Bjorling Cc: Kent Overstreet Cc: Alasdair Kergon Signed-off-by: Jens Axboe --- block/bio.c | 16 +++++++++------- block/blk-core.c | 12 ++++++------ drivers/block/drbd/drbd_req.c | 4 ++-- drivers/block/rsxx/dev.c | 6 +++--- drivers/block/zram/zram_drv.c | 5 ++--- drivers/lightnvm/pblk-cache.c | 5 +++-- drivers/lightnvm/pblk-read.c | 5 +++-- drivers/md/bcache/request.c | 13 +++++-------- drivers/md/dm.c | 6 ++++-- drivers/md/md.c | 5 +++-- drivers/nvdimm/nd.h | 7 +++---- include/linux/bio.h | 4 ++-- include/linux/blk_types.h | 5 +++++ 13 files changed, 50 insertions(+), 43 deletions(-) (limited to 'drivers/block') diff --git a/block/bio.c b/block/bio.c index f3536bfc8298..8ecc95615941 100644 --- a/block/bio.c +++ b/block/bio.c @@ -1728,29 +1728,31 @@ void bio_check_pages_dirty(struct bio *bio) } EXPORT_SYMBOL_GPL(bio_check_pages_dirty); -void generic_start_io_acct(struct request_queue *q, int rw, +void generic_start_io_acct(struct request_queue *q, int op, unsigned long sectors, struct hd_struct *part) { + const int sgrp = op_stat_group(op); int cpu = part_stat_lock(); part_round_stats(q, cpu, part); - part_stat_inc(cpu, part, ios[rw]); - part_stat_add(cpu, part, sectors[rw], sectors); - part_inc_in_flight(q, part, rw); + part_stat_inc(cpu, part, ios[sgrp]); + part_stat_add(cpu, part, sectors[sgrp], sectors); + part_inc_in_flight(q, part, op_is_write(op)); part_stat_unlock(); } EXPORT_SYMBOL(generic_start_io_acct); -void generic_end_io_acct(struct request_queue *q, int rw, +void generic_end_io_acct(struct request_queue *q, int req_op, struct hd_struct *part, unsigned long start_time) { unsigned long duration = jiffies - start_time; + const int sgrp = op_stat_group(req_op); int cpu = part_stat_lock(); - part_stat_add(cpu, part, ticks[rw], duration); + part_stat_add(cpu, part, ticks[sgrp], duration); part_round_stats(q, cpu, part); - part_dec_in_flight(q, part, rw); + part_dec_in_flight(q, part, op_is_write(req_op)); part_stat_unlock(); } diff --git a/block/blk-core.c b/block/blk-core.c index c4b57d8806fe..03a4ea93a5f3 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -2702,13 +2702,13 @@ EXPORT_SYMBOL_GPL(blk_rq_err_bytes); void blk_account_io_completion(struct request *req, unsigned int bytes) { if (blk_do_io_stat(req)) { - const int rw = rq_data_dir(req); + const int sgrp = op_stat_group(req_op(req)); struct hd_struct *part; int cpu; cpu = part_stat_lock(); part = req->part; - part_stat_add(cpu, part, sectors[rw], bytes >> 9); + part_stat_add(cpu, part, sectors[sgrp], bytes >> 9); part_stat_unlock(); } } @@ -2722,7 +2722,7 @@ void blk_account_io_done(struct request *req, u64 now) */ if (blk_do_io_stat(req) && !(req->rq_flags & RQF_FLUSH_SEQ)) { unsigned long duration; - const int rw = rq_data_dir(req); + const int sgrp = op_stat_group(req_op(req)); struct hd_struct *part; int cpu; @@ -2730,10 +2730,10 @@ void blk_account_io_done(struct request *req, u64 now) cpu = part_stat_lock(); part = req->part; - part_stat_inc(cpu, part, ios[rw]); - part_stat_add(cpu, part, ticks[rw], duration); + part_stat_inc(cpu, part, ios[sgrp]); + part_stat_add(cpu, part, ticks[sgrp], duration); part_round_stats(req->q, cpu, part); - part_dec_in_flight(req->q, part, rw); + part_dec_in_flight(req->q, part, rq_data_dir(req)); hd_struct_put(part); part_stat_unlock(); diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index d146fedc38bb..19cac36e9737 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -38,7 +38,7 @@ static void _drbd_start_io_acct(struct drbd_device *device, struct drbd_request { struct request_queue *q = device->rq_queue; - generic_start_io_acct(q, bio_data_dir(req->master_bio), + generic_start_io_acct(q, bio_op(req->master_bio), req->i.size >> 9, &device->vdisk->part0); } @@ -47,7 +47,7 @@ static void _drbd_end_io_acct(struct drbd_device *device, struct drbd_request *r { struct request_queue *q = device->rq_queue; - generic_end_io_acct(q, bio_data_dir(req->master_bio), + generic_end_io_acct(q, bio_op(req->master_bio), &device->vdisk->part0, req->start_jif); } diff --git a/drivers/block/rsxx/dev.c b/drivers/block/rsxx/dev.c index dddb3f2490b6..1a92f9e65937 100644 --- a/drivers/block/rsxx/dev.c +++ b/drivers/block/rsxx/dev.c @@ -112,7 +112,7 @@ static const struct block_device_operations rsxx_fops = { static void disk_stats_start(struct rsxx_cardinfo *card, struct bio *bio) { - generic_start_io_acct(card->queue, bio_data_dir(bio), bio_sectors(bio), + generic_start_io_acct(card->queue, bio_op(bio), bio_sectors(bio), &card->gendisk->part0); } @@ -120,8 +120,8 @@ static void disk_stats_complete(struct rsxx_cardinfo *card, struct bio *bio, unsigned long start_time) { - generic_end_io_acct(card->queue, bio_data_dir(bio), - &card->gendisk->part0, start_time); + generic_end_io_acct(card->queue, bio_op(bio), + &card->gendisk->part0, start_time); } static void bio_dma_done_cb(struct rsxx_cardinfo *card, diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 78c29044684a..2907a8156aaf 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -1277,11 +1277,10 @@ static int zram_bvec_rw(struct zram *zram, struct bio_vec *bvec, u32 index, int offset, unsigned int op, struct bio *bio) { unsigned long start_time = jiffies; - int rw_acct = op_is_write(op) ? REQ_OP_WRITE : REQ_OP_READ; struct request_queue *q = zram->disk->queue; int ret; - generic_start_io_acct(q, rw_acct, bvec->bv_len >> SECTOR_SHIFT, + generic_start_io_acct(q, op, bvec->bv_len >> SECTOR_SHIFT, &zram->disk->part0); if (!op_is_write(op)) { @@ -1293,7 +1292,7 @@ static int zram_bvec_rw(struct zram *zram, struct bio_vec *bvec, u32 index, ret = zram_bvec_write(zram, bvec, index, offset, bio); } - generic_end_io_acct(q, rw_acct, &zram->disk->part0, start_time); + generic_end_io_acct(q, op, &zram->disk->part0, start_time); zram_slot_lock(zram, index); zram_accessed(zram, index); diff --git a/drivers/lightnvm/pblk-cache.c b/drivers/lightnvm/pblk-cache.c index 77d811962818..f565a56b898a 100644 --- a/drivers/lightnvm/pblk-cache.c +++ b/drivers/lightnvm/pblk-cache.c @@ -27,7 +27,8 @@ int pblk_write_to_cache(struct pblk *pblk, struct bio *bio, unsigned long flags) int nr_entries = pblk_get_secs(bio); int i, ret; - generic_start_io_acct(q, WRITE, bio_sectors(bio), &pblk->disk->part0); + generic_start_io_acct(q, REQ_OP_WRITE, bio_sectors(bio), + &pblk->disk->part0); /* Update the write buffer head (mem) with the entries that we can * write. The write in itself cannot fail, so there is no need to @@ -75,7 +76,7 @@ retry: pblk_rl_inserted(&pblk->rl, nr_entries); out: - generic_end_io_acct(q, WRITE, &pblk->disk->part0, start_time); + generic_end_io_acct(q, REQ_OP_WRITE, &pblk->disk->part0, start_time); pblk_write_should_kick(pblk); return ret; } diff --git a/drivers/lightnvm/pblk-read.c b/drivers/lightnvm/pblk-read.c index 26d414ae25b6..5a46d7f9302f 100644 --- a/drivers/lightnvm/pblk-read.c +++ b/drivers/lightnvm/pblk-read.c @@ -199,7 +199,7 @@ static void __pblk_end_io_read(struct pblk *pblk, struct nvm_rq *rqd, struct bio *int_bio = rqd->bio; unsigned long start_time = r_ctx->start_time; - generic_end_io_acct(dev->q, READ, &pblk->disk->part0, start_time); + generic_end_io_acct(dev->q, REQ_OP_READ, &pblk->disk->part0, start_time); if (rqd->error) pblk_log_read_err(pblk, rqd); @@ -461,7 +461,8 @@ int pblk_submit_read(struct pblk *pblk, struct bio *bio) return NVM_IO_ERR; } - generic_start_io_acct(q, READ, bio_sectors(bio), &pblk->disk->part0); + generic_start_io_acct(q, REQ_OP_READ, bio_sectors(bio), + &pblk->disk->part0); bitmap_zero(read_bitmap, nr_secs); diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c index ae67f5fa8047..97707b0c54ce 100644 --- a/drivers/md/bcache/request.c +++ b/drivers/md/bcache/request.c @@ -667,8 +667,7 @@ static void backing_request_endio(struct bio *bio) static void bio_complete(struct search *s) { if (s->orig_bio) { - generic_end_io_acct(s->d->disk->queue, - bio_data_dir(s->orig_bio), + generic_end_io_acct(s->d->disk->queue, bio_op(s->orig_bio), &s->d->disk->part0, s->start_time); trace_bcache_request_end(s->d, s->orig_bio); @@ -1062,8 +1061,7 @@ static void detached_dev_end_io(struct bio *bio) bio->bi_end_io = ddip->bi_end_io; bio->bi_private = ddip->bi_private; - generic_end_io_acct(ddip->d->disk->queue, - bio_data_dir(bio), + generic_end_io_acct(ddip->d->disk->queue, bio_op(bio), &ddip->d->disk->part0, ddip->start_time); if (bio->bi_status) { @@ -1120,7 +1118,7 @@ static blk_qc_t cached_dev_make_request(struct request_queue *q, } atomic_set(&dc->backing_idle, 0); - generic_start_io_acct(q, rw, bio_sectors(bio), &d->disk->part0); + generic_start_io_acct(q, bio_op(bio), bio_sectors(bio), &d->disk->part0); bio_set_dev(bio, dc->bdev); bio->bi_iter.bi_sector += dc->sb.data_offset; @@ -1229,7 +1227,6 @@ static blk_qc_t flash_dev_make_request(struct request_queue *q, struct search *s; struct closure *cl; struct bcache_device *d = bio->bi_disk->private_data; - int rw = bio_data_dir(bio); if (unlikely(d->c && test_bit(CACHE_SET_IO_DISABLE, &d->c->flags))) { bio->bi_status = BLK_STS_IOERR; @@ -1237,7 +1234,7 @@ static blk_qc_t flash_dev_make_request(struct request_queue *q, return BLK_QC_T_NONE; } - generic_start_io_acct(q, rw, bio_sectors(bio), &d->disk->part0); + generic_start_io_acct(q, bio_op(bio), bio_sectors(bio), &d->disk->part0); s = search_alloc(bio, d); cl = &s->cl; @@ -1254,7 +1251,7 @@ static blk_qc_t flash_dev_make_request(struct request_queue *q, flash_dev_nodata, bcache_wq); return BLK_QC_T_NONE; - } else if (rw) { + } else if (bio_data_dir(bio)) { bch_keybuf_check_overlapping(&s->iop.c->moving_gc_keys, &KEY(d->id, bio->bi_iter.bi_sector, 0), &KEY(d->id, bio_end_sector(bio), 0)); diff --git a/drivers/md/dm.c b/drivers/md/dm.c index b0dd7027848b..20f7e4ef5342 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -609,7 +609,8 @@ static void start_io_acct(struct dm_io *io) io->start_time = jiffies; - generic_start_io_acct(md->queue, rw, bio_sectors(bio), &dm_disk(md)->part0); + generic_start_io_acct(md->queue, bio_op(bio), bio_sectors(bio), + &dm_disk(md)->part0); atomic_set(&dm_disk(md)->part0.in_flight[rw], atomic_inc_return(&md->pending[rw])); @@ -628,7 +629,8 @@ static void end_io_acct(struct dm_io *io) int pending; int rw = bio_data_dir(bio); - generic_end_io_acct(md->queue, rw, &dm_disk(md)->part0, io->start_time); + generic_end_io_acct(md->queue, bio_op(bio), &dm_disk(md)->part0, + io->start_time); if (unlikely(dm_stats_used(&md->stats))) dm_stats_account_io(&md->stats, bio_data_dir(bio), diff --git a/drivers/md/md.c b/drivers/md/md.c index dabe36723d60..f6e58dbca0d4 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -335,6 +335,7 @@ EXPORT_SYMBOL(md_handle_request); static blk_qc_t md_make_request(struct request_queue *q, struct bio *bio) { const int rw = bio_data_dir(bio); + const int sgrp = op_stat_group(bio_op(bio)); struct mddev *mddev = q->queuedata; unsigned int sectors; int cpu; @@ -363,8 +364,8 @@ static blk_qc_t md_make_request(struct request_queue *q, struct bio *bio) md_handle_request(mddev, bio); cpu = part_stat_lock(); - part_stat_inc(cpu, &mddev->gendisk->part0, ios[rw]); - part_stat_add(cpu, &mddev->gendisk->part0, sectors[rw], sectors); + part_stat_inc(cpu, &mddev->gendisk->part0, ios[sgrp]); + part_stat_add(cpu, &mddev->gendisk->part0, sectors[sgrp], sectors); part_stat_unlock(); return BLK_QC_T_NONE; diff --git a/drivers/nvdimm/nd.h b/drivers/nvdimm/nd.h index 32e0364b48b9..6ee7fd7e4bbd 100644 --- a/drivers/nvdimm/nd.h +++ b/drivers/nvdimm/nd.h @@ -396,16 +396,15 @@ static inline bool nd_iostat_start(struct bio *bio, unsigned long *start) return false; *start = jiffies; - generic_start_io_acct(disk->queue, bio_data_dir(bio), - bio_sectors(bio), &disk->part0); + generic_start_io_acct(disk->queue, bio_op(bio), bio_sectors(bio), + &disk->part0); return true; } static inline void nd_iostat_end(struct bio *bio, unsigned long start) { struct gendisk *disk = bio->bi_disk; - generic_end_io_acct(disk->queue, bio_data_dir(bio), &disk->part0, - start); + generic_end_io_acct(disk->queue, bio_op(bio), &disk->part0, start); } static inline bool is_bad_pmem(struct badblocks *bb, sector_t sector, unsigned int len) diff --git a/include/linux/bio.h b/include/linux/bio.h index a00dfff51aa5..ab221c517f4e 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -496,9 +496,9 @@ extern struct bio *bio_copy_kern(struct request_queue *, void *, unsigned int, extern void bio_set_pages_dirty(struct bio *bio); extern void bio_check_pages_dirty(struct bio *bio); -void generic_start_io_acct(struct request_queue *q, int rw, +void generic_start_io_acct(struct request_queue *q, int op, unsigned long sectors, struct hd_struct *part); -void generic_end_io_acct(struct request_queue *q, int rw, +void generic_end_io_acct(struct request_queue *q, int op, struct hd_struct *part, unsigned long start_time); diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index d2b44de56bc1..2960a96d833c 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -401,6 +401,11 @@ static inline bool op_is_sync(unsigned int op) (op & (REQ_SYNC | REQ_FUA | REQ_PREFLUSH)); } +static inline int op_stat_group(unsigned int op) +{ + return op_is_write(op); +} + typedef unsigned int blk_qc_t; #define BLK_QC_T_NONE -1U #define BLK_QC_T_SHIFT 16 -- cgit v1.2.3 From 76f17d8ba1cbc3d2786955b2f15e071da93527cd Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 23 Jul 2018 14:18:33 -0700 Subject: block: Rename the null_blk_mod kernel module back into null_blk Commit ca4b2a011948 ("null_blk: add zone support") breaks several blktests scripts because it renamed the null_blk kernel module into null_blk_mod. Hence rename null_blk_mod back into null_blk. Fixes: ca4b2a011948 ("null_blk: add zone support") Signed-off-by: Bart Van Assche Cc: Matias Bjorling Cc: Christoph Hellwig Cc: Ming Lei Cc: Damien Le Moal Signed-off-by: Jens Axboe --- drivers/block/Makefile | 6 +- drivers/block/null_blk.c | 1926 ----------------------------------------- drivers/block/null_blk_main.c | 1926 +++++++++++++++++++++++++++++++++++++++++ 3 files changed, 1929 insertions(+), 1929 deletions(-) delete mode 100644 drivers/block/null_blk.c create mode 100644 drivers/block/null_blk_main.c (limited to 'drivers/block') diff --git a/drivers/block/Makefile b/drivers/block/Makefile index a0d88aa0c05d..8566b188368b 100644 --- a/drivers/block/Makefile +++ b/drivers/block/Makefile @@ -38,9 +38,9 @@ obj-$(CONFIG_BLK_DEV_PCIESSD_MTIP32XX) += mtip32xx/ obj-$(CONFIG_BLK_DEV_RSXX) += rsxx/ obj-$(CONFIG_ZRAM) += zram/ -obj-$(CONFIG_BLK_DEV_NULL_BLK) += null_blk_mod.o -null_blk_mod-objs := null_blk.o -null_blk_mod-$(CONFIG_BLK_DEV_ZONED) += null_blk_zoned.o +obj-$(CONFIG_BLK_DEV_NULL_BLK) += null_blk.o +null_blk-objs := null_blk_main.o +null_blk-$(CONFIG_BLK_DEV_ZONED) += null_blk_zoned.o skd-y := skd_main.o swim_mod-y := swim.o swim_asm.o diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c deleted file mode 100644 index 86cafa6d3b41..000000000000 --- a/drivers/block/null_blk.c +++ /dev/null @@ -1,1926 +0,0 @@ -/* - * Add configfs and memory store: Kyungchan Koh and - * Shaohua Li - */ -#include - -#include -#include -#include -#include -#include "null_blk.h" - -#define PAGE_SECTORS_SHIFT (PAGE_SHIFT - SECTOR_SHIFT) -#define PAGE_SECTORS (1 << PAGE_SECTORS_SHIFT) -#define SECTOR_MASK (PAGE_SECTORS - 1) - -#define FREE_BATCH 16 - -#define TICKS_PER_SEC 50ULL -#define TIMER_INTERVAL (NSEC_PER_SEC / TICKS_PER_SEC) - -#ifdef CONFIG_BLK_DEV_NULL_BLK_FAULT_INJECTION -static DECLARE_FAULT_ATTR(null_timeout_attr); -static DECLARE_FAULT_ATTR(null_requeue_attr); -#endif - -static inline u64 mb_per_tick(int mbps) -{ - return (1 << 20) / TICKS_PER_SEC * ((u64) mbps); -} - -/* - * Status flags for nullb_device. - * - * CONFIGURED: Device has been configured and turned on. Cannot reconfigure. - * UP: Device is currently on and visible in userspace. - * THROTTLED: Device is being throttled. - * CACHE: Device is using a write-back cache. - */ -enum nullb_device_flags { - NULLB_DEV_FL_CONFIGURED = 0, - NULLB_DEV_FL_UP = 1, - NULLB_DEV_FL_THROTTLED = 2, - NULLB_DEV_FL_CACHE = 3, -}; - -#define MAP_SZ ((PAGE_SIZE >> SECTOR_SHIFT) + 2) -/* - * nullb_page is a page in memory for nullb devices. - * - * @page: The page holding the data. - * @bitmap: The bitmap represents which sector in the page has data. - * Each bit represents one block size. For example, sector 8 - * will use the 7th bit - * The highest 2 bits of bitmap are for special purpose. LOCK means the cache - * page is being flushing to storage. FREE means the cache page is freed and - * should be skipped from flushing to storage. Please see - * null_make_cache_space - */ -struct nullb_page { - struct page *page; - DECLARE_BITMAP(bitmap, MAP_SZ); -}; -#define NULLB_PAGE_LOCK (MAP_SZ - 1) -#define NULLB_PAGE_FREE (MAP_SZ - 2) - -static LIST_HEAD(nullb_list); -static struct mutex lock; -static int null_major; -static DEFINE_IDA(nullb_indexes); -static struct blk_mq_tag_set tag_set; - -enum { - NULL_IRQ_NONE = 0, - NULL_IRQ_SOFTIRQ = 1, - NULL_IRQ_TIMER = 2, -}; - -enum { - NULL_Q_BIO = 0, - NULL_Q_RQ = 1, - NULL_Q_MQ = 2, -}; - -static int g_no_sched; -module_param_named(no_sched, g_no_sched, int, 0444); -MODULE_PARM_DESC(no_sched, "No io scheduler"); - -static int g_submit_queues = 1; -module_param_named(submit_queues, g_submit_queues, int, 0444); -MODULE_PARM_DESC(submit_queues, "Number of submission queues"); - -static int g_home_node = NUMA_NO_NODE; -module_param_named(home_node, g_home_node, int, 0444); -MODULE_PARM_DESC(home_node, "Home node for the device"); - -#ifdef CONFIG_BLK_DEV_NULL_BLK_FAULT_INJECTION -static char g_timeout_str[80]; -module_param_string(timeout, g_timeout_str, sizeof(g_timeout_str), 0444); - -static char g_requeue_str[80]; -module_param_string(requeue, g_requeue_str, sizeof(g_requeue_str), 0444); -#endif - -static int g_queue_mode = NULL_Q_MQ; - -static int null_param_store_val(const char *str, int *val, int min, int max) -{ - int ret, new_val; - - ret = kstrtoint(str, 10, &new_val); - if (ret) - return -EINVAL; - - if (new_val < min || new_val > max) - return -EINVAL; - - *val = new_val; - return 0; -} - -static int null_set_queue_mode(const char *str, const struct kernel_param *kp) -{ - return null_param_store_val(str, &g_queue_mode, NULL_Q_BIO, NULL_Q_MQ); -} - -static const struct kernel_param_ops null_queue_mode_param_ops = { - .set = null_set_queue_mode, - .get = param_get_int, -}; - -device_param_cb(queue_mode, &null_queue_mode_param_ops, &g_queue_mode, 0444); -MODULE_PARM_DESC(queue_mode, "Block interface to use (0=bio,1=rq,2=multiqueue)"); - -static int g_gb = 250; -module_param_named(gb, g_gb, int, 0444); -MODULE_PARM_DESC(gb, "Size in GB"); - -static int g_bs = 512; -module_param_named(bs, g_bs, int, 0444); -MODULE_PARM_DESC(bs, "Block size (in bytes)"); - -static int nr_devices = 1; -module_param(nr_devices, int, 0444); -MODULE_PARM_DESC(nr_devices, "Number of devices to register"); - -static bool g_blocking; -module_param_named(blocking, g_blocking, bool, 0444); -MODULE_PARM_DESC(blocking, "Register as a blocking blk-mq driver device"); - -static bool shared_tags; -module_param(shared_tags, bool, 0444); -MODULE_PARM_DESC(shared_tags, "Share tag set between devices for blk-mq"); - -static int g_irqmode = NULL_IRQ_SOFTIRQ; - -static int null_set_irqmode(const char *str, const struct kernel_param *kp) -{ - return null_param_store_val(str, &g_irqmode, NULL_IRQ_NONE, - NULL_IRQ_TIMER); -} - -static const struct kernel_param_ops null_irqmode_param_ops = { - .set = null_set_irqmode, - .get = param_get_int, -}; - -device_param_cb(irqmode, &null_irqmode_param_ops, &g_irqmode, 0444); -MODULE_PARM_DESC(irqmode, "IRQ completion handler. 0-none, 1-softirq, 2-timer"); - -static unsigned long g_completion_nsec = 10000; -module_param_named(completion_nsec, g_completion_nsec, ulong, 0444); -MODULE_PARM_DESC(completion_nsec, "Time in ns to complete a request in hardware. Default: 10,000ns"); - -static int g_hw_queue_depth = 64; -module_param_named(hw_queue_depth, g_hw_queue_depth, int, 0444); -MODULE_PARM_DESC(hw_queue_depth, "Queue depth for each hardware queue. Default: 64"); - -static bool g_use_per_node_hctx; -module_param_named(use_per_node_hctx, g_use_per_node_hctx, bool, 0444); -MODULE_PARM_DESC(use_per_node_hctx, "Use per-node allocation for hardware context queues. Default: false"); - -static bool g_zoned; -module_param_named(zoned, g_zoned, bool, S_IRUGO); -MODULE_PARM_DESC(zoned, "Make device as a host-managed zoned block device. Default: false"); - -static unsigned long g_zone_size = 256; -module_param_named(zone_size, g_zone_size, ulong, S_IRUGO); -MODULE_PARM_DESC(zone_size, "Zone size in MB when block device is zoned. Must be power-of-two: Default: 256"); - -static struct nullb_device *null_alloc_dev(void); -static void null_free_dev(struct nullb_device *dev); -static void null_del_dev(struct nullb *nullb); -static int null_add_dev(struct nullb_device *dev); -static void null_free_device_storage(struct nullb_device *dev, bool is_cache); - -static inline struct nullb_device *to_nullb_device(struct config_item *item) -{ - return item ? container_of(item, struct nullb_device, item) : NULL; -} - -static inline ssize_t nullb_device_uint_attr_show(unsigned int val, char *page) -{ - return snprintf(page, PAGE_SIZE, "%u\n", val); -} - -static inline ssize_t nullb_device_ulong_attr_show(unsigned long val, - char *page) -{ - return snprintf(page, PAGE_SIZE, "%lu\n", val); -} - -static inline ssize_t nullb_device_bool_attr_show(bool val, char *page) -{ - return snprintf(page, PAGE_SIZE, "%u\n", val); -} - -static ssize_t nullb_device_uint_attr_store(unsigned int *val, - const char *page, size_t count) -{ - unsigned int tmp; - int result; - - result = kstrtouint(page, 0, &tmp); - if (result) - return result; - - *val = tmp; - return count; -} - -static ssize_t nullb_device_ulong_attr_store(unsigned long *val, - const char *page, size_t count) -{ - int result; - unsigned long tmp; - - result = kstrtoul(page, 0, &tmp); - if (result) - return result; - - *val = tmp; - return count; -} - -static ssize_t nullb_device_bool_attr_store(bool *val, const char *page, - size_t count) -{ - bool tmp; - int result; - - result = kstrtobool(page, &tmp); - if (result) - return result; - - *val = tmp; - return count; -} - -/* The following macro should only be used with TYPE = {uint, ulong, bool}. */ -#define NULLB_DEVICE_ATTR(NAME, TYPE) \ -static ssize_t \ -nullb_device_##NAME##_show(struct config_item *item, char *page) \ -{ \ - return nullb_device_##TYPE##_attr_show( \ - to_nullb_device(item)->NAME, page); \ -} \ -static ssize_t \ -nullb_device_##NAME##_store(struct config_item *item, const char *page, \ - size_t count) \ -{ \ - if (test_bit(NULLB_DEV_FL_CONFIGURED, &to_nullb_device(item)->flags)) \ - return -EBUSY; \ - return nullb_device_##TYPE##_attr_store( \ - &to_nullb_device(item)->NAME, page, count); \ -} \ -CONFIGFS_ATTR(nullb_device_, NAME); - -NULLB_DEVICE_ATTR(size, ulong); -NULLB_DEVICE_ATTR(completion_nsec, ulong); -NULLB_DEVICE_ATTR(submit_queues, uint); -NULLB_DEVICE_ATTR(home_node, uint); -NULLB_DEVICE_ATTR(queue_mode, uint); -NULLB_DEVICE_ATTR(blocksize, uint); -NULLB_DEVICE_ATTR(irqmode, uint); -NULLB_DEVICE_ATTR(hw_queue_depth, uint); -NULLB_DEVICE_ATTR(index, uint); -NULLB_DEVICE_ATTR(blocking, bool); -NULLB_DEVICE_ATTR(use_per_node_hctx, bool); -NULLB_DEVICE_ATTR(memory_backed, bool); -NULLB_DEVICE_ATTR(discard, bool); -NULLB_DEVICE_ATTR(mbps, uint); -NULLB_DEVICE_ATTR(cache_size, ulong); -NULLB_DEVICE_ATTR(zoned, bool); -NULLB_DEVICE_ATTR(zone_size, ulong); - -static ssize_t nullb_device_power_show(struct config_item *item, char *page) -{ - return nullb_device_bool_attr_show(to_nullb_device(item)->power, page); -} - -static ssize_t nullb_device_power_store(struct config_item *item, - const char *page, size_t count) -{ - struct nullb_device *dev = to_nullb_device(item); - bool newp = false; - ssize_t ret; - - ret = nullb_device_bool_attr_store(&newp, page, count); - if (ret < 0) - return ret; - - if (!dev->power && newp) { - if (test_and_set_bit(NULLB_DEV_FL_UP, &dev->flags)) - return count; - if (null_add_dev(dev)) { - clear_bit(NULLB_DEV_FL_UP, &dev->flags); - return -ENOMEM; - } - - set_bit(NULLB_DEV_FL_CONFIGURED, &dev->flags); - dev->power = newp; - } else if (dev->power && !newp) { - mutex_lock(&lock); - dev->power = newp; - null_del_dev(dev->nullb); - mutex_unlock(&lock); - clear_bit(NULLB_DEV_FL_UP, &dev->flags); - clear_bit(NULLB_DEV_FL_CONFIGURED, &dev->flags); - } - - return count; -} - -CONFIGFS_ATTR(nullb_device_, power); - -static ssize_t nullb_device_badblocks_show(struct config_item *item, char *page) -{ - struct nullb_device *t_dev = to_nullb_device(item); - - return badblocks_show(&t_dev->badblocks, page, 0); -} - -static ssize_t nullb_device_badblocks_store(struct config_item *item, - const char *page, size_t count) -{ - struct nullb_device *t_dev = to_nullb_device(item); - char *orig, *buf, *tmp; - u64 start, end; - int ret; - - orig = kstrndup(page, count, GFP_KERNEL); - if (!orig) - return -ENOMEM; - - buf = strstrip(orig); - - ret = -EINVAL; - if (buf[0] != '+' && buf[0] != '-') - goto out; - tmp = strchr(&buf[1], '-'); - if (!tmp) - goto out; - *tmp = '\0'; - ret = kstrtoull(buf + 1, 0, &start); - if (ret) - goto out; - ret = kstrtoull(tmp + 1, 0, &end); - if (ret) - goto out; - ret = -EINVAL; - if (start > end) - goto out; - /* enable badblocks */ - cmpxchg(&t_dev->badblocks.shift, -1, 0); - if (buf[0] == '+') - ret = badblocks_set(&t_dev->badblocks, start, - end - start + 1, 1); - else - ret = badblocks_clear(&t_dev->badblocks, start, - end - start + 1); - if (ret == 0) - ret = count; -out: - kfree(orig); - return ret; -} -CONFIGFS_ATTR(nullb_device_, badblocks); - -static struct configfs_attribute *nullb_device_attrs[] = { - &nullb_device_attr_size, - &nullb_device_attr_completion_nsec, - &nullb_device_attr_submit_queues, - &nullb_device_attr_home_node, - &nullb_device_attr_queue_mode, - &nullb_device_attr_blocksize, - &nullb_device_attr_irqmode, - &nullb_device_attr_hw_queue_depth, - &nullb_device_attr_index, - &nullb_device_attr_blocking, - &nullb_device_attr_use_per_node_hctx, - &nullb_device_attr_power, - &nullb_device_attr_memory_backed, - &nullb_device_attr_discard, - &nullb_device_attr_mbps, - &nullb_device_attr_cache_size, - &nullb_device_attr_badblocks, - &nullb_device_attr_zoned, - &nullb_device_attr_zone_size, - NULL, -}; - -static void nullb_device_release(struct config_item *item) -{ - struct nullb_device *dev = to_nullb_device(item); - - null_free_device_storage(dev, false); - null_free_dev(dev); -} - -static struct configfs_item_operations nullb_device_ops = { - .release = nullb_device_release, -}; - -static const struct config_item_type nullb_device_type = { - .ct_item_ops = &nullb_device_ops, - .ct_attrs = nullb_device_attrs, - .ct_owner = THIS_MODULE, -}; - -static struct -config_item *nullb_group_make_item(struct config_group *group, const char *name) -{ - struct nullb_device *dev; - - dev = null_alloc_dev(); - if (!dev) - return ERR_PTR(-ENOMEM); - - config_item_init_type_name(&dev->item, name, &nullb_device_type); - - return &dev->item; -} - -static void -nullb_group_drop_item(struct config_group *group, struct config_item *item) -{ - struct nullb_device *dev = to_nullb_device(item); - - if (test_and_clear_bit(NULLB_DEV_FL_UP, &dev->flags)) { - mutex_lock(&lock); - dev->power = false; - null_del_dev(dev->nullb); - mutex_unlock(&lock); - } - - config_item_put(item); -} - -static ssize_t memb_group_features_show(struct config_item *item, char *page) -{ - return snprintf(page, PAGE_SIZE, "memory_backed,discard,bandwidth,cache,badblocks,zoned,zone_size\n"); -} - -CONFIGFS_ATTR_RO(memb_group_, features); - -static struct configfs_attribute *nullb_group_attrs[] = { - &memb_group_attr_features, - NULL, -}; - -static struct configfs_group_operations nullb_group_ops = { - .make_item = nullb_group_make_item, - .drop_item = nullb_group_drop_item, -}; - -static const struct config_item_type nullb_group_type = { - .ct_group_ops = &nullb_group_ops, - .ct_attrs = nullb_group_attrs, - .ct_owner = THIS_MODULE, -}; - -static struct configfs_subsystem nullb_subsys = { - .su_group = { - .cg_item = { - .ci_namebuf = "nullb", - .ci_type = &nullb_group_type, - }, - }, -}; - -static inline int null_cache_active(struct nullb *nullb) -{ - return test_bit(NULLB_DEV_FL_CACHE, &nullb->dev->flags); -} - -static struct nullb_device *null_alloc_dev(void) -{ - struct nullb_device *dev; - - dev = kzalloc(sizeof(*dev), GFP_KERNEL); - if (!dev) - return NULL; - INIT_RADIX_TREE(&dev->data, GFP_ATOMIC); - INIT_RADIX_TREE(&dev->cache, GFP_ATOMIC); - if (badblocks_init(&dev->badblocks, 0)) { - kfree(dev); - return NULL; - } - - dev->size = g_gb * 1024; - dev->completion_nsec = g_completion_nsec; - dev->submit_queues = g_submit_queues; - dev->home_node = g_home_node; - dev->queue_mode = g_queue_mode; - dev->blocksize = g_bs; - dev->irqmode = g_irqmode; - dev->hw_queue_depth = g_hw_queue_depth; - dev->blocking = g_blocking; - dev->use_per_node_hctx = g_use_per_node_hctx; - dev->zoned = g_zoned; - dev->zone_size = g_zone_size; - return dev; -} - -static void null_free_dev(struct nullb_device *dev) -{ - if (!dev) - return; - - null_zone_exit(dev); - badblocks_exit(&dev->badblocks); - kfree(dev); -} - -static void put_tag(struct nullb_queue *nq, unsigned int tag) -{ - clear_bit_unlock(tag, nq->tag_map); - - if (waitqueue_active(&nq->wait)) - wake_up(&nq->wait); -} - -static unsigned int get_tag(struct nullb_queue *nq) -{ - unsigned int tag; - - do { - tag = find_first_zero_bit(nq->tag_map, nq->queue_depth); - if (tag >= nq->queue_depth) - return -1U; - } while (test_and_set_bit_lock(tag, nq->tag_map)); - - return tag; -} - -static void free_cmd(struct nullb_cmd *cmd) -{ - put_tag(cmd->nq, cmd->tag); -} - -static enum hrtimer_restart null_cmd_timer_expired(struct hrtimer *timer); - -static struct nullb_cmd *__alloc_cmd(struct nullb_queue *nq) -{ - struct nullb_cmd *cmd; - unsigned int tag; - - tag = get_tag(nq); - if (tag != -1U) { - cmd = &nq->cmds[tag]; - cmd->tag = tag; - cmd->nq = nq; - if (nq->dev->irqmode == NULL_IRQ_TIMER) { - hrtimer_init(&cmd->timer, CLOCK_MONOTONIC, - HRTIMER_MODE_REL); - cmd->timer.function = null_cmd_timer_expired; - } - return cmd; - } - - return NULL; -} - -static struct nullb_cmd *alloc_cmd(struct nullb_queue *nq, int can_wait) -{ - struct nullb_cmd *cmd; - DEFINE_WAIT(wait); - - cmd = __alloc_cmd(nq); - if (cmd || !can_wait) - return cmd; - - do { - prepare_to_wait(&nq->wait, &wait, TASK_UNINTERRUPTIBLE); - cmd = __alloc_cmd(nq); - if (cmd) - break; - - io_schedule(); - } while (1); - - finish_wait(&nq->wait, &wait); - return cmd; -} - -static void end_cmd(struct nullb_cmd *cmd) -{ - struct request_queue *q = NULL; - int queue_mode = cmd->nq->dev->queue_mode; - - if (cmd->rq) - q = cmd->rq->q; - - switch (queue_mode) { - case NULL_Q_MQ: - blk_mq_end_request(cmd->rq, cmd->error); - return; - case NULL_Q_RQ: - INIT_LIST_HEAD(&cmd->rq->queuelist); - blk_end_request_all(cmd->rq, cmd->error); - break; - case NULL_Q_BIO: - cmd->bio->bi_status = cmd->error; - bio_endio(cmd->bio); - break; - } - - free_cmd(cmd); - - /* Restart queue if needed, as we are freeing a tag */ - if (queue_mode == NULL_Q_RQ && blk_queue_stopped(q)) { - unsigned long flags; - - spin_lock_irqsave(q->queue_lock, flags); - blk_start_queue_async(q); - spin_unlock_irqrestore(q->queue_lock, flags); - } -} - -static enum hrtimer_restart null_cmd_timer_expired(struct hrtimer *timer) -{ - end_cmd(container_of(timer, struct nullb_cmd, timer)); - - return HRTIMER_NORESTART; -} - -static void null_cmd_end_timer(struct nullb_cmd *cmd) -{ - ktime_t kt = cmd->nq->dev->completion_nsec; - - hrtimer_start(&cmd->timer, kt, HRTIMER_MODE_REL); -} - -static void null_softirq_done_fn(struct request *rq) -{ - struct nullb *nullb = rq->q->queuedata; - - if (nullb->dev->queue_mode == NULL_Q_MQ) - end_cmd(blk_mq_rq_to_pdu(rq)); - else - end_cmd(rq->special); -} - -static struct nullb_page *null_alloc_page(gfp_t gfp_flags) -{ - struct nullb_page *t_page; - - t_page = kmalloc(sizeof(struct nullb_page), gfp_flags); - if (!t_page) - goto out; - - t_page->page = alloc_pages(gfp_flags, 0); - if (!t_page->page) - goto out_freepage; - - memset(t_page->bitmap, 0, sizeof(t_page->bitmap)); - return t_page; -out_freepage: - kfree(t_page); -out: - return NULL; -} - -static void null_free_page(struct nullb_page *t_page) -{ - __set_bit(NULLB_PAGE_FREE, t_page->bitmap); - if (test_bit(NULLB_PAGE_LOCK, t_page->bitmap)) - return; - __free_page(t_page->page); - kfree(t_page); -} - -static bool null_page_empty(struct nullb_page *page) -{ - int size = MAP_SZ - 2; - - return find_first_bit(page->bitmap, size) == size; -} - -static void null_free_sector(struct nullb *nullb, sector_t sector, - bool is_cache) -{ - unsigned int sector_bit; - u64 idx; - struct nullb_page *t_page, *ret; - struct radix_tree_root *root; - - root = is_cache ? &nullb->dev->cache : &nullb->dev->data; - idx = sector >> PAGE_SECTORS_SHIFT; - sector_bit = (sector & SECTOR_MASK); - - t_page = radix_tree_lookup(root, idx); - if (t_page) { - __clear_bit(sector_bit, t_page->bitmap); - - if (null_page_empty(t_page)) { - ret = radix_tree_delete_item(root, idx, t_page); - WARN_ON(ret != t_page); - null_free_page(ret); - if (is_cache) - nullb->dev->curr_cache -= PAGE_SIZE; - } - } -} - -static struct nullb_page *null_radix_tree_insert(struct nullb *nullb, u64 idx, - struct nullb_page *t_page, bool is_cache) -{ - struct radix_tree_root *root; - - root = is_cache ? &nullb->dev->cache : &nullb->dev->data; - - if (radix_tree_insert(root, idx, t_page)) { - null_free_page(t_page); - t_page = radix_tree_lookup(root, idx); - WARN_ON(!t_page || t_page->page->index != idx); - } else if (is_cache) - nullb->dev->curr_cache += PAGE_SIZE; - - return t_page; -} - -static void null_free_device_storage(struct nullb_device *dev, bool is_cache) -{ - unsigned long pos = 0; - int nr_pages; - struct nullb_page *ret, *t_pages[FREE_BATCH]; - struct radix_tree_root *root; - - root = is_cache ? &dev->cache : &dev->data; - - do { - int i; - - nr_pages = radix_tree_gang_lookup(root, - (void **)t_pages, pos, FREE_BATCH); - - for (i = 0; i < nr_pages; i++) { - pos = t_pages[i]->page->index; - ret = radix_tree_delete_item(root, pos, t_pages[i]); - WARN_ON(ret != t_pages[i]); - null_free_page(ret); - } - - pos++; - } while (nr_pages == FREE_BATCH); - - if (is_cache) - dev->curr_cache = 0; -} - -static struct nullb_page *__null_lookup_page(struct nullb *nullb, - sector_t sector, bool for_write, bool is_cache) -{ - unsigned int sector_bit; - u64 idx; - struct nullb_page *t_page; - struct radix_tree_root *root; - - idx = sector >> PAGE_SECTORS_SHIFT; - sector_bit = (sector & SECTOR_MASK); - - root = is_cache ? &nullb->dev->cache : &nullb->dev->data; - t_page = radix_tree_lookup(root, idx); - WARN_ON(t_page && t_page->page->index != idx); - - if (t_page && (for_write || test_bit(sector_bit, t_page->bitmap))) - return t_page; - - return NULL; -} - -static struct nullb_page *null_lookup_page(struct nullb *nullb, - sector_t sector, bool for_write, bool ignore_cache) -{ - struct nullb_page *page = NULL; - - if (!ignore_cache) - page = __null_lookup_page(nullb, sector, for_write, true); - if (page) - return page; - return __null_lookup_page(nullb, sector, for_write, false); -} - -static struct nullb_page *null_insert_page(struct nullb *nullb, - sector_t sector, bool ignore_cache) -{ - u64 idx; - struct nullb_page *t_page; - - t_page = null_lookup_page(nullb, sector, true, ignore_cache); - if (t_page) - return t_page; - - spin_unlock_irq(&nullb->lock); - - t_page = null_alloc_page(GFP_NOIO); - if (!t_page) - goto out_lock; - - if (radix_tree_preload(GFP_NOIO)) - goto out_freepage; - - spin_lock_irq(&nullb->lock); - idx = sector >> PAGE_SECTORS_SHIFT; - t_page->page->index = idx; - t_page = null_radix_tree_insert(nullb, idx, t_page, !ignore_cache); - radix_tree_preload_end(); - - return t_page; -out_freepage: - null_free_page(t_page); -out_lock: - spin_lock_irq(&nullb->lock); - return null_lookup_page(nullb, sector, true, ignore_cache); -} - -static int null_flush_cache_page(struct nullb *nullb, struct nullb_page *c_page) -{ - int i; - unsigned int offset; - u64 idx; - struct nullb_page *t_page, *ret; - void *dst, *src; - - idx = c_page->page->index; - - t_page = null_insert_page(nullb, idx << PAGE_SECTORS_SHIFT, true); - - __clear_bit(NULLB_PAGE_LOCK, c_page->bitmap); - if (test_bit(NULLB_PAGE_FREE, c_page->bitmap)) { - null_free_page(c_page); - if (t_page && null_page_empty(t_page)) { - ret = radix_tree_delete_item(&nullb->dev->data, - idx, t_page); - null_free_page(t_page); - } - return 0; - } - - if (!t_page) - return -ENOMEM; - - src = kmap_atomic(c_page->page); - dst = kmap_atomic(t_page->page); - - for (i = 0; i < PAGE_SECTORS; - i += (nullb->dev->blocksize >> SECTOR_SHIFT)) { - if (test_bit(i, c_page->bitmap)) { - offset = (i << SECTOR_SHIFT); - memcpy(dst + offset, src + offset, - nullb->dev->blocksize); - __set_bit(i, t_page->bitmap); - } - } - - kunmap_atomic(dst); - kunmap_atomic(src); - - ret = radix_tree_delete_item(&nullb->dev->cache, idx, c_page); - null_free_page(ret); - nullb->dev->curr_cache -= PAGE_SIZE; - - return 0; -} - -static int null_make_cache_space(struct nullb *nullb, unsigned long n) -{ - int i, err, nr_pages; - struct nullb_page *c_pages[FREE_BATCH]; - unsigned long flushed = 0, one_round; - -again: - if ((nullb->dev->cache_size * 1024 * 1024) > - nullb->dev->curr_cache + n || nullb->dev->curr_cache == 0) - return 0; - - nr_pages = radix_tree_gang_lookup(&nullb->dev->cache, - (void **)c_pages, nullb->cache_flush_pos, FREE_BATCH); - /* - * nullb_flush_cache_page could unlock before using the c_pages. To - * avoid race, we don't allow page free - */ - for (i = 0; i < nr_pages; i++) { - nullb->cache_flush_pos = c_pages[i]->page->index; - /* - * We found the page which is being flushed to disk by other - * threads - */ - if (test_bit(NULLB_PAGE_LOCK, c_pages[i]->bitmap)) - c_pages[i] = NULL; - else - __set_bit(NULLB_PAGE_LOCK, c_pages[i]->bitmap); - } - - one_round = 0; - for (i = 0; i < nr_pages; i++) { - if (c_pages[i] == NULL) - continue; - err = null_flush_cache_page(nullb, c_pages[i]); - if (err) - return err; - one_round++; - } - flushed += one_round << PAGE_SHIFT; - - if (n > flushed) { - if (nr_pages == 0) - nullb->cache_flush_pos = 0; - if (one_round == 0) { - /* give other threads a chance */ - spin_unlock_irq(&nullb->lock); - spin_lock_irq(&nullb->lock); - } - goto again; - } - return 0; -} - -static int copy_to_nullb(struct nullb *nullb, struct page *source, - unsigned int off, sector_t sector, size_t n, bool is_fua) -{ - size_t temp, count = 0; - unsigned int offset; - struct nullb_page *t_page; - void *dst, *src; - - while (count < n) { - temp = min_t(size_t, nullb->dev->blocksize, n - count); - - if (null_cache_active(nullb) && !is_fua) - null_make_cache_space(nullb, PAGE_SIZE); - - offset = (sector & SECTOR_MASK) << SECTOR_SHIFT; - t_page = null_insert_page(nullb, sector, - !null_cache_active(nullb) || is_fua); - if (!t_page) - return -ENOSPC; - - src = kmap_atomic(source); - dst = kmap_atomic(t_page->page); - memcpy(dst + offset, src + off + count, temp); - kunmap_atomic(dst); - kunmap_atomic(src); - - __set_bit(sector & SECTOR_MASK, t_page->bitmap); - - if (is_fua) - null_free_sector(nullb, sector, true); - - count += temp; - sector += temp >> SECTOR_SHIFT; - } - return 0; -} - -static int copy_from_nullb(struct nullb *nullb, struct page *dest, - unsigned int off, sector_t sector, size_t n) -{ - size_t temp, count = 0; - unsigned int offset; - struct nullb_page *t_page; - void *dst, *src; - - while (count < n) { - temp = min_t(size_t, nullb->dev->blocksize, n - count); - - offset = (sector & SECTOR_MASK) << SECTOR_SHIFT; - t_page = null_lookup_page(nullb, sector, false, - !null_cache_active(nullb)); - - dst = kmap_atomic(dest); - if (!t_page) { - memset(dst + off + count, 0, temp); - goto next; - } - src = kmap_atomic(t_page->page); - memcpy(dst + off + count, src + offset, temp); - kunmap_atomic(src); -next: - kunmap_atomic(dst); - - count += temp; - sector += temp >> SECTOR_SHIFT; - } - return 0; -} - -static void null_handle_discard(struct nullb *nullb, sector_t sector, size_t n) -{ - size_t temp; - - spin_lock_irq(&nullb->lock); - while (n > 0) { - temp = min_t(size_t, n, nullb->dev->blocksize); - null_free_sector(nullb, sector, false); - if (null_cache_active(nullb)) - null_free_sector(nullb, sector, true); - sector += temp >> SECTOR_SHIFT; - n -= temp; - } - spin_unlock_irq(&nullb->lock); -} - -static int null_handle_flush(struct nullb *nullb) -{ - int err; - - if (!null_cache_active(nullb)) - return 0; - - spin_lock_irq(&nullb->lock); - while (true) { - err = null_make_cache_space(nullb, - nullb->dev->cache_size * 1024 * 1024); - if (err || nullb->dev->curr_cache == 0) - break; - } - - WARN_ON(!radix_tree_empty(&nullb->dev->cache)); - spin_unlock_irq(&nullb->lock); - return err; -} - -static int null_transfer(struct nullb *nullb, struct page *page, - unsigned int len, unsigned int off, bool is_write, sector_t sector, - bool is_fua) -{ - int err = 0; - - if (!is_write) { - err = copy_from_nullb(nullb, page, off, sector, len); - flush_dcache_page(page); - } else { - flush_dcache_page(page); - err = copy_to_nullb(nullb, page, off, sector, len, is_fua); - } - - return err; -} - -static int null_handle_rq(struct nullb_cmd *cmd) -{ - struct request *rq = cmd->rq; - struct nullb *nullb = cmd->nq->dev->nullb; - int err; - unsigned int len; - sector_t sector; - struct req_iterator iter; - struct bio_vec bvec; - - sector = blk_rq_pos(rq); - - if (req_op(rq) == REQ_OP_DISCARD) { - null_handle_discard(nullb, sector, blk_rq_bytes(rq)); - return 0; - } - - spin_lock_irq(&nullb->lock); - rq_for_each_segment(bvec, rq, iter) { - len = bvec.bv_len; - err = null_transfer(nullb, bvec.bv_page, len, bvec.bv_offset, - op_is_write(req_op(rq)), sector, - req_op(rq) & REQ_FUA); - if (err) { - spin_unlock_irq(&nullb->lock); - return err; - } - sector += len >> SECTOR_SHIFT; - } - spin_unlock_irq(&nullb->lock); - - return 0; -} - -static int null_handle_bio(struct nullb_cmd *cmd) -{ - struct bio *bio = cmd->bio; - struct nullb *nullb = cmd->nq->dev->nullb; - int err; - unsigned int len; - sector_t sector; - struct bio_vec bvec; - struct bvec_iter iter; - - sector = bio->bi_iter.bi_sector; - - if (bio_op(bio) == REQ_OP_DISCARD) { - null_handle_discard(nullb, sector, - bio_sectors(bio) << SECTOR_SHIFT); - return 0; - } - - spin_lock_irq(&nullb->lock); - bio_for_each_segment(bvec, bio, iter) { - len = bvec.bv_len; - err = null_transfer(nullb, bvec.bv_page, len, bvec.bv_offset, - op_is_write(bio_op(bio)), sector, - bio_op(bio) & REQ_FUA); - if (err) { - spin_unlock_irq(&nullb->lock); - return err; - } - sector += len >> SECTOR_SHIFT; - } - spin_unlock_irq(&nullb->lock); - return 0; -} - -static void null_stop_queue(struct nullb *nullb) -{ - struct request_queue *q = nullb->q; - - if (nullb->dev->queue_mode == NULL_Q_MQ) - blk_mq_stop_hw_queues(q); - else { - spin_lock_irq(q->queue_lock); - blk_stop_queue(q); - spin_unlock_irq(q->queue_lock); - } -} - -static void null_restart_queue_async(struct nullb *nullb) -{ - struct request_queue *q = nullb->q; - unsigned long flags; - - if (nullb->dev->queue_mode == NULL_Q_MQ) - blk_mq_start_stopped_hw_queues(q, true); - else { - spin_lock_irqsave(q->queue_lock, flags); - blk_start_queue_async(q); - spin_unlock_irqrestore(q->queue_lock, flags); - } -} - -static blk_status_t null_handle_cmd(struct nullb_cmd *cmd) -{ - struct nullb_device *dev = cmd->nq->dev; - struct nullb *nullb = dev->nullb; - int err = 0; - - if (req_op(cmd->rq) == REQ_OP_ZONE_REPORT) { - cmd->error = null_zone_report(nullb, cmd); - goto out; - } - - if (test_bit(NULLB_DEV_FL_THROTTLED, &dev->flags)) { - struct request *rq = cmd->rq; - - if (!hrtimer_active(&nullb->bw_timer)) - hrtimer_restart(&nullb->bw_timer); - - if (atomic_long_sub_return(blk_rq_bytes(rq), - &nullb->cur_bytes) < 0) { - null_stop_queue(nullb); - /* race with timer */ - if (atomic_long_read(&nullb->cur_bytes) > 0) - null_restart_queue_async(nullb); - if (dev->queue_mode == NULL_Q_RQ) { - struct request_queue *q = nullb->q; - - spin_lock_irq(q->queue_lock); - rq->rq_flags |= RQF_DONTPREP; - blk_requeue_request(q, rq); - spin_unlock_irq(q->queue_lock); - return BLK_STS_OK; - } else - /* requeue request */ - return BLK_STS_DEV_RESOURCE; - } - } - - if (nullb->dev->badblocks.shift != -1) { - int bad_sectors; - sector_t sector, size, first_bad; - bool is_flush = true; - - if (dev->queue_mode == NULL_Q_BIO && - bio_op(cmd->bio) != REQ_OP_FLUSH) { - is_flush = false; - sector = cmd->bio->bi_iter.bi_sector; - size = bio_sectors(cmd->bio); - } - if (dev->queue_mode != NULL_Q_BIO && - req_op(cmd->rq) != REQ_OP_FLUSH) { - is_flush = false; - sector = blk_rq_pos(cmd->rq); - size = blk_rq_sectors(cmd->rq); - } - if (!is_flush && badblocks_check(&nullb->dev->badblocks, sector, - size, &first_bad, &bad_sectors)) { - cmd->error = BLK_STS_IOERR; - goto out; - } - } - - if (dev->memory_backed) { - if (dev->queue_mode == NULL_Q_BIO) { - if (bio_op(cmd->bio) == REQ_OP_FLUSH) - err = null_handle_flush(nullb); - else - err = null_handle_bio(cmd); - } else { - if (req_op(cmd->rq) == REQ_OP_FLUSH) - err = null_handle_flush(nullb); - else - err = null_handle_rq(cmd); - } - } - cmd->error = errno_to_blk_status(err); - - if (!cmd->error && dev->zoned) { - if (req_op(cmd->rq) == REQ_OP_WRITE) - null_zone_write(cmd); - else if (req_op(cmd->rq) == REQ_OP_ZONE_RESET) - null_zone_reset(cmd); - } -out: - /* Complete IO by inline, softirq or timer */ - switch (dev->irqmode) { - case NULL_IRQ_SOFTIRQ: - switch (dev->queue_mode) { - case NULL_Q_MQ: - blk_mq_complete_request(cmd->rq); - break; - case NULL_Q_RQ: - blk_complete_request(cmd->rq); - break; - case NULL_Q_BIO: - /* - * XXX: no proper submitting cpu information available. - */ - end_cmd(cmd); - break; - } - break; - case NULL_IRQ_NONE: - end_cmd(cmd); - break; - case NULL_IRQ_TIMER: - null_cmd_end_timer(cmd); - break; - } - return BLK_STS_OK; -} - -static enum hrtimer_restart nullb_bwtimer_fn(struct hrtimer *timer) -{ - struct nullb *nullb = container_of(timer, struct nullb, bw_timer); - ktime_t timer_interval = ktime_set(0, TIMER_INTERVAL); - unsigned int mbps = nullb->dev->mbps; - - if (atomic_long_read(&nullb->cur_bytes) == mb_per_tick(mbps)) - return HRTIMER_NORESTART; - - atomic_long_set(&nullb->cur_bytes, mb_per_tick(mbps)); - null_restart_queue_async(nullb); - - hrtimer_forward_now(&nullb->bw_timer, timer_interval); - - return HRTIMER_RESTART; -} - -static void nullb_setup_bwtimer(struct nullb *nullb) -{ - ktime_t timer_interval = ktime_set(0, TIMER_INTERVAL); - - hrtimer_init(&nullb->bw_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); - nullb->bw_timer.function = nullb_bwtimer_fn; - atomic_long_set(&nullb->cur_bytes, mb_per_tick(nullb->dev->mbps)); - hrtimer_start(&nullb->bw_timer, timer_interval, HRTIMER_MODE_REL); -} - -static struct nullb_queue *nullb_to_queue(struct nullb *nullb) -{ - int index = 0; - - if (nullb->nr_queues != 1) - index = raw_smp_processor_id() / ((nr_cpu_ids + nullb->nr_queues - 1) / nullb->nr_queues); - - return &nullb->queues[index]; -} - -static blk_qc_t null_queue_bio(struct request_queue *q, struct bio *bio) -{ - struct nullb *nullb = q->queuedata; - struct nullb_queue *nq = nullb_to_queue(nullb); - struct nullb_cmd *cmd; - - cmd = alloc_cmd(nq, 1); - cmd->bio = bio; - - null_handle_cmd(cmd); - return BLK_QC_T_NONE; -} - -static enum blk_eh_timer_return null_rq_timed_out_fn(struct request *rq) -{ - pr_info("null: rq %p timed out\n", rq); - __blk_complete_request(rq); - return BLK_EH_DONE; -} - -static int null_rq_prep_fn(struct request_queue *q, struct request *req) -{ - struct nullb *nullb = q->queuedata; - struct nullb_queue *nq = nullb_to_queue(nullb); - struct nullb_cmd *cmd; - - cmd = alloc_cmd(nq, 0); - if (cmd) { - cmd->rq = req; - req->special = cmd; - return BLKPREP_OK; - } - blk_stop_queue(q); - - return BLKPREP_DEFER; -} - -static bool should_timeout_request(struct request *rq) -{ -#ifdef CONFIG_BLK_DEV_NULL_BLK_FAULT_INJECTION - if (g_timeout_str[0]) - return should_fail(&null_timeout_attr, 1); -#endif - return false; -} - -static bool should_requeue_request(struct request *rq) -{ -#ifdef CONFIG_BLK_DEV_NULL_BLK_FAULT_INJECTION - if (g_requeue_str[0]) - return should_fail(&null_requeue_attr, 1); -#endif - return false; -} - -static void null_request_fn(struct request_queue *q) -{ - struct request *rq; - - while ((rq = blk_fetch_request(q)) != NULL) { - struct nullb_cmd *cmd = rq->special; - - /* just ignore the request */ - if (should_timeout_request(rq)) - continue; - if (should_requeue_request(rq)) { - blk_requeue_request(q, rq); - continue; - } - - spin_unlock_irq(q->queue_lock); - null_handle_cmd(cmd); - spin_lock_irq(q->queue_lock); - } -} - -static enum blk_eh_timer_return null_timeout_rq(struct request *rq, bool res) -{ - pr_info("null: rq %p timed out\n", rq); - blk_mq_complete_request(rq); - return BLK_EH_DONE; -} - -static blk_status_t null_queue_rq(struct blk_mq_hw_ctx *hctx, - const struct blk_mq_queue_data *bd) -{ - struct nullb_cmd *cmd = blk_mq_rq_to_pdu(bd->rq); - struct nullb_queue *nq = hctx->driver_data; - - might_sleep_if(hctx->flags & BLK_MQ_F_BLOCKING); - - if (nq->dev->irqmode == NULL_IRQ_TIMER) { - hrtimer_init(&cmd->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); - cmd->timer.function = null_cmd_timer_expired; - } - cmd->rq = bd->rq; - cmd->nq = nq; - - blk_mq_start_request(bd->rq); - - if (should_requeue_request(bd->rq)) { - /* - * Alternate between hitting the core BUSY path, and the - * driver driven requeue path - */ - nq->requeue_selection++; - if (nq->requeue_selection & 1) - return BLK_STS_RESOURCE; - else { - blk_mq_requeue_request(bd->rq, true); - return BLK_STS_OK; - } - } - if (should_timeout_request(bd->rq)) - return BLK_STS_OK; - - return null_handle_cmd(cmd); -} - -static const struct blk_mq_ops null_mq_ops = { - .queue_rq = null_queue_rq, - .complete = null_softirq_done_fn, - .timeout = null_timeout_rq, -}; - -static void cleanup_queue(struct nullb_queue *nq) -{ - kfree(nq->tag_map); - kfree(nq->cmds); -} - -static void cleanup_queues(struct nullb *nullb) -{ - int i; - - for (i = 0; i < nullb->nr_queues; i++) - cleanup_queue(&nullb->queues[i]); - - kfree(nullb->queues); -} - -static void null_del_dev(struct nullb *nullb) -{ - struct nullb_device *dev = nullb->dev; - - ida_simple_remove(&nullb_indexes, nullb->index); - - list_del_init(&nullb->list); - - del_gendisk(nullb->disk); - - if (test_bit(NULLB_DEV_FL_THROTTLED, &nullb->dev->flags)) { - hrtimer_cancel(&nullb->bw_timer); - atomic_long_set(&nullb->cur_bytes, LONG_MAX); - null_restart_queue_async(nullb); - } - - blk_cleanup_queue(nullb->q); - if (dev->queue_mode == NULL_Q_MQ && - nullb->tag_set == &nullb->__tag_set) - blk_mq_free_tag_set(nullb->tag_set); - put_disk(nullb->disk); - cleanup_queues(nullb); - if (null_cache_active(nullb)) - null_free_device_storage(nullb->dev, true); - kfree(nullb); - dev->nullb = NULL; -} - -static void null_config_discard(struct nullb *nullb) -{ - if (nullb->dev->discard == false) - return; - nullb->q->limits.discard_granularity = nullb->dev->blocksize; - nullb->q->limits.discard_alignment = nullb->dev->blocksize; - blk_queue_max_discard_sectors(nullb->q, UINT_MAX >> 9); - blk_queue_flag_set(QUEUE_FLAG_DISCARD, nullb->q); -} - -static int null_open(struct block_device *bdev, fmode_t mode) -{ - return 0; -} - -static void null_release(struct gendisk *disk, fmode_t mode) -{ -} - -static const struct block_device_operations null_fops = { - .owner = THIS_MODULE, - .open = null_open, - .release = null_release, -}; - -static void null_init_queue(struct nullb *nullb, struct nullb_queue *nq) -{ - BUG_ON(!nullb); - BUG_ON(!nq); - - init_waitqueue_head(&nq->wait); - nq->queue_depth = nullb->queue_depth; - nq->dev = nullb->dev; -} - -static void null_init_queues(struct nullb *nullb) -{ - struct request_queue *q = nullb->q; - struct blk_mq_hw_ctx *hctx; - struct nullb_queue *nq; - int i; - - queue_for_each_hw_ctx(q, hctx, i) { - if (!hctx->nr_ctx || !hctx->tags) - continue; - nq = &nullb->queues[i]; - hctx->driver_data = nq; - null_init_queue(nullb, nq); - nullb->nr_queues++; - } -} - -static int setup_commands(struct nullb_queue *nq) -{ - struct nullb_cmd *cmd; - int i, tag_size; - - nq->cmds = kcalloc(nq->queue_depth, sizeof(*cmd), GFP_KERNEL); - if (!nq->cmds) - return -ENOMEM; - - tag_size = ALIGN(nq->queue_depth, BITS_PER_LONG) / BITS_PER_LONG; - nq->tag_map = kcalloc(tag_size, sizeof(unsigned long), GFP_KERNEL); - if (!nq->tag_map) { - kfree(nq->cmds); - return -ENOMEM; - } - - for (i = 0; i < nq->queue_depth; i++) { - cmd = &nq->cmds[i]; - INIT_LIST_HEAD(&cmd->list); - cmd->ll_list.next = NULL; - cmd->tag = -1U; - } - - return 0; -} - -static int setup_queues(struct nullb *nullb) -{ - nullb->queues = kcalloc(nullb->dev->submit_queues, - sizeof(struct nullb_queue), - GFP_KERNEL); - if (!nullb->queues) - return -ENOMEM; - - nullb->nr_queues = 0; - nullb->queue_depth = nullb->dev->hw_queue_depth; - - return 0; -} - -static int init_driver_queues(struct nullb *nullb) -{ - struct nullb_queue *nq; - int i, ret = 0; - - for (i = 0; i < nullb->dev->submit_queues; i++) { - nq = &nullb->queues[i]; - - null_init_queue(nullb, nq); - - ret = setup_commands(nq); - if (ret) - return ret; - nullb->nr_queues++; - } - return 0; -} - -static int null_gendisk_register(struct nullb *nullb) -{ - struct gendisk *disk; - sector_t size; - - disk = nullb->disk = alloc_disk_node(1, nullb->dev->home_node); - if (!disk) - return -ENOMEM; - size = (sector_t)nullb->dev->size * 1024 * 1024ULL; - set_capacity(disk, size >> 9); - - disk->flags |= GENHD_FL_EXT_DEVT | GENHD_FL_SUPPRESS_PARTITION_INFO; - disk->major = null_major; - disk->first_minor = nullb->index; - disk->fops = &null_fops; - disk->private_data = nullb; - disk->queue = nullb->q; - strncpy(disk->disk_name, nullb->disk_name, DISK_NAME_LEN); - - add_disk(disk); - return 0; -} - -static int null_init_tag_set(struct nullb *nullb, struct blk_mq_tag_set *set) -{ - set->ops = &null_mq_ops; - set->nr_hw_queues = nullb ? nullb->dev->submit_queues : - g_submit_queues; - set->queue_depth = nullb ? nullb->dev->hw_queue_depth : - g_hw_queue_depth; - set->numa_node = nullb ? nullb->dev->home_node : g_home_node; - set->cmd_size = sizeof(struct nullb_cmd); - set->flags = BLK_MQ_F_SHOULD_MERGE; - if (g_no_sched) - set->flags |= BLK_MQ_F_NO_SCHED; - set->driver_data = NULL; - - if ((nullb && nullb->dev->blocking) || g_blocking) - set->flags |= BLK_MQ_F_BLOCKING; - - return blk_mq_alloc_tag_set(set); -} - -static void null_validate_conf(struct nullb_device *dev) -{ - dev->blocksize = round_down(dev->blocksize, 512); - dev->blocksize = clamp_t(unsigned int, dev->blocksize, 512, 4096); - - if (dev->queue_mode == NULL_Q_MQ && dev->use_per_node_hctx) { - if (dev->submit_queues != nr_online_nodes) - dev->submit_queues = nr_online_nodes; - } else if (dev->submit_queues > nr_cpu_ids) - dev->submit_queues = nr_cpu_ids; - else if (dev->submit_queues == 0) - dev->submit_queues = 1; - - dev->queue_mode = min_t(unsigned int, dev->queue_mode, NULL_Q_MQ); - dev->irqmode = min_t(unsigned int, dev->irqmode, NULL_IRQ_TIMER); - - /* Do memory allocation, so set blocking */ - if (dev->memory_backed) - dev->blocking = true; - else /* cache is meaningless */ - dev->cache_size = 0; - dev->cache_size = min_t(unsigned long, ULONG_MAX / 1024 / 1024, - dev->cache_size); - dev->mbps = min_t(unsigned int, 1024 * 40, dev->mbps); - /* can not stop a queue */ - if (dev->queue_mode == NULL_Q_BIO) - dev->mbps = 0; -} - -#ifdef CONFIG_BLK_DEV_NULL_BLK_FAULT_INJECTION -static bool __null_setup_fault(struct fault_attr *attr, char *str) -{ - if (!str[0]) - return true; - - if (!setup_fault_attr(attr, str)) - return false; - - attr->verbose = 0; - return true; -} -#endif - -static bool null_setup_fault(void) -{ -#ifdef CONFIG_BLK_DEV_NULL_BLK_FAULT_INJECTION - if (!__null_setup_fault(&null_timeout_attr, g_timeout_str)) - return false; - if (!__null_setup_fault(&null_requeue_attr, g_requeue_str)) - return false; -#endif - return true; -} - -static int null_add_dev(struct nullb_device *dev) -{ - struct nullb *nullb; - int rv; - - null_validate_conf(dev); - - nullb = kzalloc_node(sizeof(*nullb), GFP_KERNEL, dev->home_node); - if (!nullb) { - rv = -ENOMEM; - goto out; - } - nullb->dev = dev; - dev->nullb = nullb; - - spin_lock_init(&nullb->lock); - - rv = setup_queues(nullb); - if (rv) - goto out_free_nullb; - - if (dev->queue_mode == NULL_Q_MQ) { - if (shared_tags) { - nullb->tag_set = &tag_set; - rv = 0; - } else { - nullb->tag_set = &nullb->__tag_set; - rv = null_init_tag_set(nullb, nullb->tag_set); - } - - if (rv) - goto out_cleanup_queues; - - if (!null_setup_fault()) - goto out_cleanup_queues; - - nullb->tag_set->timeout = 5 * HZ; - nullb->q = blk_mq_init_queue(nullb->tag_set); - if (IS_ERR(nullb->q)) { - rv = -ENOMEM; - goto out_cleanup_tags; - } - null_init_queues(nullb); - } else if (dev->queue_mode == NULL_Q_BIO) { - nullb->q = blk_alloc_queue_node(GFP_KERNEL, dev->home_node, - NULL); - if (!nullb->q) { - rv = -ENOMEM; - goto out_cleanup_queues; - } - blk_queue_make_request(nullb->q, null_queue_bio); - rv = init_driver_queues(nullb); - if (rv) - goto out_cleanup_blk_queue; - } else { - nullb->q = blk_init_queue_node(null_request_fn, &nullb->lock, - dev->home_node); - if (!nullb->q) { - rv = -ENOMEM; - goto out_cleanup_queues; - } - - if (!null_setup_fault()) - goto out_cleanup_blk_queue; - - blk_queue_prep_rq(nullb->q, null_rq_prep_fn); - blk_queue_softirq_done(nullb->q, null_softirq_done_fn); - blk_queue_rq_timed_out(nullb->q, null_rq_timed_out_fn); - nullb->q->rq_timeout = 5 * HZ; - rv = init_driver_queues(nullb); - if (rv) - goto out_cleanup_blk_queue; - } - - if (dev->mbps) { - set_bit(NULLB_DEV_FL_THROTTLED, &dev->flags); - nullb_setup_bwtimer(nullb); - } - - if (dev->cache_size > 0) { - set_bit(NULLB_DEV_FL_CACHE, &nullb->dev->flags); - blk_queue_write_cache(nullb->q, true, true); - blk_queue_flush_queueable(nullb->q, true); - } - - if (dev->zoned) { - rv = null_zone_init(dev); - if (rv) - goto out_cleanup_blk_queue; - - blk_queue_chunk_sectors(nullb->q, dev->zone_size_sects); - nullb->q->limits.zoned = BLK_ZONED_HM; - } - - nullb->q->queuedata = nullb; - blk_queue_flag_set(QUEUE_FLAG_NONROT, nullb->q); - blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, nullb->q); - - mutex_lock(&lock); - nullb->index = ida_simple_get(&nullb_indexes, 0, 0, GFP_KERNEL); - dev->index = nullb->index; - mutex_unlock(&lock); - - blk_queue_logical_block_size(nullb->q, dev->blocksize); - blk_queue_physical_block_size(nullb->q, dev->blocksize); - - null_config_discard(nullb); - - sprintf(nullb->disk_name, "nullb%d", nullb->index); - - rv = null_gendisk_register(nullb); - if (rv) - goto out_cleanup_zone; - - mutex_lock(&lock); - list_add_tail(&nullb->list, &nullb_list); - mutex_unlock(&lock); - - return 0; -out_cleanup_zone: - if (dev->zoned) - null_zone_exit(dev); -out_cleanup_blk_queue: - blk_cleanup_queue(nullb->q); -out_cleanup_tags: - if (dev->queue_mode == NULL_Q_MQ && nullb->tag_set == &nullb->__tag_set) - blk_mq_free_tag_set(nullb->tag_set); -out_cleanup_queues: - cleanup_queues(nullb); -out_free_nullb: - kfree(nullb); -out: - return rv; -} - -static int __init null_init(void) -{ - int ret = 0; - unsigned int i; - struct nullb *nullb; - struct nullb_device *dev; - - if (g_bs > PAGE_SIZE) { - pr_warn("null_blk: invalid block size\n"); - pr_warn("null_blk: defaults block size to %lu\n", PAGE_SIZE); - g_bs = PAGE_SIZE; - } - - if (!is_power_of_2(g_zone_size)) { - pr_err("null_blk: zone_size must be power-of-two\n"); - return -EINVAL; - } - - if (g_queue_mode == NULL_Q_MQ && g_use_per_node_hctx) { - if (g_submit_queues != nr_online_nodes) { - pr_warn("null_blk: submit_queues param is set to %u.\n", - nr_online_nodes); - g_submit_queues = nr_online_nodes; - } - } else if (g_submit_queues > nr_cpu_ids) - g_submit_queues = nr_cpu_ids; - else if (g_submit_queues <= 0) - g_submit_queues = 1; - - if (g_queue_mode == NULL_Q_MQ && shared_tags) { - ret = null_init_tag_set(NULL, &tag_set); - if (ret) - return ret; - } - - config_group_init(&nullb_subsys.su_group); - mutex_init(&nullb_subsys.su_mutex); - - ret = configfs_register_subsystem(&nullb_subsys); - if (ret) - goto err_tagset; - - mutex_init(&lock); - - null_major = register_blkdev(0, "nullb"); - if (null_major < 0) { - ret = null_major; - goto err_conf; - } - - for (i = 0; i < nr_devices; i++) { - dev = null_alloc_dev(); - if (!dev) { - ret = -ENOMEM; - goto err_dev; - } - ret = null_add_dev(dev); - if (ret) { - null_free_dev(dev); - goto err_dev; - } - } - - pr_info("null: module loaded\n"); - return 0; - -err_dev: - while (!list_empty(&nullb_list)) { - nullb = list_entry(nullb_list.next, struct nullb, list); - dev = nullb->dev; - null_del_dev(nullb); - null_free_dev(dev); - } - unregister_blkdev(null_major, "nullb"); -err_conf: - configfs_unregister_subsystem(&nullb_subsys); -err_tagset: - if (g_queue_mode == NULL_Q_MQ && shared_tags) - blk_mq_free_tag_set(&tag_set); - return ret; -} - -static void __exit null_exit(void) -{ - struct nullb *nullb; - - configfs_unregister_subsystem(&nullb_subsys); - - unregister_blkdev(null_major, "nullb"); - - mutex_lock(&lock); - while (!list_empty(&nullb_list)) { - struct nullb_device *dev; - - nullb = list_entry(nullb_list.next, struct nullb, list); - dev = nullb->dev; - null_del_dev(nullb); - null_free_dev(dev); - } - mutex_unlock(&lock); - - if (g_queue_mode == NULL_Q_MQ && shared_tags) - blk_mq_free_tag_set(&tag_set); -} - -module_init(null_init); -module_exit(null_exit); - -MODULE_AUTHOR("Jens Axboe "); -MODULE_LICENSE("GPL"); diff --git a/drivers/block/null_blk_main.c b/drivers/block/null_blk_main.c new file mode 100644 index 000000000000..86cafa6d3b41 --- /dev/null +++ b/drivers/block/null_blk_main.c @@ -0,0 +1,1926 @@ +/* + * Add configfs and memory store: Kyungchan Koh and + * Shaohua Li + */ +#include + +#include +#include +#include +#include +#include "null_blk.h" + +#define PAGE_SECTORS_SHIFT (PAGE_SHIFT - SECTOR_SHIFT) +#define PAGE_SECTORS (1 << PAGE_SECTORS_SHIFT) +#define SECTOR_MASK (PAGE_SECTORS - 1) + +#define FREE_BATCH 16 + +#define TICKS_PER_SEC 50ULL +#define TIMER_INTERVAL (NSEC_PER_SEC / TICKS_PER_SEC) + +#ifdef CONFIG_BLK_DEV_NULL_BLK_FAULT_INJECTION +static DECLARE_FAULT_ATTR(null_timeout_attr); +static DECLARE_FAULT_ATTR(null_requeue_attr); +#endif + +static inline u64 mb_per_tick(int mbps) +{ + return (1 << 20) / TICKS_PER_SEC * ((u64) mbps); +} + +/* + * Status flags for nullb_device. + * + * CONFIGURED: Device has been configured and turned on. Cannot reconfigure. + * UP: Device is currently on and visible in userspace. + * THROTTLED: Device is being throttled. + * CACHE: Device is using a write-back cache. + */ +enum nullb_device_flags { + NULLB_DEV_FL_CONFIGURED = 0, + NULLB_DEV_FL_UP = 1, + NULLB_DEV_FL_THROTTLED = 2, + NULLB_DEV_FL_CACHE = 3, +}; + +#define MAP_SZ ((PAGE_SIZE >> SECTOR_SHIFT) + 2) +/* + * nullb_page is a page in memory for nullb devices. + * + * @page: The page holding the data. + * @bitmap: The bitmap represents which sector in the page has data. + * Each bit represents one block size. For example, sector 8 + * will use the 7th bit + * The highest 2 bits of bitmap are for special purpose. LOCK means the cache + * page is being flushing to storage. FREE means the cache page is freed and + * should be skipped from flushing to storage. Please see + * null_make_cache_space + */ +struct nullb_page { + struct page *page; + DECLARE_BITMAP(bitmap, MAP_SZ); +}; +#define NULLB_PAGE_LOCK (MAP_SZ - 1) +#define NULLB_PAGE_FREE (MAP_SZ - 2) + +static LIST_HEAD(nullb_list); +static struct mutex lock; +static int null_major; +static DEFINE_IDA(nullb_indexes); +static struct blk_mq_tag_set tag_set; + +enum { + NULL_IRQ_NONE = 0, + NULL_IRQ_SOFTIRQ = 1, + NULL_IRQ_TIMER = 2, +}; + +enum { + NULL_Q_BIO = 0, + NULL_Q_RQ = 1, + NULL_Q_MQ = 2, +}; + +static int g_no_sched; +module_param_named(no_sched, g_no_sched, int, 0444); +MODULE_PARM_DESC(no_sched, "No io scheduler"); + +static int g_submit_queues = 1; +module_param_named(submit_queues, g_submit_queues, int, 0444); +MODULE_PARM_DESC(submit_queues, "Number of submission queues"); + +static int g_home_node = NUMA_NO_NODE; +module_param_named(home_node, g_home_node, int, 0444); +MODULE_PARM_DESC(home_node, "Home node for the device"); + +#ifdef CONFIG_BLK_DEV_NULL_BLK_FAULT_INJECTION +static char g_timeout_str[80]; +module_param_string(timeout, g_timeout_str, sizeof(g_timeout_str), 0444); + +static char g_requeue_str[80]; +module_param_string(requeue, g_requeue_str, sizeof(g_requeue_str), 0444); +#endif + +static int g_queue_mode = NULL_Q_MQ; + +static int null_param_store_val(const char *str, int *val, int min, int max) +{ + int ret, new_val; + + ret = kstrtoint(str, 10, &new_val); + if (ret) + return -EINVAL; + + if (new_val < min || new_val > max) + return -EINVAL; + + *val = new_val; + return 0; +} + +static int null_set_queue_mode(const char *str, const struct kernel_param *kp) +{ + return null_param_store_val(str, &g_queue_mode, NULL_Q_BIO, NULL_Q_MQ); +} + +static const struct kernel_param_ops null_queue_mode_param_ops = { + .set = null_set_queue_mode, + .get = param_get_int, +}; + +device_param_cb(queue_mode, &null_queue_mode_param_ops, &g_queue_mode, 0444); +MODULE_PARM_DESC(queue_mode, "Block interface to use (0=bio,1=rq,2=multiqueue)"); + +static int g_gb = 250; +module_param_named(gb, g_gb, int, 0444); +MODULE_PARM_DESC(gb, "Size in GB"); + +static int g_bs = 512; +module_param_named(bs, g_bs, int, 0444); +MODULE_PARM_DESC(bs, "Block size (in bytes)"); + +static int nr_devices = 1; +module_param(nr_devices, int, 0444); +MODULE_PARM_DESC(nr_devices, "Number of devices to register"); + +static bool g_blocking; +module_param_named(blocking, g_blocking, bool, 0444); +MODULE_PARM_DESC(blocking, "Register as a blocking blk-mq driver device"); + +static bool shared_tags; +module_param(shared_tags, bool, 0444); +MODULE_PARM_DESC(shared_tags, "Share tag set between devices for blk-mq"); + +static int g_irqmode = NULL_IRQ_SOFTIRQ; + +static int null_set_irqmode(const char *str, const struct kernel_param *kp) +{ + return null_param_store_val(str, &g_irqmode, NULL_IRQ_NONE, + NULL_IRQ_TIMER); +} + +static const struct kernel_param_ops null_irqmode_param_ops = { + .set = null_set_irqmode, + .get = param_get_int, +}; + +device_param_cb(irqmode, &null_irqmode_param_ops, &g_irqmode, 0444); +MODULE_PARM_DESC(irqmode, "IRQ completion handler. 0-none, 1-softirq, 2-timer"); + +static unsigned long g_completion_nsec = 10000; +module_param_named(completion_nsec, g_completion_nsec, ulong, 0444); +MODULE_PARM_DESC(completion_nsec, "Time in ns to complete a request in hardware. Default: 10,000ns"); + +static int g_hw_queue_depth = 64; +module_param_named(hw_queue_depth, g_hw_queue_depth, int, 0444); +MODULE_PARM_DESC(hw_queue_depth, "Queue depth for each hardware queue. Default: 64"); + +static bool g_use_per_node_hctx; +module_param_named(use_per_node_hctx, g_use_per_node_hctx, bool, 0444); +MODULE_PARM_DESC(use_per_node_hctx, "Use per-node allocation for hardware context queues. Default: false"); + +static bool g_zoned; +module_param_named(zoned, g_zoned, bool, S_IRUGO); +MODULE_PARM_DESC(zoned, "Make device as a host-managed zoned block device. Default: false"); + +static unsigned long g_zone_size = 256; +module_param_named(zone_size, g_zone_size, ulong, S_IRUGO); +MODULE_PARM_DESC(zone_size, "Zone size in MB when block device is zoned. Must be power-of-two: Default: 256"); + +static struct nullb_device *null_alloc_dev(void); +static void null_free_dev(struct nullb_device *dev); +static void null_del_dev(struct nullb *nullb); +static int null_add_dev(struct nullb_device *dev); +static void null_free_device_storage(struct nullb_device *dev, bool is_cache); + +static inline struct nullb_device *to_nullb_device(struct config_item *item) +{ + return item ? container_of(item, struct nullb_device, item) : NULL; +} + +static inline ssize_t nullb_device_uint_attr_show(unsigned int val, char *page) +{ + return snprintf(page, PAGE_SIZE, "%u\n", val); +} + +static inline ssize_t nullb_device_ulong_attr_show(unsigned long val, + char *page) +{ + return snprintf(page, PAGE_SIZE, "%lu\n", val); +} + +static inline ssize_t nullb_device_bool_attr_show(bool val, char *page) +{ + return snprintf(page, PAGE_SIZE, "%u\n", val); +} + +static ssize_t nullb_device_uint_attr_store(unsigned int *val, + const char *page, size_t count) +{ + unsigned int tmp; + int result; + + result = kstrtouint(page, 0, &tmp); + if (result) + return result; + + *val = tmp; + return count; +} + +static ssize_t nullb_device_ulong_attr_store(unsigned long *val, + const char *page, size_t count) +{ + int result; + unsigned long tmp; + + result = kstrtoul(page, 0, &tmp); + if (result) + return result; + + *val = tmp; + return count; +} + +static ssize_t nullb_device_bool_attr_store(bool *val, const char *page, + size_t count) +{ + bool tmp; + int result; + + result = kstrtobool(page, &tmp); + if (result) + return result; + + *val = tmp; + return count; +} + +/* The following macro should only be used with TYPE = {uint, ulong, bool}. */ +#define NULLB_DEVICE_ATTR(NAME, TYPE) \ +static ssize_t \ +nullb_device_##NAME##_show(struct config_item *item, char *page) \ +{ \ + return nullb_device_##TYPE##_attr_show( \ + to_nullb_device(item)->NAME, page); \ +} \ +static ssize_t \ +nullb_device_##NAME##_store(struct config_item *item, const char *page, \ + size_t count) \ +{ \ + if (test_bit(NULLB_DEV_FL_CONFIGURED, &to_nullb_device(item)->flags)) \ + return -EBUSY; \ + return nullb_device_##TYPE##_attr_store( \ + &to_nullb_device(item)->NAME, page, count); \ +} \ +CONFIGFS_ATTR(nullb_device_, NAME); + +NULLB_DEVICE_ATTR(size, ulong); +NULLB_DEVICE_ATTR(completion_nsec, ulong); +NULLB_DEVICE_ATTR(submit_queues, uint); +NULLB_DEVICE_ATTR(home_node, uint); +NULLB_DEVICE_ATTR(queue_mode, uint); +NULLB_DEVICE_ATTR(blocksize, uint); +NULLB_DEVICE_ATTR(irqmode, uint); +NULLB_DEVICE_ATTR(hw_queue_depth, uint); +NULLB_DEVICE_ATTR(index, uint); +NULLB_DEVICE_ATTR(blocking, bool); +NULLB_DEVICE_ATTR(use_per_node_hctx, bool); +NULLB_DEVICE_ATTR(memory_backed, bool); +NULLB_DEVICE_ATTR(discard, bool); +NULLB_DEVICE_ATTR(mbps, uint); +NULLB_DEVICE_ATTR(cache_size, ulong); +NULLB_DEVICE_ATTR(zoned, bool); +NULLB_DEVICE_ATTR(zone_size, ulong); + +static ssize_t nullb_device_power_show(struct config_item *item, char *page) +{ + return nullb_device_bool_attr_show(to_nullb_device(item)->power, page); +} + +static ssize_t nullb_device_power_store(struct config_item *item, + const char *page, size_t count) +{ + struct nullb_device *dev = to_nullb_device(item); + bool newp = false; + ssize_t ret; + + ret = nullb_device_bool_attr_store(&newp, page, count); + if (ret < 0) + return ret; + + if (!dev->power && newp) { + if (test_and_set_bit(NULLB_DEV_FL_UP, &dev->flags)) + return count; + if (null_add_dev(dev)) { + clear_bit(NULLB_DEV_FL_UP, &dev->flags); + return -ENOMEM; + } + + set_bit(NULLB_DEV_FL_CONFIGURED, &dev->flags); + dev->power = newp; + } else if (dev->power && !newp) { + mutex_lock(&lock); + dev->power = newp; + null_del_dev(dev->nullb); + mutex_unlock(&lock); + clear_bit(NULLB_DEV_FL_UP, &dev->flags); + clear_bit(NULLB_DEV_FL_CONFIGURED, &dev->flags); + } + + return count; +} + +CONFIGFS_ATTR(nullb_device_, power); + +static ssize_t nullb_device_badblocks_show(struct config_item *item, char *page) +{ + struct nullb_device *t_dev = to_nullb_device(item); + + return badblocks_show(&t_dev->badblocks, page, 0); +} + +static ssize_t nullb_device_badblocks_store(struct config_item *item, + const char *page, size_t count) +{ + struct nullb_device *t_dev = to_nullb_device(item); + char *orig, *buf, *tmp; + u64 start, end; + int ret; + + orig = kstrndup(page, count, GFP_KERNEL); + if (!orig) + return -ENOMEM; + + buf = strstrip(orig); + + ret = -EINVAL; + if (buf[0] != '+' && buf[0] != '-') + goto out; + tmp = strchr(&buf[1], '-'); + if (!tmp) + goto out; + *tmp = '\0'; + ret = kstrtoull(buf + 1, 0, &start); + if (ret) + goto out; + ret = kstrtoull(tmp + 1, 0, &end); + if (ret) + goto out; + ret = -EINVAL; + if (start > end) + goto out; + /* enable badblocks */ + cmpxchg(&t_dev->badblocks.shift, -1, 0); + if (buf[0] == '+') + ret = badblocks_set(&t_dev->badblocks, start, + end - start + 1, 1); + else + ret = badblocks_clear(&t_dev->badblocks, start, + end - start + 1); + if (ret == 0) + ret = count; +out: + kfree(orig); + return ret; +} +CONFIGFS_ATTR(nullb_device_, badblocks); + +static struct configfs_attribute *nullb_device_attrs[] = { + &nullb_device_attr_size, + &nullb_device_attr_completion_nsec, + &nullb_device_attr_submit_queues, + &nullb_device_attr_home_node, + &nullb_device_attr_queue_mode, + &nullb_device_attr_blocksize, + &nullb_device_attr_irqmode, + &nullb_device_attr_hw_queue_depth, + &nullb_device_attr_index, + &nullb_device_attr_blocking, + &nullb_device_attr_use_per_node_hctx, + &nullb_device_attr_power, + &nullb_device_attr_memory_backed, + &nullb_device_attr_discard, + &nullb_device_attr_mbps, + &nullb_device_attr_cache_size, + &nullb_device_attr_badblocks, + &nullb_device_attr_zoned, + &nullb_device_attr_zone_size, + NULL, +}; + +static void nullb_device_release(struct config_item *item) +{ + struct nullb_device *dev = to_nullb_device(item); + + null_free_device_storage(dev, false); + null_free_dev(dev); +} + +static struct configfs_item_operations nullb_device_ops = { + .release = nullb_device_release, +}; + +static const struct config_item_type nullb_device_type = { + .ct_item_ops = &nullb_device_ops, + .ct_attrs = nullb_device_attrs, + .ct_owner = THIS_MODULE, +}; + +static struct +config_item *nullb_group_make_item(struct config_group *group, const char *name) +{ + struct nullb_device *dev; + + dev = null_alloc_dev(); + if (!dev) + return ERR_PTR(-ENOMEM); + + config_item_init_type_name(&dev->item, name, &nullb_device_type); + + return &dev->item; +} + +static void +nullb_group_drop_item(struct config_group *group, struct config_item *item) +{ + struct nullb_device *dev = to_nullb_device(item); + + if (test_and_clear_bit(NULLB_DEV_FL_UP, &dev->flags)) { + mutex_lock(&lock); + dev->power = false; + null_del_dev(dev->nullb); + mutex_unlock(&lock); + } + + config_item_put(item); +} + +static ssize_t memb_group_features_show(struct config_item *item, char *page) +{ + return snprintf(page, PAGE_SIZE, "memory_backed,discard,bandwidth,cache,badblocks,zoned,zone_size\n"); +} + +CONFIGFS_ATTR_RO(memb_group_, features); + +static struct configfs_attribute *nullb_group_attrs[] = { + &memb_group_attr_features, + NULL, +}; + +static struct configfs_group_operations nullb_group_ops = { + .make_item = nullb_group_make_item, + .drop_item = nullb_group_drop_item, +}; + +static const struct config_item_type nullb_group_type = { + .ct_group_ops = &nullb_group_ops, + .ct_attrs = nullb_group_attrs, + .ct_owner = THIS_MODULE, +}; + +static struct configfs_subsystem nullb_subsys = { + .su_group = { + .cg_item = { + .ci_namebuf = "nullb", + .ci_type = &nullb_group_type, + }, + }, +}; + +static inline int null_cache_active(struct nullb *nullb) +{ + return test_bit(NULLB_DEV_FL_CACHE, &nullb->dev->flags); +} + +static struct nullb_device *null_alloc_dev(void) +{ + struct nullb_device *dev; + + dev = kzalloc(sizeof(*dev), GFP_KERNEL); + if (!dev) + return NULL; + INIT_RADIX_TREE(&dev->data, GFP_ATOMIC); + INIT_RADIX_TREE(&dev->cache, GFP_ATOMIC); + if (badblocks_init(&dev->badblocks, 0)) { + kfree(dev); + return NULL; + } + + dev->size = g_gb * 1024; + dev->completion_nsec = g_completion_nsec; + dev->submit_queues = g_submit_queues; + dev->home_node = g_home_node; + dev->queue_mode = g_queue_mode; + dev->blocksize = g_bs; + dev->irqmode = g_irqmode; + dev->hw_queue_depth = g_hw_queue_depth; + dev->blocking = g_blocking; + dev->use_per_node_hctx = g_use_per_node_hctx; + dev->zoned = g_zoned; + dev->zone_size = g_zone_size; + return dev; +} + +static void null_free_dev(struct nullb_device *dev) +{ + if (!dev) + return; + + null_zone_exit(dev); + badblocks_exit(&dev->badblocks); + kfree(dev); +} + +static void put_tag(struct nullb_queue *nq, unsigned int tag) +{ + clear_bit_unlock(tag, nq->tag_map); + + if (waitqueue_active(&nq->wait)) + wake_up(&nq->wait); +} + +static unsigned int get_tag(struct nullb_queue *nq) +{ + unsigned int tag; + + do { + tag = find_first_zero_bit(nq->tag_map, nq->queue_depth); + if (tag >= nq->queue_depth) + return -1U; + } while (test_and_set_bit_lock(tag, nq->tag_map)); + + return tag; +} + +static void free_cmd(struct nullb_cmd *cmd) +{ + put_tag(cmd->nq, cmd->tag); +} + +static enum hrtimer_restart null_cmd_timer_expired(struct hrtimer *timer); + +static struct nullb_cmd *__alloc_cmd(struct nullb_queue *nq) +{ + struct nullb_cmd *cmd; + unsigned int tag; + + tag = get_tag(nq); + if (tag != -1U) { + cmd = &nq->cmds[tag]; + cmd->tag = tag; + cmd->nq = nq; + if (nq->dev->irqmode == NULL_IRQ_TIMER) { + hrtimer_init(&cmd->timer, CLOCK_MONOTONIC, + HRTIMER_MODE_REL); + cmd->timer.function = null_cmd_timer_expired; + } + return cmd; + } + + return NULL; +} + +static struct nullb_cmd *alloc_cmd(struct nullb_queue *nq, int can_wait) +{ + struct nullb_cmd *cmd; + DEFINE_WAIT(wait); + + cmd = __alloc_cmd(nq); + if (cmd || !can_wait) + return cmd; + + do { + prepare_to_wait(&nq->wait, &wait, TASK_UNINTERRUPTIBLE); + cmd = __alloc_cmd(nq); + if (cmd) + break; + + io_schedule(); + } while (1); + + finish_wait(&nq->wait, &wait); + return cmd; +} + +static void end_cmd(struct nullb_cmd *cmd) +{ + struct request_queue *q = NULL; + int queue_mode = cmd->nq->dev->queue_mode; + + if (cmd->rq) + q = cmd->rq->q; + + switch (queue_mode) { + case NULL_Q_MQ: + blk_mq_end_request(cmd->rq, cmd->error); + return; + case NULL_Q_RQ: + INIT_LIST_HEAD(&cmd->rq->queuelist); + blk_end_request_all(cmd->rq, cmd->error); + break; + case NULL_Q_BIO: + cmd->bio->bi_status = cmd->error; + bio_endio(cmd->bio); + break; + } + + free_cmd(cmd); + + /* Restart queue if needed, as we are freeing a tag */ + if (queue_mode == NULL_Q_RQ && blk_queue_stopped(q)) { + unsigned long flags; + + spin_lock_irqsave(q->queue_lock, flags); + blk_start_queue_async(q); + spin_unlock_irqrestore(q->queue_lock, flags); + } +} + +static enum hrtimer_restart null_cmd_timer_expired(struct hrtimer *timer) +{ + end_cmd(container_of(timer, struct nullb_cmd, timer)); + + return HRTIMER_NORESTART; +} + +static void null_cmd_end_timer(struct nullb_cmd *cmd) +{ + ktime_t kt = cmd->nq->dev->completion_nsec; + + hrtimer_start(&cmd->timer, kt, HRTIMER_MODE_REL); +} + +static void null_softirq_done_fn(struct request *rq) +{ + struct nullb *nullb = rq->q->queuedata; + + if (nullb->dev->queue_mode == NULL_Q_MQ) + end_cmd(blk_mq_rq_to_pdu(rq)); + else + end_cmd(rq->special); +} + +static struct nullb_page *null_alloc_page(gfp_t gfp_flags) +{ + struct nullb_page *t_page; + + t_page = kmalloc(sizeof(struct nullb_page), gfp_flags); + if (!t_page) + goto out; + + t_page->page = alloc_pages(gfp_flags, 0); + if (!t_page->page) + goto out_freepage; + + memset(t_page->bitmap, 0, sizeof(t_page->bitmap)); + return t_page; +out_freepage: + kfree(t_page); +out: + return NULL; +} + +static void null_free_page(struct nullb_page *t_page) +{ + __set_bit(NULLB_PAGE_FREE, t_page->bitmap); + if (test_bit(NULLB_PAGE_LOCK, t_page->bitmap)) + return; + __free_page(t_page->page); + kfree(t_page); +} + +static bool null_page_empty(struct nullb_page *page) +{ + int size = MAP_SZ - 2; + + return find_first_bit(page->bitmap, size) == size; +} + +static void null_free_sector(struct nullb *nullb, sector_t sector, + bool is_cache) +{ + unsigned int sector_bit; + u64 idx; + struct nullb_page *t_page, *ret; + struct radix_tree_root *root; + + root = is_cache ? &nullb->dev->cache : &nullb->dev->data; + idx = sector >> PAGE_SECTORS_SHIFT; + sector_bit = (sector & SECTOR_MASK); + + t_page = radix_tree_lookup(root, idx); + if (t_page) { + __clear_bit(sector_bit, t_page->bitmap); + + if (null_page_empty(t_page)) { + ret = radix_tree_delete_item(root, idx, t_page); + WARN_ON(ret != t_page); + null_free_page(ret); + if (is_cache) + nullb->dev->curr_cache -= PAGE_SIZE; + } + } +} + +static struct nullb_page *null_radix_tree_insert(struct nullb *nullb, u64 idx, + struct nullb_page *t_page, bool is_cache) +{ + struct radix_tree_root *root; + + root = is_cache ? &nullb->dev->cache : &nullb->dev->data; + + if (radix_tree_insert(root, idx, t_page)) { + null_free_page(t_page); + t_page = radix_tree_lookup(root, idx); + WARN_ON(!t_page || t_page->page->index != idx); + } else if (is_cache) + nullb->dev->curr_cache += PAGE_SIZE; + + return t_page; +} + +static void null_free_device_storage(struct nullb_device *dev, bool is_cache) +{ + unsigned long pos = 0; + int nr_pages; + struct nullb_page *ret, *t_pages[FREE_BATCH]; + struct radix_tree_root *root; + + root = is_cache ? &dev->cache : &dev->data; + + do { + int i; + + nr_pages = radix_tree_gang_lookup(root, + (void **)t_pages, pos, FREE_BATCH); + + for (i = 0; i < nr_pages; i++) { + pos = t_pages[i]->page->index; + ret = radix_tree_delete_item(root, pos, t_pages[i]); + WARN_ON(ret != t_pages[i]); + null_free_page(ret); + } + + pos++; + } while (nr_pages == FREE_BATCH); + + if (is_cache) + dev->curr_cache = 0; +} + +static struct nullb_page *__null_lookup_page(struct nullb *nullb, + sector_t sector, bool for_write, bool is_cache) +{ + unsigned int sector_bit; + u64 idx; + struct nullb_page *t_page; + struct radix_tree_root *root; + + idx = sector >> PAGE_SECTORS_SHIFT; + sector_bit = (sector & SECTOR_MASK); + + root = is_cache ? &nullb->dev->cache : &nullb->dev->data; + t_page = radix_tree_lookup(root, idx); + WARN_ON(t_page && t_page->page->index != idx); + + if (t_page && (for_write || test_bit(sector_bit, t_page->bitmap))) + return t_page; + + return NULL; +} + +static struct nullb_page *null_lookup_page(struct nullb *nullb, + sector_t sector, bool for_write, bool ignore_cache) +{ + struct nullb_page *page = NULL; + + if (!ignore_cache) + page = __null_lookup_page(nullb, sector, for_write, true); + if (page) + return page; + return __null_lookup_page(nullb, sector, for_write, false); +} + +static struct nullb_page *null_insert_page(struct nullb *nullb, + sector_t sector, bool ignore_cache) +{ + u64 idx; + struct nullb_page *t_page; + + t_page = null_lookup_page(nullb, sector, true, ignore_cache); + if (t_page) + return t_page; + + spin_unlock_irq(&nullb->lock); + + t_page = null_alloc_page(GFP_NOIO); + if (!t_page) + goto out_lock; + + if (radix_tree_preload(GFP_NOIO)) + goto out_freepage; + + spin_lock_irq(&nullb->lock); + idx = sector >> PAGE_SECTORS_SHIFT; + t_page->page->index = idx; + t_page = null_radix_tree_insert(nullb, idx, t_page, !ignore_cache); + radix_tree_preload_end(); + + return t_page; +out_freepage: + null_free_page(t_page); +out_lock: + spin_lock_irq(&nullb->lock); + return null_lookup_page(nullb, sector, true, ignore_cache); +} + +static int null_flush_cache_page(struct nullb *nullb, struct nullb_page *c_page) +{ + int i; + unsigned int offset; + u64 idx; + struct nullb_page *t_page, *ret; + void *dst, *src; + + idx = c_page->page->index; + + t_page = null_insert_page(nullb, idx << PAGE_SECTORS_SHIFT, true); + + __clear_bit(NULLB_PAGE_LOCK, c_page->bitmap); + if (test_bit(NULLB_PAGE_FREE, c_page->bitmap)) { + null_free_page(c_page); + if (t_page && null_page_empty(t_page)) { + ret = radix_tree_delete_item(&nullb->dev->data, + idx, t_page); + null_free_page(t_page); + } + return 0; + } + + if (!t_page) + return -ENOMEM; + + src = kmap_atomic(c_page->page); + dst = kmap_atomic(t_page->page); + + for (i = 0; i < PAGE_SECTORS; + i += (nullb->dev->blocksize >> SECTOR_SHIFT)) { + if (test_bit(i, c_page->bitmap)) { + offset = (i << SECTOR_SHIFT); + memcpy(dst + offset, src + offset, + nullb->dev->blocksize); + __set_bit(i, t_page->bitmap); + } + } + + kunmap_atomic(dst); + kunmap_atomic(src); + + ret = radix_tree_delete_item(&nullb->dev->cache, idx, c_page); + null_free_page(ret); + nullb->dev->curr_cache -= PAGE_SIZE; + + return 0; +} + +static int null_make_cache_space(struct nullb *nullb, unsigned long n) +{ + int i, err, nr_pages; + struct nullb_page *c_pages[FREE_BATCH]; + unsigned long flushed = 0, one_round; + +again: + if ((nullb->dev->cache_size * 1024 * 1024) > + nullb->dev->curr_cache + n || nullb->dev->curr_cache == 0) + return 0; + + nr_pages = radix_tree_gang_lookup(&nullb->dev->cache, + (void **)c_pages, nullb->cache_flush_pos, FREE_BATCH); + /* + * nullb_flush_cache_page could unlock before using the c_pages. To + * avoid race, we don't allow page free + */ + for (i = 0; i < nr_pages; i++) { + nullb->cache_flush_pos = c_pages[i]->page->index; + /* + * We found the page which is being flushed to disk by other + * threads + */ + if (test_bit(NULLB_PAGE_LOCK, c_pages[i]->bitmap)) + c_pages[i] = NULL; + else + __set_bit(NULLB_PAGE_LOCK, c_pages[i]->bitmap); + } + + one_round = 0; + for (i = 0; i < nr_pages; i++) { + if (c_pages[i] == NULL) + continue; + err = null_flush_cache_page(nullb, c_pages[i]); + if (err) + return err; + one_round++; + } + flushed += one_round << PAGE_SHIFT; + + if (n > flushed) { + if (nr_pages == 0) + nullb->cache_flush_pos = 0; + if (one_round == 0) { + /* give other threads a chance */ + spin_unlock_irq(&nullb->lock); + spin_lock_irq(&nullb->lock); + } + goto again; + } + return 0; +} + +static int copy_to_nullb(struct nullb *nullb, struct page *source, + unsigned int off, sector_t sector, size_t n, bool is_fua) +{ + size_t temp, count = 0; + unsigned int offset; + struct nullb_page *t_page; + void *dst, *src; + + while (count < n) { + temp = min_t(size_t, nullb->dev->blocksize, n - count); + + if (null_cache_active(nullb) && !is_fua) + null_make_cache_space(nullb, PAGE_SIZE); + + offset = (sector & SECTOR_MASK) << SECTOR_SHIFT; + t_page = null_insert_page(nullb, sector, + !null_cache_active(nullb) || is_fua); + if (!t_page) + return -ENOSPC; + + src = kmap_atomic(source); + dst = kmap_atomic(t_page->page); + memcpy(dst + offset, src + off + count, temp); + kunmap_atomic(dst); + kunmap_atomic(src); + + __set_bit(sector & SECTOR_MASK, t_page->bitmap); + + if (is_fua) + null_free_sector(nullb, sector, true); + + count += temp; + sector += temp >> SECTOR_SHIFT; + } + return 0; +} + +static int copy_from_nullb(struct nullb *nullb, struct page *dest, + unsigned int off, sector_t sector, size_t n) +{ + size_t temp, count = 0; + unsigned int offset; + struct nullb_page *t_page; + void *dst, *src; + + while (count < n) { + temp = min_t(size_t, nullb->dev->blocksize, n - count); + + offset = (sector & SECTOR_MASK) << SECTOR_SHIFT; + t_page = null_lookup_page(nullb, sector, false, + !null_cache_active(nullb)); + + dst = kmap_atomic(dest); + if (!t_page) { + memset(dst + off + count, 0, temp); + goto next; + } + src = kmap_atomic(t_page->page); + memcpy(dst + off + count, src + offset, temp); + kunmap_atomic(src); +next: + kunmap_atomic(dst); + + count += temp; + sector += temp >> SECTOR_SHIFT; + } + return 0; +} + +static void null_handle_discard(struct nullb *nullb, sector_t sector, size_t n) +{ + size_t temp; + + spin_lock_irq(&nullb->lock); + while (n > 0) { + temp = min_t(size_t, n, nullb->dev->blocksize); + null_free_sector(nullb, sector, false); + if (null_cache_active(nullb)) + null_free_sector(nullb, sector, true); + sector += temp >> SECTOR_SHIFT; + n -= temp; + } + spin_unlock_irq(&nullb->lock); +} + +static int null_handle_flush(struct nullb *nullb) +{ + int err; + + if (!null_cache_active(nullb)) + return 0; + + spin_lock_irq(&nullb->lock); + while (true) { + err = null_make_cache_space(nullb, + nullb->dev->cache_size * 1024 * 1024); + if (err || nullb->dev->curr_cache == 0) + break; + } + + WARN_ON(!radix_tree_empty(&nullb->dev->cache)); + spin_unlock_irq(&nullb->lock); + return err; +} + +static int null_transfer(struct nullb *nullb, struct page *page, + unsigned int len, unsigned int off, bool is_write, sector_t sector, + bool is_fua) +{ + int err = 0; + + if (!is_write) { + err = copy_from_nullb(nullb, page, off, sector, len); + flush_dcache_page(page); + } else { + flush_dcache_page(page); + err = copy_to_nullb(nullb, page, off, sector, len, is_fua); + } + + return err; +} + +static int null_handle_rq(struct nullb_cmd *cmd) +{ + struct request *rq = cmd->rq; + struct nullb *nullb = cmd->nq->dev->nullb; + int err; + unsigned int len; + sector_t sector; + struct req_iterator iter; + struct bio_vec bvec; + + sector = blk_rq_pos(rq); + + if (req_op(rq) == REQ_OP_DISCARD) { + null_handle_discard(nullb, sector, blk_rq_bytes(rq)); + return 0; + } + + spin_lock_irq(&nullb->lock); + rq_for_each_segment(bvec, rq, iter) { + len = bvec.bv_len; + err = null_transfer(nullb, bvec.bv_page, len, bvec.bv_offset, + op_is_write(req_op(rq)), sector, + req_op(rq) & REQ_FUA); + if (err) { + spin_unlock_irq(&nullb->lock); + return err; + } + sector += len >> SECTOR_SHIFT; + } + spin_unlock_irq(&nullb->lock); + + return 0; +} + +static int null_handle_bio(struct nullb_cmd *cmd) +{ + struct bio *bio = cmd->bio; + struct nullb *nullb = cmd->nq->dev->nullb; + int err; + unsigned int len; + sector_t sector; + struct bio_vec bvec; + struct bvec_iter iter; + + sector = bio->bi_iter.bi_sector; + + if (bio_op(bio) == REQ_OP_DISCARD) { + null_handle_discard(nullb, sector, + bio_sectors(bio) << SECTOR_SHIFT); + return 0; + } + + spin_lock_irq(&nullb->lock); + bio_for_each_segment(bvec, bio, iter) { + len = bvec.bv_len; + err = null_transfer(nullb, bvec.bv_page, len, bvec.bv_offset, + op_is_write(bio_op(bio)), sector, + bio_op(bio) & REQ_FUA); + if (err) { + spin_unlock_irq(&nullb->lock); + return err; + } + sector += len >> SECTOR_SHIFT; + } + spin_unlock_irq(&nullb->lock); + return 0; +} + +static void null_stop_queue(struct nullb *nullb) +{ + struct request_queue *q = nullb->q; + + if (nullb->dev->queue_mode == NULL_Q_MQ) + blk_mq_stop_hw_queues(q); + else { + spin_lock_irq(q->queue_lock); + blk_stop_queue(q); + spin_unlock_irq(q->queue_lock); + } +} + +static void null_restart_queue_async(struct nullb *nullb) +{ + struct request_queue *q = nullb->q; + unsigned long flags; + + if (nullb->dev->queue_mode == NULL_Q_MQ) + blk_mq_start_stopped_hw_queues(q, true); + else { + spin_lock_irqsave(q->queue_lock, flags); + blk_start_queue_async(q); + spin_unlock_irqrestore(q->queue_lock, flags); + } +} + +static blk_status_t null_handle_cmd(struct nullb_cmd *cmd) +{ + struct nullb_device *dev = cmd->nq->dev; + struct nullb *nullb = dev->nullb; + int err = 0; + + if (req_op(cmd->rq) == REQ_OP_ZONE_REPORT) { + cmd->error = null_zone_report(nullb, cmd); + goto out; + } + + if (test_bit(NULLB_DEV_FL_THROTTLED, &dev->flags)) { + struct request *rq = cmd->rq; + + if (!hrtimer_active(&nullb->bw_timer)) + hrtimer_restart(&nullb->bw_timer); + + if (atomic_long_sub_return(blk_rq_bytes(rq), + &nullb->cur_bytes) < 0) { + null_stop_queue(nullb); + /* race with timer */ + if (atomic_long_read(&nullb->cur_bytes) > 0) + null_restart_queue_async(nullb); + if (dev->queue_mode == NULL_Q_RQ) { + struct request_queue *q = nullb->q; + + spin_lock_irq(q->queue_lock); + rq->rq_flags |= RQF_DONTPREP; + blk_requeue_request(q, rq); + spin_unlock_irq(q->queue_lock); + return BLK_STS_OK; + } else + /* requeue request */ + return BLK_STS_DEV_RESOURCE; + } + } + + if (nullb->dev->badblocks.shift != -1) { + int bad_sectors; + sector_t sector, size, first_bad; + bool is_flush = true; + + if (dev->queue_mode == NULL_Q_BIO && + bio_op(cmd->bio) != REQ_OP_FLUSH) { + is_flush = false; + sector = cmd->bio->bi_iter.bi_sector; + size = bio_sectors(cmd->bio); + } + if (dev->queue_mode != NULL_Q_BIO && + req_op(cmd->rq) != REQ_OP_FLUSH) { + is_flush = false; + sector = blk_rq_pos(cmd->rq); + size = blk_rq_sectors(cmd->rq); + } + if (!is_flush && badblocks_check(&nullb->dev->badblocks, sector, + size, &first_bad, &bad_sectors)) { + cmd->error = BLK_STS_IOERR; + goto out; + } + } + + if (dev->memory_backed) { + if (dev->queue_mode == NULL_Q_BIO) { + if (bio_op(cmd->bio) == REQ_OP_FLUSH) + err = null_handle_flush(nullb); + else + err = null_handle_bio(cmd); + } else { + if (req_op(cmd->rq) == REQ_OP_FLUSH) + err = null_handle_flush(nullb); + else + err = null_handle_rq(cmd); + } + } + cmd->error = errno_to_blk_status(err); + + if (!cmd->error && dev->zoned) { + if (req_op(cmd->rq) == REQ_OP_WRITE) + null_zone_write(cmd); + else if (req_op(cmd->rq) == REQ_OP_ZONE_RESET) + null_zone_reset(cmd); + } +out: + /* Complete IO by inline, softirq or timer */ + switch (dev->irqmode) { + case NULL_IRQ_SOFTIRQ: + switch (dev->queue_mode) { + case NULL_Q_MQ: + blk_mq_complete_request(cmd->rq); + break; + case NULL_Q_RQ: + blk_complete_request(cmd->rq); + break; + case NULL_Q_BIO: + /* + * XXX: no proper submitting cpu information available. + */ + end_cmd(cmd); + break; + } + break; + case NULL_IRQ_NONE: + end_cmd(cmd); + break; + case NULL_IRQ_TIMER: + null_cmd_end_timer(cmd); + break; + } + return BLK_STS_OK; +} + +static enum hrtimer_restart nullb_bwtimer_fn(struct hrtimer *timer) +{ + struct nullb *nullb = container_of(timer, struct nullb, bw_timer); + ktime_t timer_interval = ktime_set(0, TIMER_INTERVAL); + unsigned int mbps = nullb->dev->mbps; + + if (atomic_long_read(&nullb->cur_bytes) == mb_per_tick(mbps)) + return HRTIMER_NORESTART; + + atomic_long_set(&nullb->cur_bytes, mb_per_tick(mbps)); + null_restart_queue_async(nullb); + + hrtimer_forward_now(&nullb->bw_timer, timer_interval); + + return HRTIMER_RESTART; +} + +static void nullb_setup_bwtimer(struct nullb *nullb) +{ + ktime_t timer_interval = ktime_set(0, TIMER_INTERVAL); + + hrtimer_init(&nullb->bw_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + nullb->bw_timer.function = nullb_bwtimer_fn; + atomic_long_set(&nullb->cur_bytes, mb_per_tick(nullb->dev->mbps)); + hrtimer_start(&nullb->bw_timer, timer_interval, HRTIMER_MODE_REL); +} + +static struct nullb_queue *nullb_to_queue(struct nullb *nullb) +{ + int index = 0; + + if (nullb->nr_queues != 1) + index = raw_smp_processor_id() / ((nr_cpu_ids + nullb->nr_queues - 1) / nullb->nr_queues); + + return &nullb->queues[index]; +} + +static blk_qc_t null_queue_bio(struct request_queue *q, struct bio *bio) +{ + struct nullb *nullb = q->queuedata; + struct nullb_queue *nq = nullb_to_queue(nullb); + struct nullb_cmd *cmd; + + cmd = alloc_cmd(nq, 1); + cmd->bio = bio; + + null_handle_cmd(cmd); + return BLK_QC_T_NONE; +} + +static enum blk_eh_timer_return null_rq_timed_out_fn(struct request *rq) +{ + pr_info("null: rq %p timed out\n", rq); + __blk_complete_request(rq); + return BLK_EH_DONE; +} + +static int null_rq_prep_fn(struct request_queue *q, struct request *req) +{ + struct nullb *nullb = q->queuedata; + struct nullb_queue *nq = nullb_to_queue(nullb); + struct nullb_cmd *cmd; + + cmd = alloc_cmd(nq, 0); + if (cmd) { + cmd->rq = req; + req->special = cmd; + return BLKPREP_OK; + } + blk_stop_queue(q); + + return BLKPREP_DEFER; +} + +static bool should_timeout_request(struct request *rq) +{ +#ifdef CONFIG_BLK_DEV_NULL_BLK_FAULT_INJECTION + if (g_timeout_str[0]) + return should_fail(&null_timeout_attr, 1); +#endif + return false; +} + +static bool should_requeue_request(struct request *rq) +{ +#ifdef CONFIG_BLK_DEV_NULL_BLK_FAULT_INJECTION + if (g_requeue_str[0]) + return should_fail(&null_requeue_attr, 1); +#endif + return false; +} + +static void null_request_fn(struct request_queue *q) +{ + struct request *rq; + + while ((rq = blk_fetch_request(q)) != NULL) { + struct nullb_cmd *cmd = rq->special; + + /* just ignore the request */ + if (should_timeout_request(rq)) + continue; + if (should_requeue_request(rq)) { + blk_requeue_request(q, rq); + continue; + } + + spin_unlock_irq(q->queue_lock); + null_handle_cmd(cmd); + spin_lock_irq(q->queue_lock); + } +} + +static enum blk_eh_timer_return null_timeout_rq(struct request *rq, bool res) +{ + pr_info("null: rq %p timed out\n", rq); + blk_mq_complete_request(rq); + return BLK_EH_DONE; +} + +static blk_status_t null_queue_rq(struct blk_mq_hw_ctx *hctx, + const struct blk_mq_queue_data *bd) +{ + struct nullb_cmd *cmd = blk_mq_rq_to_pdu(bd->rq); + struct nullb_queue *nq = hctx->driver_data; + + might_sleep_if(hctx->flags & BLK_MQ_F_BLOCKING); + + if (nq->dev->irqmode == NULL_IRQ_TIMER) { + hrtimer_init(&cmd->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + cmd->timer.function = null_cmd_timer_expired; + } + cmd->rq = bd->rq; + cmd->nq = nq; + + blk_mq_start_request(bd->rq); + + if (should_requeue_request(bd->rq)) { + /* + * Alternate between hitting the core BUSY path, and the + * driver driven requeue path + */ + nq->requeue_selection++; + if (nq->requeue_selection & 1) + return BLK_STS_RESOURCE; + else { + blk_mq_requeue_request(bd->rq, true); + return BLK_STS_OK; + } + } + if (should_timeout_request(bd->rq)) + return BLK_STS_OK; + + return null_handle_cmd(cmd); +} + +static const struct blk_mq_ops null_mq_ops = { + .queue_rq = null_queue_rq, + .complete = null_softirq_done_fn, + .timeout = null_timeout_rq, +}; + +static void cleanup_queue(struct nullb_queue *nq) +{ + kfree(nq->tag_map); + kfree(nq->cmds); +} + +static void cleanup_queues(struct nullb *nullb) +{ + int i; + + for (i = 0; i < nullb->nr_queues; i++) + cleanup_queue(&nullb->queues[i]); + + kfree(nullb->queues); +} + +static void null_del_dev(struct nullb *nullb) +{ + struct nullb_device *dev = nullb->dev; + + ida_simple_remove(&nullb_indexes, nullb->index); + + list_del_init(&nullb->list); + + del_gendisk(nullb->disk); + + if (test_bit(NULLB_DEV_FL_THROTTLED, &nullb->dev->flags)) { + hrtimer_cancel(&nullb->bw_timer); + atomic_long_set(&nullb->cur_bytes, LONG_MAX); + null_restart_queue_async(nullb); + } + + blk_cleanup_queue(nullb->q); + if (dev->queue_mode == NULL_Q_MQ && + nullb->tag_set == &nullb->__tag_set) + blk_mq_free_tag_set(nullb->tag_set); + put_disk(nullb->disk); + cleanup_queues(nullb); + if (null_cache_active(nullb)) + null_free_device_storage(nullb->dev, true); + kfree(nullb); + dev->nullb = NULL; +} + +static void null_config_discard(struct nullb *nullb) +{ + if (nullb->dev->discard == false) + return; + nullb->q->limits.discard_granularity = nullb->dev->blocksize; + nullb->q->limits.discard_alignment = nullb->dev->blocksize; + blk_queue_max_discard_sectors(nullb->q, UINT_MAX >> 9); + blk_queue_flag_set(QUEUE_FLAG_DISCARD, nullb->q); +} + +static int null_open(struct block_device *bdev, fmode_t mode) +{ + return 0; +} + +static void null_release(struct gendisk *disk, fmode_t mode) +{ +} + +static const struct block_device_operations null_fops = { + .owner = THIS_MODULE, + .open = null_open, + .release = null_release, +}; + +static void null_init_queue(struct nullb *nullb, struct nullb_queue *nq) +{ + BUG_ON(!nullb); + BUG_ON(!nq); + + init_waitqueue_head(&nq->wait); + nq->queue_depth = nullb->queue_depth; + nq->dev = nullb->dev; +} + +static void null_init_queues(struct nullb *nullb) +{ + struct request_queue *q = nullb->q; + struct blk_mq_hw_ctx *hctx; + struct nullb_queue *nq; + int i; + + queue_for_each_hw_ctx(q, hctx, i) { + if (!hctx->nr_ctx || !hctx->tags) + continue; + nq = &nullb->queues[i]; + hctx->driver_data = nq; + null_init_queue(nullb, nq); + nullb->nr_queues++; + } +} + +static int setup_commands(struct nullb_queue *nq) +{ + struct nullb_cmd *cmd; + int i, tag_size; + + nq->cmds = kcalloc(nq->queue_depth, sizeof(*cmd), GFP_KERNEL); + if (!nq->cmds) + return -ENOMEM; + + tag_size = ALIGN(nq->queue_depth, BITS_PER_LONG) / BITS_PER_LONG; + nq->tag_map = kcalloc(tag_size, sizeof(unsigned long), GFP_KERNEL); + if (!nq->tag_map) { + kfree(nq->cmds); + return -ENOMEM; + } + + for (i = 0; i < nq->queue_depth; i++) { + cmd = &nq->cmds[i]; + INIT_LIST_HEAD(&cmd->list); + cmd->ll_list.next = NULL; + cmd->tag = -1U; + } + + return 0; +} + +static int setup_queues(struct nullb *nullb) +{ + nullb->queues = kcalloc(nullb->dev->submit_queues, + sizeof(struct nullb_queue), + GFP_KERNEL); + if (!nullb->queues) + return -ENOMEM; + + nullb->nr_queues = 0; + nullb->queue_depth = nullb->dev->hw_queue_depth; + + return 0; +} + +static int init_driver_queues(struct nullb *nullb) +{ + struct nullb_queue *nq; + int i, ret = 0; + + for (i = 0; i < nullb->dev->submit_queues; i++) { + nq = &nullb->queues[i]; + + null_init_queue(nullb, nq); + + ret = setup_commands(nq); + if (ret) + return ret; + nullb->nr_queues++; + } + return 0; +} + +static int null_gendisk_register(struct nullb *nullb) +{ + struct gendisk *disk; + sector_t size; + + disk = nullb->disk = alloc_disk_node(1, nullb->dev->home_node); + if (!disk) + return -ENOMEM; + size = (sector_t)nullb->dev->size * 1024 * 1024ULL; + set_capacity(disk, size >> 9); + + disk->flags |= GENHD_FL_EXT_DEVT | GENHD_FL_SUPPRESS_PARTITION_INFO; + disk->major = null_major; + disk->first_minor = nullb->index; + disk->fops = &null_fops; + disk->private_data = nullb; + disk->queue = nullb->q; + strncpy(disk->disk_name, nullb->disk_name, DISK_NAME_LEN); + + add_disk(disk); + return 0; +} + +static int null_init_tag_set(struct nullb *nullb, struct blk_mq_tag_set *set) +{ + set->ops = &null_mq_ops; + set->nr_hw_queues = nullb ? nullb->dev->submit_queues : + g_submit_queues; + set->queue_depth = nullb ? nullb->dev->hw_queue_depth : + g_hw_queue_depth; + set->numa_node = nullb ? nullb->dev->home_node : g_home_node; + set->cmd_size = sizeof(struct nullb_cmd); + set->flags = BLK_MQ_F_SHOULD_MERGE; + if (g_no_sched) + set->flags |= BLK_MQ_F_NO_SCHED; + set->driver_data = NULL; + + if ((nullb && nullb->dev->blocking) || g_blocking) + set->flags |= BLK_MQ_F_BLOCKING; + + return blk_mq_alloc_tag_set(set); +} + +static void null_validate_conf(struct nullb_device *dev) +{ + dev->blocksize = round_down(dev->blocksize, 512); + dev->blocksize = clamp_t(unsigned int, dev->blocksize, 512, 4096); + + if (dev->queue_mode == NULL_Q_MQ && dev->use_per_node_hctx) { + if (dev->submit_queues != nr_online_nodes) + dev->submit_queues = nr_online_nodes; + } else if (dev->submit_queues > nr_cpu_ids) + dev->submit_queues = nr_cpu_ids; + else if (dev->submit_queues == 0) + dev->submit_queues = 1; + + dev->queue_mode = min_t(unsigned int, dev->queue_mode, NULL_Q_MQ); + dev->irqmode = min_t(unsigned int, dev->irqmode, NULL_IRQ_TIMER); + + /* Do memory allocation, so set blocking */ + if (dev->memory_backed) + dev->blocking = true; + else /* cache is meaningless */ + dev->cache_size = 0; + dev->cache_size = min_t(unsigned long, ULONG_MAX / 1024 / 1024, + dev->cache_size); + dev->mbps = min_t(unsigned int, 1024 * 40, dev->mbps); + /* can not stop a queue */ + if (dev->queue_mode == NULL_Q_BIO) + dev->mbps = 0; +} + +#ifdef CONFIG_BLK_DEV_NULL_BLK_FAULT_INJECTION +static bool __null_setup_fault(struct fault_attr *attr, char *str) +{ + if (!str[0]) + return true; + + if (!setup_fault_attr(attr, str)) + return false; + + attr->verbose = 0; + return true; +} +#endif + +static bool null_setup_fault(void) +{ +#ifdef CONFIG_BLK_DEV_NULL_BLK_FAULT_INJECTION + if (!__null_setup_fault(&null_timeout_attr, g_timeout_str)) + return false; + if (!__null_setup_fault(&null_requeue_attr, g_requeue_str)) + return false; +#endif + return true; +} + +static int null_add_dev(struct nullb_device *dev) +{ + struct nullb *nullb; + int rv; + + null_validate_conf(dev); + + nullb = kzalloc_node(sizeof(*nullb), GFP_KERNEL, dev->home_node); + if (!nullb) { + rv = -ENOMEM; + goto out; + } + nullb->dev = dev; + dev->nullb = nullb; + + spin_lock_init(&nullb->lock); + + rv = setup_queues(nullb); + if (rv) + goto out_free_nullb; + + if (dev->queue_mode == NULL_Q_MQ) { + if (shared_tags) { + nullb->tag_set = &tag_set; + rv = 0; + } else { + nullb->tag_set = &nullb->__tag_set; + rv = null_init_tag_set(nullb, nullb->tag_set); + } + + if (rv) + goto out_cleanup_queues; + + if (!null_setup_fault()) + goto out_cleanup_queues; + + nullb->tag_set->timeout = 5 * HZ; + nullb->q = blk_mq_init_queue(nullb->tag_set); + if (IS_ERR(nullb->q)) { + rv = -ENOMEM; + goto out_cleanup_tags; + } + null_init_queues(nullb); + } else if (dev->queue_mode == NULL_Q_BIO) { + nullb->q = blk_alloc_queue_node(GFP_KERNEL, dev->home_node, + NULL); + if (!nullb->q) { + rv = -ENOMEM; + goto out_cleanup_queues; + } + blk_queue_make_request(nullb->q, null_queue_bio); + rv = init_driver_queues(nullb); + if (rv) + goto out_cleanup_blk_queue; + } else { + nullb->q = blk_init_queue_node(null_request_fn, &nullb->lock, + dev->home_node); + if (!nullb->q) { + rv = -ENOMEM; + goto out_cleanup_queues; + } + + if (!null_setup_fault()) + goto out_cleanup_blk_queue; + + blk_queue_prep_rq(nullb->q, null_rq_prep_fn); + blk_queue_softirq_done(nullb->q, null_softirq_done_fn); + blk_queue_rq_timed_out(nullb->q, null_rq_timed_out_fn); + nullb->q->rq_timeout = 5 * HZ; + rv = init_driver_queues(nullb); + if (rv) + goto out_cleanup_blk_queue; + } + + if (dev->mbps) { + set_bit(NULLB_DEV_FL_THROTTLED, &dev->flags); + nullb_setup_bwtimer(nullb); + } + + if (dev->cache_size > 0) { + set_bit(NULLB_DEV_FL_CACHE, &nullb->dev->flags); + blk_queue_write_cache(nullb->q, true, true); + blk_queue_flush_queueable(nullb->q, true); + } + + if (dev->zoned) { + rv = null_zone_init(dev); + if (rv) + goto out_cleanup_blk_queue; + + blk_queue_chunk_sectors(nullb->q, dev->zone_size_sects); + nullb->q->limits.zoned = BLK_ZONED_HM; + } + + nullb->q->queuedata = nullb; + blk_queue_flag_set(QUEUE_FLAG_NONROT, nullb->q); + blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, nullb->q); + + mutex_lock(&lock); + nullb->index = ida_simple_get(&nullb_indexes, 0, 0, GFP_KERNEL); + dev->index = nullb->index; + mutex_unlock(&lock); + + blk_queue_logical_block_size(nullb->q, dev->blocksize); + blk_queue_physical_block_size(nullb->q, dev->blocksize); + + null_config_discard(nullb); + + sprintf(nullb->disk_name, "nullb%d", nullb->index); + + rv = null_gendisk_register(nullb); + if (rv) + goto out_cleanup_zone; + + mutex_lock(&lock); + list_add_tail(&nullb->list, &nullb_list); + mutex_unlock(&lock); + + return 0; +out_cleanup_zone: + if (dev->zoned) + null_zone_exit(dev); +out_cleanup_blk_queue: + blk_cleanup_queue(nullb->q); +out_cleanup_tags: + if (dev->queue_mode == NULL_Q_MQ && nullb->tag_set == &nullb->__tag_set) + blk_mq_free_tag_set(nullb->tag_set); +out_cleanup_queues: + cleanup_queues(nullb); +out_free_nullb: + kfree(nullb); +out: + return rv; +} + +static int __init null_init(void) +{ + int ret = 0; + unsigned int i; + struct nullb *nullb; + struct nullb_device *dev; + + if (g_bs > PAGE_SIZE) { + pr_warn("null_blk: invalid block size\n"); + pr_warn("null_blk: defaults block size to %lu\n", PAGE_SIZE); + g_bs = PAGE_SIZE; + } + + if (!is_power_of_2(g_zone_size)) { + pr_err("null_blk: zone_size must be power-of-two\n"); + return -EINVAL; + } + + if (g_queue_mode == NULL_Q_MQ && g_use_per_node_hctx) { + if (g_submit_queues != nr_online_nodes) { + pr_warn("null_blk: submit_queues param is set to %u.\n", + nr_online_nodes); + g_submit_queues = nr_online_nodes; + } + } else if (g_submit_queues > nr_cpu_ids) + g_submit_queues = nr_cpu_ids; + else if (g_submit_queues <= 0) + g_submit_queues = 1; + + if (g_queue_mode == NULL_Q_MQ && shared_tags) { + ret = null_init_tag_set(NULL, &tag_set); + if (ret) + return ret; + } + + config_group_init(&nullb_subsys.su_group); + mutex_init(&nullb_subsys.su_mutex); + + ret = configfs_register_subsystem(&nullb_subsys); + if (ret) + goto err_tagset; + + mutex_init(&lock); + + null_major = register_blkdev(0, "nullb"); + if (null_major < 0) { + ret = null_major; + goto err_conf; + } + + for (i = 0; i < nr_devices; i++) { + dev = null_alloc_dev(); + if (!dev) { + ret = -ENOMEM; + goto err_dev; + } + ret = null_add_dev(dev); + if (ret) { + null_free_dev(dev); + goto err_dev; + } + } + + pr_info("null: module loaded\n"); + return 0; + +err_dev: + while (!list_empty(&nullb_list)) { + nullb = list_entry(nullb_list.next, struct nullb, list); + dev = nullb->dev; + null_del_dev(nullb); + null_free_dev(dev); + } + unregister_blkdev(null_major, "nullb"); +err_conf: + configfs_unregister_subsystem(&nullb_subsys); +err_tagset: + if (g_queue_mode == NULL_Q_MQ && shared_tags) + blk_mq_free_tag_set(&tag_set); + return ret; +} + +static void __exit null_exit(void) +{ + struct nullb *nullb; + + configfs_unregister_subsystem(&nullb_subsys); + + unregister_blkdev(null_major, "nullb"); + + mutex_lock(&lock); + while (!list_empty(&nullb_list)) { + struct nullb_device *dev; + + nullb = list_entry(nullb_list.next, struct nullb, list); + dev = nullb->dev; + null_del_dev(nullb); + null_free_dev(dev); + } + mutex_unlock(&lock); + + if (g_queue_mode == NULL_Q_MQ && shared_tags) + blk_mq_free_tag_set(&tag_set); +} + +module_init(null_init); +module_exit(null_exit); + +MODULE_AUTHOR("Jens Axboe "); +MODULE_LICENSE("GPL"); -- cgit v1.2.3 From d3df0ac09654e9db82a882031ccae010f1b7575b Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Wed, 25 Jul 2018 09:42:07 +0200 Subject: xen/blkfront: remove unused macros MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove some macros not used anywhere. Acked-by: Roger Pau Monné Signed-off-by: Juergen Gross Signed-off-by: Jens Axboe --- drivers/block/xen-blkfront.c | 5 ----- 1 file changed, 5 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index b5cedccb5d7d..94300dbe358b 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -251,14 +251,9 @@ static DEFINE_SPINLOCK(minor_lock); #define GRANTS_PER_INDIRECT_FRAME \ (XEN_PAGE_SIZE / sizeof(struct blkif_request_segment)) -#define PSEGS_PER_INDIRECT_FRAME \ - (GRANTS_INDIRECT_FRAME / GRANTS_PSEGS) - #define INDIRECT_GREFS(_grants) \ DIV_ROUND_UP(_grants, GRANTS_PER_INDIRECT_FRAME) -#define GREFS(_psegs) ((_psegs) * GRANTS_PER_PSEG) - static int blkfront_setup_indirect(struct blkfront_ring_info *rinfo); static void blkfront_gather_backend_features(struct blkfront_info *info); static int negotiate_mq(struct blkfront_info *info); -- cgit v1.2.3 From 55690c07b44a82cc3359ce0c233f4ba7d80ba145 Mon Sep 17 00:00:00 2001 From: Jinbum Park Date: Sat, 28 Jul 2018 13:20:44 +0900 Subject: pktcdvd: Fix possible Spectre-v1 for pkt_devs User controls @dev_minor which to be used as index of pkt_devs. So, It can be exploited via Spectre-like attack. (speculative execution) This kind of attack leaks address of pkt_devs, [1] It leads an attacker to bypass security mechanism such as KASLR. So sanitize @dev_minor before using it to prevent attack. [1] https://github.com/jinb-park/linux-exploit/ tree/master/exploit-remaining-spectre-gadget/leak_pkt_devs.c Signed-off-by: Jinbum Park Signed-off-by: Jens Axboe --- drivers/block/pktcdvd.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'drivers/block') diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c index a4b4d524c3af..9bb7721c26fc 100644 --- a/drivers/block/pktcdvd.c +++ b/drivers/block/pktcdvd.c @@ -67,7 +67,7 @@ #include #include #include - +#include #include #define DRIVER_NAME "pktcdvd" @@ -2254,6 +2254,8 @@ static struct pktcdvd_device *pkt_find_dev_from_minor(unsigned int dev_minor) { if (dev_minor >= MAX_WRITERS) return NULL; + + dev_minor = array_index_nospec(dev_minor, MAX_WRITERS); return pkt_devs[dev_minor]; } -- cgit v1.2.3 From 99972f171bba19243999310154b7442198f0ab30 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Wed, 1 Aug 2018 17:30:20 -0500 Subject: aoe: mark expected switch fall-through In preparation to enabling -Wimplicit-fallthrough, mark switch cases where we are expecting to fall through. Addresses-Coverity-ID: 114722 ("Missing break in switch") Signed-off-by: Gustavo A. R. Silva Signed-off-by: Jens Axboe --- drivers/block/aoe/aoecmd.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/block') diff --git a/drivers/block/aoe/aoecmd.c b/drivers/block/aoe/aoecmd.c index 096882e54095..136dc507d020 100644 --- a/drivers/block/aoe/aoecmd.c +++ b/drivers/block/aoe/aoecmd.c @@ -1137,6 +1137,7 @@ noskb: if (buf) break; } bvcpy(skb, f->buf->bio, f->iter, n); + /* fall through */ case ATA_CMD_PIO_WRITE: case ATA_CMD_PIO_WRITE_EXT: spin_lock_irq(&d->lock); -- cgit v1.2.3 From e7d0748dd71695b94f3a35c8bdc05226a7f3d919 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 2 Aug 2018 15:22:13 -0600 Subject: block: Switch struct packet_command to use struct scsi_sense_hdr There is a lot of needless struct request_sense usage in the CDROM code. These can all be struct scsi_sense_hdr instead, to avoid any confusion over their respective structure sizes. This patch is a lot of noise changing "sense" to "sshdr", but the final code is more readable to distinguish between "sense" meaning "struct request_sense" and "sshdr" meaning "struct scsi_sense_hdr". Reviewed-by: Christoph Hellwig Signed-off-by: Kees Cook Signed-off-by: Jens Axboe --- drivers/block/pktcdvd.c | 36 ++++++++++++++++++------------------ drivers/cdrom/cdrom.c | 22 +++++++++++----------- drivers/ide/ide-cd.c | 11 ++++++----- drivers/ide/ide-cd.h | 4 ++-- drivers/ide/ide-cd_ioctl.c | 30 +++++++++++++++--------------- drivers/scsi/sr_ioctl.c | 22 +++++++++------------- include/linux/cdrom.h | 3 ++- 7 files changed, 63 insertions(+), 65 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c index 9bb7721c26fc..e285413d4a75 100644 --- a/drivers/block/pktcdvd.c +++ b/drivers/block/pktcdvd.c @@ -748,13 +748,13 @@ static const char *sense_key_string(__u8 index) static void pkt_dump_sense(struct pktcdvd_device *pd, struct packet_command *cgc) { - struct request_sense *sense = cgc->sense; + struct scsi_sense_hdr *sshdr = cgc->sshdr; - if (sense) + if (sshdr) pkt_err(pd, "%*ph - sense %02x.%02x.%02x (%s)\n", CDROM_PACKET_SIZE, cgc->cmd, - sense->sense_key, sense->asc, sense->ascq, - sense_key_string(sense->sense_key)); + sshdr->sense_key, sshdr->asc, sshdr->ascq, + sense_key_string(sshdr->sense_key)); else pkt_err(pd, "%*ph - no sense\n", CDROM_PACKET_SIZE, cgc->cmd); } @@ -787,11 +787,11 @@ static noinline_for_stack int pkt_set_speed(struct pktcdvd_device *pd, unsigned write_speed, unsigned read_speed) { struct packet_command cgc; - struct request_sense sense; + struct scsi_sense_hdr sshdr; int ret; init_cdrom_command(&cgc, NULL, 0, CGC_DATA_NONE); - cgc.sense = &sense; + cgc.sshdr = &sshdr; cgc.cmd[0] = GPCMD_SET_SPEED; cgc.cmd[2] = (read_speed >> 8) & 0xff; cgc.cmd[3] = read_speed & 0xff; @@ -1651,7 +1651,7 @@ static noinline_for_stack int pkt_get_last_written(struct pktcdvd_device *pd, static noinline_for_stack int pkt_set_write_settings(struct pktcdvd_device *pd) { struct packet_command cgc; - struct request_sense sense; + struct scsi_sense_hdr sshdr; write_param_page *wp; char buffer[128]; int ret, size; @@ -1662,7 +1662,7 @@ static noinline_for_stack int pkt_set_write_settings(struct pktcdvd_device *pd) memset(buffer, 0, sizeof(buffer)); init_cdrom_command(&cgc, buffer, sizeof(*wp), CGC_DATA_READ); - cgc.sense = &sense; + cgc.sshdr = &sshdr; ret = pkt_mode_sense(pd, &cgc, GPMODE_WRITE_PARMS_PAGE, 0); if (ret) { pkt_dump_sense(pd, &cgc); @@ -1678,7 +1678,7 @@ static noinline_for_stack int pkt_set_write_settings(struct pktcdvd_device *pd) * now get it all */ init_cdrom_command(&cgc, buffer, size, CGC_DATA_READ); - cgc.sense = &sense; + cgc.sshdr = &sshdr; ret = pkt_mode_sense(pd, &cgc, GPMODE_WRITE_PARMS_PAGE, 0); if (ret) { pkt_dump_sense(pd, &cgc); @@ -1916,12 +1916,12 @@ static noinline_for_stack int pkt_write_caching(struct pktcdvd_device *pd, int set) { struct packet_command cgc; - struct request_sense sense; + struct scsi_sense_hdr sshdr; unsigned char buf[64]; int ret; init_cdrom_command(&cgc, buf, sizeof(buf), CGC_DATA_READ); - cgc.sense = &sense; + cgc.sshdr = &sshdr; cgc.buflen = pd->mode_offset + 12; /* @@ -1962,14 +1962,14 @@ static noinline_for_stack int pkt_get_max_speed(struct pktcdvd_device *pd, unsigned *write_speed) { struct packet_command cgc; - struct request_sense sense; + struct scsi_sense_hdr sshdr; unsigned char buf[256+18]; unsigned char *cap_buf; int ret, offset; cap_buf = &buf[sizeof(struct mode_page_header) + pd->mode_offset]; init_cdrom_command(&cgc, buf, sizeof(buf), CGC_DATA_UNKNOWN); - cgc.sense = &sense; + cgc.sshdr = &sshdr; ret = pkt_mode_sense(pd, &cgc, GPMODE_CAPABILITIES_PAGE, 0); if (ret) { @@ -2023,13 +2023,13 @@ static noinline_for_stack int pkt_media_speed(struct pktcdvd_device *pd, unsigned *speed) { struct packet_command cgc; - struct request_sense sense; + struct scsi_sense_hdr sshdr; unsigned char buf[64]; unsigned int size, st, sp; int ret; init_cdrom_command(&cgc, buf, 2, CGC_DATA_READ); - cgc.sense = &sense; + cgc.sshdr = &sshdr; cgc.cmd[0] = GPCMD_READ_TOC_PMA_ATIP; cgc.cmd[1] = 2; cgc.cmd[2] = 4; /* READ ATIP */ @@ -2044,7 +2044,7 @@ static noinline_for_stack int pkt_media_speed(struct pktcdvd_device *pd, size = sizeof(buf); init_cdrom_command(&cgc, buf, size, CGC_DATA_READ); - cgc.sense = &sense; + cgc.sshdr = &sshdr; cgc.cmd[0] = GPCMD_READ_TOC_PMA_ATIP; cgc.cmd[1] = 2; cgc.cmd[2] = 4; @@ -2095,13 +2095,13 @@ static noinline_for_stack int pkt_media_speed(struct pktcdvd_device *pd, static noinline_for_stack int pkt_perform_opc(struct pktcdvd_device *pd) { struct packet_command cgc; - struct request_sense sense; + struct scsi_sense_hdr sshdr; int ret; pkt_dbg(2, pd, "Performing OPC\n"); init_cdrom_command(&cgc, NULL, 0, CGC_DATA_NONE); - cgc.sense = &sense; + cgc.sshdr = &sshdr; cgc.timeout = 60*HZ; cgc.cmd[0] = GPCMD_SEND_OPC; cgc.cmd[1] = 1; diff --git a/drivers/cdrom/cdrom.c b/drivers/cdrom/cdrom.c index a78b8e7085e9..86619472d916 100644 --- a/drivers/cdrom/cdrom.c +++ b/drivers/cdrom/cdrom.c @@ -345,10 +345,10 @@ static LIST_HEAD(cdrom_list); int cdrom_dummy_generic_packet(struct cdrom_device_info *cdi, struct packet_command *cgc) { - if (cgc->sense) { - cgc->sense->sense_key = 0x05; - cgc->sense->asc = 0x20; - cgc->sense->ascq = 0x00; + if (cgc->sshdr) { + cgc->sshdr->sense_key = 0x05; + cgc->sshdr->asc = 0x20; + cgc->sshdr->ascq = 0x00; } cgc->stat = -EIO; @@ -2943,7 +2943,7 @@ static noinline int mmc_ioctl_cdrom_read_data(struct cdrom_device_info *cdi, struct packet_command *cgc, int cmd) { - struct request_sense sense; + struct scsi_sense_hdr sshdr; struct cdrom_msf msf; int blocksize = 0, format = 0, lba; int ret; @@ -2971,13 +2971,13 @@ static noinline int mmc_ioctl_cdrom_read_data(struct cdrom_device_info *cdi, if (cgc->buffer == NULL) return -ENOMEM; - memset(&sense, 0, sizeof(sense)); - cgc->sense = &sense; + memset(&sshdr, 0, sizeof(sshdr)); + cgc->sshdr = &sshdr; cgc->data_direction = CGC_DATA_READ; ret = cdrom_read_block(cdi, cgc, lba, 1, format, blocksize); - if (ret && sense.sense_key == 0x05 && - sense.asc == 0x20 && - sense.ascq == 0x00) { + if (ret && sshdr.sense_key == 0x05 && + sshdr.asc == 0x20 && + sshdr.ascq == 0x00) { /* * SCSI-II devices are not required to support * READ_CD, so let's try switching block size @@ -2986,7 +2986,7 @@ static noinline int mmc_ioctl_cdrom_read_data(struct cdrom_device_info *cdi, ret = cdrom_switch_blocksize(cdi, blocksize); if (ret) goto out; - cgc->sense = NULL; + cgc->sshdr = NULL; ret = cdrom_read_cd(cdi, cgc, lba, blocksize, 1); ret |= cdrom_switch_blocksize(cdi, blocksize); } diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c index a37dd381d307..a24cdff01865 100644 --- a/drivers/ide/ide-cd.c +++ b/drivers/ide/ide-cd.c @@ -419,7 +419,7 @@ static void ide_cd_request_sense_fixup(ide_drive_t *drive, struct ide_cmd *cmd) int ide_cd_queue_pc(ide_drive_t *drive, const unsigned char *cmd, int write, void *buffer, unsigned *bufflen, - struct request_sense *sense, int timeout, + struct scsi_sense_hdr *sshdr, int timeout, req_flags_t rq_flags) { struct cdrom_info *info = drive->driver_data; @@ -456,8 +456,9 @@ int ide_cd_queue_pc(ide_drive_t *drive, const unsigned char *cmd, if (buffer) *bufflen = scsi_req(rq)->resid_len; - if (sense) - memcpy(sense, scsi_req(rq)->sense, sizeof(*sense)); + if (sshdr) + scsi_normalize_sense(scsi_req(rq)->sense, + scsi_req(rq)->sense_len, sshdr); /* * FIXME: we should probably abort/retry or something in case of @@ -864,7 +865,7 @@ static void msf_from_bcd(struct atapi_msf *msf) msf->frame = bcd2bin(msf->frame); } -int cdrom_check_status(ide_drive_t *drive, struct request_sense *sense) +int cdrom_check_status(ide_drive_t *drive, struct scsi_sense_hdr *sshdr) { struct cdrom_info *info = drive->driver_data; struct cdrom_device_info *cdi; @@ -886,7 +887,7 @@ int cdrom_check_status(ide_drive_t *drive, struct request_sense *sense) */ cmd[7] = cdi->sanyo_slot % 3; - return ide_cd_queue_pc(drive, cmd, 0, NULL, NULL, sense, 0, RQF_QUIET); + return ide_cd_queue_pc(drive, cmd, 0, NULL, NULL, sshdr, 0, RQF_QUIET); } static int cdrom_read_capacity(ide_drive_t *drive, unsigned long *capacity, diff --git a/drivers/ide/ide-cd.h b/drivers/ide/ide-cd.h index fc162fbb6629..a69dc7f61c4d 100644 --- a/drivers/ide/ide-cd.h +++ b/drivers/ide/ide-cd.h @@ -98,11 +98,11 @@ void ide_cd_log_error(const char *, struct request *, struct request_sense *); /* ide-cd.c functions used by ide-cd_ioctl.c */ int ide_cd_queue_pc(ide_drive_t *, const unsigned char *, int, void *, - unsigned *, struct request_sense *, int, req_flags_t); + unsigned *, struct scsi_sense_hdr *, int, req_flags_t); int ide_cd_read_toc(ide_drive_t *); int ide_cdrom_get_capabilities(ide_drive_t *, u8 *); void ide_cdrom_update_speed(ide_drive_t *, u8 *); -int cdrom_check_status(ide_drive_t *, struct request_sense *); +int cdrom_check_status(ide_drive_t *, struct scsi_sense_hdr *); /* ide-cd_ioctl.c */ int ide_cdrom_open_real(struct cdrom_device_info *, int); diff --git a/drivers/ide/ide-cd_ioctl.c b/drivers/ide/ide-cd_ioctl.c index 14540544413c..4a6e1a413ead 100644 --- a/drivers/ide/ide-cd_ioctl.c +++ b/drivers/ide/ide-cd_ioctl.c @@ -43,14 +43,14 @@ int ide_cdrom_drive_status(struct cdrom_device_info *cdi, int slot_nr) { ide_drive_t *drive = cdi->handle; struct media_event_desc med; - struct request_sense sense; + struct scsi_sense_hdr sshdr; int stat; if (slot_nr != CDSL_CURRENT) return -EINVAL; - stat = cdrom_check_status(drive, &sense); - if (!stat || sense.sense_key == UNIT_ATTENTION) + stat = cdrom_check_status(drive, &sshdr); + if (!stat || sshdr.sense_key == UNIT_ATTENTION) return CDS_DISC_OK; if (!cdrom_get_media_event(cdi, &med)) { @@ -62,8 +62,8 @@ int ide_cdrom_drive_status(struct cdrom_device_info *cdi, int slot_nr) return CDS_NO_DISC; } - if (sense.sense_key == NOT_READY && sense.asc == 0x04 - && sense.ascq == 0x04) + if (sshdr.sense_key == NOT_READY && sshdr.asc == 0x04 + && sshdr.ascq == 0x04) return CDS_DISC_OK; /* @@ -71,8 +71,8 @@ int ide_cdrom_drive_status(struct cdrom_device_info *cdi, int slot_nr) * just return TRAY_OPEN since ATAPI doesn't provide * any other way to detect this... */ - if (sense.sense_key == NOT_READY) { - if (sense.asc == 0x3a && sense.ascq == 1) + if (sshdr.sense_key == NOT_READY) { + if (sshdr.asc == 0x3a && sshdr.ascq == 1) return CDS_NO_DISC; else return CDS_TRAY_OPEN; @@ -135,7 +135,7 @@ int cdrom_eject(ide_drive_t *drive, int ejectflag) static int ide_cd_lockdoor(ide_drive_t *drive, int lockflag) { - struct request_sense my_sense, *sense = &my_sense; + struct scsi_sense_hdr sshdr; int stat; /* If the drive cannot lock the door, just pretend. */ @@ -150,14 +150,14 @@ int ide_cd_lockdoor(ide_drive_t *drive, int lockflag) cmd[4] = lockflag ? 1 : 0; stat = ide_cd_queue_pc(drive, cmd, 0, NULL, NULL, - sense, 0, 0); + &sshdr, 0, 0); } /* If we got an illegal field error, the drive probably cannot lock the door. */ if (stat != 0 && - sense->sense_key == ILLEGAL_REQUEST && - (sense->asc == 0x24 || sense->asc == 0x20)) { + sshdr.sense_key == ILLEGAL_REQUEST && + (sshdr.asc == 0x24 || sshdr.asc == 0x20)) { printk(KERN_ERR "%s: door locking not supported\n", drive->name); drive->dev_flags &= ~IDE_DFLAG_DOORLOCKING; @@ -165,7 +165,7 @@ int ide_cd_lockdoor(ide_drive_t *drive, int lockflag) } /* no medium, that's alright. */ - if (stat != 0 && sense->sense_key == NOT_READY && sense->asc == 0x3a) + if (stat != 0 && sshdr.sense_key == NOT_READY && sshdr.asc == 0x3a) stat = 0; if (stat == 0) { @@ -451,8 +451,8 @@ int ide_cdrom_packet(struct cdrom_device_info *cdi, layer. the packet must be complete, as we do not touch it at all. */ - if (cgc->sense) - memset(cgc->sense, 0, sizeof(struct request_sense)); + if (cgc->sshdr) + memset(cgc->sshdr, 0, sizeof(*cgc->sshdr)); if (cgc->quiet) flags |= RQF_QUIET; @@ -460,7 +460,7 @@ int ide_cdrom_packet(struct cdrom_device_info *cdi, cgc->stat = ide_cd_queue_pc(drive, cgc->cmd, cgc->data_direction == CGC_DATA_WRITE, cgc->buffer, &len, - cgc->sense, cgc->timeout, flags); + cgc->sshdr, cgc->timeout, flags); if (!cgc->stat) cgc->buflen -= len; return cgc->stat; diff --git a/drivers/scsi/sr_ioctl.c b/drivers/scsi/sr_ioctl.c index 35fab1e18adc..ffcf902da390 100644 --- a/drivers/scsi/sr_ioctl.c +++ b/drivers/scsi/sr_ioctl.c @@ -186,14 +186,13 @@ static int sr_play_trkind(struct cdrom_device_info *cdi, int sr_do_ioctl(Scsi_CD *cd, struct packet_command *cgc) { struct scsi_device *SDev; - struct scsi_sense_hdr sshdr; + struct scsi_sense_hdr local_sshdr, *sshdr = &local_sshdr; int result, err = 0, retries = 0; - unsigned char sense_buffer[SCSI_SENSE_BUFFERSIZE], *senseptr = NULL; SDev = cd->device; - if (cgc->sense) - senseptr = sense_buffer; + if (cgc->sshdr) + sshdr = cgc->sshdr; retry: if (!scsi_block_when_processing_errors(SDev)) { @@ -202,15 +201,12 @@ int sr_do_ioctl(Scsi_CD *cd, struct packet_command *cgc) } result = scsi_execute(SDev, cgc->cmd, cgc->data_direction, - cgc->buffer, cgc->buflen, senseptr, &sshdr, + cgc->buffer, cgc->buflen, NULL, sshdr, cgc->timeout, IOCTL_RETRIES, 0, 0, NULL); - if (cgc->sense) - memcpy(cgc->sense, sense_buffer, sizeof(*cgc->sense)); - /* Minimal error checking. Ignore cases we know about, and report the rest. */ if (driver_byte(result) != 0) { - switch (sshdr.sense_key) { + switch (sshdr->sense_key) { case UNIT_ATTENTION: SDev->changed = 1; if (!cgc->quiet) @@ -221,8 +217,8 @@ int sr_do_ioctl(Scsi_CD *cd, struct packet_command *cgc) err = -ENOMEDIUM; break; case NOT_READY: /* This happens if there is no disc in drive */ - if (sshdr.asc == 0x04 && - sshdr.ascq == 0x01) { + if (sshdr->asc == 0x04 && + sshdr->ascq == 0x01) { /* sense: Logical unit is in process of becoming ready */ if (!cgc->quiet) sr_printk(KERN_INFO, cd, @@ -245,8 +241,8 @@ int sr_do_ioctl(Scsi_CD *cd, struct packet_command *cgc) break; case ILLEGAL_REQUEST: err = -EIO; - if (sshdr.asc == 0x20 && - sshdr.ascq == 0x00) + if (sshdr->asc == 0x20 && + sshdr->ascq == 0x00) /* sense: Invalid command operation code */ err = -EDRIVE_CANT_DO_THIS; break; diff --git a/include/linux/cdrom.h b/include/linux/cdrom.h index e75dfd1f1dec..528271c60018 100644 --- a/include/linux/cdrom.h +++ b/include/linux/cdrom.h @@ -13,6 +13,7 @@ #include /* not really needed, later.. */ #include +#include #include struct packet_command @@ -21,7 +22,7 @@ struct packet_command unsigned char *buffer; unsigned int buflen; int stat; - struct request_sense *sense; + struct scsi_sense_hdr *sshdr; unsigned char data_direction; int quiet; int timeout; -- cgit v1.2.3 From 4e178c17cac07d58df7d31ef6fe10036cfa3883d Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 31 Jul 2018 12:51:52 -0700 Subject: cdrom: Use struct scsi_sense_hdr internally This removes more casts of struct request_sense and uses the standard struct scsi_sense_hdr instead. This also fixes any possible stale values since the prior code did not check the sense length. Reviewed-by: Christoph Hellwig Signed-off-by: Kees Cook Signed-off-by: Jens Axboe --- drivers/block/Kconfig | 2 +- drivers/cdrom/cdrom.c | 8 ++++++-- 2 files changed, 7 insertions(+), 3 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig index ad9b687a236a..d4913516823f 100644 --- a/drivers/block/Kconfig +++ b/drivers/block/Kconfig @@ -74,12 +74,12 @@ config AMIGA_Z2RAM config CDROM tristate + select BLK_SCSI_REQUEST config GDROM tristate "SEGA Dreamcast GD-ROM drive" depends on SH_DREAMCAST select CDROM - select BLK_SCSI_REQUEST # only for the generic cdrom code help A standard SEGA Dreamcast comes with a modified CD ROM drive called a "GD-ROM" by SEGA to signify it is capable of reading special disks diff --git a/drivers/cdrom/cdrom.c b/drivers/cdrom/cdrom.c index 86619472d916..113fc6edb2b0 100644 --- a/drivers/cdrom/cdrom.c +++ b/drivers/cdrom/cdrom.c @@ -282,6 +282,7 @@ #include #include #include +#include #include /* used to tell the module to turn on full debugging messages */ @@ -2222,9 +2223,12 @@ static int cdrom_read_cdda_bpc(struct cdrom_device_info *cdi, __u8 __user *ubuf, blk_execute_rq(q, cdi->disk, rq, 0); if (scsi_req(rq)->result) { - struct request_sense *s = req->sense; + struct scsi_sense_hdr sshdr; + ret = -EIO; - cdi->last_sense = s->sense_key; + scsi_normalize_sense(req->sense, req->sense_len, + &sshdr); + cdi->last_sense = sshdr.sense_key; } if (blk_rq_unmap_user(bio)) -- cgit v1.2.3 From f87c30c96cd9b5baa28bc63900f16e04e8c7cbb2 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Mon, 6 Aug 2018 08:14:55 -0600 Subject: xen-blkfront: use true and false for boolean values MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Return statements in functions returning bool should use true or false instead of an integer value. This code was detected with the help of Coccinelle. Acked-by: Roger Pau Monné Signed-off-by: Gustavo A. R. Silva Signed-off-by: Jens Axboe --- drivers/block/xen-blkfront.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index 94300dbe358b..8986adab9bf5 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -1436,7 +1436,7 @@ static bool blkif_completion(unsigned long *id, /* Wait the second response if not yet here. */ if (s2->status == REQ_WAITING) - return 0; + return false; bret->status = blkif_get_final_status(s->status, s2->status); @@ -1537,7 +1537,7 @@ static bool blkif_completion(unsigned long *id, } } - return 1; + return true; } static irqreturn_t blkif_interrupt(int irq, void *dev_id) -- cgit v1.2.3 From d5fcc4e46e5168e6a6ce1e350ad0714a9d880c8e Mon Sep 17 00:00:00 2001 From: zhong jiang Date: Wed, 8 Aug 2018 22:58:33 +0800 Subject: drivers/block/mtip32xx: remove the null check for debugfs_remove_recursive debugfs_remove_recursive has taken null pointer into account. So it is safe to drop the null check before calling the function. Signed-off-by: zhong jiang Signed-off-by: Jens Axboe --- drivers/block/mtip32xx/mtip32xx.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c index c73626decb46..db253cd5b32a 100644 --- a/drivers/block/mtip32xx/mtip32xx.c +++ b/drivers/block/mtip32xx/mtip32xx.c @@ -2575,8 +2575,7 @@ static int mtip_hw_debugfs_init(struct driver_data *dd) static void mtip_hw_debugfs_exit(struct driver_data *dd) { - if (dd->dfs_node) - debugfs_remove_recursive(dd->dfs_node); + debugfs_remove_recursive(dd->dfs_node); } /* -- cgit v1.2.3 From 69daf897d75b31ff90031bb0a49a8d65cedfe3ca Mon Sep 17 00:00:00 2001 From: zhong jiang Date: Wed, 8 Aug 2018 23:00:35 +0800 Subject: drivers/block/aoe/aoedev: NULL check is not needed for mempool_destroy mempool_destroy has taken the null pointer into account. So it is safe to remove the null check. Signed-off-by: zhong jiang Signed-off-by: Jens Axboe --- drivers/block/aoe/aoedev.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/aoe/aoedev.c b/drivers/block/aoe/aoedev.c index 697f735b07a4..41060e9cedf2 100644 --- a/drivers/block/aoe/aoedev.c +++ b/drivers/block/aoe/aoedev.c @@ -284,8 +284,8 @@ freedev(struct aoedev *d) e = t + d->ntargets; for (; t < e && *t; t++) freetgt(d, *t); - if (d->bufpool) - mempool_destroy(d->bufpool); + + mempool_destroy(d->bufpool); skbpoolfree(d); minor_free(d->sysminor); -- cgit v1.2.3 From a12fc00b237c25519cc861b56dd4ca41bbec4ed3 Mon Sep 17 00:00:00 2001 From: zhong jiang Date: Wed, 8 Aug 2018 23:22:47 +0800 Subject: drivers/block/drbd: remove the null check for kmem_cache_destroy kmem_cache_destroy has taken null pointer into account. So it is safe to drop the null check before calling the function. Signed-off-by: zhong jiang Signed-off-by: Jens Axboe --- drivers/block/drbd/drbd_main.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index a80809bd3057..ef8212a4b73e 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -2103,14 +2103,10 @@ static void drbd_destroy_mempools(void) mempool_exit(&drbd_md_io_page_pool); mempool_exit(&drbd_ee_mempool); mempool_exit(&drbd_request_mempool); - if (drbd_ee_cache) - kmem_cache_destroy(drbd_ee_cache); - if (drbd_request_cache) - kmem_cache_destroy(drbd_request_cache); - if (drbd_bm_ext_cache) - kmem_cache_destroy(drbd_bm_ext_cache); - if (drbd_al_ext_cache) - kmem_cache_destroy(drbd_al_ext_cache); + kmem_cache_destroy(drbd_ee_cache); + kmem_cache_destroy(drbd_request_cache); + kmem_cache_destroy(drbd_bm_ext_cache); + kmem_cache_destroy(drbd_al_ext_cache); drbd_ee_cache = NULL; drbd_request_cache = NULL; -- cgit v1.2.3 From 0a1c749dee4c52465d5580d77e0f8aaa9215c357 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Thu, 9 Aug 2018 10:54:46 -0500 Subject: block: paride: pd: mark expected switch fall-throughs In preparation to enabling -Wimplicit-fallthrough, mark switch cases where we are expecting to fall through. Addresses-Coverity-ID: 1056543 ("Missing break in switch") Addresses-Coverity-ID: 1056544 ("Missing break in switch") Signed-off-by: Gustavo A. R. Silva Signed-off-by: Jens Axboe --- drivers/block/paride/pd.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers/block') diff --git a/drivers/block/paride/pd.c b/drivers/block/paride/pd.c index 8961b190e256..7cf947586fe4 100644 --- a/drivers/block/paride/pd.c +++ b/drivers/block/paride/pd.c @@ -426,6 +426,7 @@ static void run_fsm(void) pd_claimed = 1; if (!pi_schedule_claimed(pi_current, run_fsm)) return; + /* fall through */ case 1: pd_claimed = 2; pi_current->proto->connect(pi_current); @@ -445,6 +446,7 @@ static void run_fsm(void) spin_unlock_irqrestore(&pd_lock, saved_flags); if (stop) return; + /* fall through */ case Hold: schedule_fsm(); return; -- cgit v1.2.3 From 61884de08f8368b9aa289ab8dc953e0ce4c755b1 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 9 Aug 2018 14:22:41 -0600 Subject: null_blk: add lock drop/acquire annotation sparse complains: drivers/block/null_blk_main.c:816:24: sparse: context imbalance in 'null_insert_page' - unexpected unlock Fix it by adding the necessary annotations to the function. Signed-off-by: Jens Axboe --- drivers/block/null_blk_main.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'drivers/block') diff --git a/drivers/block/null_blk_main.c b/drivers/block/null_blk_main.c index 86cafa6d3b41..6127e3ff7b4b 100644 --- a/drivers/block/null_blk_main.c +++ b/drivers/block/null_blk_main.c @@ -804,7 +804,9 @@ static struct nullb_page *null_lookup_page(struct nullb *nullb, } static struct nullb_page *null_insert_page(struct nullb *nullb, - sector_t sector, bool ignore_cache) + sector_t sector, bool ignore_cache) + __releases(&nullb->lock) + __acquires(&nullb->lock) { u64 idx; struct nullb_page *t_page; -- cgit v1.2.3