diff options
-rw-r--r-- | .mailmap | 5 | ||||
-rw-r--r-- | Documentation/admin-guide/blockdev/nbd.rst | 2 | ||||
-rw-r--r-- | block/bdev.c | 13 | ||||
-rw-r--r-- | block/blk-cgroup.c | 3 | ||||
-rw-r--r-- | block/blk-integrity.c | 175 | ||||
-rw-r--r-- | block/blk.h | 12 | ||||
-rw-r--r-- | block/genhd.c | 19 | ||||
-rw-r--r-- | block/partitions/core.c | 8 | ||||
-rw-r--r-- | drivers/block/brd.c | 1 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_receiver.c | 2 | ||||
-rw-r--r-- | drivers/block/nbd.c | 7 | ||||
-rw-r--r-- | drivers/block/null_blk/main.c | 1 | ||||
-rw-r--r-- | drivers/block/ublk_drv.c | 31 | ||||
-rw-r--r-- | drivers/block/zram/zram_drv.c | 1 | ||||
-rw-r--r-- | drivers/md/bcache/super.c | 1 | ||||
-rw-r--r-- | drivers/md/md-bitmap.c | 6 | ||||
-rw-r--r-- | drivers/md/raid5.c | 45 | ||||
-rw-r--r-- | fs/fs-writeback.c | 2 | ||||
-rw-r--r-- | include/linux/blkdev.h | 3 | ||||
-rw-r--r-- | include/uapi/linux/nbd.h | 25 |
20 files changed, 192 insertions, 170 deletions
@@ -213,7 +213,10 @@ Jeff Garzik <jgarzik@pretzel.yyz.us> Jeff Layton <jlayton@kernel.org> <jlayton@poochiereds.net> Jeff Layton <jlayton@kernel.org> <jlayton@primarydata.com> Jeff Layton <jlayton@kernel.org> <jlayton@redhat.com> -Jens Axboe <axboe@suse.de> +Jens Axboe <axboe@kernel.dk> <axboe@suse.de> +Jens Axboe <axboe@kernel.dk> <jens.axboe@oracle.com> +Jens Axboe <axboe@kernel.dk> <axboe@fb.com> +Jens Axboe <axboe@kernel.dk> <axboe@meta.com> Jens Osterkamp <Jens.Osterkamp@de.ibm.com> Jernej Skrabec <jernej.skrabec@gmail.com> <jernej.skrabec@siol.net> Jessica Zhang <quic_jesszhan@quicinc.com> <jesszhan@codeaurora.org> diff --git a/Documentation/admin-guide/blockdev/nbd.rst b/Documentation/admin-guide/blockdev/nbd.rst index d78dfe559dcf..faf2ac4b1509 100644 --- a/Documentation/admin-guide/blockdev/nbd.rst +++ b/Documentation/admin-guide/blockdev/nbd.rst @@ -14,7 +14,7 @@ to borrow disk space from another computer. Unlike NFS, it is possible to put any filesystem on it, etc. For more information, or to download the nbd-client and nbd-server -tools, go to http://nbd.sf.net/. +tools, go to https://github.com/NetworkBlockDevice/nbd. The nbd kernel module need only be installed on the client system, as the nbd-server is completely in userspace. In fact, diff --git a/block/bdev.c b/block/bdev.c index 850852fe4b78..21c63bfef323 100644 --- a/block/bdev.c +++ b/block/bdev.c @@ -418,8 +418,11 @@ struct block_device *bdev_alloc(struct gendisk *disk, u8 partno) bdev->bd_partno = partno; bdev->bd_inode = inode; bdev->bd_queue = disk->queue; + if (partno) + bdev->bd_has_submit_bio = disk->part0->bd_has_submit_bio; + else + bdev->bd_has_submit_bio = false; bdev->bd_stats = alloc_percpu(struct disk_stats); - bdev->bd_has_submit_bio = false; if (!bdev->bd_stats) { iput(inode); return NULL; @@ -428,6 +431,14 @@ struct block_device *bdev_alloc(struct gendisk *disk, u8 partno) return bdev; } +void bdev_set_nr_sectors(struct block_device *bdev, sector_t sectors) +{ + spin_lock(&bdev->bd_size_lock); + i_size_write(bdev->bd_inode, (loff_t)sectors << SECTOR_SHIFT); + bdev->bd_nr_sectors = sectors; + spin_unlock(&bdev->bd_size_lock); +} + void bdev_add(struct block_device *bdev, dev_t dev) { bdev->bd_dev = dev; diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index ff45649361e7..0ce64dd73cfe 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -567,6 +567,9 @@ restart: list_for_each_entry_safe(blkg, n, &q->blkg_list, q_node) { struct blkcg *blkcg = blkg->blkcg; + if (hlist_unhashed(&blkg->blkcg_node)) + continue; + spin_lock(&blkcg->lock); blkg_destroy(blkg); spin_unlock(&blkcg->lock); diff --git a/block/blk-integrity.c b/block/blk-integrity.c index 8f01d786f5cb..d4e9b4556d14 100644 --- a/block/blk-integrity.c +++ b/block/blk-integrity.c @@ -212,61 +212,44 @@ bool blk_integrity_merge_bio(struct request_queue *q, struct request *req, return true; } -struct integrity_sysfs_entry { - struct attribute attr; - ssize_t (*show)(struct blk_integrity *, char *); - ssize_t (*store)(struct blk_integrity *, const char *, size_t); -}; - -static ssize_t integrity_attr_show(struct kobject *kobj, struct attribute *attr, - char *page) +static inline struct blk_integrity *dev_to_bi(struct device *dev) { - struct gendisk *disk = container_of(kobj, struct gendisk, integrity_kobj); - struct blk_integrity *bi = &disk->queue->integrity; - struct integrity_sysfs_entry *entry = - container_of(attr, struct integrity_sysfs_entry, attr); - - return entry->show(bi, page); + return &dev_to_disk(dev)->queue->integrity; } -static ssize_t integrity_attr_store(struct kobject *kobj, - struct attribute *attr, const char *page, - size_t count) +static ssize_t format_show(struct device *dev, struct device_attribute *attr, + char *page) { - struct gendisk *disk = container_of(kobj, struct gendisk, integrity_kobj); - struct blk_integrity *bi = &disk->queue->integrity; - struct integrity_sysfs_entry *entry = - container_of(attr, struct integrity_sysfs_entry, attr); - ssize_t ret = 0; + struct blk_integrity *bi = dev_to_bi(dev); - if (entry->store) - ret = entry->store(bi, page, count); - - return ret; -} - -static ssize_t integrity_format_show(struct blk_integrity *bi, char *page) -{ if (bi->profile && bi->profile->name) - return sprintf(page, "%s\n", bi->profile->name); - else - return sprintf(page, "none\n"); + return sysfs_emit(page, "%s\n", bi->profile->name); + return sysfs_emit(page, "none\n"); } -static ssize_t integrity_tag_size_show(struct blk_integrity *bi, char *page) +static ssize_t tag_size_show(struct device *dev, struct device_attribute *attr, + char *page) { - return sprintf(page, "%u\n", bi->tag_size); + struct blk_integrity *bi = dev_to_bi(dev); + + return sysfs_emit(page, "%u\n", bi->tag_size); } -static ssize_t integrity_interval_show(struct blk_integrity *bi, char *page) +static ssize_t protection_interval_bytes_show(struct device *dev, + struct device_attribute *attr, + char *page) { - return sprintf(page, "%u\n", - bi->interval_exp ? 1 << bi->interval_exp : 0); + struct blk_integrity *bi = dev_to_bi(dev); + + return sysfs_emit(page, "%u\n", + bi->interval_exp ? 1 << bi->interval_exp : 0); } -static ssize_t integrity_verify_store(struct blk_integrity *bi, - const char *page, size_t count) +static ssize_t read_verify_store(struct device *dev, + struct device_attribute *attr, + const char *page, size_t count) { + struct blk_integrity *bi = dev_to_bi(dev); char *p = (char *) page; unsigned long val = simple_strtoul(p, &p, 10); @@ -278,14 +261,20 @@ static ssize_t integrity_verify_store(struct blk_integrity *bi, return count; } -static ssize_t integrity_verify_show(struct blk_integrity *bi, char *page) +static ssize_t read_verify_show(struct device *dev, + struct device_attribute *attr, char *page) { - return sprintf(page, "%d\n", (bi->flags & BLK_INTEGRITY_VERIFY) != 0); + struct blk_integrity *bi = dev_to_bi(dev); + + return sysfs_emit(page, "%d\n", !!(bi->flags & BLK_INTEGRITY_VERIFY)); } -static ssize_t integrity_generate_store(struct blk_integrity *bi, - const char *page, size_t count) +static ssize_t write_generate_store(struct device *dev, + struct device_attribute *attr, + const char *page, size_t count) { + struct blk_integrity *bi = dev_to_bi(dev); + char *p = (char *) page; unsigned long val = simple_strtoul(p, &p, 10); @@ -297,68 +286,44 @@ static ssize_t integrity_generate_store(struct blk_integrity *bi, return count; } -static ssize_t integrity_generate_show(struct blk_integrity *bi, char *page) +static ssize_t write_generate_show(struct device *dev, + struct device_attribute *attr, char *page) { - return sprintf(page, "%d\n", (bi->flags & BLK_INTEGRITY_GENERATE) != 0); -} + struct blk_integrity *bi = dev_to_bi(dev); -static ssize_t integrity_device_show(struct blk_integrity *bi, char *page) -{ - return sprintf(page, "%u\n", - (bi->flags & BLK_INTEGRITY_DEVICE_CAPABLE) != 0); + return sysfs_emit(page, "%d\n", !!(bi->flags & BLK_INTEGRITY_GENERATE)); } -static struct integrity_sysfs_entry integrity_format_entry = { - .attr = { .name = "format", .mode = 0444 }, - .show = integrity_format_show, -}; - -static struct integrity_sysfs_entry integrity_tag_size_entry = { - .attr = { .name = "tag_size", .mode = 0444 }, - .show = integrity_tag_size_show, -}; - -static struct integrity_sysfs_entry integrity_interval_entry = { - .attr = { .name = "protection_interval_bytes", .mode = 0444 }, - .show = integrity_interval_show, -}; - -static struct integrity_sysfs_entry integrity_verify_entry = { - .attr = { .name = "read_verify", .mode = 0644 }, - .show = integrity_verify_show, - .store = integrity_verify_store, -}; +static ssize_t device_is_integrity_capable_show(struct device *dev, + struct device_attribute *attr, + char *page) +{ + struct blk_integrity *bi = dev_to_bi(dev); -static struct integrity_sysfs_entry integrity_generate_entry = { - .attr = { .name = "write_generate", .mode = 0644 }, - .show = integrity_generate_show, - .store = integrity_generate_store, -}; + return sysfs_emit(page, "%u\n", + !!(bi->flags & BLK_INTEGRITY_DEVICE_CAPABLE)); +} -static struct integrity_sysfs_entry integrity_device_entry = { - .attr = { .name = "device_is_integrity_capable", .mode = 0444 }, - .show = integrity_device_show, -}; +static DEVICE_ATTR_RO(format); +static DEVICE_ATTR_RO(tag_size); +static DEVICE_ATTR_RO(protection_interval_bytes); +static DEVICE_ATTR_RW(read_verify); +static DEVICE_ATTR_RW(write_generate); +static DEVICE_ATTR_RO(device_is_integrity_capable); static struct attribute *integrity_attrs[] = { - &integrity_format_entry.attr, - &integrity_tag_size_entry.attr, - &integrity_interval_entry.attr, - &integrity_verify_entry.attr, - &integrity_generate_entry.attr, - &integrity_device_entry.attr, - NULL, + &dev_attr_format.attr, + &dev_attr_tag_size.attr, + &dev_attr_protection_interval_bytes.attr, + &dev_attr_read_verify.attr, + &dev_attr_write_generate.attr, + &dev_attr_device_is_integrity_capable.attr, + NULL }; -ATTRIBUTE_GROUPS(integrity); -static const struct sysfs_ops integrity_ops = { - .show = &integrity_attr_show, - .store = &integrity_attr_store, -}; - -static const struct kobj_type integrity_ktype = { - .default_groups = integrity_groups, - .sysfs_ops = &integrity_ops, +const struct attribute_group blk_integrity_attr_group = { + .name = "integrity", + .attrs = integrity_attrs, }; static blk_status_t blk_integrity_nop_fn(struct blk_integrity_iter *iter) @@ -437,21 +402,3 @@ void blk_integrity_unregister(struct gendisk *disk) memset(bi, 0, sizeof(*bi)); } EXPORT_SYMBOL(blk_integrity_unregister); - -int blk_integrity_add(struct gendisk *disk) -{ - int ret; - - ret = kobject_init_and_add(&disk->integrity_kobj, &integrity_ktype, - &disk_to_dev(disk)->kobj, "%s", "integrity"); - if (!ret) - kobject_uevent(&disk->integrity_kobj, KOBJ_ADD); - return ret; -} - -void blk_integrity_del(struct gendisk *disk) -{ - kobject_uevent(&disk->integrity_kobj, KOBJ_REMOVE); - kobject_del(&disk->integrity_kobj); - kobject_put(&disk->integrity_kobj); -} diff --git a/block/blk.h b/block/blk.h index 2da831103471..45547bcf1119 100644 --- a/block/blk.h +++ b/block/blk.h @@ -214,8 +214,7 @@ static inline bool integrity_req_gap_front_merge(struct request *req, bip_next->bip_vec[0].bv_offset); } -int blk_integrity_add(struct gendisk *disk); -void blk_integrity_del(struct gendisk *); +extern const struct attribute_group blk_integrity_attr_group; #else /* CONFIG_BLK_DEV_INTEGRITY */ static inline bool blk_integrity_merge_rq(struct request_queue *rq, struct request *r1, struct request *r2) @@ -248,13 +247,6 @@ static inline bool bio_integrity_endio(struct bio *bio) static inline void bio_integrity_free(struct bio *bio) { } -static inline int blk_integrity_add(struct gendisk *disk) -{ - return 0; -} -static inline void blk_integrity_del(struct gendisk *disk) -{ -} #endif /* CONFIG_BLK_DEV_INTEGRITY */ unsigned long blk_rq_timeout(unsigned long timeout); @@ -419,6 +411,8 @@ int bdev_resize_partition(struct gendisk *disk, int partno, sector_t start, sector_t length); void blk_drop_partitions(struct gendisk *disk); +void bdev_set_nr_sectors(struct block_device *bdev, sector_t sectors); + struct gendisk *__alloc_disk_node(struct request_queue *q, int node_id, struct lock_class_key *lkclass); diff --git a/block/genhd.c b/block/genhd.c index 90c402771bb5..1cb489b927d5 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -57,12 +57,7 @@ static DEFINE_IDA(ext_devt_ida); void set_capacity(struct gendisk *disk, sector_t sectors) { - struct block_device *bdev = disk->part0; - - spin_lock(&bdev->bd_size_lock); - i_size_write(bdev->bd_inode, (loff_t)sectors << SECTOR_SHIFT); - bdev->bd_nr_sectors = sectors; - spin_unlock(&bdev->bd_size_lock); + bdev_set_nr_sectors(disk->part0, sectors); } EXPORT_SYMBOL(set_capacity); @@ -487,15 +482,11 @@ int __must_check device_add_disk(struct device *parent, struct gendisk *disk, */ pm_runtime_set_memalloc_noio(ddev, true); - ret = blk_integrity_add(disk); - if (ret) - goto out_del_block_link; - disk->part0->bd_holder_dir = kobject_create_and_add("holders", &ddev->kobj); if (!disk->part0->bd_holder_dir) { ret = -ENOMEM; - goto out_del_integrity; + goto out_del_block_link; } disk->slave_dir = kobject_create_and_add("slaves", &ddev->kobj); if (!disk->slave_dir) { @@ -558,8 +549,6 @@ out_put_slave_dir: disk->slave_dir = NULL; out_put_holder_dir: kobject_put(disk->part0->bd_holder_dir); -out_del_integrity: - blk_integrity_del(disk); out_del_block_link: sysfs_remove_link(block_depr, dev_name(ddev)); out_device_del: @@ -621,7 +610,6 @@ void del_gendisk(struct gendisk *disk) if (WARN_ON_ONCE(!disk_live(disk) && !(disk->flags & GENHD_FL_HIDDEN))) return; - blk_integrity_del(disk); disk_del_events(disk); mutex_lock(&disk->open_mutex); @@ -1156,6 +1144,9 @@ static const struct attribute_group *disk_attr_groups[] = { #ifdef CONFIG_BLK_DEV_IO_TRACE &blk_trace_attr_group, #endif +#ifdef CONFIG_BLK_DEV_INTEGRITY + &blk_integrity_attr_group, +#endif NULL }; diff --git a/block/partitions/core.c b/block/partitions/core.c index 7b8ef6296abd..49e0496ff23c 100644 --- a/block/partitions/core.c +++ b/block/partitions/core.c @@ -85,14 +85,6 @@ static int (*check_part[])(struct parsed_partitions *) = { NULL }; -static void bdev_set_nr_sectors(struct block_device *bdev, sector_t sectors) -{ - spin_lock(&bdev->bd_size_lock); - i_size_write(bdev->bd_inode, (loff_t)sectors << SECTOR_SHIFT); - bdev->bd_nr_sectors = sectors; - spin_unlock(&bdev->bd_size_lock); -} - static struct parsed_partitions *allocate_partitions(struct gendisk *hd) { struct parsed_partitions *state; diff --git a/drivers/block/brd.c b/drivers/block/brd.c index 34177f1bd97d..bcad9b926b0c 100644 --- a/drivers/block/brd.c +++ b/drivers/block/brd.c @@ -404,7 +404,6 @@ static int brd_alloc(int i) /* Tell the block layer that this is not a rotational device */ blk_queue_flag_set(QUEUE_FLAG_NONROT, disk->queue); blk_queue_flag_set(QUEUE_FLAG_SYNCHRONOUS, disk->queue); - blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, disk->queue); blk_queue_flag_set(QUEUE_FLAG_NOWAIT, disk->queue); err = add_disk(disk); if (err) diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 719a7260c22b..8c2bc47de473 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -1283,7 +1283,7 @@ static void one_flush_endio(struct bio *bio) static void submit_one_flush(struct drbd_device *device, struct issue_flush_context *ctx) { struct bio *bio = bio_alloc(device->ldev->backing_bdev, 0, - REQ_OP_FLUSH | REQ_PREFLUSH, GFP_NOIO); + REQ_OP_WRITE | REQ_PREFLUSH, GFP_NOIO); struct one_flush_context *octx = kmalloc(sizeof(*octx), GFP_NOIO); if (!octx) { diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index d445fd0934bd..9c35c958f2c8 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -609,7 +609,7 @@ static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index) request.len = htonl(size); } handle = nbd_cmd_handle(cmd); - memcpy(request.handle, &handle, sizeof(handle)); + request.cookie = cpu_to_be64(handle); trace_nbd_send_request(&request, nbd->index, blk_mq_rq_from_pdu(cmd)); @@ -621,7 +621,7 @@ static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index) trace_nbd_header_sent(req, handle); if (result < 0) { if (was_interrupted(result)) { - /* If we havne't sent anything we can just return BUSY, + /* If we haven't sent anything we can just return BUSY, * however if we have sent something we need to make * sure we only allow this req to be sent until we are * completely done. @@ -735,7 +735,7 @@ static struct nbd_cmd *nbd_handle_reply(struct nbd_device *nbd, int index, u32 tag; int ret = 0; - memcpy(&handle, reply->handle, sizeof(handle)); + handle = be64_to_cpu(reply->cookie); tag = nbd_handle_to_tag(handle); hwq = blk_mq_unique_tag_to_hwq(tag); if (hwq < nbd->tag_set.nr_hw_queues) @@ -1805,7 +1805,6 @@ static struct nbd_device *nbd_dev_add(int index, unsigned int refs) * Tell the block layer that we are not a rotational device */ blk_queue_flag_set(QUEUE_FLAG_NONROT, disk->queue); - blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, disk->queue); disk->queue->limits.discard_granularity = 0; blk_queue_max_discard_sectors(disk->queue, 0); blk_queue_max_segment_size(disk->queue, UINT_MAX); diff --git a/drivers/block/null_blk/main.c b/drivers/block/null_blk/main.c index b195b8b9fe32..b3fedafe301e 100644 --- a/drivers/block/null_blk/main.c +++ b/drivers/block/null_blk/main.c @@ -2144,7 +2144,6 @@ static int null_add_dev(struct nullb_device *dev) nullb->q->queuedata = nullb; blk_queue_flag_set(QUEUE_FLAG_NONROT, nullb->q); - blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, nullb->q); mutex_lock(&lock); rv = ida_simple_get(&nullb_indexes, 0, 0, GFP_KERNEL); diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c index 6b8aa0d030f7..72a5cde9a5af 100644 --- a/drivers/block/ublk_drv.c +++ b/drivers/block/ublk_drv.c @@ -129,6 +129,7 @@ struct ublk_queue { unsigned long io_addr; /* mapped vm address */ unsigned int max_io_sz; bool force_abort; + bool timeout; unsigned short nr_io_ready; /* how many ios setup */ struct ublk_device *dev; struct ublk_io ios[]; @@ -898,6 +899,22 @@ static void ublk_queue_cmd(struct ublk_queue *ubq, struct request *rq) } } +static enum blk_eh_timer_return ublk_timeout(struct request *rq) +{ + struct ublk_queue *ubq = rq->mq_hctx->driver_data; + + if (ubq->flags & UBLK_F_UNPRIVILEGED_DEV) { + if (!ubq->timeout) { + send_sig(SIGKILL, ubq->ubq_daemon, 0); + ubq->timeout = true; + } + + return BLK_EH_DONE; + } + + return BLK_EH_RESET_TIMER; +} + static blk_status_t ublk_queue_rq(struct blk_mq_hw_ctx *hctx, const struct blk_mq_queue_data *bd) { @@ -957,6 +974,7 @@ static const struct blk_mq_ops ublk_mq_ops = { .queue_rq = ublk_queue_rq, .init_hctx = ublk_init_hctx, .init_request = ublk_init_rq, + .timeout = ublk_timeout, }; static int ublk_ch_open(struct inode *inode, struct file *filp) @@ -1737,6 +1755,18 @@ static int ublk_ctrl_add_dev(struct io_uring_cmd *cmd) else if (!(info.flags & UBLK_F_UNPRIVILEGED_DEV)) return -EPERM; + /* + * unprivileged device can't be trusted, but RECOVERY and + * RECOVERY_REISSUE still may hang error handling, so can't + * support recovery features for unprivileged ublk now + * + * TODO: provide forward progress for RECOVERY handler, so that + * unprivileged device can benefit from it + */ + if (info.flags & UBLK_F_UNPRIVILEGED_DEV) + info.flags &= ~(UBLK_F_USER_RECOVERY_REISSUE | + UBLK_F_USER_RECOVERY); + /* the created device is always owned by current user */ ublk_store_owner_uid_gid(&info.owner_uid, &info.owner_gid); @@ -2007,6 +2037,7 @@ static void ublk_queue_reinit(struct ublk_device *ub, struct ublk_queue *ubq) put_task_struct(ubq->ubq_daemon); /* We have to reset it to NULL, otherwise ub won't accept new FETCH_REQ */ ubq->ubq_daemon = NULL; + ubq->timeout = false; for (i = 0; i < ubq->q_depth; i++) { struct ublk_io *io = &ubq->ios[i]; diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index a84c4268257a..f6d90f1ba5cf 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -2215,7 +2215,6 @@ static int zram_add(void) /* zram devices sort of resembles non-rotational disks */ blk_queue_flag_set(QUEUE_FLAG_NONROT, zram->disk->queue); blk_queue_flag_set(QUEUE_FLAG_SYNCHRONOUS, zram->disk->queue); - blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, zram->disk->queue); /* * To ensure that we always get PAGE_SIZE aligned diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index ba3909bb6bea..7e9d19fd21dd 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -971,7 +971,6 @@ static int bcache_device_init(struct bcache_device *d, unsigned int block_size, } blk_queue_flag_set(QUEUE_FLAG_NONROT, d->disk->queue); - blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, d->disk->queue); blk_queue_write_cache(q, true, true); diff --git a/drivers/md/md-bitmap.c b/drivers/md/md-bitmap.c index 920bb68156d2..bc8d7565171d 100644 --- a/drivers/md/md-bitmap.c +++ b/drivers/md/md-bitmap.c @@ -219,7 +219,7 @@ static unsigned int optimal_io_size(struct block_device *bdev, } static unsigned int bitmap_io_size(unsigned int io_size, unsigned int opt_size, - sector_t start, sector_t boundary) + loff_t start, loff_t boundary) { if (io_size != opt_size && start + opt_size / SECTOR_SIZE <= boundary) @@ -237,8 +237,8 @@ static int __write_sb_page(struct md_rdev *rdev, struct bitmap *bitmap, struct block_device *bdev; struct mddev *mddev = bitmap->mddev; struct bitmap_storage *store = &bitmap->storage; - sector_t offset = mddev->bitmap_info.offset; - sector_t ps, sboff, doff; + loff_t sboff, offset = mddev->bitmap_info.offset; + sector_t ps, doff; unsigned int size = PAGE_SIZE; unsigned int opt_size = PAGE_SIZE; diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 812a12e3e41a..4739ed891e75 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -6079,6 +6079,38 @@ out_release: return ret; } +/* + * If the bio covers multiple data disks, find sector within the bio that has + * the lowest chunk offset in the first chunk. + */ +static sector_t raid5_bio_lowest_chunk_sector(struct r5conf *conf, + struct bio *bi) +{ + int sectors_per_chunk = conf->chunk_sectors; + int raid_disks = conf->raid_disks; + int dd_idx; + struct stripe_head sh; + unsigned int chunk_offset; + sector_t r_sector = bi->bi_iter.bi_sector & ~((sector_t)RAID5_STRIPE_SECTORS(conf)-1); + sector_t sector; + + /* We pass in fake stripe_head to get back parity disk numbers */ + sector = raid5_compute_sector(conf, r_sector, 0, &dd_idx, &sh); + chunk_offset = sector_div(sector, sectors_per_chunk); + if (sectors_per_chunk - chunk_offset >= bio_sectors(bi)) + return r_sector; + /* + * Bio crosses to the next data disk. Check whether it's in the same + * chunk. + */ + dd_idx++; + while (dd_idx == sh.pd_idx || dd_idx == sh.qd_idx) + dd_idx++; + if (dd_idx >= raid_disks) + return r_sector; + return r_sector + sectors_per_chunk - chunk_offset; +} + static bool raid5_make_request(struct mddev *mddev, struct bio * bi) { DEFINE_WAIT_FUNC(wait, woken_wake_function); @@ -6150,6 +6182,17 @@ static bool raid5_make_request(struct mddev *mddev, struct bio * bi) } md_account_bio(mddev, &bi); + /* + * Lets start with the stripe with the lowest chunk offset in the first + * chunk. That has the best chances of creating IOs adjacent to + * previous IOs in case of sequential IO and thus creates the most + * sequential IO pattern. We don't bother with the optimization when + * reshaping as the performance benefit is not worth the complexity. + */ + if (likely(conf->reshape_progress == MaxSector)) + logical_sector = raid5_bio_lowest_chunk_sector(conf, bi); + s = (logical_sector - ctx.first_sector) >> RAID5_STRIPE_SHIFT(conf); + add_wait_queue(&conf->wait_for_overlap, &wait); while (1) { res = make_stripe_request(mddev, conf, &ctx, logical_sector, @@ -6178,7 +6221,7 @@ static bool raid5_make_request(struct mddev *mddev, struct bio * bi) continue; } - s = find_first_bit(ctx.sectors_to_do, stripe_cnt); + s = find_next_bit_wrap(ctx.sectors_to_do, stripe_cnt, s); if (s == stripe_cnt) break; diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 1db3e3c24b43..ae4e51e91ee3 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -829,7 +829,7 @@ void wbc_detach_inode(struct writeback_control *wbc) * is okay. The main goal is avoiding keeping an inode on * the wrong wb for an extended period of time. */ - if (hweight32(history) > WB_FRN_HIST_THR_SLOTS) + if (hweight16(history) > WB_FRN_HIST_THR_SLOTS) inode_switch_wbs(inode, max_id); } diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index e3242e67a8e3..b441e633f4dd 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -157,9 +157,6 @@ struct gendisk { struct timer_rand_state *random; atomic_t sync_io; /* RAID */ struct disk_events *ev; -#ifdef CONFIG_BLK_DEV_INTEGRITY - struct kobject integrity_kobj; -#endif /* CONFIG_BLK_DEV_INTEGRITY */ #ifdef CONFIG_BLK_DEV_ZONED /* diff --git a/include/uapi/linux/nbd.h b/include/uapi/linux/nbd.h index 20d6cc91435d..80ce0ef43afd 100644 --- a/include/uapi/linux/nbd.h +++ b/include/uapi/linux/nbd.h @@ -11,6 +11,8 @@ * Cleanup PARANOIA usage & code. * 2004/02/19 Paul Clements * Removed PARANOIA, plus various cleanup and comments + * 2023 Copyright Red Hat + * Link to userspace extensions, favor cookie over handle. */ #ifndef _UAPILINUX_NBD_H @@ -30,12 +32,18 @@ #define NBD_SET_TIMEOUT _IO( 0xab, 9 ) #define NBD_SET_FLAGS _IO( 0xab, 10) +/* + * See also https://github.com/NetworkBlockDevice/nbd/blob/master/doc/proto.md + * for additional userspace extensions not yet utilized in the kernel module. + */ + enum { NBD_CMD_READ = 0, NBD_CMD_WRITE = 1, NBD_CMD_DISC = 2, NBD_CMD_FLUSH = 3, NBD_CMD_TRIM = 4 + /* userspace defines additional extension commands */ }; /* values for flags field, these are server interaction specific. */ @@ -64,15 +72,19 @@ enum { #define NBD_REQUEST_MAGIC 0x25609513 #define NBD_REPLY_MAGIC 0x67446698 /* Do *not* use magics: 0x12560953 0x96744668. */ +/* magic 0x668e33ef for structured reply not supported by kernel yet */ /* * This is the packet used for communication between client and * server. All data are in network byte order. */ struct nbd_request { - __be32 magic; - __be32 type; /* == READ || == WRITE */ - char handle[8]; + __be32 magic; /* NBD_REQUEST_MAGIC */ + __be32 type; /* See NBD_CMD_* */ + union { + __be64 cookie; /* Opaque identifier for request */ + char handle[8]; /* older spelling of cookie */ + }; __be64 from; __be32 len; } __attribute__((packed)); @@ -82,8 +94,11 @@ struct nbd_request { * it has completed an I/O request (or an error occurs). */ struct nbd_reply { - __be32 magic; + __be32 magic; /* NBD_REPLY_MAGIC */ __be32 error; /* 0 = ok, else error */ - char handle[8]; /* handle you got from request */ + union { + __be64 cookie; /* Opaque identifier from request */ + char handle[8]; /* older spelling of cookie */ + }; }; #endif /* _UAPILINUX_NBD_H */ |