diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2018-05-05 09:41:44 +0300 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2018-05-05 09:41:44 +0300 |
commit | 2f50037a1c687ac928bbd47b6eb959b39f748ada (patch) | |
tree | 3b1e97aa8b1e4605822f9736588914809711092c /drivers | |
parent | 2e171ffcdf62a90ea7a0192728f81c1ac288de50 (diff) | |
parent | b8b784958eccbf8f51ebeee65282ca3fd59ea391 (diff) | |
download | linux-2f50037a1c687ac928bbd47b6eb959b39f748ada.tar.xz |
Merge tag 'for-linus-20180504' of git://git.kernel.dk/linux-block
Pull block fixes from Jens Axboe:
"A collection of fixes that should to into this release. This contains:
- Set of bcache fixes from Coly, fixing regression in patches that
went into this series.
- Set of NVMe fixes by way of Keith.
- Set of bdi related fixes, one from Jan and two from Tetsuo Handa,
fixing various issues around device addition/removal.
- Two block inflight fixes from Omar, fixing issues around the
transition to using tags for blk-mq inflight accounting that we
did a few releases ago"
* tag 'for-linus-20180504' of git://git.kernel.dk/linux-block:
bdi: Fix oops in wb_workfn()
nvmet: switch loopback target state to connecting when resetting
nvme/multipath: Fix multipath disabled naming collisions
nvme/multipath: Disable runtime writable enabling parameter
nvme: Set integrity flag for user passthrough commands
nvme: fix potential memory leak in option parsing
bdi: Fix use after free bug in debugfs_remove()
bdi: wake up concurrent wb_shutdown() callers.
bcache: use pr_info() to inform duplicated CACHE_SET_IO_DISABLE set
bcache: set dc->io_disable to true in conditional_stop_bcache_device()
bcache: add wait_for_kthread_stop() in bch_allocator_thread()
bcache: count backing device I/O error for writeback I/O
bcache: set CACHE_SET_IO_DISABLE in bch_cached_dev_error()
bcache: store disk name in struct cache and struct cached_dev
blk-mq: fix sysfs inflight counter
blk-mq: count allocated but not started requests in iostats inflight
Diffstat (limited to 'drivers')
-rw-r--r-- | drivers/md/bcache/alloc.c | 5 | ||||
-rw-r--r-- | drivers/md/bcache/bcache.h | 4 | ||||
-rw-r--r-- | drivers/md/bcache/debug.c | 3 | ||||
-rw-r--r-- | drivers/md/bcache/io.c | 8 | ||||
-rw-r--r-- | drivers/md/bcache/request.c | 5 | ||||
-rw-r--r-- | drivers/md/bcache/super.c | 75 | ||||
-rw-r--r-- | drivers/md/bcache/writeback.c | 4 | ||||
-rw-r--r-- | drivers/nvme/host/core.c | 27 | ||||
-rw-r--r-- | drivers/nvme/host/fabrics.c | 6 | ||||
-rw-r--r-- | drivers/nvme/host/multipath.c | 24 | ||||
-rw-r--r-- | drivers/nvme/host/nvme.h | 12 | ||||
-rw-r--r-- | drivers/nvme/target/loop.c | 6 |
12 files changed, 117 insertions, 62 deletions
diff --git a/drivers/md/bcache/alloc.c b/drivers/md/bcache/alloc.c index 004cc3cc6123..7fa2631b422c 100644 --- a/drivers/md/bcache/alloc.c +++ b/drivers/md/bcache/alloc.c @@ -290,7 +290,7 @@ do { \ if (kthread_should_stop() || \ test_bit(CACHE_SET_IO_DISABLE, &ca->set->flags)) { \ set_current_state(TASK_RUNNING); \ - return 0; \ + goto out; \ } \ \ schedule(); \ @@ -378,6 +378,9 @@ retry_invalidate: bch_prio_write(ca); } } +out: + wait_for_kthread_stop(); + return 0; } /* Allocation */ diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h index d338b7086013..3a0cfb237af9 100644 --- a/drivers/md/bcache/bcache.h +++ b/drivers/md/bcache/bcache.h @@ -392,6 +392,8 @@ struct cached_dev { #define DEFAULT_CACHED_DEV_ERROR_LIMIT 64 atomic_t io_errors; unsigned error_limit; + + char backing_dev_name[BDEVNAME_SIZE]; }; enum alloc_reserve { @@ -464,6 +466,8 @@ struct cache { atomic_long_t meta_sectors_written; atomic_long_t btree_sectors_written; atomic_long_t sectors_written; + + char cache_dev_name[BDEVNAME_SIZE]; }; struct gc_stat { diff --git a/drivers/md/bcache/debug.c b/drivers/md/bcache/debug.c index 028f7b386e01..4e63c6f6c04d 100644 --- a/drivers/md/bcache/debug.c +++ b/drivers/md/bcache/debug.c @@ -106,7 +106,6 @@ void bch_btree_verify(struct btree *b) void bch_data_verify(struct cached_dev *dc, struct bio *bio) { - char name[BDEVNAME_SIZE]; struct bio *check; struct bio_vec bv, cbv; struct bvec_iter iter, citer = { 0 }; @@ -134,7 +133,7 @@ void bch_data_verify(struct cached_dev *dc, struct bio *bio) bv.bv_len), dc->disk.c, "verify failed at dev %s sector %llu", - bdevname(dc->bdev, name), + dc->backing_dev_name, (uint64_t) bio->bi_iter.bi_sector); kunmap_atomic(p1); diff --git a/drivers/md/bcache/io.c b/drivers/md/bcache/io.c index 7fac97ae036e..2ddf8515e6a5 100644 --- a/drivers/md/bcache/io.c +++ b/drivers/md/bcache/io.c @@ -52,7 +52,6 @@ void bch_submit_bbio(struct bio *bio, struct cache_set *c, /* IO errors */ void bch_count_backing_io_errors(struct cached_dev *dc, struct bio *bio) { - char buf[BDEVNAME_SIZE]; unsigned errors; WARN_ONCE(!dc, "NULL pointer of struct cached_dev"); @@ -60,7 +59,7 @@ void bch_count_backing_io_errors(struct cached_dev *dc, struct bio *bio) errors = atomic_add_return(1, &dc->io_errors); if (errors < dc->error_limit) pr_err("%s: IO error on backing device, unrecoverable", - bio_devname(bio, buf)); + dc->backing_dev_name); else bch_cached_dev_error(dc); } @@ -105,19 +104,18 @@ void bch_count_io_errors(struct cache *ca, } if (error) { - char buf[BDEVNAME_SIZE]; unsigned errors = atomic_add_return(1 << IO_ERROR_SHIFT, &ca->io_errors); errors >>= IO_ERROR_SHIFT; if (errors < ca->set->error_limit) pr_err("%s: IO error on %s%s", - bdevname(ca->bdev, buf), m, + ca->cache_dev_name, m, is_read ? ", recovering." : "."); else bch_cache_set_error(ca->set, "%s: too many IO errors %s", - bdevname(ca->bdev, buf), m); + ca->cache_dev_name, m); } } diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c index a65e3365eeb9..8e3e8655ed63 100644 --- a/drivers/md/bcache/request.c +++ b/drivers/md/bcache/request.c @@ -649,11 +649,8 @@ static void backing_request_endio(struct bio *bio) */ if (unlikely(s->iop.writeback && bio->bi_opf & REQ_PREFLUSH)) { - char buf[BDEVNAME_SIZE]; - - bio_devname(bio, buf); pr_err("Can't flush %s: returned bi_status %i", - buf, bio->bi_status); + dc->backing_dev_name, bio->bi_status); } else { /* set to orig_bio->bi_status in bio_complete() */ s->iop.status = bio->bi_status; diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index d90d9e59ca00..3dea06b41d43 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -936,7 +936,6 @@ static void cancel_writeback_rate_update_dwork(struct cached_dev *dc) static void cached_dev_detach_finish(struct work_struct *w) { struct cached_dev *dc = container_of(w, struct cached_dev, detach); - char buf[BDEVNAME_SIZE]; struct closure cl; closure_init_stack(&cl); @@ -967,7 +966,7 @@ static void cached_dev_detach_finish(struct work_struct *w) mutex_unlock(&bch_register_lock); - pr_info("Caching disabled for %s", bdevname(dc->bdev, buf)); + pr_info("Caching disabled for %s", dc->backing_dev_name); /* Drop ref we took in cached_dev_detach() */ closure_put(&dc->disk.cl); @@ -999,29 +998,28 @@ int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c, { uint32_t rtime = cpu_to_le32(get_seconds()); struct uuid_entry *u; - char buf[BDEVNAME_SIZE]; struct cached_dev *exist_dc, *t; - bdevname(dc->bdev, buf); - if ((set_uuid && memcmp(set_uuid, c->sb.set_uuid, 16)) || (!set_uuid && memcmp(dc->sb.set_uuid, c->sb.set_uuid, 16))) return -ENOENT; if (dc->disk.c) { - pr_err("Can't attach %s: already attached", buf); + pr_err("Can't attach %s: already attached", + dc->backing_dev_name); return -EINVAL; } if (test_bit(CACHE_SET_STOPPING, &c->flags)) { - pr_err("Can't attach %s: shutting down", buf); + pr_err("Can't attach %s: shutting down", + dc->backing_dev_name); return -EINVAL; } if (dc->sb.block_size < c->sb.block_size) { /* Will die */ pr_err("Couldn't attach %s: block size less than set's block size", - buf); + dc->backing_dev_name); return -EINVAL; } @@ -1029,7 +1027,7 @@ int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c, list_for_each_entry_safe(exist_dc, t, &c->cached_devs, list) { if (!memcmp(dc->sb.uuid, exist_dc->sb.uuid, 16)) { pr_err("Tried to attach %s but duplicate UUID already attached", - buf); + dc->backing_dev_name); return -EINVAL; } @@ -1047,13 +1045,15 @@ int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c, if (!u) { if (BDEV_STATE(&dc->sb) == BDEV_STATE_DIRTY) { - pr_err("Couldn't find uuid for %s in set", buf); + pr_err("Couldn't find uuid for %s in set", + dc->backing_dev_name); return -ENOENT; } u = uuid_find_empty(c); if (!u) { - pr_err("Not caching %s, no room for UUID", buf); + pr_err("Not caching %s, no room for UUID", + dc->backing_dev_name); return -EINVAL; } } @@ -1112,7 +1112,8 @@ int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c, up_write(&dc->writeback_lock); pr_info("Caching %s as %s on set %pU", - bdevname(dc->bdev, buf), dc->disk.disk->disk_name, + dc->backing_dev_name, + dc->disk.disk->disk_name, dc->disk.c->sb.set_uuid); return 0; } @@ -1225,10 +1226,10 @@ static void register_bdev(struct cache_sb *sb, struct page *sb_page, struct block_device *bdev, struct cached_dev *dc) { - char name[BDEVNAME_SIZE]; const char *err = "cannot allocate memory"; struct cache_set *c; + bdevname(bdev, dc->backing_dev_name); memcpy(&dc->sb, sb, sizeof(struct cache_sb)); dc->bdev = bdev; dc->bdev->bd_holder = dc; @@ -1237,6 +1238,7 @@ static void register_bdev(struct cache_sb *sb, struct page *sb_page, bio_first_bvec_all(&dc->sb_bio)->bv_page = sb_page; get_page(sb_page); + if (cached_dev_init(dc, sb->block_size << 9)) goto err; @@ -1247,7 +1249,7 @@ static void register_bdev(struct cache_sb *sb, struct page *sb_page, if (bch_cache_accounting_add_kobjs(&dc->accounting, &dc->disk.kobj)) goto err; - pr_info("registered backing device %s", bdevname(bdev, name)); + pr_info("registered backing device %s", dc->backing_dev_name); list_add(&dc->list, &uncached_devices); list_for_each_entry(c, &bch_cache_sets, list) @@ -1259,7 +1261,7 @@ static void register_bdev(struct cache_sb *sb, struct page *sb_page, return; err: - pr_notice("error %s: %s", bdevname(bdev, name), err); + pr_notice("error %s: %s", dc->backing_dev_name, err); bcache_device_stop(&dc->disk); } @@ -1367,7 +1369,7 @@ int bch_flash_dev_create(struct cache_set *c, uint64_t size) bool bch_cached_dev_error(struct cached_dev *dc) { - char name[BDEVNAME_SIZE]; + struct cache_set *c; if (!dc || test_bit(BCACHE_DEV_CLOSING, &dc->disk.flags)) return false; @@ -1377,7 +1379,22 @@ bool bch_cached_dev_error(struct cached_dev *dc) smp_mb(); pr_err("stop %s: too many IO errors on backing device %s\n", - dc->disk.disk->disk_name, bdevname(dc->bdev, name)); + dc->disk.disk->disk_name, dc->backing_dev_name); + + /* + * If the cached device is still attached to a cache set, + * even dc->io_disable is true and no more I/O requests + * accepted, cache device internal I/O (writeback scan or + * garbage collection) may still prevent bcache device from + * being stopped. So here CACHE_SET_IO_DISABLE should be + * set to c->flags too, to make the internal I/O to cache + * device rejected and stopped immediately. + * If c is NULL, that means the bcache device is not attached + * to any cache set, then no CACHE_SET_IO_DISABLE bit to set. + */ + c = dc->disk.c; + if (c && test_and_set_bit(CACHE_SET_IO_DISABLE, &c->flags)) + pr_info("CACHE_SET_IO_DISABLE already set"); bcache_device_stop(&dc->disk); return true; @@ -1395,7 +1412,7 @@ bool bch_cache_set_error(struct cache_set *c, const char *fmt, ...) return false; if (test_and_set_bit(CACHE_SET_IO_DISABLE, &c->flags)) - pr_warn("CACHE_SET_IO_DISABLE already set"); + pr_info("CACHE_SET_IO_DISABLE already set"); /* XXX: we can be called from atomic context acquire_console_sem(); @@ -1539,6 +1556,20 @@ static void conditional_stop_bcache_device(struct cache_set *c, */ pr_warn("stop_when_cache_set_failed of %s is \"auto\" and cache is dirty, stop it to avoid potential data corruption.", d->disk->disk_name); + /* + * There might be a small time gap that cache set is + * released but bcache device is not. Inside this time + * gap, regular I/O requests will directly go into + * backing device as no cache set attached to. This + * behavior may also introduce potential inconsistence + * data in writeback mode while cache is dirty. + * Therefore before calling bcache_device_stop() due + * to a broken cache device, dc->io_disable should be + * explicitly set to true. + */ + dc->io_disable = true; + /* make others know io_disable is true earlier */ + smp_mb(); bcache_device_stop(d); } else { /* @@ -2003,12 +2034,10 @@ static int cache_alloc(struct cache *ca) static int register_cache(struct cache_sb *sb, struct page *sb_page, struct block_device *bdev, struct cache *ca) { - char name[BDEVNAME_SIZE]; const char *err = NULL; /* must be set for any error case */ int ret = 0; - bdevname(bdev, name); - + bdevname(bdev, ca->cache_dev_name); memcpy(&ca->sb, sb, sizeof(struct cache_sb)); ca->bdev = bdev; ca->bdev->bd_holder = ca; @@ -2045,14 +2074,14 @@ static int register_cache(struct cache_sb *sb, struct page *sb_page, goto out; } - pr_info("registered cache device %s", name); + pr_info("registered cache device %s", ca->cache_dev_name); out: kobject_put(&ca->kobj); err: if (err) - pr_notice("error %s: %s", name, err); + pr_notice("error %s: %s", ca->cache_dev_name, err); return ret; } diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c index 4a9547cdcdc5..ad45ebe1a74b 100644 --- a/drivers/md/bcache/writeback.c +++ b/drivers/md/bcache/writeback.c @@ -244,8 +244,10 @@ static void dirty_endio(struct bio *bio) struct keybuf_key *w = bio->bi_private; struct dirty_io *io = w->private; - if (bio->bi_status) + if (bio->bi_status) { SET_KEY_DIRTY(&w->key, false); + bch_count_backing_io_errors(io->dc, bio); + } closure_put(&io->cl); } diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 9df4f71e58ca..a3771c5729f5 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -764,6 +764,7 @@ static int nvme_submit_user_cmd(struct request_queue *q, ret = PTR_ERR(meta); goto out_unmap; } + req->cmd_flags |= REQ_INTEGRITY; } } @@ -2997,31 +2998,7 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid) if (nvme_init_ns_head(ns, nsid, id)) goto out_free_id; nvme_setup_streams_ns(ctrl, ns); - -#ifdef CONFIG_NVME_MULTIPATH - /* - * If multipathing is enabled we need to always use the subsystem - * instance number for numbering our devices to avoid conflicts - * between subsystems that have multiple controllers and thus use - * the multipath-aware subsystem node and those that have a single - * controller and use the controller node directly. - */ - if (ns->head->disk) { - sprintf(disk_name, "nvme%dc%dn%d", ctrl->subsys->instance, - ctrl->cntlid, ns->head->instance); - flags = GENHD_FL_HIDDEN; - } else { - sprintf(disk_name, "nvme%dn%d", ctrl->subsys->instance, - ns->head->instance); - } -#else - /* - * But without the multipath code enabled, multiple controller per - * subsystems are visible as devices and thus we cannot use the - * subsystem instance. - */ - sprintf(disk_name, "nvme%dn%d", ctrl->instance, ns->head->instance); -#endif + nvme_set_disk_name(disk_name, ns, ctrl, &flags); if ((ctrl->quirks & NVME_QUIRK_LIGHTNVM) && id->vs[0] == 0x1) { if (nvme_nvm_register(ns, disk_name, node)) { diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c index 124c458806df..7ae732a77fe8 100644 --- a/drivers/nvme/host/fabrics.c +++ b/drivers/nvme/host/fabrics.c @@ -668,6 +668,7 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts, ret = -ENOMEM; goto out; } + kfree(opts->transport); opts->transport = p; break; case NVMF_OPT_NQN: @@ -676,6 +677,7 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts, ret = -ENOMEM; goto out; } + kfree(opts->subsysnqn); opts->subsysnqn = p; nqnlen = strlen(opts->subsysnqn); if (nqnlen >= NVMF_NQN_SIZE) { @@ -698,6 +700,7 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts, ret = -ENOMEM; goto out; } + kfree(opts->traddr); opts->traddr = p; break; case NVMF_OPT_TRSVCID: @@ -706,6 +709,7 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts, ret = -ENOMEM; goto out; } + kfree(opts->trsvcid); opts->trsvcid = p; break; case NVMF_OPT_QUEUE_SIZE: @@ -792,6 +796,7 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts, ret = -EINVAL; goto out; } + nvmf_host_put(opts->host); opts->host = nvmf_host_add(p); kfree(p); if (!opts->host) { @@ -817,6 +822,7 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts, ret = -ENOMEM; goto out; } + kfree(opts->host_traddr); opts->host_traddr = p; break; case NVMF_OPT_HOST_ID: diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index 956e0b8e9c4d..d7b664ae5923 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -15,10 +15,32 @@ #include "nvme.h" static bool multipath = true; -module_param(multipath, bool, 0644); +module_param(multipath, bool, 0444); MODULE_PARM_DESC(multipath, "turn on native support for multiple controllers per subsystem"); +/* + * If multipathing is enabled we need to always use the subsystem instance + * number for numbering our devices to avoid conflicts between subsystems that + * have multiple controllers and thus use the multipath-aware subsystem node + * and those that have a single controller and use the controller node + * directly. + */ +void nvme_set_disk_name(char *disk_name, struct nvme_ns *ns, + struct nvme_ctrl *ctrl, int *flags) +{ + if (!multipath) { + sprintf(disk_name, "nvme%dn%d", ctrl->instance, ns->head->instance); + } else if (ns->head->disk) { + sprintf(disk_name, "nvme%dc%dn%d", ctrl->subsys->instance, + ctrl->cntlid, ns->head->instance); + *flags = GENHD_FL_HIDDEN; + } else { + sprintf(disk_name, "nvme%dn%d", ctrl->subsys->instance, + ns->head->instance); + } +} + void nvme_failover_req(struct request *req) { struct nvme_ns *ns = req->q->queuedata; diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 061fecfd44f5..7ded7a51c430 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -436,6 +436,8 @@ extern const struct attribute_group nvme_ns_id_attr_group; extern const struct block_device_operations nvme_ns_head_ops; #ifdef CONFIG_NVME_MULTIPATH +void nvme_set_disk_name(char *disk_name, struct nvme_ns *ns, + struct nvme_ctrl *ctrl, int *flags); void nvme_failover_req(struct request *req); bool nvme_req_needs_failover(struct request *req, blk_status_t error); void nvme_kick_requeue_lists(struct nvme_ctrl *ctrl); @@ -461,6 +463,16 @@ static inline void nvme_mpath_check_last_path(struct nvme_ns *ns) } #else +/* + * Without the multipath code enabled, multiple controller per subsystems are + * visible as devices and thus we cannot use the subsystem instance. + */ +static inline void nvme_set_disk_name(char *disk_name, struct nvme_ns *ns, + struct nvme_ctrl *ctrl, int *flags) +{ + sprintf(disk_name, "nvme%dn%d", ctrl->instance, ns->head->instance); +} + static inline void nvme_failover_req(struct request *req) { } diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c index 31fdfba556a8..27a8561c0cb9 100644 --- a/drivers/nvme/target/loop.c +++ b/drivers/nvme/target/loop.c @@ -469,6 +469,12 @@ static void nvme_loop_reset_ctrl_work(struct work_struct *work) nvme_stop_ctrl(&ctrl->ctrl); nvme_loop_shutdown_ctrl(ctrl); + if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING)) { + /* state change failure should never happen */ + WARN_ON_ONCE(1); + return; + } + ret = nvme_loop_configure_admin_queue(ctrl); if (ret) goto out_disable; |