diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2017-09-09 22:49:01 +0300 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2017-09-09 22:49:01 +0300 |
commit | 126e76ffbf78d9e948b641aadb265d16c57f5a3d (patch) | |
tree | 656e7838f0ec057936b80e15a774911df05c6005 /drivers | |
parent | fbd01410e89a66f346ba1b3c0161e1198449b746 (diff) | |
parent | 175206cf9ab63161dec74d9cd7f9992e062491f5 (diff) | |
download | linux-126e76ffbf78d9e948b641aadb265d16c57f5a3d.tar.xz |
Merge branch 'for-4.14/block-postmerge' of git://git.kernel.dk/linux-block
Pull followup block layer updates from Jens Axboe:
"I ended up splitting the main pull request for this series into two,
mainly because of clashes between NVMe fixes that went into 4.13 after
the for-4.14 branches were split off. This pull request is mostly
NVMe, but not exclusively. In detail, it contains:
- Two pull request for NVMe changes from Christoph. Nothing new on
the feature front, basically just fixes all over the map for the
core bits, transport, rdma, etc.
- Series from Bart, cleaning up various bits in the BFQ scheduler.
- Series of bcache fixes, which has been lingering for a release or
two. Coly sent this in, but patches from various people in this
area.
- Set of patches for BFQ from Paolo himself, updating both
documentation and fixing some corner cases in performance.
- Series from Omar, attempting to now get the 4k loop support
correct. Our confidence level is higher this time.
- Series from Shaohua for loop as well, improving O_DIRECT
performance and fixing a use-after-free"
* 'for-4.14/block-postmerge' of git://git.kernel.dk/linux-block: (74 commits)
bcache: initialize dirty stripes in flash_dev_run()
loop: set physical block size to logical block size
bcache: fix bch_hprint crash and improve output
bcache: Update continue_at() documentation
bcache: silence static checker warning
bcache: fix for gc and write-back race
bcache: increase the number of open buckets
bcache: Correct return value for sysfs attach errors
bcache: correct cache_dirty_target in __update_writeback_rate()
bcache: gc does not work when triggering by manual command
bcache: Don't reinvent the wheel but use existing llist API
bcache: do not subtract sectors_to_gc for bypassed IO
bcache: fix sequential large write IO bypass
bcache: Fix leak of bdev reference
block/loop: remove unused field
block/loop: fix use after free
bfq: Use icq_to_bic() consistently
bfq: Suppress compiler warnings about comparisons
bfq: Check kstrtoul() return value
bfq: Declare local functions static
...
Diffstat (limited to 'drivers')
26 files changed, 903 insertions, 655 deletions
diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 407cb172d6e3..85de67334695 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -213,10 +213,13 @@ static void __loop_update_dio(struct loop_device *lo, bool dio) */ blk_mq_freeze_queue(lo->lo_queue); lo->use_dio = use_dio; - if (use_dio) + if (use_dio) { + queue_flag_clear_unlocked(QUEUE_FLAG_NOMERGES, lo->lo_queue); lo->lo_flags |= LO_FLAGS_DIRECT_IO; - else + } else { + queue_flag_set_unlocked(QUEUE_FLAG_NOMERGES, lo->lo_queue); lo->lo_flags &= ~LO_FLAGS_DIRECT_IO; + } blk_mq_unfreeze_queue(lo->lo_queue); } @@ -460,12 +463,21 @@ static void lo_complete_rq(struct request *rq) blk_mq_end_request(rq, cmd->ret < 0 ? BLK_STS_IOERR : BLK_STS_OK); } +static void lo_rw_aio_do_completion(struct loop_cmd *cmd) +{ + if (!atomic_dec_and_test(&cmd->ref)) + return; + kfree(cmd->bvec); + cmd->bvec = NULL; + blk_mq_complete_request(cmd->rq); +} + static void lo_rw_aio_complete(struct kiocb *iocb, long ret, long ret2) { struct loop_cmd *cmd = container_of(iocb, struct loop_cmd, iocb); cmd->ret = ret; - blk_mq_complete_request(cmd->rq); + lo_rw_aio_do_completion(cmd); } static int lo_rw_aio(struct loop_device *lo, struct loop_cmd *cmd, @@ -473,22 +485,51 @@ static int lo_rw_aio(struct loop_device *lo, struct loop_cmd *cmd, { struct iov_iter iter; struct bio_vec *bvec; - struct bio *bio = cmd->rq->bio; + struct request *rq = cmd->rq; + struct bio *bio = rq->bio; struct file *file = lo->lo_backing_file; + unsigned int offset; + int segments = 0; int ret; - /* nomerge for loop request queue */ - WARN_ON(cmd->rq->bio != cmd->rq->biotail); + if (rq->bio != rq->biotail) { + struct req_iterator iter; + struct bio_vec tmp; + + __rq_for_each_bio(bio, rq) + segments += bio_segments(bio); + bvec = kmalloc(sizeof(struct bio_vec) * segments, GFP_NOIO); + if (!bvec) + return -EIO; + cmd->bvec = bvec; + + /* + * The bios of the request may be started from the middle of + * the 'bvec' because of bio splitting, so we can't directly + * copy bio->bi_iov_vec to new bvec. The rq_for_each_segment + * API will take care of all details for us. + */ + rq_for_each_segment(tmp, rq, iter) { + *bvec = tmp; + bvec++; + } + bvec = cmd->bvec; + offset = 0; + } else { + /* + * Same here, this bio may be started from the middle of the + * 'bvec' because of bio splitting, so offset from the bvec + * must be passed to iov iterator + */ + offset = bio->bi_iter.bi_bvec_done; + bvec = __bvec_iter_bvec(bio->bi_io_vec, bio->bi_iter); + segments = bio_segments(bio); + } + atomic_set(&cmd->ref, 2); - bvec = __bvec_iter_bvec(bio->bi_io_vec, bio->bi_iter); iov_iter_bvec(&iter, ITER_BVEC | rw, bvec, - bio_segments(bio), blk_rq_bytes(cmd->rq)); - /* - * This bio may be started from the middle of the 'bvec' - * because of bio splitting, so offset from the bvec must - * be passed to iov iterator - */ - iter.iov_offset = bio->bi_iter.bi_bvec_done; + segments, blk_rq_bytes(rq)); + iter.iov_offset = offset; cmd->iocb.ki_pos = pos; cmd->iocb.ki_filp = file; @@ -500,6 +541,8 @@ static int lo_rw_aio(struct loop_device *lo, struct loop_cmd *cmd, else ret = call_read_iter(file, &cmd->iocb, &iter); + lo_rw_aio_do_completion(cmd); + if (ret != -EIOCBQUEUED) cmd->iocb.ki_complete(&cmd->iocb, ret, 0); return 0; @@ -546,74 +589,12 @@ static int do_req_filebacked(struct loop_device *lo, struct request *rq) } } -struct switch_request { - struct file *file; - struct completion wait; -}; - static inline void loop_update_dio(struct loop_device *lo) { __loop_update_dio(lo, io_is_direct(lo->lo_backing_file) | lo->use_dio); } -/* - * Do the actual switch; called from the BIO completion routine - */ -static void do_loop_switch(struct loop_device *lo, struct switch_request *p) -{ - struct file *file = p->file; - struct file *old_file = lo->lo_backing_file; - struct address_space *mapping; - - /* if no new file, only flush of queued bios requested */ - if (!file) - return; - - mapping = file->f_mapping; - mapping_set_gfp_mask(old_file->f_mapping, lo->old_gfp_mask); - lo->lo_backing_file = file; - lo->lo_blocksize = S_ISBLK(mapping->host->i_mode) ? - mapping->host->i_bdev->bd_block_size : PAGE_SIZE; - lo->old_gfp_mask = mapping_gfp_mask(mapping); - mapping_set_gfp_mask(mapping, lo->old_gfp_mask & ~(__GFP_IO|__GFP_FS)); - loop_update_dio(lo); -} - -/* - * loop_switch performs the hard work of switching a backing store. - * First it needs to flush existing IO, it does this by sending a magic - * BIO down the pipe. The completion of this BIO does the actual switch. - */ -static int loop_switch(struct loop_device *lo, struct file *file) -{ - struct switch_request w; - - w.file = file; - - /* freeze queue and wait for completion of scheduled requests */ - blk_mq_freeze_queue(lo->lo_queue); - - /* do the switch action */ - do_loop_switch(lo, &w); - - /* unfreeze */ - blk_mq_unfreeze_queue(lo->lo_queue); - - return 0; -} - -/* - * Helper to flush the IOs in loop, but keeping loop thread running - */ -static int loop_flush(struct loop_device *lo) -{ - /* loop not yet configured, no running thread, nothing to flush */ - if (lo->lo_state != Lo_bound) - return 0; - return loop_switch(lo, NULL); -} - static void loop_reread_partitions(struct loop_device *lo, struct block_device *bdev) { @@ -678,9 +659,14 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev, goto out_putf; /* and ... switch */ - error = loop_switch(lo, file); - if (error) - goto out_putf; + blk_mq_freeze_queue(lo->lo_queue); + mapping_set_gfp_mask(old_file->f_mapping, lo->old_gfp_mask); + lo->lo_backing_file = file; + lo->old_gfp_mask = mapping_gfp_mask(file->f_mapping); + mapping_set_gfp_mask(file->f_mapping, + lo->old_gfp_mask & ~(__GFP_IO|__GFP_FS)); + loop_update_dio(lo); + blk_mq_unfreeze_queue(lo->lo_queue); fput(old_file); if (lo->lo_flags & LO_FLAGS_PARTSCAN) @@ -867,7 +853,6 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode, struct file *file, *f; struct inode *inode; struct address_space *mapping; - unsigned lo_blocksize; int lo_flags = 0; int error; loff_t size; @@ -911,9 +896,6 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode, !file->f_op->write_iter) lo_flags |= LO_FLAGS_READ_ONLY; - lo_blocksize = S_ISBLK(inode->i_mode) ? - inode->i_bdev->bd_block_size : PAGE_SIZE; - error = -EFBIG; size = get_loop_size(lo, file); if ((loff_t)(sector_t)size != size) @@ -927,7 +909,6 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode, set_device_ro(bdev, (lo_flags & LO_FLAGS_READ_ONLY) != 0); lo->use_dio = false; - lo->lo_blocksize = lo_blocksize; lo->lo_device = bdev; lo->lo_flags = lo_flags; lo->lo_backing_file = file; @@ -947,7 +928,8 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode, /* let user-space know about the new size */ kobject_uevent(&disk_to_dev(bdev->bd_disk)->kobj, KOBJ_CHANGE); - set_blocksize(bdev, lo_blocksize); + set_blocksize(bdev, S_ISBLK(inode->i_mode) ? + block_size(inode->i_bdev) : PAGE_SIZE); lo->lo_state = Lo_bound; if (part_shift) @@ -1053,6 +1035,9 @@ static int loop_clr_fd(struct loop_device *lo) memset(lo->lo_encrypt_key, 0, LO_KEY_SIZE); memset(lo->lo_crypt_name, 0, LO_NAME_SIZE); memset(lo->lo_file_name, 0, LO_NAME_SIZE); + blk_queue_logical_block_size(lo->lo_queue, 512); + blk_queue_physical_block_size(lo->lo_queue, 512); + blk_queue_io_min(lo->lo_queue, 512); if (bdev) { bdput(bdev); invalidate_bdev(bdev); @@ -1336,6 +1321,26 @@ static int loop_set_dio(struct loop_device *lo, unsigned long arg) return error; } +static int loop_set_block_size(struct loop_device *lo, unsigned long arg) +{ + if (lo->lo_state != Lo_bound) + return -ENXIO; + + if (arg < 512 || arg > PAGE_SIZE || !is_power_of_2(arg)) + return -EINVAL; + + blk_mq_freeze_queue(lo->lo_queue); + + blk_queue_logical_block_size(lo->lo_queue, arg); + blk_queue_physical_block_size(lo->lo_queue, arg); + blk_queue_io_min(lo->lo_queue, arg); + loop_update_dio(lo); + + blk_mq_unfreeze_queue(lo->lo_queue); + + return 0; +} + static int lo_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd, unsigned long arg) { @@ -1384,6 +1389,11 @@ static int lo_ioctl(struct block_device *bdev, fmode_t mode, if ((mode & FMODE_WRITE) || capable(CAP_SYS_ADMIN)) err = loop_set_dio(lo, arg); break; + case LOOP_SET_BLOCK_SIZE: + err = -EPERM; + if ((mode & FMODE_WRITE) || capable(CAP_SYS_ADMIN)) + err = loop_set_block_size(lo, arg); + break; default: err = lo->ioctl ? lo->ioctl(lo, cmd, arg) : -EINVAL; } @@ -1583,12 +1593,13 @@ static void lo_release(struct gendisk *disk, fmode_t mode) err = loop_clr_fd(lo); if (!err) return; - } else { + } else if (lo->lo_state == Lo_bound) { /* * Otherwise keep thread (if running) and config, * but flush possible ongoing bios in thread. */ - loop_flush(lo); + blk_mq_freeze_queue(lo->lo_queue); + blk_mq_unfreeze_queue(lo->lo_queue); } mutex_unlock(&lo->lo_ctl_mutex); @@ -1770,9 +1781,13 @@ static int loop_add(struct loop_device **l, int i) } lo->lo_queue->queuedata = lo; + blk_queue_max_hw_sectors(lo->lo_queue, BLK_DEF_MAX_SECTORS); + /* - * It doesn't make sense to enable merge because the I/O - * submitted to backing file is handled page by page. + * By default, we do buffer IO, so it doesn't make sense to enable + * merge because the I/O submitted to backing file is handled page by + * page. For directio mode, merge does help to dispatch bigger request + * to underlayer disk. We will enable merge once directio is enabled. */ queue_flag_set_unlocked(QUEUE_FLAG_NOMERGES, lo->lo_queue); diff --git a/drivers/block/loop.h b/drivers/block/loop.h index fecd3f97ef8c..f68c1d50802f 100644 --- a/drivers/block/loop.h +++ b/drivers/block/loop.h @@ -48,7 +48,6 @@ struct loop_device { struct file * lo_backing_file; struct block_device *lo_device; - unsigned lo_blocksize; void *key_data; gfp_t old_gfp_mask; @@ -68,10 +67,13 @@ struct loop_device { struct loop_cmd { struct kthread_work work; struct request *rq; - struct list_head list; - bool use_aio; /* use AIO interface to handle I/O */ + union { + bool use_aio; /* use AIO interface to handle I/O */ + atomic_t ref; /* only for aio */ + }; long ret; struct kiocb iocb; + struct bio_vec *bvec; }; /* Support for loadable transfer modules */ diff --git a/drivers/md/bcache/alloc.c b/drivers/md/bcache/alloc.c index ca4abe1ccd8d..cacbe2dbd5c3 100644 --- a/drivers/md/bcache/alloc.c +++ b/drivers/md/bcache/alloc.c @@ -68,6 +68,8 @@ #include <linux/random.h> #include <trace/events/bcache.h> +#define MAX_OPEN_BUCKETS 128 + /* Bucket heap / gen */ uint8_t bch_inc_gen(struct cache *ca, struct bucket *b) @@ -671,7 +673,7 @@ int bch_open_buckets_alloc(struct cache_set *c) spin_lock_init(&c->data_bucket_lock); - for (i = 0; i < 6; i++) { + for (i = 0; i < MAX_OPEN_BUCKETS; i++) { struct open_bucket *b = kzalloc(sizeof(*b), GFP_KERNEL); if (!b) return -ENOMEM; diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h index dee542fff68e..2ed9bd231d84 100644 --- a/drivers/md/bcache/bcache.h +++ b/drivers/md/bcache/bcache.h @@ -333,6 +333,7 @@ struct cached_dev { /* Limit number of writeback bios in flight */ struct semaphore in_flight; struct task_struct *writeback_thread; + struct workqueue_struct *writeback_write_wq; struct keybuf writeback_keys; diff --git a/drivers/md/bcache/closure.c b/drivers/md/bcache/closure.c index 864e673aec39..7d5286b05036 100644 --- a/drivers/md/bcache/closure.c +++ b/drivers/md/bcache/closure.c @@ -70,21 +70,10 @@ void __closure_wake_up(struct closure_waitlist *wait_list) list = llist_del_all(&wait_list->list); /* We first reverse the list to preserve FIFO ordering and fairness */ - - while (list) { - struct llist_node *t = list; - list = llist_next(list); - - t->next = reverse; - reverse = t; - } + reverse = llist_reverse_order(list); /* Then do the wakeups */ - - while (reverse) { - cl = container_of(reverse, struct closure, list); - reverse = llist_next(reverse); - + llist_for_each_entry(cl, reverse, list) { closure_set_waiting(cl, 0); closure_sub(cl, CLOSURE_WAITING + 1); } diff --git a/drivers/md/bcache/closure.h b/drivers/md/bcache/closure.h index 1ec84ca81146..295b7e43f92c 100644 --- a/drivers/md/bcache/closure.h +++ b/drivers/md/bcache/closure.h @@ -312,8 +312,6 @@ static inline void closure_wake_up(struct closure_waitlist *list) * been dropped with closure_put()), it will resume execution at @fn running out * of @wq (or, if @wq is NULL, @fn will be called by closure_put() directly). * - * NOTE: This macro expands to a return in the calling function! - * * This is because after calling continue_at() you no longer have a ref on @cl, * and whatever @cl owns may be freed out from under you - a running closure fn * has a ref on its own closure which continue_at() drops. @@ -340,8 +338,6 @@ do { \ * Causes @fn to be executed out of @cl, in @wq context (or called directly if * @wq is NULL). * - * NOTE: like continue_at(), this macro expands to a return in the caller! - * * The ref the caller of continue_at_nobarrier() had on @cl is now owned by @fn, * thus it's not safe to touch anything protected by @cl after a * continue_at_nobarrier(). diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c index 0e1463d0c334..681b4f12b05a 100644 --- a/drivers/md/bcache/request.c +++ b/drivers/md/bcache/request.c @@ -196,12 +196,12 @@ static void bch_data_insert_start(struct closure *cl) struct data_insert_op *op = container_of(cl, struct data_insert_op, cl); struct bio *bio = op->bio, *n; - if (atomic_sub_return(bio_sectors(bio), &op->c->sectors_to_gc) < 0) - wake_up_gc(op->c); - if (op->bypass) return bch_data_invalidate(cl); + if (atomic_sub_return(bio_sectors(bio), &op->c->sectors_to_gc) < 0) + wake_up_gc(op->c); + /* * Journal writes are marked REQ_PREFLUSH; if the original write was a * flush, it'll wait on the journal write. @@ -400,12 +400,6 @@ static bool check_should_bypass(struct cached_dev *dc, struct bio *bio) if (!congested && !dc->sequential_cutoff) goto rescale; - if (!congested && - mode == CACHE_MODE_WRITEBACK && - op_is_write(bio->bi_opf) && - op_is_sync(bio->bi_opf)) - goto rescale; - spin_lock(&dc->io_lock); hlist_for_each_entry(i, iohash(dc, bio->bi_iter.bi_sector), hash) diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index 974d832e54a6..fc0a31b13ac4 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -1026,7 +1026,7 @@ int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c) } if (BDEV_STATE(&dc->sb) == BDEV_STATE_DIRTY) { - bch_sectors_dirty_init(dc); + bch_sectors_dirty_init(&dc->disk); atomic_set(&dc->has_dirty, 1); atomic_inc(&dc->count); bch_writeback_queue(dc); @@ -1059,6 +1059,8 @@ static void cached_dev_free(struct closure *cl) cancel_delayed_work_sync(&dc->writeback_rate_update); if (!IS_ERR_OR_NULL(dc->writeback_thread)) kthread_stop(dc->writeback_thread); + if (dc->writeback_write_wq) + destroy_workqueue(dc->writeback_write_wq); mutex_lock(&bch_register_lock); @@ -1228,6 +1230,7 @@ static int flash_dev_run(struct cache_set *c, struct uuid_entry *u) goto err; bcache_device_attach(d, c, u - c->uuids); + bch_sectors_dirty_init(d); bch_flash_dev_request_init(d); add_disk(d->disk); @@ -1374,9 +1377,6 @@ static void cache_set_flush(struct closure *cl) struct btree *b; unsigned i; - if (!c) - closure_return(cl); - bch_cache_accounting_destroy(&c->accounting); kobject_put(&c->internal); @@ -1964,6 +1964,8 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr, else err = "device busy"; mutex_unlock(&bch_register_lock); + if (!IS_ERR(bdev)) + bdput(bdev); if (attr == &ksysfs_register_quiet) goto out; } diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c index f90f13616980..104c57cd666c 100644 --- a/drivers/md/bcache/sysfs.c +++ b/drivers/md/bcache/sysfs.c @@ -192,7 +192,7 @@ STORE(__cached_dev) { struct cached_dev *dc = container_of(kobj, struct cached_dev, disk.kobj); - unsigned v = size; + ssize_t v = size; struct cache_set *c; struct kobj_uevent_env *env; @@ -227,7 +227,7 @@ STORE(__cached_dev) bch_cached_dev_run(dc); if (attr == &sysfs_cache_mode) { - ssize_t v = bch_read_string_list(buf, bch_cache_modes + 1); + v = bch_read_string_list(buf, bch_cache_modes + 1); if (v < 0) return v; @@ -615,8 +615,21 @@ STORE(__bch_cache_set) bch_cache_accounting_clear(&c->accounting); } - if (attr == &sysfs_trigger_gc) + if (attr == &sysfs_trigger_gc) { + /* + * Garbage collection thread only works when sectors_to_gc < 0, + * when users write to sysfs entry trigger_gc, most of time + * they want to forcibly triger gargage collection. Here -1 is + * set to c->sectors_to_gc, to make gc_should_run() give a + * chance to permit gc thread to run. "give a chance" means + * before going into gc_should_run(), there is still chance + * that c->sectors_to_gc being set to other positive value. So + * writing sysfs entry trigger_gc won't always make sure gc + * thread takes effect. + */ + atomic_set(&c->sectors_to_gc, -1); wake_up_gc(c); + } if (attr == &sysfs_prune_cache) { struct shrink_control sc; diff --git a/drivers/md/bcache/util.c b/drivers/md/bcache/util.c index 8c3a938f4bf0..176d3c2ef5f5 100644 --- a/drivers/md/bcache/util.c +++ b/drivers/md/bcache/util.c @@ -74,24 +74,44 @@ STRTO_H(strtouint, unsigned int) STRTO_H(strtoll, long long) STRTO_H(strtoull, unsigned long long) +/** + * bch_hprint() - formats @v to human readable string for sysfs. + * + * @v - signed 64 bit integer + * @buf - the (at least 8 byte) buffer to format the result into. + * + * Returns the number of bytes used by format. + */ ssize_t bch_hprint(char *buf, int64_t v) { static const char units[] = "?kMGTPEZY"; - char dec[4] = ""; - int u, t = 0; - - for (u = 0; v >= 1024 || v <= -1024; u++) { - t = v & ~(~0 << 10); - v >>= 10; - } - - if (!u) - return sprintf(buf, "%llu", v); - - if (v < 100 && v > -100) - snprintf(dec, sizeof(dec), ".%i", t / 100); - - return sprintf(buf, "%lli%s%c", v, dec, units[u]); + int u = 0, t; + + uint64_t q; + + if (v < 0) + q = -v; + else + q = v; + + /* For as long as the number is more than 3 digits, but at least + * once, shift right / divide by 1024. Keep the remainder for + * a digit after the decimal point. + */ + do { + u++; + + t = q & ~(~0 << 10); + q >>= 10; + } while (q >= 1000); + + if (v < 0) + /* '-', up to 3 digits, '.', 1 digit, 1 character, null; + * yields 8 bytes. + */ + return sprintf(buf, "-%llu.%i%c", q, t * 10 / 1024, units[u]); + else + return sprintf(buf, "%llu.%i%c", q, t * 10 / 1024, units[u]); } ssize_t bch_snprint_string_list(char *buf, size_t size, const char * const list[], diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c index c49022a8dc9d..e663ca082183 100644 --- a/drivers/md/bcache/writeback.c +++ b/drivers/md/bcache/writeback.c @@ -21,7 +21,8 @@ static void __update_writeback_rate(struct cached_dev *dc) { struct cache_set *c = dc->disk.c; - uint64_t cache_sectors = c->nbuckets * c->sb.bucket_size; + uint64_t cache_sectors = c->nbuckets * c->sb.bucket_size - + bcache_flash_devs_sectors_dirty(c); uint64_t cache_dirty_target = div_u64(cache_sectors * dc->writeback_percent, 100); @@ -186,7 +187,7 @@ static void write_dirty(struct closure *cl) closure_bio_submit(&io->bio, cl); - continue_at(cl, write_dirty_finish, system_wq); + continue_at(cl, write_dirty_finish, io->dc->writeback_write_wq); } static void read_dirty_endio(struct bio *bio) @@ -206,7 +207,7 @@ static void read_dirty_submit(struct closure *cl) closure_bio_submit(&io->bio, cl); - continue_at(cl, write_dirty, system_wq); + continue_at(cl, write_dirty, io->dc->writeback_write_wq); } static void read_dirty(struct cached_dev *dc) @@ -481,17 +482,17 @@ static int sectors_dirty_init_fn(struct btree_op *_op, struct btree *b, return MAP_CONTINUE; } -void bch_sectors_dirty_init(struct cached_dev *dc) +void bch_sectors_dirty_init(struct bcache_device *d) { struct sectors_dirty_init op; bch_btree_op_init(&op.op, -1); - op.inode = dc->disk.id; + op.inode = d->id; - bch_btree_map_keys(&op.op, dc->disk.c, &KEY(op.inode, 0, 0), + bch_btree_map_keys(&op.op, d->c, &KEY(op.inode, 0, 0), sectors_dirty_init_fn, 0); - dc->disk.sectors_dirty_last = bcache_dev_sectors_dirty(&dc->disk); + d->sectors_dirty_last = bcache_dev_sectors_dirty(d); } void bch_cached_dev_writeback_init(struct cached_dev *dc) @@ -515,6 +516,11 @@ void bch_cached_dev_writeback_init(struct cached_dev *dc) int bch_cached_dev_writeback_start(struct cached_dev *dc) { + dc->writeback_write_wq = alloc_workqueue("bcache_writeback_wq", + WQ_MEM_RECLAIM, 0); + if (!dc->writeback_write_wq) + return -ENOMEM; + dc->writeback_thread = kthread_create(bch_writeback_thread, dc, "bcache_writeback"); if (IS_ERR(dc->writeback_thread)) diff --git a/drivers/md/bcache/writeback.h b/drivers/md/bcache/writeback.h index 629bd1a502fd..e35421d20d2e 100644 --- a/drivers/md/bcache/writeback.h +++ b/drivers/md/bcache/writeback.h @@ -14,6 +14,25 @@ static inline uint64_t bcache_dev_sectors_dirty(struct bcache_device *d) return ret; } +static inline uint64_t bcache_flash_devs_sectors_dirty(struct cache_set *c) +{ + uint64_t i, ret = 0; + + mutex_lock(&bch_register_lock); + + for (i = 0; i < c->nr_uuids; i++) { + struct bcache_device *d = c->devices[i]; + + if (!d || !UUID_FLASH_ONLY(&c->uuids[i])) + continue; + ret += bcache_dev_sectors_dirty(d); + } + + mutex_unlock(&bch_register_lock); + + return ret; +} + static inline unsigned offset_to_stripe(struct bcache_device *d, uint64_t offset) { @@ -84,7 +103,7 @@ static inline void bch_writeback_add(struct cached_dev *dc) void bcache_dev_sectors_dirty_add(struct cache_set *, unsigned, uint64_t, int); -void bch_sectors_dirty_init(struct cached_dev *dc); +void bch_sectors_dirty_init(struct bcache_device *); void bch_cached_dev_writeback_init(struct cached_dev *); int bch_cached_dev_writeback_start(struct cached_dev *); diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index c596dd3c58b1..277a7a02cba5 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -76,6 +76,11 @@ static DEFINE_SPINLOCK(dev_list_lock); static struct class *nvme_class; +static __le32 nvme_get_log_dw10(u8 lid, size_t size) +{ + return cpu_to_le32((((size / 4) - 1) << 16) | lid); +} + int nvme_reset_ctrl(struct nvme_ctrl *ctrl) { if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_RESETTING)) @@ -108,7 +113,16 @@ static blk_status_t nvme_error_status(struct request *req) case NVME_SC_WRITE_FAULT: case NVME_SC_READ_ERROR: case NVME_SC_UNWRITTEN_BLOCK: + case NVME_SC_ACCESS_DENIED: + case NVME_SC_READ_ONLY: return BLK_STS_MEDIUM; + case NVME_SC_GUARD_CHECK: + case NVME_SC_APPTAG_CHECK: + case NVME_SC_REFTAG_CHECK: + case NVME_SC_INVALID_PI: + return BLK_STS_PROTECTION; + case NVME_SC_RESERVATION_CONFLICT: + return BLK_STS_NEXUS; default: return BLK_STS_IOERR; } @@ -162,9 +176,10 @@ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl, enum nvme_ctrl_state new_state) { enum nvme_ctrl_state old_state; + unsigned long flags; bool changed = false; - spin_lock_irq(&ctrl->lock); + spin_lock_irqsave(&ctrl->lock, flags); old_state = ctrl->state; switch (new_state) { @@ -225,7 +240,7 @@ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl, if (changed) ctrl->state = new_state; - spin_unlock_irq(&ctrl->lock); + spin_unlock_irqrestore(&ctrl->lock, flags); return changed; } @@ -307,7 +322,7 @@ static int nvme_toggle_streams(struct nvme_ctrl *ctrl, bool enable) memset(&c, 0, sizeof(c)); c.directive.opcode = nvme_admin_directive_send; - c.directive.nsid = cpu_to_le32(0xffffffff); + c.directive.nsid = cpu_to_le32(NVME_NSID_ALL); c.directive.doper = NVME_DIR_SND_ID_OP_ENABLE; c.directive.dtype = NVME_DIR_IDENTIFY; c.directive.tdtype = NVME_DIR_STREAMS; @@ -357,7 +372,7 @@ static int nvme_configure_directives(struct nvme_ctrl *ctrl) if (ret) return ret; - ret = nvme_get_stream_params(ctrl, &s, 0xffffffff); + ret = nvme_get_stream_params(ctrl, &s, NVME_NSID_ALL); if (ret) return ret; @@ -585,10 +600,44 @@ int nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd, } EXPORT_SYMBOL_GPL(nvme_submit_sync_cmd); -int __nvme_submit_user_cmd(struct request_queue *q, struct nvme_command *cmd, - void __user *ubuffer, unsigned bufflen, - void __user *meta_buffer, unsigned meta_len, u32 meta_seed, - u32 *result, unsigned timeout) +static void *nvme_add_user_metadata(struct bio *bio, void __user *ubuf, + unsigned len, u32 seed, bool write) +{ + struct bio_integrity_payload *bip; + int ret = -ENOMEM; + void *buf; + + buf = kmalloc(len, GFP_KERNEL); + if (!buf) + goto out; + + ret = -EFAULT; + if (write && copy_from_user(buf, ubuf, len)) + goto out_free_meta; + + bip = bio_integrity_alloc(bio, GFP_KERNEL, 1); + if (IS_ERR(bip)) { + ret = PTR_ERR(bip); + goto out_free_meta; + } + + bip->bip_iter.bi_size = len; + bip->bip_iter.bi_sector = seed; + ret = bio_integrity_add_page(bio, virt_to_page(buf), len, + offset_in_page(buf)); + if (ret == len) + return buf; + ret = -ENOMEM; +out_free_meta: + kfree(buf); +out: + return ERR_PTR(ret); +} + +static int nvme_submit_user_cmd(struct request_queue *q, + struct nvme_command *cmd, void __user *ubuffer, + unsigned bufflen, void __user *meta_buffer, unsigned meta_len, + u32 meta_seed, u32 *result, unsigned timeout) { bool write = nvme_is_write(cmd); struct nvme_ns *ns = q->queuedata; @@ -610,46 +659,17 @@ int __nvme_submit_user_cmd(struct request_queue *q, struct nvme_command *cmd, if (ret) goto out; bio = req->bio; - - if (!disk) - goto submit; bio->bi_disk = disk; - - if (meta_buffer && meta_len) { - struct bio_integrity_payload *bip; - - meta = kmalloc(meta_len, GFP_KERNEL); - if (!meta) { - ret = -ENOMEM; + if (disk && meta_buffer && meta_len) { + meta = nvme_add_user_metadata(bio, meta_buffer, meta_len, + meta_seed, write); + if (IS_ERR(meta)) { + ret = PTR_ERR(meta); goto out_unmap; } - - if (write) { - if (copy_from_user(meta, meta_buffer, - meta_len)) { - ret = -EFAULT; - goto out_free_meta; - } - } - - bip = bio_integrity_alloc(bio, GFP_KERNEL, 1); - if (IS_ERR(bip)) { - ret = PTR_ERR(bip); - goto out_free_meta; - } - - bip->bip_iter.bi_size = meta_len; - bip->bip_iter.bi_sector = meta_seed; - - ret = bio_integrity_add_page(bio, virt_to_page(meta), - meta_len, offset_in_page(meta)); - if (ret != meta_len) { - ret = -ENOMEM; - goto out_free_meta; - } } } - submit: + blk_execute_rq(req->q, disk, req, 0); if (nvme_req(req)->flags & NVME_REQ_CANCELLED) ret = -EINTR; @@ -661,7 +681,6 @@ int __nvme_submit_user_cmd(struct request_queue *q, struct nvme_command *cmd, if (copy_to_user(meta_buffer, meta, meta_len)) ret = -EFAULT; } - out_free_meta: kfree(meta); out_unmap: if (bio) @@ -671,14 +690,6 @@ int __nvme_submit_user_cmd(struct request_queue *q, struct nvme_command *cmd, return ret; } -int nvme_submit_user_cmd(struct request_queue *q, struct nvme_command *cmd, - void __user *ubuffer, unsigned bufflen, u32 *result, - unsigned timeout) -{ - return __nvme_submit_user_cmd(q, cmd, ubuffer, bufflen, NULL, 0, 0, - result, timeout); -} - static void nvme_keep_alive_end_io(struct request *rq, blk_status_t status) { struct nvme_ctrl *ctrl = rq->end_io_data; @@ -768,7 +779,8 @@ static int nvme_identify_ctrl(struct nvme_ctrl *dev, struct nvme_id_ctrl **id) return error; } -static int nvme_identify_ns_descs(struct nvme_ns *ns, unsigned nsid) +static int nvme_identify_ns_descs(struct nvme_ctrl *ctrl, unsigned nsid, + u8 *eui64, u8 *nguid, uuid_t *uuid) { struct nvme_command c = { }; int status; @@ -784,7 +796,7 @@ static int nvme_identify_ns_descs(struct nvme_ns *ns, unsigned nsid) if (!data) return -ENOMEM; - status = nvme_submit_sync_cmd(ns->ctrl->admin_q, &c, data, + status = nvme_submit_sync_cmd(ctrl->admin_q, &c, data, NVME_IDENTIFY_DATA_SIZE); if (status) goto free_data; @@ -798,33 +810,33 @@ static int nvme_identify_ns_descs(struct nvme_ns *ns, unsigned nsid) switch (cur->nidt) { case NVME_NIDT_EUI64: if (cur->nidl != NVME_NIDT_EUI64_LEN) { - dev_warn(ns->ctrl->device, + dev_warn(ctrl->device, "ctrl returned bogus length: %d for NVME_NIDT_EUI64\n", cur->nidl); goto free_data; } len = NVME_NIDT_EUI64_LEN; - memcpy(ns->eui, data + pos + sizeof(*cur), len); + memcpy(eui64, data + pos + sizeof(*cur), len); break; case NVME_NIDT_NGUID: if (cur->nidl != NVME_NIDT_NGUID_LEN) { - dev_warn(ns->ctrl->device, + dev_warn(ctrl->device, "ctrl returned bogus length: %d for NVME_NIDT_NGUID\n", cur->nidl); goto free_data; } len = NVME_NIDT_NGUID_LEN; - memcpy(ns->nguid, data + pos + sizeof(*cur), len); + memcpy(nguid, data + pos + sizeof(*cur), len); break; case NVME_NIDT_UUID: if (cur->nidl != NVME_NIDT_UUID_LEN) { - dev_warn(ns->ctrl->device, + dev_warn(ctrl->device, "ctrl returned bogus length: %d for NVME_NIDT_UUID\n", cur->nidl); goto free_data; } len = NVME_NIDT_UUID_LEN; - uuid_copy(&ns->uuid, data + pos + sizeof(*cur)); + uuid_copy(uuid, data + pos + sizeof(*cur)); break; default: /* Skip unnkown types */ @@ -849,9 +861,10 @@ static int nvme_identify_ns_list(struct nvme_ctrl *dev, unsigned nsid, __le32 *n return nvme_submit_sync_cmd(dev->admin_q, &c, ns_list, 0x1000); } -static int nvme_identify_ns(struct nvme_ctrl *dev, unsigned nsid, - struct nvme_id_ns **id) +static struct nvme_id_ns *nvme_identify_ns(struct nvme_ctrl *ctrl, + unsigned nsid) { + struct nvme_id_ns *id; struct nvme_command c = { }; int error; @@ -860,15 +873,18 @@ static int nvme_identify_ns(struct nvme_ctrl *dev, unsigned nsid, c.identify.nsid = cpu_to_le32(nsid); c.identify.cns = NVME_ID_CNS_NS; - *id = kmalloc(sizeof(struct nvme_id_ns), GFP_KERNEL); - if (!*id) - return -ENOMEM; + id = kmalloc(sizeof(*id), GFP_KERNEL); + if (!id) + return NULL; - error = nvme_submit_sync_cmd(dev->admin_q, &c, *id, - sizeof(struct nvme_id_ns)); - if (error) - kfree(*id); - return error; + error = nvme_submit_sync_cmd(ctrl->admin_q, &c, id, sizeof(*id)); + if (error) { + dev_warn(ctrl->device, "Identify namespace failed\n"); + kfree(id); + return NULL; + } + + return id; } static int nvme_set_features(struct nvme_ctrl *dev, unsigned fid, unsigned dword11, @@ -963,7 +979,7 @@ static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio) c.rw.apptag = cpu_to_le16(io.apptag); c.rw.appmask = cpu_to_le16(io.appmask); - return __nvme_submit_user_cmd(ns->queue, &c, + return nvme_submit_user_cmd(ns->queue, &c, (void __user *)(uintptr_t)io.addr, length, metadata, meta_len, io.slba, NULL, 0); } @@ -1001,7 +1017,8 @@ static int nvme_user_cmd(struct nvme_ctrl *ctrl, struct nvme_ns *ns, status = nvme_submit_user_cmd(ns ? ns->queue : ctrl->admin_q, &c, (void __user *)(uintptr_t)cmd.addr, cmd.data_len, - &cmd.result, timeout); + (void __user *)(uintptr_t)cmd.metadata, cmd.metadata, + 0, &cmd.result, timeout); if (status >= 0) { if (put_user(cmd.result, &ucmd->result)) return -EFAULT; @@ -1159,32 +1176,21 @@ static void nvme_config_discard(struct nvme_ns *ns) blk_queue_max_write_zeroes_sectors(ns->queue, UINT_MAX); } -static int nvme_revalidate_ns(struct nvme_ns *ns, struct nvme_id_ns **id) +static void nvme_report_ns_ids(struct nvme_ctrl *ctrl, unsigned int nsid, + struct nvme_id_ns *id, u8 *eui64, u8 *nguid, uuid_t *uuid) { - if (nvme_identify_ns(ns->ctrl, ns->ns_id, id)) { - dev_warn(ns->ctrl->dev, "%s: Identify failure\n", __func__); - return -ENODEV; - } - - if ((*id)->ncap == 0) { - kfree(*id); - return -ENODEV; - } - - if (ns->ctrl->vs >= NVME_VS(1, 1, 0)) - memcpy(ns->eui, (*id)->eui64, sizeof(ns->eui)); - if (ns->ctrl->vs >= NVME_VS(1, 2, 0)) - memcpy(ns->nguid, (*id)->nguid, sizeof(ns->nguid)); - if (ns->ctrl->vs >= NVME_VS(1, 3, 0)) { + if (ctrl->vs >= NVME_VS(1, 1, 0)) + memcpy(eui64, id->eui64, sizeof(id->eui64)); + if (ctrl->vs >= NVME_VS(1, 2, 0)) + memcpy(nguid, id->nguid, sizeof(id->nguid)); + if (ctrl->vs >= NVME_VS(1, 3, 0)) { /* Don't treat error as fatal we potentially * already have a NGUID or EUI-64 */ - if (nvme_identify_ns_descs(ns, ns->ns_id)) - dev_warn(ns->ctrl->device, + if (nvme_identify_ns_descs(ctrl, nsid, eui64, nguid, uuid)) + dev_warn(ctrl->device, "%s: Identify Descriptors failed\n", __func__); } - - return 0; } static void __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id) @@ -1225,22 +1231,38 @@ static void __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id) static int nvme_revalidate_disk(struct gendisk *disk) { struct nvme_ns *ns = disk->private_data; - struct nvme_id_ns *id = NULL; - int ret; + struct nvme_ctrl *ctrl = ns->ctrl; + struct nvme_id_ns *id; + u8 eui64[8] = { 0 }, nguid[16] = { 0 }; + uuid_t uuid = uuid_null; + int ret = 0; if (test_bit(NVME_NS_DEAD, &ns->flags)) { set_capacity(disk, 0); return -ENODEV; } - ret = nvme_revalidate_ns(ns, &id); - if (ret) - return ret; + id = nvme_identify_ns(ctrl, ns->ns_id); + if (!id) + return -ENODEV; - __nvme_revalidate_disk(disk, id); - kfree(id); + if (id->ncap == 0) { + ret = -ENODEV; + goto out; + } - return 0; + nvme_report_ns_ids(ctrl, ns->ns_id, id, eui64, nguid, &uuid); + if (!uuid_equal(&ns->uuid, &uuid) || + memcmp(&ns->nguid, &nguid, sizeof(ns->nguid)) || + memcmp(&ns->eui, &eui64, sizeof(ns->eui))) { + dev_err(ctrl->device, + "identifiers changed for nsid %d\n", ns->ns_id); + ret = -ENODEV; + } + +out: + kfree(id); + return ret; } static char nvme_pr_type(enum pr_type type) @@ -1440,7 +1462,7 @@ int nvme_enable_ctrl(struct nvme_ctrl *ctrl, u64 cap) ctrl->ctrl_config = NVME_CC_CSS_NVM; ctrl->ctrl_config |= (page_shift - 12) << NVME_CC_MPS_SHIFT; - ctrl->ctrl_config |= NVME_CC_ARB_RR | NVME_CC_SHN_NONE; + ctrl->ctrl_config |= NVME_CC_AMS_RR | NVME_CC_SHN_NONE; ctrl->ctrl_config |= NVME_CC_IOSQES | NVME_CC_IOCQES; ctrl->ctrl_config |= NVME_CC_ENABLE; @@ -1453,7 +1475,7 @@ EXPORT_SYMBOL_GPL(nvme_enable_ctrl); int nvme_shutdown_ctrl(struct nvme_ctrl *ctrl) { - unsigned long timeout = jiffies + (shutdown_timeout * HZ); + unsigned long timeout = jiffies + (ctrl->shutdown_timeout * HZ); u32 csts; int ret; @@ -1502,6 +1524,23 @@ static void nvme_set_queue_limits(struct nvme_ctrl *ctrl, blk_queue_write_cache(q, vwc, vwc); } +static int nvme_configure_timestamp(struct nvme_ctrl *ctrl) +{ + __le64 ts; + int ret; + + if (!(ctrl->oncs & NVME_CTRL_ONCS_TIMESTAMP)) + return 0; + + ts = cpu_to_le64(ktime_to_ms(ktime_get_real())); + ret = nvme_set_features(ctrl, NVME_FEAT_TIMESTAMP, 0, &ts, sizeof(ts), + NULL); + if (ret) + dev_warn_once(ctrl->device, + "could not set timestamp (%d)\n", ret); + return ret; +} + static int nvme_configure_apst(struct nvme_ctrl *ctrl) { /* @@ -1804,6 +1843,20 @@ int nvme_init_identify(struct nvme_ctrl *ctrl) ctrl->sgls = le32_to_cpu(id->sgls); ctrl->kas = le16_to_cpu(id->kas); + if (id->rtd3e) { + /* us -> s */ + u32 transition_time = le32_to_cpu(id->rtd3e) / 1000000; + + ctrl->shutdown_timeout = clamp_t(unsigned int, transition_time, + shutdown_timeout, 60); + + if (ctrl->shutdown_timeout != shutdown_timeout) + dev_warn(ctrl->device, + "Shutdown timeout set to %u seconds\n", + ctrl->shutdown_timeout); + } else + ctrl->shutdown_timeout = shutdown_timeout; + ctrl->npss = id->npss; ctrl->apsta = id->apsta; prev_apst_enabled = ctrl->apst_enabled; @@ -1856,6 +1909,10 @@ int nvme_init_identify(struct nvme_ctrl *ctrl) ret = nvme_configure_apst(ctrl); if (ret < 0) return ret; + + ret = nvme_configure_timestamp(ctrl); + if (ret < 0) + return ret; ret = nvme_configure_directives(ctrl); if (ret < 0) @@ -2311,9 +2368,15 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid) sprintf(disk_name, "nvme%dn%d", ctrl->instance, ns->instance); - if (nvme_revalidate_ns(ns, &id)) + id = nvme_identify_ns(ctrl, nsid); + if (!id) goto out_free_queue; + if (id->ncap == 0) + goto out_free_id; + + nvme_report_ns_ids(ctrl, ns->ns_id, id, ns->eui, ns->nguid, &ns->uuid); + if (nvme_nvm_ns_supported(ns, id) && nvme_nvm_register(ns, disk_name, node)) { dev_warn(ctrl->device, "%s: LightNVM init failure\n", __func__); @@ -2534,6 +2597,71 @@ static void nvme_async_event_work(struct work_struct *work) spin_unlock_irq(&ctrl->lock); } +static bool nvme_ctrl_pp_status(struct nvme_ctrl *ctrl) +{ + + u32 csts; + + if (ctrl->ops->reg_read32(ctrl, NVME_REG_CSTS, &csts)) + return false; + + if (csts == ~0) + return false; + + return ((ctrl->ctrl_config & NVME_CC_ENABLE) && (csts & NVME_CSTS_PP)); +} + +static void nvme_get_fw_slot_info(struct nvme_ctrl *ctrl) +{ + struct nvme_command c = { }; + struct nvme_fw_slot_info_log *log; + + log = kmalloc(sizeof(*log), GFP_KERNEL); + if (!log) + return; + + c.common.opcode = nvme_admin_get_log_page; + c.common.nsid = cpu_to_le32(NVME_NSID_ALL); + c.common.cdw10[0] = nvme_get_log_dw10(NVME_LOG_FW_SLOT, sizeof(*log)); + + if (!nvme_submit_sync_cmd(ctrl->admin_q, &c, log, sizeof(*log))) + dev_warn(ctrl->device, + "Get FW SLOT INFO log error\n"); + kfree(log); +} + +static void nvme_fw_act_work(struct work_struct *work) +{ + struct nvme_ctrl *ctrl = container_of(work, + struct nvme_ctrl, fw_act_work); + unsigned long fw_act_timeout; + + if (ctrl->mtfa) + fw_act_timeout = jiffies + + msecs_to_jiffies(ctrl->mtfa * 100); + else + fw_act_timeout = jiffies + + msecs_to_jiffies(admin_timeout * 1000); + + nvme_stop_queues(ctrl); + while (nvme_ctrl_pp_status(ctrl)) { + if (time_after(jiffies, fw_act_timeout)) { + dev_warn(ctrl->device, + "Fw activation timeout, reset controller\n"); + nvme_reset_ctrl(ctrl); + break; + } + msleep(100); + } + + if (ctrl->state != NVME_CTRL_LIVE) + return; + + nvme_start_queues(ctrl); + /* read FW slot informationi to clear the AER*/ + nvme_get_fw_slot_info(ctrl); +} + void nvme_complete_async_event(struct nvme_ctrl *ctrl, __le16 status, union nvme_result *res) { @@ -2560,6 +2688,9 @@ void nvme_complete_async_event(struct nvme_ctrl *ctrl, __le16 status, dev_info(ctrl->device, "rescanning\n"); nvme_queue_scan(ctrl); break; + case NVME_AER_NOTICE_FW_ACT_STARTING: + schedule_work(&ctrl->fw_act_work); + break; default: dev_warn(ctrl->device, "async event result %08x\n", result); } @@ -2607,6 +2738,7 @@ void nvme_stop_ctrl(struct nvme_ctrl *ctrl) nvme_stop_keep_alive(ctrl); flush_work(&ctrl->async_event_work); flush_work(&ctrl->scan_work); + cancel_work_sync(&ctrl->fw_act_work); } EXPORT_SYMBOL_GPL(nvme_stop_ctrl); @@ -2670,6 +2802,7 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev, ctrl->quirks = quirks; INIT_WORK(&ctrl->scan_work, nvme_scan_work); INIT_WORK(&ctrl->async_event_work, nvme_async_event_work); + INIT_WORK(&ctrl->fw_act_work, nvme_fw_act_work); ret = nvme_set_instance(ctrl); if (ret) diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c index 5f5cd306f76d..47307752dc65 100644 --- a/drivers/nvme/host/fabrics.c +++ b/drivers/nvme/host/fabrics.c @@ -22,7 +22,7 @@ #include "fabrics.h" static LIST_HEAD(nvmf_transports); -static DEFINE_MUTEX(nvmf_transports_mutex); +static DECLARE_RWSEM(nvmf_transports_rwsem); static LIST_HEAD(nvmf_hosts); static DEFINE_MUTEX(nvmf_hosts_mutex); @@ -75,7 +75,7 @@ static struct nvmf_host *nvmf_host_default(void) kref_init(&host->ref); snprintf(host->nqn, NVMF_NQN_SIZE, - "nqn.2014-08.org.nvmexpress:NVMf:uuid:%pUb", &host->id); + "nqn.2014-08.org.nvmexpress:uuid:%pUb", &host->id); mutex_lock(&nvmf_hosts_mutex); list_add_tail(&host->list, &nvmf_hosts); @@ -495,9 +495,9 @@ int nvmf_register_transport(struct nvmf_transport_ops *ops) if (!ops->create_ctrl) return -EINVAL; - mutex_lock(&nvmf_transports_mutex); + down_write(&nvmf_transports_rwsem); list_add_tail(&ops->entry, &nvmf_transports); - mutex_unlock(&nvmf_transports_mutex); + up_write(&nvmf_transports_rwsem); return 0; } @@ -514,9 +514,9 @@ EXPORT_SYMBOL_GPL(nvmf_register_transport); */ void nvmf_unregister_transport(struct nvmf_transport_ops *ops) { - mutex_lock(&nvmf_transports_mutex); + down_write(&nvmf_transports_rwsem); list_del(&ops->entry); - mutex_unlock(&nvmf_transports_mutex); + up_write(&nvmf_transports_rwsem); } EXPORT_SYMBOL_GPL(nvmf_unregister_transport); @@ -525,7 +525,7 @@ static struct nvmf_transport_ops *nvmf_lookup_transport( { struct nvmf_transport_ops *ops; - lockdep_assert_held(&nvmf_transports_mutex); + lockdep_assert_held(&nvmf_transports_rwsem); list_for_each_entry(ops, &nvmf_transports, entry) { if (strcmp(ops->name, opts->transport) == 0) @@ -735,6 +735,7 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts, goto out; } if (uuid_parse(p, &hostid)) { + pr_err("Invalid hostid %s\n", p); ret = -EINVAL; goto out; } @@ -850,7 +851,7 @@ nvmf_create_ctrl(struct device *dev, const char *buf, size_t count) goto out_free_opts; opts->mask &= ~NVMF_REQUIRED_OPTS; - mutex_lock(&nvmf_transports_mutex); + down_read(&nvmf_transports_rwsem); ops = nvmf_lookup_transport(opts); if (!ops) { pr_info("no handler found for transport %s.\n", @@ -877,16 +878,16 @@ nvmf_create_ctrl(struct device *dev, const char *buf, size_t count) dev_warn(ctrl->device, "controller returned incorrect NQN: \"%s\".\n", ctrl->subnqn); - mutex_unlock(&nvmf_transports_mutex); + up_read(&nvmf_transports_rwsem); ctrl->ops->delete_ctrl(ctrl); return ERR_PTR(-EINVAL); } - mutex_unlock(&nvmf_transports_mutex); + up_read(&nvmf_transports_rwsem); return ctrl; out_unlock: - mutex_unlock(&nvmf_transports_mutex); + up_read(&nvmf_transports_rwsem); out_free_opts: nvmf_free_options(opts); return ERR_PTR(ret); diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index 1438be649866..d2e882c0f496 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -220,6 +220,90 @@ static int __nvme_fc_del_ctrl(struct nvme_fc_ctrl *); static void __nvme_fc_delete_hw_queue(struct nvme_fc_ctrl *, struct nvme_fc_queue *, unsigned int); +static void +nvme_fc_free_lport(struct kref *ref) +{ + struct nvme_fc_lport *lport = + container_of(ref, struct nvme_fc_lport, ref); + unsigned long flags; + + WARN_ON(lport->localport.port_state != FC_OBJSTATE_DELETED); + WARN_ON(!list_empty(&lport->endp_list)); + + /* remove from transport list */ + spin_lock_irqsave(&nvme_fc_lock, flags); + list_del(&lport->port_list); + spin_unlock_irqrestore(&nvme_fc_lock, flags); + + /* let the LLDD know we've finished tearing it down */ + lport->ops->localport_delete(&lport->localport); + + ida_simple_remove(&nvme_fc_local_port_cnt, lport->localport.port_num); + ida_destroy(&lport->endp_cnt); + + put_device(lport->dev); + + kfree(lport); +} + +static void +nvme_fc_lport_put(struct nvme_fc_lport *lport) +{ + kref_put(&lport->ref, nvme_fc_free_lport); +} + +static int +nvme_fc_lport_get(struct nvme_fc_lport *lport) +{ + return kref_get_unless_zero(&lport->ref); +} + + +static struct nvme_fc_lport * +nvme_fc_attach_to_unreg_lport(struct nvme_fc_port_info *pinfo) +{ + struct nvme_fc_lport *lport; + unsigned long flags; + + spin_lock_irqsave(&nvme_fc_lock, flags); + + list_for_each_entry(lport, &nvme_fc_lport_list, port_list) { + if (lport->localport.node_name != pinfo->node_name || + lport->localport.port_name != pinfo->port_name) + continue; + + if (lport->localport.port_state != FC_OBJSTATE_DELETED) { + lport = ERR_PTR(-EEXIST); + goto out_done; + } + + if (!nvme_fc_lport_get(lport)) { + /* + * fails if ref cnt already 0. If so, + * act as if lport already deleted + */ + lport = NULL; + goto out_done; + } + + /* resume the lport */ + + lport->localport.port_role = pinfo->port_role; + lport->localport.port_id = pinfo->port_id; + lport->localport.port_state = FC_OBJSTATE_ONLINE; + + spin_unlock_irqrestore(&nvme_fc_lock, flags); + + return lport; + } + + lport = NULL; + +out_done: + spin_unlock_irqrestore(&nvme_fc_lock, flags); + + return lport; +} /** * nvme_fc_register_localport - transport entry point called by an @@ -257,6 +341,28 @@ nvme_fc_register_localport(struct nvme_fc_port_info *pinfo, goto out_reghost_failed; } + /* + * look to see if there is already a localport that had been + * deregistered and in the process of waiting for all the + * references to fully be removed. If the references haven't + * expired, we can simply re-enable the localport. Remoteports + * and controller reconnections should resume naturally. + */ + newrec = nvme_fc_attach_to_unreg_lport(pinfo); + + /* found an lport, but something about its state is bad */ + if (IS_ERR(newrec)) { + ret = PTR_ERR(newrec); + goto out_reghost_failed; + + /* found existing lport, which was resumed */ + } else if (newrec) { + *portptr = &newrec->localport; + return 0; + } + + /* nothing found - allocate a new localport struct */ + newrec = kmalloc((sizeof(*newrec) + template->local_priv_sz), GFP_KERNEL); if (!newrec) { @@ -310,44 +416,6 @@ out_reghost_failed: } EXPORT_SYMBOL_GPL(nvme_fc_register_localport); -static void -nvme_fc_free_lport(struct kref *ref) -{ - struct nvme_fc_lport *lport = - container_of(ref, struct nvme_fc_lport, ref); - unsigned long flags; - - WARN_ON(lport->localport.port_state != FC_OBJSTATE_DELETED); - WARN_ON(!list_empty(&lport->endp_list)); - - /* remove from transport list */ - spin_lock_irqsave(&nvme_fc_lock, flags); - list_del(&lport->port_list); - spin_unlock_irqrestore(&nvme_fc_lock, flags); - - /* let the LLDD know we've finished tearing it down */ - lport->ops->localport_delete(&lport->localport); - - ida_simple_remove(&nvme_fc_local_port_cnt, lport->localport.port_num); - ida_destroy(&lport->endp_cnt); - - put_device(lport->dev); - - kfree(lport); -} - -static void -nvme_fc_lport_put(struct nvme_fc_lport *lport) -{ - kref_put(&lport->ref, nvme_fc_free_lport); -} - -static int -nvme_fc_lport_get(struct nvme_fc_lport *lport) -{ - return kref_get_unless_zero(&lport->ref); -} - /** * nvme_fc_unregister_localport - transport entry point called by an * LLDD to deregister/remove a previously @@ -2731,6 +2799,7 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts, ret = blk_mq_alloc_tag_set(&ctrl->admin_tag_set); if (ret) goto out_free_queues; + ctrl->ctrl.admin_tagset = &ctrl->admin_tag_set; ctrl->ctrl.admin_q = blk_mq_init_queue(&ctrl->admin_tag_set); if (IS_ERR(ctrl->ctrl.admin_q)) { diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 8f2a168ddc01..a19a587d60ed 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -125,6 +125,7 @@ struct nvme_ctrl { struct kref kref; int instance; struct blk_mq_tag_set *tagset; + struct blk_mq_tag_set *admin_tagset; struct list_head namespaces; struct mutex namespaces_mutex; struct device *device; /* char device */ @@ -142,6 +143,7 @@ struct nvme_ctrl { u16 cntlid; u32 ctrl_config; + u16 mtfa; u32 queue_count; u64 cap; @@ -160,6 +162,7 @@ struct nvme_ctrl { u16 kas; u8 npss; u8 apsta; + unsigned int shutdown_timeout; unsigned int kato; bool subsystem; unsigned long quirks; @@ -167,6 +170,7 @@ struct nvme_ctrl { struct work_struct scan_work; struct work_struct async_event_work; struct delayed_work ka_work; + struct work_struct fw_act_work; /* Power saving configuration */ u64 ps_max_latency_us; @@ -207,13 +211,9 @@ struct nvme_ns { bool ext; u8 pi_type; unsigned long flags; - u16 noiob; - #define NVME_NS_REMOVING 0 #define NVME_NS_DEAD 1 - - u64 mode_select_num_blocks; - u32 mode_select_block_len; + u16 noiob; }; struct nvme_ctrl_ops { @@ -314,13 +314,6 @@ int nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd, int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd, union nvme_result *result, void *buffer, unsigned bufflen, unsigned timeout, int qid, int at_head, int flags); -int nvme_submit_user_cmd(struct request_queue *q, struct nvme_command *cmd, - void __user *ubuffer, unsigned bufflen, u32 *result, - unsigned timeout); -int __nvme_submit_user_cmd(struct request_queue *q, struct nvme_command *cmd, - void __user *ubuffer, unsigned bufflen, - void __user *meta_buffer, unsigned meta_len, u32 meta_seed, - u32 *result, unsigned timeout); int nvme_set_queue_count(struct nvme_ctrl *ctrl, int *count); void nvme_start_keep_alive(struct nvme_ctrl *ctrl); void nvme_stop_keep_alive(struct nvme_ctrl *ctrl); diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index ea892e732268..198245faba6b 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -556,8 +556,10 @@ static blk_status_t nvme_setup_prps(struct nvme_dev *dev, struct request *req) int nprps, i; length -= (page_size - offset); - if (length <= 0) + if (length <= 0) { + iod->first_dma = 0; return BLK_STS_OK; + } dma_len -= (page_size - offset); if (dma_len) { @@ -667,7 +669,7 @@ static blk_status_t nvme_map_data(struct nvme_dev *dev, struct request *req, if (blk_rq_map_integrity_sg(q, req->bio, &iod->meta_sg) != 1) goto out_unmap; - if (rq_data_dir(req)) + if (req_op(req) == REQ_OP_WRITE) nvme_dif_remap(req, nvme_dif_prep); if (!dma_map_sg(dev->dev, &iod->meta_sg, 1, dma_dir)) @@ -695,7 +697,7 @@ static void nvme_unmap_data(struct nvme_dev *dev, struct request *req) if (iod->nents) { dma_unmap_sg(dev->dev, iod->sg, iod->nents, dma_dir); if (blk_integrity_rq(req)) { - if (!rq_data_dir(req)) + if (req_op(req) == REQ_OP_READ) nvme_dif_remap(req, nvme_dif_complete); dma_unmap_sg(dev->dev, &iod->meta_sg, 1, dma_dir); } @@ -1377,6 +1379,7 @@ static int nvme_alloc_admin_tags(struct nvme_dev *dev) if (blk_mq_alloc_tag_set(&dev->admin_tagset)) return -ENOMEM; + dev->ctrl.admin_tagset = &dev->admin_tagset; dev->ctrl.admin_q = blk_mq_init_queue(&dev->admin_tagset); if (IS_ERR(dev->ctrl.admin_q)) { diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index bf42d31484d4..58983000964b 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -37,8 +37,6 @@ #define NVME_RDMA_CONNECT_TIMEOUT_MS 3000 /* 3 second */ -#define NVME_RDMA_MAX_SEGMENT_SIZE 0xffffff /* 24-bit SGL field */ - #define NVME_RDMA_MAX_SEGMENTS 256 #define NVME_RDMA_MAX_INLINE_SEGMENTS 1 @@ -152,6 +150,9 @@ static int nvme_rdma_cm_handler(struct rdma_cm_id *cm_id, struct rdma_cm_event *event); static void nvme_rdma_recv_done(struct ib_cq *cq, struct ib_wc *wc); +static const struct blk_mq_ops nvme_rdma_mq_ops; +static const struct blk_mq_ops nvme_rdma_admin_mq_ops; + /* XXX: really should move to a generic header sooner or later.. */ static inline void put_unaligned_le24(u32 val, u8 *p) { @@ -500,7 +501,7 @@ out_put_dev: return ret; } -static int nvme_rdma_init_queue(struct nvme_rdma_ctrl *ctrl, +static int nvme_rdma_alloc_queue(struct nvme_rdma_ctrl *ctrl, int idx, size_t queue_size) { struct nvme_rdma_queue *queue; @@ -558,54 +559,74 @@ out_destroy_cm_id: static void nvme_rdma_stop_queue(struct nvme_rdma_queue *queue) { + if (!test_and_clear_bit(NVME_RDMA_Q_LIVE, &queue->flags)) + return; + rdma_disconnect(queue->cm_id); ib_drain_qp(queue->qp); } static void nvme_rdma_free_queue(struct nvme_rdma_queue *queue) { + if (test_and_set_bit(NVME_RDMA_Q_DELETING, &queue->flags)) + return; + nvme_rdma_destroy_queue_ib(queue); rdma_destroy_id(queue->cm_id); } -static void nvme_rdma_stop_and_free_queue(struct nvme_rdma_queue *queue) +static void nvme_rdma_free_io_queues(struct nvme_rdma_ctrl *ctrl) { - if (test_and_set_bit(NVME_RDMA_Q_DELETING, &queue->flags)) - return; - nvme_rdma_stop_queue(queue); - nvme_rdma_free_queue(queue); + int i; + + for (i = 1; i < ctrl->ctrl.queue_count; i++) + nvme_rdma_free_queue(&ctrl->queues[i]); } -static void nvme_rdma_free_io_queues(struct nvme_rdma_ctrl *ctrl) +static void nvme_rdma_stop_io_queues(struct nvme_rdma_ctrl *ctrl) { int i; for (i = 1; i < ctrl->ctrl.queue_count; i++) - nvme_rdma_stop_and_free_queue(&ctrl->queues[i]); + nvme_rdma_stop_queue(&ctrl->queues[i]); } -static int nvme_rdma_connect_io_queues(struct nvme_rdma_ctrl *ctrl) +static int nvme_rdma_start_queue(struct nvme_rdma_ctrl *ctrl, int idx) +{ + int ret; + + if (idx) + ret = nvmf_connect_io_queue(&ctrl->ctrl, idx); + else + ret = nvmf_connect_admin_queue(&ctrl->ctrl); + + if (!ret) + set_bit(NVME_RDMA_Q_LIVE, &ctrl->queues[idx].flags); + else + dev_info(ctrl->ctrl.device, + "failed to connect queue: %d ret=%d\n", idx, ret); + return ret; +} + +static int nvme_rdma_start_io_queues(struct nvme_rdma_ctrl *ctrl) { int i, ret = 0; for (i = 1; i < ctrl->ctrl.queue_count; i++) { - ret = nvmf_connect_io_queue(&ctrl->ctrl, i); - if (ret) { - dev_info(ctrl->ctrl.device, - "failed to connect i/o queue: %d\n", ret); - goto out_free_queues; - } - set_bit(NVME_RDMA_Q_LIVE, &ctrl->queues[i].flags); + ret = nvme_rdma_start_queue(ctrl, i); + if (ret) + goto out_stop_queues; } return 0; -out_free_queues: - nvme_rdma_free_io_queues(ctrl); +out_stop_queues: + for (i--; i >= 1; i--) + nvme_rdma_stop_queue(&ctrl->queues[i]); return ret; } -static int nvme_rdma_init_io_queues(struct nvme_rdma_ctrl *ctrl) +static int nvme_rdma_alloc_io_queues(struct nvme_rdma_ctrl *ctrl) { struct nvmf_ctrl_options *opts = ctrl->ctrl.opts; struct ib_device *ibdev = ctrl->device->dev; @@ -634,32 +655,230 @@ static int nvme_rdma_init_io_queues(struct nvme_rdma_ctrl *ctrl) "creating %d I/O queues.\n", nr_io_queues); for (i = 1; i < ctrl->ctrl.queue_count; i++) { - ret = nvme_rdma_init_queue(ctrl, i, - ctrl->ctrl.opts->queue_size); - if (ret) { - dev_info(ctrl->ctrl.device, - "failed to initialize i/o queue: %d\n", ret); + ret = nvme_rdma_alloc_queue(ctrl, i, + ctrl->ctrl.sqsize + 1); + if (ret) goto out_free_queues; - } } return 0; out_free_queues: for (i--; i >= 1; i--) - nvme_rdma_stop_and_free_queue(&ctrl->queues[i]); + nvme_rdma_free_queue(&ctrl->queues[i]); return ret; } -static void nvme_rdma_destroy_admin_queue(struct nvme_rdma_ctrl *ctrl) +static void nvme_rdma_free_tagset(struct nvme_ctrl *nctrl, bool admin) +{ + struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(nctrl); + struct blk_mq_tag_set *set = admin ? + &ctrl->admin_tag_set : &ctrl->tag_set; + + blk_mq_free_tag_set(set); + nvme_rdma_dev_put(ctrl->device); +} + +static struct blk_mq_tag_set *nvme_rdma_alloc_tagset(struct nvme_ctrl *nctrl, + bool admin) +{ + struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(nctrl); + struct blk_mq_tag_set *set; + int ret; + + if (admin) { + set = &ctrl->admin_tag_set; + memset(set, 0, sizeof(*set)); + set->ops = &nvme_rdma_admin_mq_ops; + set->queue_depth = NVME_RDMA_AQ_BLKMQ_DEPTH; + set->reserved_tags = 2; /* connect + keep-alive */ + set->numa_node = NUMA_NO_NODE; + set->cmd_size = sizeof(struct nvme_rdma_request) + + SG_CHUNK_SIZE * sizeof(struct scatterlist); + set->driver_data = ctrl; + set->nr_hw_queues = 1; + set->timeout = ADMIN_TIMEOUT; + } else { + set = &ctrl->tag_set; + memset(set, 0, sizeof(*set)); + set->ops = &nvme_rdma_mq_ops; + set->queue_depth = nctrl->opts->queue_size; + set->reserved_tags = 1; /* fabric connect */ + set->numa_node = NUMA_NO_NODE; + set->flags = BLK_MQ_F_SHOULD_MERGE; + set->cmd_size = sizeof(struct nvme_rdma_request) + + SG_CHUNK_SIZE * sizeof(struct scatterlist); + set->driver_data = ctrl; + set->nr_hw_queues = nctrl->queue_count - 1; + set->timeout = NVME_IO_TIMEOUT; + } + + ret = blk_mq_alloc_tag_set(set); + if (ret) + goto out; + + /* + * We need a reference on the device as long as the tag_set is alive, + * as the MRs in the request structures need a valid ib_device. + */ + ret = nvme_rdma_dev_get(ctrl->device); + if (!ret) { + ret = -EINVAL; + goto out_free_tagset; + } + + return set; + +out_free_tagset: + blk_mq_free_tag_set(set); +out: + return ERR_PTR(ret); +} + +static void nvme_rdma_destroy_admin_queue(struct nvme_rdma_ctrl *ctrl, + bool remove) { nvme_rdma_free_qe(ctrl->queues[0].device->dev, &ctrl->async_event_sqe, sizeof(struct nvme_command), DMA_TO_DEVICE); - nvme_rdma_stop_and_free_queue(&ctrl->queues[0]); - blk_cleanup_queue(ctrl->ctrl.admin_q); - blk_mq_free_tag_set(&ctrl->admin_tag_set); - nvme_rdma_dev_put(ctrl->device); + nvme_rdma_stop_queue(&ctrl->queues[0]); + if (remove) { + blk_cleanup_queue(ctrl->ctrl.admin_q); + nvme_rdma_free_tagset(&ctrl->ctrl, true); + } + nvme_rdma_free_queue(&ctrl->queues[0]); +} + +static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl, + bool new) +{ + int error; + + error = nvme_rdma_alloc_queue(ctrl, 0, NVME_AQ_DEPTH); + if (error) + return error; + + ctrl->device = ctrl->queues[0].device; + + ctrl->max_fr_pages = min_t(u32, NVME_RDMA_MAX_SEGMENTS, + ctrl->device->dev->attrs.max_fast_reg_page_list_len); + + if (new) { + ctrl->ctrl.admin_tagset = nvme_rdma_alloc_tagset(&ctrl->ctrl, true); + if (IS_ERR(ctrl->ctrl.admin_tagset)) + goto out_free_queue; + + ctrl->ctrl.admin_q = blk_mq_init_queue(&ctrl->admin_tag_set); + if (IS_ERR(ctrl->ctrl.admin_q)) { + error = PTR_ERR(ctrl->ctrl.admin_q); + goto out_free_tagset; + } + } else { + error = blk_mq_reinit_tagset(&ctrl->admin_tag_set, + nvme_rdma_reinit_request); + if (error) + goto out_free_queue; + } + + error = nvme_rdma_start_queue(ctrl, 0); + if (error) + goto out_cleanup_queue; + + error = ctrl->ctrl.ops->reg_read64(&ctrl->ctrl, NVME_REG_CAP, + &ctrl->ctrl.cap); + if (error) { + dev_err(ctrl->ctrl.device, + "prop_get NVME_REG_CAP failed\n"); + goto out_cleanup_queue; + } + + ctrl->ctrl.sqsize = + min_t(int, NVME_CAP_MQES(ctrl->ctrl.cap), ctrl->ctrl.sqsize); + + error = nvme_enable_ctrl(&ctrl->ctrl, ctrl->ctrl.cap); + if (error) + goto out_cleanup_queue; + + ctrl->ctrl.max_hw_sectors = + (ctrl->max_fr_pages - 1) << (ilog2(SZ_4K) - 9); + + error = nvme_init_identify(&ctrl->ctrl); + if (error) + goto out_cleanup_queue; + + error = nvme_rdma_alloc_qe(ctrl->queues[0].device->dev, + &ctrl->async_event_sqe, sizeof(struct nvme_command), + DMA_TO_DEVICE); + if (error) + goto out_cleanup_queue; + + return 0; + +out_cleanup_queue: + if (new) + blk_cleanup_queue(ctrl->ctrl.admin_q); +out_free_tagset: + if (new) + nvme_rdma_free_tagset(&ctrl->ctrl, true); +out_free_queue: + nvme_rdma_free_queue(&ctrl->queues[0]); + return error; +} + +static void nvme_rdma_destroy_io_queues(struct nvme_rdma_ctrl *ctrl, + bool remove) +{ + nvme_rdma_stop_io_queues(ctrl); + if (remove) { + blk_cleanup_queue(ctrl->ctrl.connect_q); + nvme_rdma_free_tagset(&ctrl->ctrl, false); + } + nvme_rdma_free_io_queues(ctrl); +} + +static int nvme_rdma_configure_io_queues(struct nvme_rdma_ctrl *ctrl, bool new) +{ + int ret; + + ret = nvme_rdma_alloc_io_queues(ctrl); + if (ret) + return ret; + + if (new) { + ctrl->ctrl.tagset = nvme_rdma_alloc_tagset(&ctrl->ctrl, false); + if (IS_ERR(ctrl->ctrl.tagset)) + goto out_free_io_queues; + + ctrl->ctrl.connect_q = blk_mq_init_queue(&ctrl->tag_set); + if (IS_ERR(ctrl->ctrl.connect_q)) { + ret = PTR_ERR(ctrl->ctrl.connect_q); + goto out_free_tag_set; + } + } else { + ret = blk_mq_reinit_tagset(&ctrl->tag_set, + nvme_rdma_reinit_request); + if (ret) + goto out_free_io_queues; + + blk_mq_update_nr_hw_queues(&ctrl->tag_set, + ctrl->ctrl.queue_count - 1); + } + + ret = nvme_rdma_start_io_queues(ctrl); + if (ret) + goto out_cleanup_connect_q; + + return 0; + +out_cleanup_connect_q: + if (new) + blk_cleanup_queue(ctrl->ctrl.connect_q); +out_free_tag_set: + if (new) + nvme_rdma_free_tagset(&ctrl->ctrl, false); +out_free_io_queues: + nvme_rdma_free_io_queues(ctrl); + return ret; } static void nvme_rdma_free_ctrl(struct nvme_ctrl *nctrl) @@ -708,47 +927,18 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work) ++ctrl->ctrl.nr_reconnects; - if (ctrl->ctrl.queue_count > 1) { - nvme_rdma_free_io_queues(ctrl); - - ret = blk_mq_reinit_tagset(&ctrl->tag_set, - nvme_rdma_reinit_request); - if (ret) - goto requeue; - } - - nvme_rdma_stop_and_free_queue(&ctrl->queues[0]); - - ret = blk_mq_reinit_tagset(&ctrl->admin_tag_set, - nvme_rdma_reinit_request); - if (ret) - goto requeue; - - ret = nvme_rdma_init_queue(ctrl, 0, NVME_AQ_DEPTH); - if (ret) - goto requeue; - - ret = nvmf_connect_admin_queue(&ctrl->ctrl); - if (ret) - goto requeue; - - set_bit(NVME_RDMA_Q_LIVE, &ctrl->queues[0].flags); + if (ctrl->ctrl.queue_count > 1) + nvme_rdma_destroy_io_queues(ctrl, false); - ret = nvme_enable_ctrl(&ctrl->ctrl, ctrl->ctrl.cap); + nvme_rdma_destroy_admin_queue(ctrl, false); + ret = nvme_rdma_configure_admin_queue(ctrl, false); if (ret) goto requeue; if (ctrl->ctrl.queue_count > 1) { - ret = nvme_rdma_init_io_queues(ctrl); - if (ret) - goto requeue; - - ret = nvme_rdma_connect_io_queues(ctrl); + ret = nvme_rdma_configure_io_queues(ctrl, false); if (ret) goto requeue; - - blk_mq_update_nr_hw_queues(&ctrl->tag_set, - ctrl->ctrl.queue_count - 1); } changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE); @@ -771,16 +961,15 @@ static void nvme_rdma_error_recovery_work(struct work_struct *work) { struct nvme_rdma_ctrl *ctrl = container_of(work, struct nvme_rdma_ctrl, err_work); - int i; nvme_stop_ctrl(&ctrl->ctrl); - for (i = 0; i < ctrl->ctrl.queue_count; i++) - clear_bit(NVME_RDMA_Q_LIVE, &ctrl->queues[i].flags); - - if (ctrl->ctrl.queue_count > 1) + if (ctrl->ctrl.queue_count > 1) { nvme_stop_queues(&ctrl->ctrl); + nvme_rdma_stop_io_queues(ctrl); + } blk_mq_quiesce_queue(ctrl->ctrl.admin_q); + nvme_rdma_stop_queue(&ctrl->queues[0]); /* We must take care of fastfail/requeue all our inflight requests */ if (ctrl->ctrl.queue_count > 1) @@ -865,7 +1054,7 @@ static void nvme_rdma_unmap_data(struct nvme_rdma_queue *queue, if (req->mr->need_inval) { res = nvme_rdma_inv_rkey(queue, req); - if (res < 0) { + if (unlikely(res < 0)) { dev_err(ctrl->ctrl.device, "Queueing INV WR for rkey %#x failed (%d)\n", req->mr->rkey, res); @@ -934,7 +1123,7 @@ static int nvme_rdma_map_sg_fr(struct nvme_rdma_queue *queue, * the block virtual boundary. */ nr = ib_map_mr_sg(req->mr, req->sg_table.sgl, count, NULL, SZ_4K); - if (nr < count) { + if (unlikely(nr < count)) { if (nr < 0) return nr; return -EINVAL; @@ -1070,7 +1259,7 @@ static int nvme_rdma_post_send(struct nvme_rdma_queue *queue, first = ≀ ret = ib_post_send(queue->qp, first, &bad_wr); - if (ret) { + if (unlikely(ret)) { dev_err(queue->ctrl->ctrl.device, "%s failed with error code %d\n", __func__, ret); } @@ -1096,7 +1285,7 @@ static int nvme_rdma_post_recv(struct nvme_rdma_queue *queue, wr.num_sge = 1; ret = ib_post_recv(queue->qp, &wr, &bad_wr); - if (ret) { + if (unlikely(ret)) { dev_err(queue->ctrl->ctrl.device, "%s failed with error code %d\n", __func__, ret); } @@ -1456,7 +1645,7 @@ static blk_status_t nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx, blk_mq_start_request(rq); err = nvme_rdma_map_data(queue, rq, c); - if (err < 0) { + if (unlikely(err < 0)) { dev_err(queue->ctrl->ctrl.device, "Failed to map data (%d)\n", err); nvme_cleanup_cmd(rq); @@ -1470,7 +1659,7 @@ static blk_status_t nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx, flush = true; err = nvme_rdma_post_send(queue, sqe, req->sge, req->num_sge, req->mr->need_inval ? &req->reg_wr.wr : NULL, flush); - if (err) { + if (unlikely(err)) { nvme_rdma_unmap_data(queue, rq); goto err; } @@ -1538,98 +1727,7 @@ static const struct blk_mq_ops nvme_rdma_admin_mq_ops = { .timeout = nvme_rdma_timeout, }; -static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl) -{ - int error; - - error = nvme_rdma_init_queue(ctrl, 0, NVME_AQ_DEPTH); - if (error) - return error; - - ctrl->device = ctrl->queues[0].device; - - /* - * We need a reference on the device as long as the tag_set is alive, - * as the MRs in the request structures need a valid ib_device. - */ - error = -EINVAL; - if (!nvme_rdma_dev_get(ctrl->device)) - goto out_free_queue; - - ctrl->max_fr_pages = min_t(u32, NVME_RDMA_MAX_SEGMENTS, - ctrl->device->dev->attrs.max_fast_reg_page_list_len); - - memset(&ctrl->admin_tag_set, 0, sizeof(ctrl->admin_tag_set)); - ctrl->admin_tag_set.ops = &nvme_rdma_admin_mq_ops; - ctrl->admin_tag_set.queue_depth = NVME_RDMA_AQ_BLKMQ_DEPTH; - ctrl->admin_tag_set.reserved_tags = 2; /* connect + keep-alive */ - ctrl->admin_tag_set.numa_node = NUMA_NO_NODE; - ctrl->admin_tag_set.cmd_size = sizeof(struct nvme_rdma_request) + - SG_CHUNK_SIZE * sizeof(struct scatterlist); - ctrl->admin_tag_set.driver_data = ctrl; - ctrl->admin_tag_set.nr_hw_queues = 1; - ctrl->admin_tag_set.timeout = ADMIN_TIMEOUT; - - error = blk_mq_alloc_tag_set(&ctrl->admin_tag_set); - if (error) - goto out_put_dev; - - ctrl->ctrl.admin_q = blk_mq_init_queue(&ctrl->admin_tag_set); - if (IS_ERR(ctrl->ctrl.admin_q)) { - error = PTR_ERR(ctrl->ctrl.admin_q); - goto out_free_tagset; - } - - error = nvmf_connect_admin_queue(&ctrl->ctrl); - if (error) - goto out_cleanup_queue; - - set_bit(NVME_RDMA_Q_LIVE, &ctrl->queues[0].flags); - - error = nvmf_reg_read64(&ctrl->ctrl, NVME_REG_CAP, - &ctrl->ctrl.cap); - if (error) { - dev_err(ctrl->ctrl.device, - "prop_get NVME_REG_CAP failed\n"); - goto out_cleanup_queue; - } - - ctrl->ctrl.sqsize = - min_t(int, NVME_CAP_MQES(ctrl->ctrl.cap), ctrl->ctrl.sqsize); - - error = nvme_enable_ctrl(&ctrl->ctrl, ctrl->ctrl.cap); - if (error) - goto out_cleanup_queue; - - ctrl->ctrl.max_hw_sectors = - (ctrl->max_fr_pages - 1) << (ilog2(SZ_4K) - 9); - - error = nvme_init_identify(&ctrl->ctrl); - if (error) - goto out_cleanup_queue; - - error = nvme_rdma_alloc_qe(ctrl->queues[0].device->dev, - &ctrl->async_event_sqe, sizeof(struct nvme_command), - DMA_TO_DEVICE); - if (error) - goto out_cleanup_queue; - - return 0; - -out_cleanup_queue: - blk_cleanup_queue(ctrl->ctrl.admin_q); -out_free_tagset: - /* disconnect and drain the queue before freeing the tagset */ - nvme_rdma_stop_queue(&ctrl->queues[0]); - blk_mq_free_tag_set(&ctrl->admin_tag_set); -out_put_dev: - nvme_rdma_dev_put(ctrl->device); -out_free_queue: - nvme_rdma_free_queue(&ctrl->queues[0]); - return error; -} - -static void nvme_rdma_shutdown_ctrl(struct nvme_rdma_ctrl *ctrl) +static void nvme_rdma_shutdown_ctrl(struct nvme_rdma_ctrl *ctrl, bool shutdown) { cancel_work_sync(&ctrl->err_work); cancel_delayed_work_sync(&ctrl->reconnect_work); @@ -1638,33 +1736,26 @@ static void nvme_rdma_shutdown_ctrl(struct nvme_rdma_ctrl *ctrl) nvme_stop_queues(&ctrl->ctrl); blk_mq_tagset_busy_iter(&ctrl->tag_set, nvme_cancel_request, &ctrl->ctrl); - nvme_rdma_free_io_queues(ctrl); + nvme_rdma_destroy_io_queues(ctrl, shutdown); } - if (test_bit(NVME_RDMA_Q_LIVE, &ctrl->queues[0].flags)) + if (shutdown) nvme_shutdown_ctrl(&ctrl->ctrl); + else + nvme_disable_ctrl(&ctrl->ctrl, ctrl->ctrl.cap); blk_mq_quiesce_queue(ctrl->ctrl.admin_q); blk_mq_tagset_busy_iter(&ctrl->admin_tag_set, nvme_cancel_request, &ctrl->ctrl); blk_mq_unquiesce_queue(ctrl->ctrl.admin_q); - nvme_rdma_destroy_admin_queue(ctrl); + nvme_rdma_destroy_admin_queue(ctrl, shutdown); } -static void __nvme_rdma_remove_ctrl(struct nvme_rdma_ctrl *ctrl, bool shutdown) +static void nvme_rdma_remove_ctrl(struct nvme_rdma_ctrl *ctrl) { - nvme_stop_ctrl(&ctrl->ctrl); nvme_remove_namespaces(&ctrl->ctrl); - if (shutdown) - nvme_rdma_shutdown_ctrl(ctrl); - + nvme_rdma_shutdown_ctrl(ctrl, true); nvme_uninit_ctrl(&ctrl->ctrl); - if (ctrl->ctrl.tagset) { - blk_cleanup_queue(ctrl->ctrl.connect_q); - blk_mq_free_tag_set(&ctrl->tag_set); - nvme_rdma_dev_put(ctrl->device); - } - nvme_put_ctrl(&ctrl->ctrl); } @@ -1673,7 +1764,8 @@ static void nvme_rdma_del_ctrl_work(struct work_struct *work) struct nvme_rdma_ctrl *ctrl = container_of(work, struct nvme_rdma_ctrl, delete_work); - __nvme_rdma_remove_ctrl(ctrl, true); + nvme_stop_ctrl(&ctrl->ctrl); + nvme_rdma_remove_ctrl(ctrl); } static int __nvme_rdma_del_ctrl(struct nvme_rdma_ctrl *ctrl) @@ -1705,14 +1797,6 @@ static int nvme_rdma_del_ctrl(struct nvme_ctrl *nctrl) return ret; } -static void nvme_rdma_remove_ctrl_work(struct work_struct *work) -{ - struct nvme_rdma_ctrl *ctrl = container_of(work, - struct nvme_rdma_ctrl, delete_work); - - __nvme_rdma_remove_ctrl(ctrl, false); -} - static void nvme_rdma_reset_ctrl_work(struct work_struct *work) { struct nvme_rdma_ctrl *ctrl = @@ -1721,31 +1805,16 @@ static void nvme_rdma_reset_ctrl_work(struct work_struct *work) bool changed; nvme_stop_ctrl(&ctrl->ctrl); - nvme_rdma_shutdown_ctrl(ctrl); + nvme_rdma_shutdown_ctrl(ctrl, false); - ret = nvme_rdma_configure_admin_queue(ctrl); - if (ret) { - /* ctrl is already shutdown, just remove the ctrl */ - INIT_WORK(&ctrl->delete_work, nvme_rdma_remove_ctrl_work); - goto del_dead_ctrl; - } + ret = nvme_rdma_configure_admin_queue(ctrl, false); + if (ret) + goto out_fail; if (ctrl->ctrl.queue_count > 1) { - ret = blk_mq_reinit_tagset(&ctrl->tag_set, - nvme_rdma_reinit_request); - if (ret) - goto del_dead_ctrl; - - ret = nvme_rdma_init_io_queues(ctrl); + ret = nvme_rdma_configure_io_queues(ctrl, false); if (ret) - goto del_dead_ctrl; - - ret = nvme_rdma_connect_io_queues(ctrl); - if (ret) - goto del_dead_ctrl; - - blk_mq_update_nr_hw_queues(&ctrl->tag_set, - ctrl->ctrl.queue_count - 1); + goto out_fail; } changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE); @@ -1755,10 +1824,9 @@ static void nvme_rdma_reset_ctrl_work(struct work_struct *work) return; -del_dead_ctrl: - /* Deleting this dead controller... */ +out_fail: dev_warn(ctrl->ctrl.device, "Removing after reset failure\n"); - WARN_ON(!queue_work(nvme_wq, &ctrl->delete_work)); + nvme_rdma_remove_ctrl(ctrl); } static const struct nvme_ctrl_ops nvme_rdma_ctrl_ops = { @@ -1774,62 +1842,6 @@ static const struct nvme_ctrl_ops nvme_rdma_ctrl_ops = { .get_address = nvmf_get_address, }; -static int nvme_rdma_create_io_queues(struct nvme_rdma_ctrl *ctrl) -{ - int ret; - - ret = nvme_rdma_init_io_queues(ctrl); - if (ret) - return ret; - - /* - * We need a reference on the device as long as the tag_set is alive, - * as the MRs in the request structures need a valid ib_device. - */ - ret = -EINVAL; - if (!nvme_rdma_dev_get(ctrl->device)) - goto out_free_io_queues; - - memset(&ctrl->tag_set, 0, sizeof(ctrl->tag_set)); - ctrl->tag_set.ops = &nvme_rdma_mq_ops; - ctrl->tag_set.queue_depth = ctrl->ctrl.opts->queue_size; - ctrl->tag_set.reserved_tags = 1; /* fabric connect */ - ctrl->tag_set.numa_node = NUMA_NO_NODE; - ctrl->tag_set.flags = BLK_MQ_F_SHOULD_MERGE; - ctrl->tag_set.cmd_size = sizeof(struct nvme_rdma_request) + - SG_CHUNK_SIZE * sizeof(struct scatterlist); - ctrl->tag_set.driver_data = ctrl; - ctrl->tag_set.nr_hw_queues = ctrl->ctrl.queue_count - 1; - ctrl->tag_set.timeout = NVME_IO_TIMEOUT; - - ret = blk_mq_alloc_tag_set(&ctrl->tag_set); - if (ret) - goto out_put_dev; - ctrl->ctrl.tagset = &ctrl->tag_set; - - ctrl->ctrl.connect_q = blk_mq_init_queue(&ctrl->tag_set); - if (IS_ERR(ctrl->ctrl.connect_q)) { - ret = PTR_ERR(ctrl->ctrl.connect_q); - goto out_free_tag_set; - } - - ret = nvme_rdma_connect_io_queues(ctrl); - if (ret) - goto out_cleanup_connect_q; - - return 0; - -out_cleanup_connect_q: - blk_cleanup_queue(ctrl->ctrl.connect_q); -out_free_tag_set: - blk_mq_free_tag_set(&ctrl->tag_set); -out_put_dev: - nvme_rdma_dev_put(ctrl->device); -out_free_io_queues: - nvme_rdma_free_io_queues(ctrl); - return ret; -} - static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev, struct nvmf_ctrl_options *opts) { @@ -1887,7 +1899,7 @@ static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev, if (!ctrl->queues) goto out_uninit_ctrl; - ret = nvme_rdma_configure_admin_queue(ctrl); + ret = nvme_rdma_configure_admin_queue(ctrl, true); if (ret) goto out_kfree_queues; @@ -1922,7 +1934,7 @@ static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev, } if (opts->nr_io_queues) { - ret = nvme_rdma_create_io_queues(ctrl); + ret = nvme_rdma_configure_io_queues(ctrl, true); if (ret) goto out_remove_admin_queue; } @@ -1944,7 +1956,7 @@ static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev, return &ctrl->ctrl; out_remove_admin_queue: - nvme_rdma_destroy_admin_queue(ctrl); + nvme_rdma_destroy_admin_queue(ctrl, true); out_kfree_queues: kfree(ctrl->queues); out_uninit_ctrl: diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c index a53bb6635b83..c4a0bf36e752 100644 --- a/drivers/nvme/target/admin-cmd.c +++ b/drivers/nvme/target/admin-cmd.c @@ -100,7 +100,7 @@ static u16 nvmet_get_smart_log(struct nvmet_req *req, u16 status; WARN_ON(req == NULL || slog == NULL); - if (req->cmd->get_log_page.nsid == cpu_to_le32(0xFFFFFFFF)) + if (req->cmd->get_log_page.nsid == cpu_to_le32(NVME_NSID_ALL)) status = nvmet_get_smart_log_all(req, slog); else status = nvmet_get_smart_log_nsid(req, slog); @@ -168,15 +168,6 @@ out: nvmet_req_complete(req, status); } -static void copy_and_pad(char *dst, int dst_len, const char *src, int src_len) -{ - int len = min(src_len, dst_len); - - memcpy(dst, src, len); - if (dst_len > len) - memset(dst + len, ' ', dst_len - len); -} - static void nvmet_execute_identify_ctrl(struct nvmet_req *req) { struct nvmet_ctrl *ctrl = req->sq->ctrl; @@ -196,8 +187,9 @@ static void nvmet_execute_identify_ctrl(struct nvmet_req *req) bin2hex(id->sn, &ctrl->subsys->serial, min(sizeof(ctrl->subsys->serial), sizeof(id->sn) / 2)); - copy_and_pad(id->mn, sizeof(id->mn), model, sizeof(model) - 1); - copy_and_pad(id->fr, sizeof(id->fr), UTS_RELEASE, strlen(UTS_RELEASE)); + memcpy_and_pad(id->mn, sizeof(id->mn), model, sizeof(model) - 1, ' '); + memcpy_and_pad(id->fr, sizeof(id->fr), + UTS_RELEASE, strlen(UTS_RELEASE), ' '); id->rab = 6; @@ -451,7 +443,7 @@ static void nvmet_execute_set_features(struct nvmet_req *req) u32 val32; u16 status = 0; - switch (cdw10 & 0xf) { + switch (cdw10 & 0xff) { case NVME_FEAT_NUM_QUEUES: nvmet_set_result(req, (subsys->max_qid - 1) | ((subsys->max_qid - 1) << 16)); @@ -461,6 +453,9 @@ static void nvmet_execute_set_features(struct nvmet_req *req) req->sq->ctrl->kato = DIV_ROUND_UP(val32, 1000); nvmet_set_result(req, req->sq->ctrl->kato); break; + case NVME_FEAT_HOST_ID: + status = NVME_SC_CMD_SEQ_ERROR | NVME_SC_DNR; + break; default: status = NVME_SC_INVALID_FIELD | NVME_SC_DNR; break; @@ -475,7 +470,7 @@ static void nvmet_execute_get_features(struct nvmet_req *req) u32 cdw10 = le32_to_cpu(req->cmd->common.cdw10[0]); u16 status = 0; - switch (cdw10 & 0xf) { + switch (cdw10 & 0xff) { /* * These features are mandatory in the spec, but we don't * have a useful way to implement them. We'll eventually @@ -509,6 +504,16 @@ static void nvmet_execute_get_features(struct nvmet_req *req) case NVME_FEAT_KATO: nvmet_set_result(req, req->sq->ctrl->kato * 1000); break; + case NVME_FEAT_HOST_ID: + /* need 128-bit host identifier flag */ + if (!(req->cmd->common.cdw10[1] & cpu_to_le32(1 << 0))) { + status = NVME_SC_INVALID_FIELD | NVME_SC_DNR; + break; + } + + status = nvmet_copy_to_sgl(req, 0, &req->sq->ctrl->hostid, + sizeof(req->sq->ctrl->hostid)); + break; default: status = NVME_SC_INVALID_FIELD | NVME_SC_DNR; break; diff --git a/drivers/nvme/target/configfs.c b/drivers/nvme/target/configfs.c index 0a0067e771f5..b6aeb1d70951 100644 --- a/drivers/nvme/target/configfs.c +++ b/drivers/nvme/target/configfs.c @@ -444,7 +444,7 @@ static struct config_group *nvmet_ns_make(struct config_group *group, goto out; ret = -EINVAL; - if (nsid == 0 || nsid == 0xffffffff) + if (nsid == 0 || nsid == NVME_NSID_ALL) goto out; ret = -ENOMEM; diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index f4b02bb4a1a8..7c23eaf8e563 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -538,37 +538,37 @@ EXPORT_SYMBOL_GPL(nvmet_req_uninit); static inline bool nvmet_cc_en(u32 cc) { - return cc & 0x1; + return (cc >> NVME_CC_EN_SHIFT) & 0x1; } static inline u8 nvmet_cc_css(u32 cc) { - return (cc >> 4) & 0x7; + return (cc >> NVME_CC_CSS_SHIFT) & 0x7; } static inline u8 nvmet_cc_mps(u32 cc) { - return (cc >> 7) & 0xf; + return (cc >> NVME_CC_MPS_SHIFT) & 0xf; } static inline u8 nvmet_cc_ams(u32 cc) { - return (cc >> 11) & 0x7; + return (cc >> NVME_CC_AMS_SHIFT) & 0x7; } static inline u8 nvmet_cc_shn(u32 cc) { - return (cc >> 14) & 0x3; + return (cc >> NVME_CC_SHN_SHIFT) & 0x3; } static inline u8 nvmet_cc_iosqes(u32 cc) { - return (cc >> 16) & 0xf; + return (cc >> NVME_CC_IOSQES_SHIFT) & 0xf; } static inline u8 nvmet_cc_iocqes(u32 cc) { - return (cc >> 20) & 0xf; + return (cc >> NVME_CC_IOCQES_SHIFT) & 0xf; } static void nvmet_start_ctrl(struct nvmet_ctrl *ctrl) @@ -749,6 +749,7 @@ u16 nvmet_alloc_ctrl(const char *subsysnqn, const char *hostnqn, hostnqn, subsysnqn); req->rsp->result.u32 = IPO_IATTR_CONNECT_DATA(hostnqn); up_read(&nvmet_config_sem); + status = NVME_SC_CONNECT_INVALID_HOST | NVME_SC_DNR; goto out_put_subsystem; } up_read(&nvmet_config_sem); diff --git a/drivers/nvme/target/fabrics-cmd.c b/drivers/nvme/target/fabrics-cmd.c index 3cc17269504b..859a66725291 100644 --- a/drivers/nvme/target/fabrics-cmd.c +++ b/drivers/nvme/target/fabrics-cmd.c @@ -154,6 +154,7 @@ static void nvmet_execute_admin_connect(struct nvmet_req *req) le32_to_cpu(c->kato), &ctrl); if (status) goto out; + uuid_copy(&ctrl->hostid, &d->hostid); status = nvmet_install_queue(ctrl, req); if (status) { diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c index 309c84aa7595..421e43bf1dd7 100644 --- a/drivers/nvme/target/fc.c +++ b/drivers/nvme/target/fc.c @@ -58,7 +58,8 @@ struct nvmet_fc_ls_iod { struct work_struct work; } __aligned(sizeof(unsigned long long)); -#define NVMET_FC_MAX_KB_PER_XFR 256 +#define NVMET_FC_MAX_SEQ_LENGTH (256 * 1024) +#define NVMET_FC_MAX_XFR_SGENTS (NVMET_FC_MAX_SEQ_LENGTH / PAGE_SIZE) enum nvmet_fcp_datadir { NVMET_FCP_NODATA, @@ -74,9 +75,7 @@ struct nvmet_fc_fcp_iod { struct nvme_fc_ersp_iu rspiubuf; dma_addr_t rspdma; struct scatterlist *data_sg; - struct scatterlist *next_sg; int data_sg_cnt; - u32 next_sg_offset; u32 total_length; u32 offset; enum nvmet_fcp_datadir io_dir; @@ -112,6 +111,7 @@ struct nvmet_fc_tgtport { struct ida assoc_cnt; struct nvmet_port *port; struct kref ref; + u32 max_sg_cnt; }; struct nvmet_fc_defer_fcp_req { @@ -994,6 +994,8 @@ nvmet_fc_register_targetport(struct nvmet_fc_port_info *pinfo, INIT_LIST_HEAD(&newrec->assoc_list); kref_init(&newrec->ref); ida_init(&newrec->assoc_cnt); + newrec->max_sg_cnt = min_t(u32, NVMET_FC_MAX_XFR_SGENTS, + template->max_sgl_segments); ret = nvmet_fc_alloc_ls_iodlist(newrec); if (ret) { @@ -1866,51 +1868,23 @@ nvmet_fc_transfer_fcp_data(struct nvmet_fc_tgtport *tgtport, struct nvmet_fc_fcp_iod *fod, u8 op) { struct nvmefc_tgt_fcp_req *fcpreq = fod->fcpreq; - struct scatterlist *sg, *datasg; unsigned long flags; - u32 tlen, sg_off; + u32 tlen; int ret; fcpreq->op = op; fcpreq->offset = fod->offset; fcpreq->timeout = NVME_FC_TGTOP_TIMEOUT_SEC; - tlen = min_t(u32, (NVMET_FC_MAX_KB_PER_XFR * 1024), + + tlen = min_t(u32, tgtport->max_sg_cnt * PAGE_SIZE, (fod->total_length - fod->offset)); - tlen = min_t(u32, tlen, NVME_FC_MAX_SEGMENTS * PAGE_SIZE); - tlen = min_t(u32, tlen, fod->tgtport->ops->max_sgl_segments - * PAGE_SIZE); fcpreq->transfer_length = tlen; fcpreq->transferred_length = 0; fcpreq->fcp_error = 0; fcpreq->rsplen = 0; - fcpreq->sg_cnt = 0; - - datasg = fod->next_sg; - sg_off = fod->next_sg_offset; - - for (sg = fcpreq->sg ; tlen; sg++) { - *sg = *datasg; - if (sg_off) { - sg->offset += sg_off; - sg->length -= sg_off; - sg->dma_address += sg_off; - sg_off = 0; - } - if (tlen < sg->length) { - sg->length = tlen; - fod->next_sg = datasg; - fod->next_sg_offset += tlen; - } else if (tlen == sg->length) { - fod->next_sg_offset = 0; - fod->next_sg = sg_next(datasg); - } else { - fod->next_sg_offset = 0; - datasg = sg_next(datasg); - } - tlen -= sg->length; - fcpreq->sg_cnt++; - } + fcpreq->sg = &fod->data_sg[fod->offset / PAGE_SIZE]; + fcpreq->sg_cnt = DIV_ROUND_UP(tlen, PAGE_SIZE); /* * If the last READDATA request: check if LLDD supports @@ -2225,8 +2199,6 @@ nvmet_fc_handle_fcp_rqst(struct nvmet_fc_tgtport *tgtport, fod->req.sg = fod->data_sg; fod->req.sg_cnt = fod->data_sg_cnt; fod->offset = 0; - fod->next_sg = fod->data_sg; - fod->next_sg_offset = 0; if (fod->io_dir == NVMET_FCP_WRITE) { /* pull the data over before invoking nvmet layer */ diff --git a/drivers/nvme/target/fcloop.c b/drivers/nvme/target/fcloop.c index 1bb9d5b311b1..1cb9847ec261 100644 --- a/drivers/nvme/target/fcloop.c +++ b/drivers/nvme/target/fcloop.c @@ -193,9 +193,6 @@ out_free_options: #define TGTPORT_OPTS (NVMF_OPT_WWNN | NVMF_OPT_WWPN) -#define ALL_OPTS (NVMF_OPT_WWNN | NVMF_OPT_WWPN | NVMF_OPT_ROLES | \ - NVMF_OPT_FCADDR | NVMF_OPT_LPWWNN | NVMF_OPT_LPWWPN) - static DEFINE_SPINLOCK(fcloop_lock); static LIST_HEAD(fcloop_lports); diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c index 717ed7ddb2f6..92628c432926 100644 --- a/drivers/nvme/target/loop.c +++ b/drivers/nvme/target/loop.c @@ -375,6 +375,7 @@ static int nvme_loop_configure_admin_queue(struct nvme_loop_ctrl *ctrl) error = blk_mq_alloc_tag_set(&ctrl->admin_tag_set); if (error) goto out_free_sq; + ctrl->ctrl.admin_tagset = &ctrl->admin_tag_set; ctrl->ctrl.admin_q = blk_mq_init_queue(&ctrl->admin_tag_set); if (IS_ERR(ctrl->ctrl.admin_q)) { diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h index e3b244c7e443..7d261ab894f4 100644 --- a/drivers/nvme/target/nvmet.h +++ b/drivers/nvme/target/nvmet.h @@ -115,6 +115,7 @@ struct nvmet_ctrl { u32 cc; u32 csts; + uuid_t hostid; u16 cntlid; u32 kato; |