diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2019-07-16 07:20:52 +0300 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2019-07-16 07:20:52 +0300 |
commit | 9637d517347e80ee2fe1c5d8ce45ba1b88d8b5cd (patch) | |
tree | 3cee2a1d8b3c6ea466924517307a1f98ada1e92f /drivers/nvme/host | |
parent | 273cbf61c3ddee9574ef1f4959b9bc6db5b24271 (diff) | |
parent | 787c79d6393fc028887cc1b6066915f0b094e92f (diff) | |
download | linux-9637d517347e80ee2fe1c5d8ce45ba1b88d8b5cd.tar.xz |
Merge tag 'for-linus-20190715' of git://git.kernel.dk/linux-block
Pull more block updates from Jens Axboe:
"A later pull request with some followup items. I had some vacation
coming up to the merge window, so certain things items were delayed a
bit. This pull request also contains fixes that came in within the
last few days of the merge window, which I didn't want to push right
before sending you a pull request.
This contains:
- NVMe pull request, mostly fixes, but also a few minor items on the
feature side that were timing constrained (Christoph et al)
- Report zones fixes (Damien)
- Removal of dead code (Damien)
- Turn on cgroup psi memstall (Josef)
- block cgroup MAINTAINERS entry (Konstantin)
- Flush init fix (Josef)
- blk-throttle low iops timing fix (Konstantin)
- nbd resize fixes (Mike)
- nbd 0 blocksize crash fix (Xiubo)
- block integrity error leak fix (Wenwen)
- blk-cgroup writeback and priority inheritance fixes (Tejun)"
* tag 'for-linus-20190715' of git://git.kernel.dk/linux-block: (42 commits)
MAINTAINERS: add entry for block io cgroup
null_blk: fixup ->report_zones() for !CONFIG_BLK_DEV_ZONED
block: Limit zone array allocation size
sd_zbc: Fix report zones buffer allocation
block: Kill gfp_t argument of blkdev_report_zones()
block: Allow mapping of vmalloc-ed buffers
block/bio-integrity: fix a memory leak bug
nvme: fix NULL deref for fabrics options
nbd: add netlink reconfigure resize support
nbd: fix crash when the blksize is zero
block: Disable write plugging for zoned block devices
block: Fix elevator name declaration
block: Remove unused definitions
nvme: fix regression upon hot device removal and insertion
blk-throttle: fix zero wait time for iops throttled group
block: Fix potential overflow in blk_report_zones()
blkcg: implement REQ_CGROUP_PUNT
blkcg, writeback: Implement wbc_blkcg_css()
blkcg, writeback: Add wbc->no_cgroup_owner
blkcg, writeback: Rename wbc_account_io() to wbc_account_cgroup_owner()
...
Diffstat (limited to 'drivers/nvme/host')
-rw-r--r-- | drivers/nvme/host/core.c | 43 | ||||
-rw-r--r-- | drivers/nvme/host/fc.c | 51 | ||||
-rw-r--r-- | drivers/nvme/host/multipath.c | 18 | ||||
-rw-r--r-- | drivers/nvme/host/nvme.h | 1 | ||||
-rw-r--r-- | drivers/nvme/host/pci.c | 26 | ||||
-rw-r--r-- | drivers/nvme/host/tcp.c | 9 | ||||
-rw-r--r-- | drivers/nvme/host/trace.c | 28 |
7 files changed, 152 insertions, 24 deletions
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index b2dd4e391f5c..cc09b81fc7f4 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -11,6 +11,7 @@ #include <linux/hdreg.h> #include <linux/kernel.h> #include <linux/module.h> +#include <linux/backing-dev.h> #include <linux/list_sort.h> #include <linux/slab.h> #include <linux/types.h> @@ -1626,6 +1627,7 @@ static void nvme_update_disk_info(struct gendisk *disk, { sector_t capacity = le64_to_cpu(id->nsze) << (ns->lba_shift - 9); unsigned short bs = 1 << ns->lba_shift; + u32 atomic_bs, phys_bs, io_opt; if (ns->lba_shift > PAGE_SHIFT) { /* unsupported block size, set capacity to 0 later */ @@ -1634,9 +1636,37 @@ static void nvme_update_disk_info(struct gendisk *disk, blk_mq_freeze_queue(disk->queue); blk_integrity_unregister(disk); + if (id->nabo == 0) { + /* + * Bit 1 indicates whether NAWUPF is defined for this namespace + * and whether it should be used instead of AWUPF. If NAWUPF == + * 0 then AWUPF must be used instead. + */ + if (id->nsfeat & (1 << 1) && id->nawupf) + atomic_bs = (1 + le16_to_cpu(id->nawupf)) * bs; + else + atomic_bs = (1 + ns->ctrl->subsys->awupf) * bs; + } else { + atomic_bs = bs; + } + phys_bs = bs; + io_opt = bs; + if (id->nsfeat & (1 << 4)) { + /* NPWG = Namespace Preferred Write Granularity */ + phys_bs *= 1 + le16_to_cpu(id->npwg); + /* NOWS = Namespace Optimal Write Size */ + io_opt *= 1 + le16_to_cpu(id->nows); + } + blk_queue_logical_block_size(disk->queue, bs); - blk_queue_physical_block_size(disk->queue, bs); - blk_queue_io_min(disk->queue, bs); + /* + * Linux filesystems assume writing a single physical block is + * an atomic operation. Hence limit the physical block size to the + * value of the Atomic Write Unit Power Fail parameter. + */ + blk_queue_physical_block_size(disk->queue, min(phys_bs, atomic_bs)); + blk_queue_io_min(disk->queue, phys_bs); + blk_queue_io_opt(disk->queue, io_opt); if (ns->ms && !ns->ext && (ns->ctrl->ops->flags & NVME_F_METADATA_SUPPORTED)) @@ -2386,8 +2416,8 @@ static bool nvme_validate_cntlid(struct nvme_subsystem *subsys, lockdep_assert_held(&nvme_subsystems_lock); list_for_each_entry(tmp, &subsys->ctrls, subsys_entry) { - if (ctrl->state == NVME_CTRL_DELETING || - ctrl->state == NVME_CTRL_DEAD) + if (tmp->state == NVME_CTRL_DELETING || + tmp->state == NVME_CTRL_DEAD) continue; if (tmp->cntlid == ctrl->cntlid) { @@ -2433,6 +2463,7 @@ static int nvme_init_subsystem(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id) memcpy(subsys->firmware_rev, id->fr, sizeof(subsys->firmware_rev)); subsys->vendor_id = le16_to_cpu(id->vid); subsys->cmic = id->cmic; + subsys->awupf = le16_to_cpu(id->awupf); #ifdef CONFIG_NVME_MULTIPATH subsys->iopolicy = NVME_IOPOLICY_NUMA; #endif @@ -3274,6 +3305,10 @@ static int nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid) goto out_free_ns; } + if (ctrl->opts && ctrl->opts->data_digest) + ns->queue->backing_dev_info->capabilities + |= BDI_CAP_STABLE_WRITES; + blk_queue_flag_set(QUEUE_FLAG_NONROT, ns->queue); if (ctrl->ops->flags & NVME_F_PCI_P2PDMA) blk_queue_flag_set(QUEUE_FLAG_PCI_P2PDMA, ns->queue); diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index dcb2b799966f..232d8094091b 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -204,6 +204,9 @@ static DEFINE_IDA(nvme_fc_ctrl_cnt); static struct workqueue_struct *nvme_fc_wq; +static bool nvme_fc_waiting_to_unload; +static DECLARE_COMPLETION(nvme_fc_unload_proceed); + /* * These items are short-term. They will eventually be moved into * a generic FC class. See comments in module init. @@ -229,6 +232,8 @@ nvme_fc_free_lport(struct kref *ref) /* remove from transport list */ spin_lock_irqsave(&nvme_fc_lock, flags); list_del(&lport->port_list); + if (nvme_fc_waiting_to_unload && list_empty(&nvme_fc_lport_list)) + complete(&nvme_fc_unload_proceed); spin_unlock_irqrestore(&nvme_fc_lock, flags); ida_simple_remove(&nvme_fc_local_port_cnt, lport->localport.port_num); @@ -3457,11 +3462,51 @@ out_destroy_wq: return ret; } +static void +nvme_fc_delete_controllers(struct nvme_fc_rport *rport) +{ + struct nvme_fc_ctrl *ctrl; + + spin_lock(&rport->lock); + list_for_each_entry(ctrl, &rport->ctrl_list, ctrl_list) { + dev_warn(ctrl->ctrl.device, + "NVME-FC{%d}: transport unloading: deleting ctrl\n", + ctrl->cnum); + nvme_delete_ctrl(&ctrl->ctrl); + } + spin_unlock(&rport->lock); +} + +static void +nvme_fc_cleanup_for_unload(void) +{ + struct nvme_fc_lport *lport; + struct nvme_fc_rport *rport; + + list_for_each_entry(lport, &nvme_fc_lport_list, port_list) { + list_for_each_entry(rport, &lport->endp_list, endp_list) { + nvme_fc_delete_controllers(rport); + } + } +} + static void __exit nvme_fc_exit_module(void) { - /* sanity check - all lports should be removed */ - if (!list_empty(&nvme_fc_lport_list)) - pr_warn("%s: localport list not empty\n", __func__); + unsigned long flags; + bool need_cleanup = false; + + spin_lock_irqsave(&nvme_fc_lock, flags); + nvme_fc_waiting_to_unload = true; + if (!list_empty(&nvme_fc_lport_list)) { + need_cleanup = true; + nvme_fc_cleanup_for_unload(); + } + spin_unlock_irqrestore(&nvme_fc_lock, flags); + if (need_cleanup) { + pr_info("%s: waiting for ctlr deletes\n", __func__); + wait_for_completion(&nvme_fc_unload_proceed); + pr_info("%s: ctrl deletes complete\n", __func__); + } nvmf_unregister_transport(&nvme_fc_transport); diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index 499acf07d61a..a9a927677970 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -123,14 +123,20 @@ void nvme_mpath_clear_current_path(struct nvme_ns *ns) } } +static bool nvme_path_is_disabled(struct nvme_ns *ns) +{ + return ns->ctrl->state != NVME_CTRL_LIVE || + test_bit(NVME_NS_ANA_PENDING, &ns->flags) || + test_bit(NVME_NS_REMOVING, &ns->flags); +} + static struct nvme_ns *__nvme_find_path(struct nvme_ns_head *head, int node) { int found_distance = INT_MAX, fallback_distance = INT_MAX, distance; struct nvme_ns *found = NULL, *fallback = NULL, *ns; list_for_each_entry_rcu(ns, &head->list, siblings) { - if (ns->ctrl->state != NVME_CTRL_LIVE || - test_bit(NVME_NS_ANA_PENDING, &ns->flags)) + if (nvme_path_is_disabled(ns)) continue; if (READ_ONCE(head->subsys->iopolicy) == NVME_IOPOLICY_NUMA) @@ -178,14 +184,16 @@ static struct nvme_ns *nvme_round_robin_path(struct nvme_ns_head *head, { struct nvme_ns *ns, *found, *fallback = NULL; - if (list_is_singular(&head->list)) + if (list_is_singular(&head->list)) { + if (nvme_path_is_disabled(old)) + return NULL; return old; + } for (ns = nvme_next_ns(head, old); ns != old; ns = nvme_next_ns(head, ns)) { - if (ns->ctrl->state != NVME_CTRL_LIVE || - test_bit(NVME_NS_ANA_PENDING, &ns->flags)) + if (nvme_path_is_disabled(ns)) continue; if (ns->ana_state == NVME_ANA_OPTIMIZED) { diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index ea45d7d393ad..716a876119c8 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -283,6 +283,7 @@ struct nvme_subsystem { char firmware_rev[8]; u8 cmic; u16 vendor_id; + u16 awupf; /* 0's based awupf value. */ struct ida ns_ida; #ifdef CONFIG_NVME_MULTIPATH enum nvme_iopolicy iopolicy; diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 189352081994..bb970ca82517 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -1439,11 +1439,15 @@ static int nvme_alloc_sq_cmds(struct nvme_dev *dev, struct nvme_queue *nvmeq, if (qid && dev->cmb_use_sqes && (dev->cmbsz & NVME_CMBSZ_SQS)) { nvmeq->sq_cmds = pci_alloc_p2pmem(pdev, SQ_SIZE(depth)); - nvmeq->sq_dma_addr = pci_p2pmem_virt_to_bus(pdev, - nvmeq->sq_cmds); - if (nvmeq->sq_dma_addr) { - set_bit(NVMEQ_SQ_CMB, &nvmeq->flags); - return 0; + if (nvmeq->sq_cmds) { + nvmeq->sq_dma_addr = pci_p2pmem_virt_to_bus(pdev, + nvmeq->sq_cmds); + if (nvmeq->sq_dma_addr) { + set_bit(NVMEQ_SQ_CMB, &nvmeq->flags); + return 0; + } + + pci_free_p2pmem(pdev, nvmeq->sq_cmds, SQ_SIZE(depth)); } } @@ -2250,7 +2254,9 @@ static int nvme_dev_add(struct nvme_dev *dev) if (!dev->ctrl.tagset) { dev->tagset.ops = &nvme_mq_ops; dev->tagset.nr_hw_queues = dev->online_queues - 1; - dev->tagset.nr_maps = 2; /* default + read */ + dev->tagset.nr_maps = 1; /* default */ + if (dev->io_queues[HCTX_TYPE_READ]) + dev->tagset.nr_maps++; if (dev->io_queues[HCTX_TYPE_POLL]) dev->tagset.nr_maps++; dev->tagset.timeout = NVME_IO_TIMEOUT; @@ -2289,8 +2295,7 @@ static int nvme_pci_enable(struct nvme_dev *dev) pci_set_master(pdev); - if (dma_set_mask_and_coherent(dev->dev, DMA_BIT_MASK(64)) && - dma_set_mask_and_coherent(dev->dev, DMA_BIT_MASK(32))) + if (dma_set_mask_and_coherent(dev->dev, DMA_BIT_MASK(64))) goto disable; if (readl(dev->bar + NVME_REG_CSTS) == -1) { @@ -2498,7 +2503,8 @@ static void nvme_reset_work(struct work_struct *work) * Limit the max command size to prevent iod->sg allocations going * over a single page. */ - dev->ctrl.max_hw_sectors = NVME_MAX_KB_SZ << 1; + dev->ctrl.max_hw_sectors = min_t(u32, + NVME_MAX_KB_SZ << 1, dma_max_mapping_size(dev->dev) >> 9); dev->ctrl.max_segments = NVME_MAX_SEGS; /* @@ -2923,7 +2929,7 @@ static int nvme_simple_resume(struct device *dev) return 0; } -const struct dev_pm_ops nvme_dev_pm_ops = { +static const struct dev_pm_ops nvme_dev_pm_ops = { .suspend = nvme_suspend, .resume = nvme_resume, .freeze = nvme_simple_suspend, diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index 08a2501b9357..606b13d35d16 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -860,7 +860,14 @@ static int nvme_tcp_try_send_data(struct nvme_tcp_request *req) else flags |= MSG_MORE; - ret = kernel_sendpage(queue->sock, page, offset, len, flags); + /* can't zcopy slab pages */ + if (unlikely(PageSlab(page))) { + ret = sock_no_sendpage(queue->sock, page, offset, len, + flags); + } else { + ret = kernel_sendpage(queue->sock, page, offset, len, + flags); + } if (ret <= 0) return ret; diff --git a/drivers/nvme/host/trace.c b/drivers/nvme/host/trace.c index f01ad0fd60bb..9778eb0406b3 100644 --- a/drivers/nvme/host/trace.c +++ b/drivers/nvme/host/trace.c @@ -7,6 +7,17 @@ #include <asm/unaligned.h> #include "trace.h" +static const char *nvme_trace_delete_sq(struct trace_seq *p, u8 *cdw10) +{ + const char *ret = trace_seq_buffer_ptr(p); + u16 sqid = get_unaligned_le16(cdw10); + + trace_seq_printf(p, "sqid=%u", sqid); + trace_seq_putc(p, 0); + + return ret; +} + static const char *nvme_trace_create_sq(struct trace_seq *p, u8 *cdw10) { const char *ret = trace_seq_buffer_ptr(p); @@ -23,6 +34,17 @@ static const char *nvme_trace_create_sq(struct trace_seq *p, u8 *cdw10) return ret; } +static const char *nvme_trace_delete_cq(struct trace_seq *p, u8 *cdw10) +{ + const char *ret = trace_seq_buffer_ptr(p); + u16 cqid = get_unaligned_le16(cdw10); + + trace_seq_printf(p, "cqid=%u", cqid); + trace_seq_putc(p, 0); + + return ret; +} + static const char *nvme_trace_create_cq(struct trace_seq *p, u8 *cdw10) { const char *ret = trace_seq_buffer_ptr(p); @@ -107,8 +129,12 @@ const char *nvme_trace_parse_admin_cmd(struct trace_seq *p, u8 opcode, u8 *cdw10) { switch (opcode) { + case nvme_admin_delete_sq: + return nvme_trace_delete_sq(p, cdw10); case nvme_admin_create_sq: return nvme_trace_create_sq(p, cdw10); + case nvme_admin_delete_cq: + return nvme_trace_delete_cq(p, cdw10); case nvme_admin_create_cq: return nvme_trace_create_cq(p, cdw10); case nvme_admin_identify: @@ -178,7 +204,7 @@ static const char *nvme_trace_fabrics_common(struct trace_seq *p, u8 *spc) { const char *ret = trace_seq_buffer_ptr(p); - trace_seq_printf(p, "spcecific=%*ph", 24, spc); + trace_seq_printf(p, "specific=%*ph", 24, spc); trace_seq_putc(p, 0); return ret; } |