summaryrefslogtreecommitdiff
path: root/drivers/nvme/host
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2018-10-25 16:50:48 +0300
committerLinus Torvalds <torvalds@linux-foundation.org>2018-10-25 16:50:48 +0300
commitbd6bf7c10484f026505814b690104cdef27ed460 (patch)
tree0ff42aa1dae7f7e28a5de5a74bfea641cae0841c /drivers/nvme/host
parenta41efc2a0f68cea26665ab9e6d991c9bf33b3f59 (diff)
parent663569db6476795c7955289529ea0154e3d768bf (diff)
downloadlinux-bd6bf7c10484f026505814b690104cdef27ed460.tar.xz
Merge tag 'pci-v4.20-changes' of git://git.kernel.org/pub/scm/linux/kernel/git/helgaas/pci
Pull PCI updates from Bjorn Helgaas: - Fix ASPM link_state teardown on removal (Lukas Wunner) - Fix misleading _OSC ASPM message (Sinan Kaya) - Make _OSC optional for PCI (Sinan Kaya) - Don't initialize ASPM link state when ACPI_FADT_NO_ASPM is set (Patrick Talbert) - Remove x86 and arm64 node-local allocation for host bridge structures (Punit Agrawal) - Pay attention to device-specific _PXM node values (Jonathan Cameron) - Support new Immediate Readiness bit (Felipe Balbi) - Differentiate between pciehp surprise and safe removal (Lukas Wunner) - Remove unnecessary pciehp includes (Lukas Wunner) - Drop pciehp hotplug_slot_ops wrappers (Lukas Wunner) - Tolerate PCIe Slot Presence Detect being hardwired to zero to workaround broken hardware, e.g., the Wilocity switch/wireless device (Lukas Wunner) - Unify pciehp controller & slot structs (Lukas Wunner) - Constify hotplug_slot_ops (Lukas Wunner) - Drop hotplug_slot_info (Lukas Wunner) - Embed hotplug_slot struct into users instead of allocating it separately (Lukas Wunner) - Initialize PCIe port service drivers directly instead of relying on initcall ordering (Keith Busch) - Restore PCI config state after a slot reset (Keith Busch) - Save/restore DPC config state along with other PCI config state (Keith Busch) - Reference count devices during AER handling to avoid race issue with concurrent hot removal (Keith Busch) - If an Upstream Port reports ERR_FATAL, don't try to read the Port's config space because it is probably unreachable (Keith Busch) - During error handling, use slot-specific reset instead of secondary bus reset to avoid link up/down issues on hotplug ports (Keith Busch) - Restore previous AER/DPC handling that does not remove and re-enumerate devices on ERR_FATAL (Keith Busch) - Notify all drivers that may be affected by error recovery resets (Keith Busch) - Always generate error recovery uevents, even if a driver doesn't have error callbacks (Keith Busch) - Make PCIe link active reporting detection generic (Keith Busch) - Support D3cold in PCIe hierarchies during system sleep and runtime, including hotplug and Thunderbolt ports (Mika Westerberg) - Handle hpmemsize/hpiosize kernel parameters uniformly, whether slots are empty or occupied (Jon Derrick) - Remove duplicated include from pci/pcie/err.c and unused variable from cpqphp (YueHaibing) - Remove driver pci_cleanup_aer_uncorrect_error_status() calls (Oza Pawandeep) - Uninline PCI bus accessors for better ftracing (Keith Busch) - Remove unused AER Root Port .error_resume method (Keith Busch) - Use kfifo in AER instead of a local version (Keith Busch) - Use threaded IRQ in AER bottom half (Keith Busch) - Use managed resources in AER core (Keith Busch) - Reuse pcie_port_find_device() for AER injection (Keith Busch) - Abstract AER interrupt handling to disconnect error injection (Keith Busch) - Refactor AER injection callbacks to simplify future improvments (Keith Busch) - Remove unused Netronome NFP32xx Device IDs (Jakub Kicinski) - Use bitmap_zalloc() for dma_alias_mask (Andy Shevchenko) - Add switch fall-through annotations (Gustavo A. R. Silva) - Remove unused Switchtec quirk variable (Joshua Abraham) - Fix pci.c kernel-doc warning (Randy Dunlap) - Remove trivial PCI wrappers for DMA APIs (Christoph Hellwig) - Add Intel GPU device IDs to spurious interrupt quirk (Bin Meng) - Run Switchtec DMA aliasing quirk only on NTB endpoints to avoid useless dmesg errors (Logan Gunthorpe) - Update Switchtec NTB documentation (Wesley Yung) - Remove redundant "default n" from Kconfig (Bartlomiej Zolnierkiewicz) - Avoid panic when drivers enable MSI/MSI-X twice (Tonghao Zhang) - Add PCI support for peer-to-peer DMA (Logan Gunthorpe) - Add sysfs group for PCI peer-to-peer memory statistics (Logan Gunthorpe) - Add PCI peer-to-peer DMA scatterlist mapping interface (Logan Gunthorpe) - Add PCI configfs/sysfs helpers for use by peer-to-peer users (Logan Gunthorpe) - Add PCI peer-to-peer DMA driver writer's documentation (Logan Gunthorpe) - Add block layer flag to indicate driver support for PCI peer-to-peer DMA (Logan Gunthorpe) - Map Infiniband scatterlists for peer-to-peer DMA if they contain P2P memory (Logan Gunthorpe) - Register nvme-pci CMB buffer as PCI peer-to-peer memory (Logan Gunthorpe) - Add nvme-pci support for PCI peer-to-peer memory in requests (Logan Gunthorpe) - Use PCI peer-to-peer memory in nvme (Stephen Bates, Steve Wise, Christoph Hellwig, Logan Gunthorpe) - Cache VF config space size to optimize enumeration of many VFs (KarimAllah Ahmed) - Remove unnecessary <linux/pci-ats.h> include (Bjorn Helgaas) - Fix VMD AERSID quirk Device ID matching (Jon Derrick) - Fix Cadence PHY handling during probe (Alan Douglas) - Signal Cadence Endpoint interrupts via AXI region 0 instead of last region (Alan Douglas) - Write Cadence Endpoint MSI interrupts with 32 bits of data (Alan Douglas) - Remove redundant controller tests for "device_type == pci" (Rob Herring) - Document R-Car E3 (R8A77990) bindings (Tho Vu) - Add device tree support for R-Car r8a7744 (Biju Das) - Drop unused mvebu PCIe capability code (Thomas Petazzoni) - Add shared PCI bridge emulation code (Thomas Petazzoni) - Convert mvebu to use shared PCI bridge emulation (Thomas Petazzoni) - Add aardvark Root Port emulation (Thomas Petazzoni) - Support 100MHz/200MHz refclocks for i.MX6 (Lucas Stach) - Add initial power management for i.MX7 (Leonard Crestez) - Add PME_Turn_Off support for i.MX7 (Leonard Crestez) - Fix qcom runtime power management error handling (Bjorn Andersson) - Update TI dra7xx unaligned access errata workaround for host mode as well as endpoint mode (Vignesh R) - Fix kirin section mismatch warning (Nathan Chancellor) - Remove iproc PAXC slot check to allow VF support (Jitendra Bhivare) - Quirk Keystone K2G to limit MRRS to 256 (Kishon Vijay Abraham I) - Update Keystone to use MRRS quirk for host bridge instead of open coding (Kishon Vijay Abraham I) - Refactor Keystone link establishment (Kishon Vijay Abraham I) - Simplify and speed up Keystone link training (Kishon Vijay Abraham I) - Remove unused Keystone host_init argument (Kishon Vijay Abraham I) - Merge Keystone driver files into one (Kishon Vijay Abraham I) - Remove redundant Keystone platform_set_drvdata() (Kishon Vijay Abraham I) - Rename Keystone functions for uniformity (Kishon Vijay Abraham I) - Add Keystone device control module DT binding (Kishon Vijay Abraham I) - Use SYSCON API to get Keystone control module device IDs (Kishon Vijay Abraham I) - Clean up Keystone PHY handling (Kishon Vijay Abraham I) - Use runtime PM APIs to enable Keystone clock (Kishon Vijay Abraham I) - Clean up Keystone config space access checks (Kishon Vijay Abraham I) - Get Keystone outbound window count from DT (Kishon Vijay Abraham I) - Clean up Keystone outbound window configuration (Kishon Vijay Abraham I) - Clean up Keystone DBI setup (Kishon Vijay Abraham I) - Clean up Keystone ks_pcie_link_up() (Kishon Vijay Abraham I) - Fix Keystone IRQ status checking (Kishon Vijay Abraham I) - Add debug messages for all Keystone errors (Kishon Vijay Abraham I) - Clean up Keystone includes and macros (Kishon Vijay Abraham I) - Fix Mediatek unchecked return value from devm_pci_remap_iospace() (Gustavo A. R. Silva) - Fix Mediatek endpoint/port matching logic (Honghui Zhang) - Change Mediatek Root Port Class Code to PCI_CLASS_BRIDGE_PCI (Honghui Zhang) - Remove redundant Mediatek PM domain check (Honghui Zhang) - Convert Mediatek to pci_host_probe() (Honghui Zhang) - Fix Mediatek MSI enablement (Honghui Zhang) - Add Mediatek system PM support for MT2712 and MT7622 (Honghui Zhang) - Add Mediatek loadable module support (Honghui Zhang) - Detach VMD resources after stopping root bus to prevent orphan resources (Jon Derrick) - Convert pcitest build process to that used by other tools (iio, perf, etc) (Gustavo Pimentel) * tag 'pci-v4.20-changes' of git://git.kernel.org/pub/scm/linux/kernel/git/helgaas/pci: (140 commits) PCI/AER: Refactor error injection fallbacks PCI/AER: Abstract AER interrupt handling PCI/AER: Reuse existing pcie_port_find_device() interface PCI/AER: Use managed resource allocations PCI: pcie: Remove redundant 'default n' from Kconfig PCI: aardvark: Implement emulated root PCI bridge config space PCI: mvebu: Convert to PCI emulated bridge config space PCI: mvebu: Drop unused PCI express capability code PCI: Introduce PCI bridge emulated config space common logic PCI: vmd: Detach resources after stopping root bus nvmet: Optionally use PCI P2P memory nvmet: Introduce helper functions to allocate and free request SGLs nvme-pci: Add support for P2P memory in requests nvme-pci: Use PCI p2pmem subsystem to manage the CMB IB/core: Ensure we map P2P memory correctly in rdma_rw_ctx_[init|destroy]() block: Add PCI P2P flag for request queue PCI/P2PDMA: Add P2P DMA driver writer's documentation docs-rst: Add a new directory for PCI documentation PCI/P2PDMA: Introduce configfs/sysfs enable attribute helpers PCI/P2PDMA: Add PCI p2pmem DMA mappings to adjust the bus offset ...
Diffstat (limited to 'drivers/nvme/host')
-rw-r--r--drivers/nvme/host/core.c4
-rw-r--r--drivers/nvme/host/nvme.h1
-rw-r--r--drivers/nvme/host/pci.c98
3 files changed, 63 insertions, 40 deletions
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 9e4a30b05bd2..2e65be8b1387 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -3064,7 +3064,11 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
ns->queue = blk_mq_init_queue(ctrl->tagset);
if (IS_ERR(ns->queue))
goto out_free_ns;
+
blk_queue_flag_set(QUEUE_FLAG_NONROT, ns->queue);
+ if (ctrl->ops->flags & NVME_F_PCI_P2PDMA)
+ blk_queue_flag_set(QUEUE_FLAG_PCI_P2PDMA, ns->queue);
+
ns->queue->queuedata = ns;
ns->ctrl = ctrl;
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index 9fefba039d1e..cee79cb388af 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -343,6 +343,7 @@ struct nvme_ctrl_ops {
unsigned int flags;
#define NVME_F_FABRICS (1 << 0)
#define NVME_F_METADATA_SUPPORTED (1 << 1)
+#define NVME_F_PCI_P2PDMA (1 << 2)
int (*reg_read32)(struct nvme_ctrl *ctrl, u32 off, u32 *val);
int (*reg_write32)(struct nvme_ctrl *ctrl, u32 off, u32 val);
int (*reg_read64)(struct nvme_ctrl *ctrl, u32 off, u64 *val);
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 4e023cd007e1..f30031945ee4 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -30,6 +30,7 @@
#include <linux/types.h>
#include <linux/io-64-nonatomic-lo-hi.h>
#include <linux/sed-opal.h>
+#include <linux/pci-p2pdma.h>
#include "nvme.h"
@@ -99,9 +100,8 @@ struct nvme_dev {
struct work_struct remove_work;
struct mutex shutdown_lock;
bool subsystem;
- void __iomem *cmb;
- pci_bus_addr_t cmb_bus_addr;
u64 cmb_size;
+ bool cmb_use_sqes;
u32 cmbsz;
u32 cmbloc;
struct nvme_ctrl ctrl;
@@ -158,7 +158,7 @@ struct nvme_queue {
struct nvme_dev *dev;
spinlock_t sq_lock;
struct nvme_command *sq_cmds;
- struct nvme_command __iomem *sq_cmds_io;
+ bool sq_cmds_is_io;
spinlock_t cq_lock ____cacheline_aligned_in_smp;
volatile struct nvme_completion *cqes;
struct blk_mq_tags **tags;
@@ -447,11 +447,8 @@ static int nvme_pci_map_queues(struct blk_mq_tag_set *set)
static void nvme_submit_cmd(struct nvme_queue *nvmeq, struct nvme_command *cmd)
{
spin_lock(&nvmeq->sq_lock);
- if (nvmeq->sq_cmds_io)
- memcpy_toio(&nvmeq->sq_cmds_io[nvmeq->sq_tail], cmd,
- sizeof(*cmd));
- else
- memcpy(&nvmeq->sq_cmds[nvmeq->sq_tail], cmd, sizeof(*cmd));
+
+ memcpy(&nvmeq->sq_cmds[nvmeq->sq_tail], cmd, sizeof(*cmd));
if (++nvmeq->sq_tail == nvmeq->q_depth)
nvmeq->sq_tail = 0;
@@ -748,8 +745,13 @@ static blk_status_t nvme_map_data(struct nvme_dev *dev, struct request *req,
goto out;
ret = BLK_STS_RESOURCE;
- nr_mapped = dma_map_sg_attrs(dev->dev, iod->sg, iod->nents, dma_dir,
- DMA_ATTR_NO_WARN);
+
+ if (is_pci_p2pdma_page(sg_page(iod->sg)))
+ nr_mapped = pci_p2pdma_map_sg(dev->dev, iod->sg, iod->nents,
+ dma_dir);
+ else
+ nr_mapped = dma_map_sg_attrs(dev->dev, iod->sg, iod->nents,
+ dma_dir, DMA_ATTR_NO_WARN);
if (!nr_mapped)
goto out;
@@ -791,7 +793,10 @@ static void nvme_unmap_data(struct nvme_dev *dev, struct request *req)
DMA_TO_DEVICE : DMA_FROM_DEVICE;
if (iod->nents) {
- dma_unmap_sg(dev->dev, iod->sg, iod->nents, dma_dir);
+ /* P2PDMA requests do not need to be unmapped */
+ if (!is_pci_p2pdma_page(sg_page(iod->sg)))
+ dma_unmap_sg(dev->dev, iod->sg, iod->nents, dma_dir);
+
if (blk_integrity_rq(req))
dma_unmap_sg(dev->dev, &iod->meta_sg, 1, dma_dir);
}
@@ -1232,9 +1237,18 @@ static void nvme_free_queue(struct nvme_queue *nvmeq)
{
dma_free_coherent(nvmeq->q_dmadev, CQ_SIZE(nvmeq->q_depth),
(void *)nvmeq->cqes, nvmeq->cq_dma_addr);
- if (nvmeq->sq_cmds)
- dma_free_coherent(nvmeq->q_dmadev, SQ_SIZE(nvmeq->q_depth),
- nvmeq->sq_cmds, nvmeq->sq_dma_addr);
+
+ if (nvmeq->sq_cmds) {
+ if (nvmeq->sq_cmds_is_io)
+ pci_free_p2pmem(to_pci_dev(nvmeq->q_dmadev),
+ nvmeq->sq_cmds,
+ SQ_SIZE(nvmeq->q_depth));
+ else
+ dma_free_coherent(nvmeq->q_dmadev,
+ SQ_SIZE(nvmeq->q_depth),
+ nvmeq->sq_cmds,
+ nvmeq->sq_dma_addr);
+ }
}
static void nvme_free_queues(struct nvme_dev *dev, int lowest)
@@ -1323,12 +1337,21 @@ static int nvme_cmb_qdepth(struct nvme_dev *dev, int nr_io_queues,
static int nvme_alloc_sq_cmds(struct nvme_dev *dev, struct nvme_queue *nvmeq,
int qid, int depth)
{
- /* CMB SQEs will be mapped before creation */
- if (qid && dev->cmb && use_cmb_sqes && (dev->cmbsz & NVME_CMBSZ_SQS))
- return 0;
+ struct pci_dev *pdev = to_pci_dev(dev->dev);
+
+ if (qid && dev->cmb_use_sqes && (dev->cmbsz & NVME_CMBSZ_SQS)) {
+ nvmeq->sq_cmds = pci_alloc_p2pmem(pdev, SQ_SIZE(depth));
+ nvmeq->sq_dma_addr = pci_p2pmem_virt_to_bus(pdev,
+ nvmeq->sq_cmds);
+ nvmeq->sq_cmds_is_io = true;
+ }
+
+ if (!nvmeq->sq_cmds) {
+ nvmeq->sq_cmds = dma_alloc_coherent(dev->dev, SQ_SIZE(depth),
+ &nvmeq->sq_dma_addr, GFP_KERNEL);
+ nvmeq->sq_cmds_is_io = false;
+ }
- nvmeq->sq_cmds = dma_alloc_coherent(dev->dev, SQ_SIZE(depth),
- &nvmeq->sq_dma_addr, GFP_KERNEL);
if (!nvmeq->sq_cmds)
return -ENOMEM;
return 0;
@@ -1405,13 +1428,6 @@ static int nvme_create_queue(struct nvme_queue *nvmeq, int qid)
int result;
s16 vector;
- if (dev->cmb && use_cmb_sqes && (dev->cmbsz & NVME_CMBSZ_SQS)) {
- unsigned offset = (qid - 1) * roundup(SQ_SIZE(nvmeq->q_depth),
- dev->ctrl.page_size);
- nvmeq->sq_dma_addr = dev->cmb_bus_addr + offset;
- nvmeq->sq_cmds_io = dev->cmb + offset;
- }
-
/*
* A queue's vector matches the queue identifier unless the controller
* has only one vector available.
@@ -1652,9 +1668,6 @@ static void nvme_map_cmb(struct nvme_dev *dev)
return;
dev->cmbloc = readl(dev->bar + NVME_REG_CMBLOC);
- if (!use_cmb_sqes)
- return;
-
size = nvme_cmb_size_unit(dev) * nvme_cmb_size(dev);
offset = nvme_cmb_size_unit(dev) * NVME_CMB_OFST(dev->cmbloc);
bar = NVME_CMB_BIR(dev->cmbloc);
@@ -1671,11 +1684,18 @@ static void nvme_map_cmb(struct nvme_dev *dev)
if (size > bar_size - offset)
size = bar_size - offset;
- dev->cmb = ioremap_wc(pci_resource_start(pdev, bar) + offset, size);
- if (!dev->cmb)
+ if (pci_p2pdma_add_resource(pdev, bar, size, offset)) {
+ dev_warn(dev->ctrl.device,
+ "failed to register the CMB\n");
return;
- dev->cmb_bus_addr = pci_bus_address(pdev, bar) + offset;
+ }
+
dev->cmb_size = size;
+ dev->cmb_use_sqes = use_cmb_sqes && (dev->cmbsz & NVME_CMBSZ_SQS);
+
+ if ((dev->cmbsz & (NVME_CMBSZ_WDS | NVME_CMBSZ_RDS)) ==
+ (NVME_CMBSZ_WDS | NVME_CMBSZ_RDS))
+ pci_p2pmem_publish(pdev, true);
if (sysfs_add_file_to_group(&dev->ctrl.device->kobj,
&dev_attr_cmb.attr, NULL))
@@ -1685,12 +1705,10 @@ static void nvme_map_cmb(struct nvme_dev *dev)
static inline void nvme_release_cmb(struct nvme_dev *dev)
{
- if (dev->cmb) {
- iounmap(dev->cmb);
- dev->cmb = NULL;
+ if (dev->cmb_size) {
sysfs_remove_file_from_group(&dev->ctrl.device->kobj,
&dev_attr_cmb.attr, NULL);
- dev->cmbsz = 0;
+ dev->cmb_size = 0;
}
}
@@ -1889,13 +1907,13 @@ static int nvme_setup_io_queues(struct nvme_dev *dev)
if (nr_io_queues == 0)
return 0;
- if (dev->cmb && (dev->cmbsz & NVME_CMBSZ_SQS)) {
+ if (dev->cmb_use_sqes) {
result = nvme_cmb_qdepth(dev, nr_io_queues,
sizeof(struct nvme_command));
if (result > 0)
dev->q_depth = result;
else
- nvme_release_cmb(dev);
+ dev->cmb_use_sqes = false;
}
do {
@@ -2390,7 +2408,8 @@ static int nvme_pci_get_address(struct nvme_ctrl *ctrl, char *buf, int size)
static const struct nvme_ctrl_ops nvme_pci_ctrl_ops = {
.name = "pcie",
.module = THIS_MODULE,
- .flags = NVME_F_METADATA_SUPPORTED,
+ .flags = NVME_F_METADATA_SUPPORTED |
+ NVME_F_PCI_P2PDMA,
.reg_read32 = nvme_pci_reg_read32,
.reg_write32 = nvme_pci_reg_write32,
.reg_read64 = nvme_pci_reg_read64,
@@ -2648,7 +2667,6 @@ static void nvme_error_resume(struct pci_dev *pdev)
struct nvme_dev *dev = pci_get_drvdata(pdev);
flush_work(&dev->ctrl.reset_work);
- pci_cleanup_aer_uncorrect_error_status(pdev);
}
static const struct pci_error_handlers nvme_err_handler = {