From 11eb0e0e8dea8b97cff972b09cf6fb033b729dff Mon Sep 17 00:00:00 2001 From: Sinan Kaya Date: Mon, 4 Jun 2018 22:16:09 -0400 Subject: PCI: Make early dump functionality generic Move early dump functionality into common code so that it is available for all architectures. No need to carry arch-specific reads around as the read hooks are already initialized by the time pci_setup_device() is getting called during scan. Tested-by: Andy Shevchenko Signed-off-by: Sinan Kaya Signed-off-by: Bjorn Helgaas Reviewed-by: Andy Shevchenko --- Documentation/admin-guide/kernel-parameters.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'Documentation') diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index efc7aa7a0670..f11b9485ed7f 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -2995,7 +2995,7 @@ See also Documentation/blockdev/paride.txt. pci=option[,option...] [PCI] various PCI subsystem options: - earlydump [X86] dump PCI config space before the kernel + earlydump dump PCI config space before the kernel changes anything off [X86] don't probe for the PCI bus bios [X86-32] force use of PCI BIOS, don't access -- cgit v1.2.3 From 82dfbd27c837b5a7c2a7a13b54a4f0b16c51222f Mon Sep 17 00:00:00 2001 From: Alan Douglas Date: Mon, 25 Jun 2018 09:30:51 +0100 Subject: dt-bindings: PCI: cadence: Add DT bindings for optional PHYs Update DT documentation to include optional PHYs for cadence PCIe host and endpoint controllers. Signed-off-by: Alan Douglas Signed-off-by: Lorenzo Pieralisi Reviewed-by: Rob Herring --- Documentation/devicetree/bindings/pci/cdns,cdns-pcie-ep.txt | 5 +++++ Documentation/devicetree/bindings/pci/cdns,cdns-pcie-host.txt | 6 ++++++ 2 files changed, 11 insertions(+) (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/pci/cdns,cdns-pcie-ep.txt b/Documentation/devicetree/bindings/pci/cdns,cdns-pcie-ep.txt index 9a305237fa6e..4a0475e2ba7e 100644 --- a/Documentation/devicetree/bindings/pci/cdns,cdns-pcie-ep.txt +++ b/Documentation/devicetree/bindings/pci/cdns,cdns-pcie-ep.txt @@ -9,6 +9,9 @@ Required properties: Optional properties: - max-functions: Maximum number of functions that can be configured (default 1). +- phys: From PHY bindings: List of Generic PHY phandles. One per lane if more + than one in the list. If only one PHY listed it must manage all lanes. +- phy-names: List of names to identify the PHY. Example: @@ -19,4 +22,6 @@ pcie@fc000000 { reg-names = "reg", "mem"; cdns,max-outbound-regions = <16>; max-functions = /bits/ 8 <8>; + phys = <&ep_phy0 &ep_phy1>; + phy-names = "pcie-lane0","pcie-lane1"; }; diff --git a/Documentation/devicetree/bindings/pci/cdns,cdns-pcie-host.txt b/Documentation/devicetree/bindings/pci/cdns,cdns-pcie-host.txt index 20a33f38f69d..91de69c713a9 100644 --- a/Documentation/devicetree/bindings/pci/cdns,cdns-pcie-host.txt +++ b/Documentation/devicetree/bindings/pci/cdns,cdns-pcie-host.txt @@ -24,6 +24,9 @@ Optional properties: translations (default 32) - vendor-id: The PCI vendor ID (16 bits, default is design dependent) - device-id: The PCI device ID (16 bits, default is design dependent) +- phys: From PHY bindings: List of Generic PHY phandles. One per lane if more + than one in the list. If only one PHY listed it must manage all lanes. +- phy-names: List of names to identify the PHY. Example: @@ -57,4 +60,7 @@ pcie@fb000000 { interrupt-map-mask = <0x0 0x0 0x0 0x7>; msi-parent = <&its_pci>; + + phys = <&pcie_phy0>; + phy-names = "pcie-phy"; }; -- cgit v1.2.3 From 8963106eabdc56911e9b65258eb5e9a6b7b3dfda Mon Sep 17 00:00:00 2001 From: Gustavo Pimentel Date: Thu, 19 Jul 2018 10:32:12 +0200 Subject: PCI: endpoint: Add MSI-X interfaces Add PCI_EPC_IRQ_MSIX type. Add MSI-X callbacks signatures to the ops structure. Add sysfs interface for set/get MSI-X capability maximum number. Update documentation accordingly. Signed-off-by: Gustavo Pimentel Signed-off-by: Lorenzo Pieralisi Acked-by: Kishon Vijay Abraham I --- .../PCI/endpoint/function/binding/pci-test.txt | 2 + drivers/pci/endpoint/pci-ep-cfs.c | 24 +++++++++ drivers/pci/endpoint/pci-epc-core.c | 57 ++++++++++++++++++++++ include/linux/pci-epc.h | 9 ++++ include/linux/pci-epf.h | 1 + 5 files changed, 93 insertions(+) (limited to 'Documentation') diff --git a/Documentation/PCI/endpoint/function/binding/pci-test.txt b/Documentation/PCI/endpoint/function/binding/pci-test.txt index 3b68b955fb50..cd76ba47394b 100644 --- a/Documentation/PCI/endpoint/function/binding/pci-test.txt +++ b/Documentation/PCI/endpoint/function/binding/pci-test.txt @@ -15,3 +15,5 @@ subsys_id : don't care interrupt_pin : Should be 1 - INTA, 2 - INTB, 3 - INTC, 4 -INTD msi_interrupts : Should be 1 to 32 depending on the number of MSI interrupts to test +msix_interrupts : Should be 1 to 2048 depending on the number of MSI-X + interrupts to test diff --git a/drivers/pci/endpoint/pci-ep-cfs.c b/drivers/pci/endpoint/pci-ep-cfs.c index 018ea3433cb5..d1288a0bd530 100644 --- a/drivers/pci/endpoint/pci-ep-cfs.c +++ b/drivers/pci/endpoint/pci-ep-cfs.c @@ -286,6 +286,28 @@ static ssize_t pci_epf_msi_interrupts_show(struct config_item *item, to_pci_epf_group(item)->epf->msi_interrupts); } +static ssize_t pci_epf_msix_interrupts_store(struct config_item *item, + const char *page, size_t len) +{ + u16 val; + int ret; + + ret = kstrtou16(page, 0, &val); + if (ret) + return ret; + + to_pci_epf_group(item)->epf->msix_interrupts = val; + + return len; +} + +static ssize_t pci_epf_msix_interrupts_show(struct config_item *item, + char *page) +{ + return sprintf(page, "%d\n", + to_pci_epf_group(item)->epf->msix_interrupts); +} + PCI_EPF_HEADER_R(vendorid) PCI_EPF_HEADER_W_u16(vendorid) @@ -327,6 +349,7 @@ CONFIGFS_ATTR(pci_epf_, subsys_vendor_id); CONFIGFS_ATTR(pci_epf_, subsys_id); CONFIGFS_ATTR(pci_epf_, interrupt_pin); CONFIGFS_ATTR(pci_epf_, msi_interrupts); +CONFIGFS_ATTR(pci_epf_, msix_interrupts); static struct configfs_attribute *pci_epf_attrs[] = { &pci_epf_attr_vendorid, @@ -340,6 +363,7 @@ static struct configfs_attribute *pci_epf_attrs[] = { &pci_epf_attr_subsys_id, &pci_epf_attr_interrupt_pin, &pci_epf_attr_msi_interrupts, + &pci_epf_attr_msix_interrupts, NULL, }; diff --git a/drivers/pci/endpoint/pci-epc-core.c b/drivers/pci/endpoint/pci-epc-core.c index b0ee42739c3c..7d77bd0e5d4a 100644 --- a/drivers/pci/endpoint/pci-epc-core.c +++ b/drivers/pci/endpoint/pci-epc-core.c @@ -217,6 +217,63 @@ int pci_epc_set_msi(struct pci_epc *epc, u8 func_no, u8 interrupts) } EXPORT_SYMBOL_GPL(pci_epc_set_msi); +/** + * pci_epc_get_msix() - get the number of MSI-X interrupt numbers allocated + * @epc: the EPC device to which MSI-X interrupts was requested + * @func_no: the endpoint function number in the EPC device + * + * Invoke to get the number of MSI-X interrupts allocated by the RC + */ +int pci_epc_get_msix(struct pci_epc *epc, u8 func_no) +{ + int interrupt; + unsigned long flags; + + if (IS_ERR_OR_NULL(epc) || func_no >= epc->max_functions) + return 0; + + if (!epc->ops->get_msix) + return 0; + + spin_lock_irqsave(&epc->lock, flags); + interrupt = epc->ops->get_msix(epc, func_no); + spin_unlock_irqrestore(&epc->lock, flags); + + if (interrupt < 0) + return 0; + + return interrupt + 1; +} +EXPORT_SYMBOL_GPL(pci_epc_get_msix); + +/** + * pci_epc_set_msix() - set the number of MSI-X interrupt numbers required + * @epc: the EPC device on which MSI-X has to be configured + * @func_no: the endpoint function number in the EPC device + * @interrupts: number of MSI-X interrupts required by the EPF + * + * Invoke to set the required number of MSI-X interrupts. + */ +int pci_epc_set_msix(struct pci_epc *epc, u8 func_no, u16 interrupts) +{ + int ret; + unsigned long flags; + + if (IS_ERR_OR_NULL(epc) || func_no >= epc->max_functions || + interrupts < 1 || interrupts > 2048) + return -EINVAL; + + if (!epc->ops->set_msix) + return 0; + + spin_lock_irqsave(&epc->lock, flags); + ret = epc->ops->set_msix(epc, func_no, interrupts - 1); + spin_unlock_irqrestore(&epc->lock, flags); + + return ret; +} +EXPORT_SYMBOL_GPL(pci_epc_set_msix); + /** * pci_epc_unmap_addr() - unmap CPU address from PCI address * @epc: the EPC device on which address is allocated diff --git a/include/linux/pci-epc.h b/include/linux/pci-epc.h index 243eaa5a66ff..89f079f582df 100644 --- a/include/linux/pci-epc.h +++ b/include/linux/pci-epc.h @@ -17,6 +17,7 @@ enum pci_epc_irq_type { PCI_EPC_IRQ_UNKNOWN, PCI_EPC_IRQ_LEGACY, PCI_EPC_IRQ_MSI, + PCI_EPC_IRQ_MSIX, }; /** @@ -30,6 +31,10 @@ enum pci_epc_irq_type { * capability register * @get_msi: ops to get the number of MSI interrupts allocated by the RC from * the MSI capability register + * @set_msix: ops to set the requested number of MSI-X interrupts in the + * MSI-X capability register + * @get_msix: ops to get the number of MSI-X interrupts allocated by the RC + * from the MSI-X capability register * @raise_irq: ops to raise a legacy or MSI interrupt * @start: ops to start the PCI link * @stop: ops to stop the PCI link @@ -48,6 +53,8 @@ struct pci_epc_ops { phys_addr_t addr); int (*set_msi)(struct pci_epc *epc, u8 func_no, u8 interrupts); int (*get_msi)(struct pci_epc *epc, u8 func_no); + int (*set_msix)(struct pci_epc *epc, u8 func_no, u16 interrupts); + int (*get_msix)(struct pci_epc *epc, u8 func_no); int (*raise_irq)(struct pci_epc *epc, u8 func_no, enum pci_epc_irq_type type, u8 interrupt_num); int (*start)(struct pci_epc *epc); @@ -144,6 +151,8 @@ void pci_epc_unmap_addr(struct pci_epc *epc, u8 func_no, phys_addr_t phys_addr); int pci_epc_set_msi(struct pci_epc *epc, u8 func_no, u8 interrupts); int pci_epc_get_msi(struct pci_epc *epc, u8 func_no); +int pci_epc_set_msix(struct pci_epc *epc, u8 func_no, u16 interrupts); +int pci_epc_get_msix(struct pci_epc *epc, u8 func_no); int pci_epc_raise_irq(struct pci_epc *epc, u8 func_no, enum pci_epc_irq_type type, u8 interrupt_num); int pci_epc_start(struct pci_epc *epc); diff --git a/include/linux/pci-epf.h b/include/linux/pci-epf.h index 4e7764935fa8..ec02f58758c8 100644 --- a/include/linux/pci-epf.h +++ b/include/linux/pci-epf.h @@ -119,6 +119,7 @@ struct pci_epf { struct pci_epf_header *header; struct pci_epf_bar bar[6]; u8 msi_interrupts; + u16 msix_interrupts; u8 func_no; struct pci_epc *epc; -- cgit v1.2.3 From e8817de7fbfca407f4f47da050d12b10fece5706 Mon Sep 17 00:00:00 2001 From: Gustavo Pimentel Date: Thu, 19 Jul 2018 10:32:17 +0200 Subject: pci-epf-test/pci_endpoint_test: Cleanup PCI_ENDPOINT_TEST memspace Cleanup PCI_ENDPOINT_TEST memspace (by moving the interrupt number away from command section). Add IRQ_TYPE register to identify the triggered ID interrupt required for the READ/WRITE/COPY tests and raise IRQ test commands. Update documentation accordingly. Signed-off-by: Gustavo Pimentel Signed-off-by: Lorenzo Pieralisi Acked-by: Kishon Vijay Abraham I --- Documentation/PCI/endpoint/pci-test-function.txt | 27 ++++++-- drivers/misc/pci_endpoint_test.c | 81 +++++++++++++++--------- drivers/pci/endpoint/functions/pci-epf-test.c | 61 ++++++++++++------ 3 files changed, 114 insertions(+), 55 deletions(-) (limited to 'Documentation') diff --git a/Documentation/PCI/endpoint/pci-test-function.txt b/Documentation/PCI/endpoint/pci-test-function.txt index 0c519c9bf94a..bf4b5cf6fee6 100644 --- a/Documentation/PCI/endpoint/pci-test-function.txt +++ b/Documentation/PCI/endpoint/pci-test-function.txt @@ -20,6 +20,8 @@ The PCI endpoint test device has the following registers: 5) PCI_ENDPOINT_TEST_DST_ADDR 6) PCI_ENDPOINT_TEST_SIZE 7) PCI_ENDPOINT_TEST_CHECKSUM + 8) PCI_ENDPOINT_TEST_IRQ_TYPE + 9) PCI_ENDPOINT_TEST_IRQ_NUMBER *) PCI_ENDPOINT_TEST_MAGIC @@ -34,10 +36,10 @@ that the endpoint device must perform. Bitfield Description: Bit 0 : raise legacy IRQ Bit 1 : raise MSI IRQ - Bit 2 - 7 : MSI interrupt number - Bit 8 : read command (read data from RC buffer) - Bit 9 : write command (write data to RC buffer) - Bit 10 : copy command (copy data from one RC buffer to another + Bit 2 : raise MSI-X IRQ (reserved for future implementation) + Bit 3 : read command (read data from RC buffer) + Bit 4 : write command (write data to RC buffer) + Bit 5 : copy command (copy data from one RC buffer to another RC buffer) *) PCI_ENDPOINT_TEST_STATUS @@ -64,3 +66,20 @@ COPY/READ command. This register contains the destination address (RC buffer address) for the COPY/WRITE command. + +*) PCI_ENDPOINT_TEST_IRQ_TYPE + +This register contains the interrupt type (Legacy/MSI) triggered +for the READ/WRITE/COPY and raise IRQ (Legacy/MSI) commands. + +Possible types: + - Legacy : 0 + - MSI : 1 + +*) PCI_ENDPOINT_TEST_IRQ_NUMBER + +This register contains the triggered ID interrupt. + +Admissible values: + - Legacy : 0 + - MSI : [1 .. 32] diff --git a/drivers/misc/pci_endpoint_test.c b/drivers/misc/pci_endpoint_test.c index 7b370466a227..35fbfbd73a6d 100644 --- a/drivers/misc/pci_endpoint_test.c +++ b/drivers/misc/pci_endpoint_test.c @@ -35,38 +35,43 @@ #include -#define DRV_MODULE_NAME "pci-endpoint-test" - -#define PCI_ENDPOINT_TEST_MAGIC 0x0 - -#define PCI_ENDPOINT_TEST_COMMAND 0x4 -#define COMMAND_RAISE_LEGACY_IRQ BIT(0) -#define COMMAND_RAISE_MSI_IRQ BIT(1) -#define MSI_NUMBER_SHIFT 2 -/* 6 bits for MSI number */ -#define COMMAND_READ BIT(8) -#define COMMAND_WRITE BIT(9) -#define COMMAND_COPY BIT(10) - -#define PCI_ENDPOINT_TEST_STATUS 0x8 -#define STATUS_READ_SUCCESS BIT(0) -#define STATUS_READ_FAIL BIT(1) -#define STATUS_WRITE_SUCCESS BIT(2) -#define STATUS_WRITE_FAIL BIT(3) -#define STATUS_COPY_SUCCESS BIT(4) -#define STATUS_COPY_FAIL BIT(5) -#define STATUS_IRQ_RAISED BIT(6) -#define STATUS_SRC_ADDR_INVALID BIT(7) -#define STATUS_DST_ADDR_INVALID BIT(8) - -#define PCI_ENDPOINT_TEST_LOWER_SRC_ADDR 0xc +#define DRV_MODULE_NAME "pci-endpoint-test" + +#define IRQ_TYPE_LEGACY 0 +#define IRQ_TYPE_MSI 1 + +#define PCI_ENDPOINT_TEST_MAGIC 0x0 + +#define PCI_ENDPOINT_TEST_COMMAND 0x4 +#define COMMAND_RAISE_LEGACY_IRQ BIT(0) +#define COMMAND_RAISE_MSI_IRQ BIT(1) +/* BIT(2) is reserved for raising MSI-X IRQ command */ +#define COMMAND_READ BIT(3) +#define COMMAND_WRITE BIT(4) +#define COMMAND_COPY BIT(5) + +#define PCI_ENDPOINT_TEST_STATUS 0x8 +#define STATUS_READ_SUCCESS BIT(0) +#define STATUS_READ_FAIL BIT(1) +#define STATUS_WRITE_SUCCESS BIT(2) +#define STATUS_WRITE_FAIL BIT(3) +#define STATUS_COPY_SUCCESS BIT(4) +#define STATUS_COPY_FAIL BIT(5) +#define STATUS_IRQ_RAISED BIT(6) +#define STATUS_SRC_ADDR_INVALID BIT(7) +#define STATUS_DST_ADDR_INVALID BIT(8) + +#define PCI_ENDPOINT_TEST_LOWER_SRC_ADDR 0x0c #define PCI_ENDPOINT_TEST_UPPER_SRC_ADDR 0x10 #define PCI_ENDPOINT_TEST_LOWER_DST_ADDR 0x14 #define PCI_ENDPOINT_TEST_UPPER_DST_ADDR 0x18 -#define PCI_ENDPOINT_TEST_SIZE 0x1c -#define PCI_ENDPOINT_TEST_CHECKSUM 0x20 +#define PCI_ENDPOINT_TEST_SIZE 0x1c +#define PCI_ENDPOINT_TEST_CHECKSUM 0x20 + +#define PCI_ENDPOINT_TEST_IRQ_TYPE 0x24 +#define PCI_ENDPOINT_TEST_IRQ_NUMBER 0x28 static DEFINE_IDA(pci_endpoint_test_ida); @@ -179,6 +184,9 @@ static bool pci_endpoint_test_legacy_irq(struct pci_endpoint_test *test) { u32 val; + pci_endpoint_test_writel(test, PCI_ENDPOINT_TEST_IRQ_TYPE, + IRQ_TYPE_LEGACY); + pci_endpoint_test_writel(test, PCI_ENDPOINT_TEST_IRQ_NUMBER, 0); pci_endpoint_test_writel(test, PCI_ENDPOINT_TEST_COMMAND, COMMAND_RAISE_LEGACY_IRQ); val = wait_for_completion_timeout(&test->irq_raised, @@ -195,8 +203,10 @@ static bool pci_endpoint_test_msi_irq(struct pci_endpoint_test *test, u32 val; struct pci_dev *pdev = test->pdev; + pci_endpoint_test_writel(test, PCI_ENDPOINT_TEST_IRQ_TYPE, + IRQ_TYPE_MSI); + pci_endpoint_test_writel(test, PCI_ENDPOINT_TEST_IRQ_NUMBER, msi_num); pci_endpoint_test_writel(test, PCI_ENDPOINT_TEST_COMMAND, - msi_num << MSI_NUMBER_SHIFT | COMMAND_RAISE_MSI_IRQ); val = wait_for_completion_timeout(&test->irq_raised, msecs_to_jiffies(1000)); @@ -281,8 +291,11 @@ static bool pci_endpoint_test_copy(struct pci_endpoint_test *test, size_t size) pci_endpoint_test_writel(test, PCI_ENDPOINT_TEST_SIZE, size); + pci_endpoint_test_writel(test, PCI_ENDPOINT_TEST_IRQ_TYPE, + no_msi ? IRQ_TYPE_LEGACY : IRQ_TYPE_MSI); + pci_endpoint_test_writel(test, PCI_ENDPOINT_TEST_IRQ_NUMBER, 1); pci_endpoint_test_writel(test, PCI_ENDPOINT_TEST_COMMAND, - 1 << MSI_NUMBER_SHIFT | COMMAND_COPY); + COMMAND_COPY); wait_for_completion(&test->irq_raised); @@ -348,8 +361,11 @@ static bool pci_endpoint_test_write(struct pci_endpoint_test *test, size_t size) pci_endpoint_test_writel(test, PCI_ENDPOINT_TEST_SIZE, size); + pci_endpoint_test_writel(test, PCI_ENDPOINT_TEST_IRQ_TYPE, + no_msi ? IRQ_TYPE_LEGACY : IRQ_TYPE_MSI); + pci_endpoint_test_writel(test, PCI_ENDPOINT_TEST_IRQ_NUMBER, 1); pci_endpoint_test_writel(test, PCI_ENDPOINT_TEST_COMMAND, - 1 << MSI_NUMBER_SHIFT | COMMAND_READ); + COMMAND_READ); wait_for_completion(&test->irq_raised); @@ -403,8 +419,11 @@ static bool pci_endpoint_test_read(struct pci_endpoint_test *test, size_t size) pci_endpoint_test_writel(test, PCI_ENDPOINT_TEST_SIZE, size); + pci_endpoint_test_writel(test, PCI_ENDPOINT_TEST_IRQ_TYPE, + no_msi ? IRQ_TYPE_LEGACY : IRQ_TYPE_MSI); + pci_endpoint_test_writel(test, PCI_ENDPOINT_TEST_IRQ_NUMBER, 1); pci_endpoint_test_writel(test, PCI_ENDPOINT_TEST_COMMAND, - 1 << MSI_NUMBER_SHIFT | COMMAND_WRITE); + COMMAND_WRITE); wait_for_completion(&test->irq_raised); diff --git a/drivers/pci/endpoint/functions/pci-epf-test.c b/drivers/pci/endpoint/functions/pci-epf-test.c index 63ed706445b9..db4b23672004 100644 --- a/drivers/pci/endpoint/functions/pci-epf-test.c +++ b/drivers/pci/endpoint/functions/pci-epf-test.c @@ -18,13 +18,15 @@ #include #include +#define IRQ_TYPE_LEGACY 0 +#define IRQ_TYPE_MSI 1 + #define COMMAND_RAISE_LEGACY_IRQ BIT(0) #define COMMAND_RAISE_MSI_IRQ BIT(1) -#define MSI_NUMBER_SHIFT 2 -#define MSI_NUMBER_MASK (0x3f << MSI_NUMBER_SHIFT) -#define COMMAND_READ BIT(8) -#define COMMAND_WRITE BIT(9) -#define COMMAND_COPY BIT(10) +/* BIT(2) is reserved for raising MSI-X IRQ command */ +#define COMMAND_READ BIT(3) +#define COMMAND_WRITE BIT(4) +#define COMMAND_COPY BIT(5) #define STATUS_READ_SUCCESS BIT(0) #define STATUS_READ_FAIL BIT(1) @@ -56,6 +58,8 @@ struct pci_epf_test_reg { u64 dst_addr; u32 size; u32 checksum; + u32 irq_type; + u32 irq_number; } __packed; static struct pci_epf_header test_header = { @@ -244,31 +248,39 @@ err: return ret; } -static void pci_epf_test_raise_irq(struct pci_epf_test *epf_test, u8 irq) +static void pci_epf_test_raise_irq(struct pci_epf_test *epf_test, u8 irq_type, + u16 irq) { - u8 msi_count; struct pci_epf *epf = epf_test->epf; + struct device *dev = &epf->dev; struct pci_epc *epc = epf->epc; enum pci_barno test_reg_bar = epf_test->test_reg_bar; struct pci_epf_test_reg *reg = epf_test->reg[test_reg_bar]; reg->status |= STATUS_IRQ_RAISED; - msi_count = pci_epc_get_msi(epc, epf->func_no); - if (irq > msi_count || msi_count <= 0) + + switch (irq_type) { + case IRQ_TYPE_LEGACY: pci_epc_raise_irq(epc, epf->func_no, PCI_EPC_IRQ_LEGACY, 0); - else + break; + case IRQ_TYPE_MSI: pci_epc_raise_irq(epc, epf->func_no, PCI_EPC_IRQ_MSI, irq); + break; + default: + dev_err(dev, "Failed to raise IRQ, unknown type\n"); + break; + } } static void pci_epf_test_cmd_handler(struct work_struct *work) { int ret; - u8 irq; - u8 msi_count; + int count; u32 command; struct pci_epf_test *epf_test = container_of(work, struct pci_epf_test, cmd_handler.work); struct pci_epf *epf = epf_test->epf; + struct device *dev = &epf->dev; struct pci_epc *epc = epf->epc; enum pci_barno test_reg_bar = epf_test->test_reg_bar; struct pci_epf_test_reg *reg = epf_test->reg[test_reg_bar]; @@ -280,7 +292,10 @@ static void pci_epf_test_cmd_handler(struct work_struct *work) reg->command = 0; reg->status = 0; - irq = (command & MSI_NUMBER_MASK) >> MSI_NUMBER_SHIFT; + if (reg->irq_type > IRQ_TYPE_MSI) { + dev_err(dev, "Failed to detect IRQ type\n"); + goto reset_handler; + } if (command & COMMAND_RAISE_LEGACY_IRQ) { reg->status = STATUS_IRQ_RAISED; @@ -294,7 +309,8 @@ static void pci_epf_test_cmd_handler(struct work_struct *work) reg->status |= STATUS_WRITE_FAIL; else reg->status |= STATUS_WRITE_SUCCESS; - pci_epf_test_raise_irq(epf_test, irq); + pci_epf_test_raise_irq(epf_test, reg->irq_type, + reg->irq_number); goto reset_handler; } @@ -304,7 +320,8 @@ static void pci_epf_test_cmd_handler(struct work_struct *work) reg->status |= STATUS_READ_SUCCESS; else reg->status |= STATUS_READ_FAIL; - pci_epf_test_raise_irq(epf_test, irq); + pci_epf_test_raise_irq(epf_test, reg->irq_type, + reg->irq_number); goto reset_handler; } @@ -314,16 +331,18 @@ static void pci_epf_test_cmd_handler(struct work_struct *work) reg->status |= STATUS_COPY_SUCCESS; else reg->status |= STATUS_COPY_FAIL; - pci_epf_test_raise_irq(epf_test, irq); + pci_epf_test_raise_irq(epf_test, reg->irq_type, + reg->irq_number); goto reset_handler; } if (command & COMMAND_RAISE_MSI_IRQ) { - msi_count = pci_epc_get_msi(epc, epf->func_no); - if (irq > msi_count || msi_count <= 0) + count = pci_epc_get_msi(epc, epf->func_no); + if (reg->irq_number > count || count <= 0) goto reset_handler; reg->status = STATUS_IRQ_RAISED; - pci_epc_raise_irq(epc, epf->func_no, PCI_EPC_IRQ_MSI, irq); + pci_epc_raise_irq(epc, epf->func_no, PCI_EPC_IRQ_MSI, + reg->irq_number); goto reset_handler; } @@ -457,8 +476,10 @@ static int pci_epf_test_bind(struct pci_epf *epf) return ret; ret = pci_epc_set_msi(epc, epf->func_no, epf->msi_interrupts); - if (ret) + if (ret) { + dev_err(dev, "MSI configuration failed\n"); return ret; + } if (!epf_test->linkup_notifier) queue_work(kpcitest_workqueue, &epf_test->cmd_handler.work); -- cgit v1.2.3 From c2e00e31087e58f6c49b90b4702fc3df4fad6a83 Mon Sep 17 00:00:00 2001 From: Gustavo Pimentel Date: Thu, 19 Jul 2018 10:32:19 +0200 Subject: pci-epf-test/pci_endpoint_test: Add MSI-X support Add MSI-X support and update driver documentation accordingly. Signed-off-by: Gustavo Pimentel Signed-off-by: Lorenzo Pieralisi Acked-by: Kishon Vijay Abraham I --- Documentation/PCI/endpoint/pci-endpoint.txt | 4 ++-- Documentation/PCI/endpoint/pci-test-function.txt | 4 +++- Documentation/PCI/endpoint/pci-test-howto.txt | 22 ++++++++++++++--- Documentation/ioctl/ioctl-number.txt | 1 + Documentation/misc-devices/pci-endpoint-test.txt | 3 +++ drivers/misc/pci_endpoint_test.c | 29 ++++++++++++++++------- drivers/pci/controller/dwc/pcie-designware-plat.c | 1 + drivers/pci/endpoint/functions/pci-epf-test.c | 29 +++++++++++++++++++++-- include/linux/pci-epc.h | 1 + include/uapi/linux/pcitest.h | 1 + 10 files changed, 79 insertions(+), 16 deletions(-) (limited to 'Documentation') diff --git a/Documentation/PCI/endpoint/pci-endpoint.txt b/Documentation/PCI/endpoint/pci-endpoint.txt index 9b1d66829290..e86a96b66a6a 100644 --- a/Documentation/PCI/endpoint/pci-endpoint.txt +++ b/Documentation/PCI/endpoint/pci-endpoint.txt @@ -44,7 +44,7 @@ by the PCI controller driver. * clear_bar: ops to reset the BAR * alloc_addr_space: ops to allocate in PCI controller address space * free_addr_space: ops to free the allocated address space - * raise_irq: ops to raise a legacy or MSI interrupt + * raise_irq: ops to raise a legacy, MSI or MSI-X interrupt * start: ops to start the PCI link * stop: ops to stop the PCI link @@ -96,7 +96,7 @@ by the PCI endpoint function driver. *) pci_epc_raise_irq() The PCI endpoint function driver should use pci_epc_raise_irq() to raise - Legacy Interrupt or MSI Interrupt. + Legacy Interrupt, MSI or MSI-X Interrupt. *) pci_epc_mem_alloc_addr() diff --git a/Documentation/PCI/endpoint/pci-test-function.txt b/Documentation/PCI/endpoint/pci-test-function.txt index bf4b5cf6fee6..5916f1f592bb 100644 --- a/Documentation/PCI/endpoint/pci-test-function.txt +++ b/Documentation/PCI/endpoint/pci-test-function.txt @@ -36,7 +36,7 @@ that the endpoint device must perform. Bitfield Description: Bit 0 : raise legacy IRQ Bit 1 : raise MSI IRQ - Bit 2 : raise MSI-X IRQ (reserved for future implementation) + Bit 2 : raise MSI-X IRQ Bit 3 : read command (read data from RC buffer) Bit 4 : write command (write data to RC buffer) Bit 5 : copy command (copy data from one RC buffer to another @@ -75,6 +75,7 @@ for the READ/WRITE/COPY and raise IRQ (Legacy/MSI) commands. Possible types: - Legacy : 0 - MSI : 1 + - MSI-X : 2 *) PCI_ENDPOINT_TEST_IRQ_NUMBER @@ -83,3 +84,4 @@ This register contains the triggered ID interrupt. Admissible values: - Legacy : 0 - MSI : [1 .. 32] + - MSI-X : [1 .. 2048] diff --git a/Documentation/PCI/endpoint/pci-test-howto.txt b/Documentation/PCI/endpoint/pci-test-howto.txt index 75f48c3bb191..65f1a137e35c 100644 --- a/Documentation/PCI/endpoint/pci-test-howto.txt +++ b/Documentation/PCI/endpoint/pci-test-howto.txt @@ -45,9 +45,9 @@ The PCI endpoint framework populates the directory with the following configurable fields. # ls functions/pci_epf_test/func1 - baseclass_code interrupt_pin revid subsys_vendor_id - cache_line_size msi_interrupts subclass_code vendorid - deviceid progif_code subsys_id + baseclass_code interrupt_pin progif_code subsys_id + cache_line_size msi_interrupts revid subsys_vendorid + deviceid msix_interrupts subclass_code vendorid The PCI endpoint function driver populates these entries with default values when the device is bound to the driver. The pci-epf-test driver populates @@ -67,6 +67,7 @@ device, the following commands can be used. # echo 0x104c > functions/pci_epf_test/func1/vendorid # echo 0xb500 > functions/pci_epf_test/func1/deviceid # echo 16 > functions/pci_epf_test/func1/msi_interrupts + # echo 8 > functions/pci_epf_test/func1/msix_interrupts 1.5 Binding pci-epf-test Device to EP Controller @@ -153,6 +154,21 @@ following commands. MSI30: NOT OKAY MSI31: NOT OKAY MSI32: NOT OKAY + MSIX1: OKAY + MSIX2: OKAY + MSIX3: OKAY + MSIX4: OKAY + MSIX5: OKAY + MSIX6: OKAY + MSIX7: OKAY + MSIX8: OKAY + MSIX9: NOT OKAY + MSIX10: NOT OKAY + MSIX11: NOT OKAY + MSIX12: NOT OKAY + MSIX13: NOT OKAY + [...] + MSIX2048: NOT OKAY Read Tests diff --git a/Documentation/ioctl/ioctl-number.txt b/Documentation/ioctl/ioctl-number.txt index 480c8609dc58..65259d459fd1 100644 --- a/Documentation/ioctl/ioctl-number.txt +++ b/Documentation/ioctl/ioctl-number.txt @@ -166,6 +166,7 @@ Code Seq#(hex) Include File Comments 'P' all linux/soundcard.h conflict! 'P' 60-6F sound/sscape_ioctl.h conflict! 'P' 00-0F drivers/usb/class/usblp.c conflict! +'P' 01-07 drivers/misc/pci_endpoint_test.c conflict! 'Q' all linux/soundcard.h 'R' 00-1F linux/random.h conflict! 'R' 01 linux/rfkill.h conflict! diff --git a/Documentation/misc-devices/pci-endpoint-test.txt b/Documentation/misc-devices/pci-endpoint-test.txt index 4ebc3594b32c..fdfa0f66d3d0 100644 --- a/Documentation/misc-devices/pci-endpoint-test.txt +++ b/Documentation/misc-devices/pci-endpoint-test.txt @@ -10,6 +10,7 @@ The PCI driver for the test device performs the following tests *) verifying addresses programmed in BAR *) raise legacy IRQ *) raise MSI IRQ + *) raise MSI-X IRQ *) read data *) write data *) copy data @@ -25,6 +26,8 @@ ioctl PCITEST_LEGACY_IRQ: Tests legacy IRQ PCITEST_MSI: Tests message signalled interrupts. The MSI number to be tested should be passed as argument. + PCITEST_MSIX: Tests message signalled interrupts. The MSI-X number + to be tested should be passed as argument. PCITEST_WRITE: Perform write tests. The size of the buffer should be passed as argument. PCITEST_READ: Perform read tests. The size of the buffer should be passed diff --git a/drivers/misc/pci_endpoint_test.c b/drivers/misc/pci_endpoint_test.c index 349794cbe1f3..f4fef108caff 100644 --- a/drivers/misc/pci_endpoint_test.c +++ b/drivers/misc/pci_endpoint_test.c @@ -39,13 +39,14 @@ #define IRQ_TYPE_LEGACY 0 #define IRQ_TYPE_MSI 1 +#define IRQ_TYPE_MSIX 2 #define PCI_ENDPOINT_TEST_MAGIC 0x0 #define PCI_ENDPOINT_TEST_COMMAND 0x4 #define COMMAND_RAISE_LEGACY_IRQ BIT(0) #define COMMAND_RAISE_MSI_IRQ BIT(1) -/* BIT(2) is reserved for raising MSI-X IRQ command */ +#define COMMAND_RAISE_MSIX_IRQ BIT(2) #define COMMAND_READ BIT(3) #define COMMAND_WRITE BIT(4) #define COMMAND_COPY BIT(5) @@ -84,7 +85,7 @@ MODULE_PARM_DESC(no_msi, "Disable MSI interrupt in pci_endpoint_test"); static int irq_type = IRQ_TYPE_MSI; module_param(irq_type, int, 0444); -MODULE_PARM_DESC(irq_type, "IRQ mode selection in pci_endpoint_test (0 - Legacy, 1 - MSI)"); +MODULE_PARM_DESC(irq_type, "IRQ mode selection in pci_endpoint_test (0 - Legacy, 1 - MSI, 2 - MSI-X)"); enum pci_barno { BAR_0, @@ -202,16 +203,18 @@ static bool pci_endpoint_test_legacy_irq(struct pci_endpoint_test *test) } static bool pci_endpoint_test_msi_irq(struct pci_endpoint_test *test, - u8 msi_num) + u16 msi_num, bool msix) { u32 val; struct pci_dev *pdev = test->pdev; pci_endpoint_test_writel(test, PCI_ENDPOINT_TEST_IRQ_TYPE, - IRQ_TYPE_MSI); + msix == false ? IRQ_TYPE_MSI : + IRQ_TYPE_MSIX); pci_endpoint_test_writel(test, PCI_ENDPOINT_TEST_IRQ_NUMBER, msi_num); pci_endpoint_test_writel(test, PCI_ENDPOINT_TEST_COMMAND, - COMMAND_RAISE_MSI_IRQ); + msix == false ? COMMAND_RAISE_MSI_IRQ : + COMMAND_RAISE_MSIX_IRQ); val = wait_for_completion_timeout(&test->irq_raised, msecs_to_jiffies(1000)); if (!val) @@ -456,7 +459,8 @@ static long pci_endpoint_test_ioctl(struct file *file, unsigned int cmd, ret = pci_endpoint_test_legacy_irq(test); break; case PCITEST_MSI: - ret = pci_endpoint_test_msi_irq(test, arg); + case PCITEST_MSIX: + ret = pci_endpoint_test_msi_irq(test, arg, cmd == PCITEST_MSIX); break; case PCITEST_WRITE: ret = pci_endpoint_test_write(test, arg); @@ -542,6 +546,12 @@ static int pci_endpoint_test_probe(struct pci_dev *pdev, dev_err(dev, "Failed to get MSI interrupts\n"); test->num_irqs = irq; break; + case IRQ_TYPE_MSIX: + irq = pci_alloc_irq_vectors(pdev, 1, 2048, PCI_IRQ_MSIX); + if (irq < 0) + dev_err(dev, "Failed to get MSI-X interrupts\n"); + test->num_irqs = irq; + break; default: dev_err(dev, "Invalid IRQ type selected\n"); } @@ -558,8 +568,9 @@ static int pci_endpoint_test_probe(struct pci_dev *pdev, pci_endpoint_test_irqhandler, IRQF_SHARED, DRV_MODULE_NAME, test); if (err) - dev_err(dev, "failed to request IRQ %d for MSI %d\n", - pci_irq_vector(pdev, i), i + 1); + dev_err(dev, "Failed to request IRQ %d for MSI%s %d\n", + pci_irq_vector(pdev, i), + irq_type == IRQ_TYPE_MSIX ? "-X" : "", i + 1); } for (bar = BAR_0; bar <= BAR_5; bar++) { @@ -625,6 +636,7 @@ err_iounmap: err_disable_msi: pci_disable_msi(pdev); + pci_disable_msix(pdev); pci_release_regions(pdev); err_disable_pdev: @@ -656,6 +668,7 @@ static void pci_endpoint_test_remove(struct pci_dev *pdev) for (i = 0; i < test->num_irqs; i++) devm_free_irq(&pdev->dev, pci_irq_vector(pdev, i), test); pci_disable_msi(pdev); + pci_disable_msix(pdev); pci_release_regions(pdev); pci_disable_device(pdev); } diff --git a/drivers/pci/controller/dwc/pcie-designware-plat.c b/drivers/pci/controller/dwc/pcie-designware-plat.c index 3f8a3aa3a91e..c12bf794d69c 100644 --- a/drivers/pci/controller/dwc/pcie-designware-plat.c +++ b/drivers/pci/controller/dwc/pcie-designware-plat.c @@ -77,6 +77,7 @@ static void dw_plat_pcie_ep_init(struct dw_pcie_ep *ep) dw_pcie_ep_reset_bar(pci, bar); epc->features |= EPC_FEATURE_NO_LINKUP_NOTIFIER; + epc->features |= EPC_FEATURE_MSIX_AVAILABLE; } static int dw_plat_pcie_ep_raise_irq(struct dw_pcie_ep *ep, u8 func_no, diff --git a/drivers/pci/endpoint/functions/pci-epf-test.c b/drivers/pci/endpoint/functions/pci-epf-test.c index db4b23672004..3e86fa3c7da3 100644 --- a/drivers/pci/endpoint/functions/pci-epf-test.c +++ b/drivers/pci/endpoint/functions/pci-epf-test.c @@ -20,10 +20,11 @@ #define IRQ_TYPE_LEGACY 0 #define IRQ_TYPE_MSI 1 +#define IRQ_TYPE_MSIX 2 #define COMMAND_RAISE_LEGACY_IRQ BIT(0) #define COMMAND_RAISE_MSI_IRQ BIT(1) -/* BIT(2) is reserved for raising MSI-X IRQ command */ +#define COMMAND_RAISE_MSIX_IRQ BIT(2) #define COMMAND_READ BIT(3) #define COMMAND_WRITE BIT(4) #define COMMAND_COPY BIT(5) @@ -47,6 +48,7 @@ struct pci_epf_test { struct pci_epf *epf; enum pci_barno test_reg_bar; bool linkup_notifier; + bool msix_available; struct delayed_work cmd_handler; }; @@ -266,6 +268,9 @@ static void pci_epf_test_raise_irq(struct pci_epf_test *epf_test, u8 irq_type, case IRQ_TYPE_MSI: pci_epc_raise_irq(epc, epf->func_no, PCI_EPC_IRQ_MSI, irq); break; + case IRQ_TYPE_MSIX: + pci_epc_raise_irq(epc, epf->func_no, PCI_EPC_IRQ_MSIX, irq); + break; default: dev_err(dev, "Failed to raise IRQ, unknown type\n"); break; @@ -292,7 +297,7 @@ static void pci_epf_test_cmd_handler(struct work_struct *work) reg->command = 0; reg->status = 0; - if (reg->irq_type > IRQ_TYPE_MSI) { + if (reg->irq_type > IRQ_TYPE_MSIX) { dev_err(dev, "Failed to detect IRQ type\n"); goto reset_handler; } @@ -346,6 +351,16 @@ static void pci_epf_test_cmd_handler(struct work_struct *work) goto reset_handler; } + if (command & COMMAND_RAISE_MSIX_IRQ) { + count = pci_epc_get_msix(epc, epf->func_no); + if (reg->irq_number > count || count <= 0) + goto reset_handler; + reg->status = STATUS_IRQ_RAISED; + pci_epc_raise_irq(epc, epf->func_no, PCI_EPC_IRQ_MSIX, + reg->irq_number); + goto reset_handler; + } + reset_handler: queue_delayed_work(kpcitest_workqueue, &epf_test->cmd_handler, msecs_to_jiffies(1)); @@ -459,6 +474,8 @@ static int pci_epf_test_bind(struct pci_epf *epf) else epf_test->linkup_notifier = true; + epf_test->msix_available = epc->features & EPC_FEATURE_MSIX_AVAILABLE; + epf_test->test_reg_bar = EPC_FEATURE_GET_BAR(epc->features); ret = pci_epc_write_header(epc, epf->func_no, header); @@ -481,6 +498,14 @@ static int pci_epf_test_bind(struct pci_epf *epf) return ret; } + if (epf_test->msix_available) { + ret = pci_epc_set_msix(epc, epf->func_no, epf->msix_interrupts); + if (ret) { + dev_err(dev, "MSI-X configuration failed\n"); + return ret; + } + } + if (!epf_test->linkup_notifier) queue_work(kpcitest_workqueue, &epf_test->cmd_handler.work); diff --git a/include/linux/pci-epc.h b/include/linux/pci-epc.h index bb2395b56f13..37dab8116901 100644 --- a/include/linux/pci-epc.h +++ b/include/linux/pci-epc.h @@ -102,6 +102,7 @@ struct pci_epc { #define EPC_FEATURE_NO_LINKUP_NOTIFIER BIT(0) #define EPC_FEATURE_BAR_MASK (BIT(1) | BIT(2) | BIT(3)) +#define EPC_FEATURE_MSIX_AVAILABLE BIT(4) #define EPC_FEATURE_SET_BAR(features, bar) \ (features |= (EPC_FEATURE_BAR_MASK & (bar << 1))) #define EPC_FEATURE_GET_BAR(features) \ diff --git a/include/uapi/linux/pcitest.h b/include/uapi/linux/pcitest.h index 953cf036cb26..d746fb159dcd 100644 --- a/include/uapi/linux/pcitest.h +++ b/include/uapi/linux/pcitest.h @@ -16,5 +16,6 @@ #define PCITEST_WRITE _IOW('P', 0x4, unsigned long) #define PCITEST_READ _IOW('P', 0x5, unsigned long) #define PCITEST_COPY _IOW('P', 0x6, unsigned long) +#define PCITEST_MSIX _IOW('P', 0x7, int) #endif /* __UAPI_LINUX_PCITEST_H */ -- cgit v1.2.3 From e03327122e2c8e6ae4565ef5b3d3cbe4364546a1 Mon Sep 17 00:00:00 2001 From: Gustavo Pimentel Date: Thu, 19 Jul 2018 10:32:20 +0200 Subject: pci_endpoint_test: Add 2 ioctl commands Add MSI-X support and update driver documentation accordingly. Add 2 new IOCTL commands: - Allow to reconfigure driver IRQ type in runtime. - Allow to retrieve current driver IRQ type configured. Add IRQ type validation before executing the READ/WRITE/COPY tests. Signed-off-by: Gustavo Pimentel Signed-off-by: Lorenzo Pieralisi Acked-by: Kishon Vijay Abraham I --- Documentation/ioctl/ioctl-number.txt | 2 +- Documentation/misc-devices/pci-endpoint-test.txt | 3 + drivers/misc/pci_endpoint_test.c | 206 +++++++++++++++++------ include/uapi/linux/pcitest.h | 2 + 4 files changed, 165 insertions(+), 48 deletions(-) (limited to 'Documentation') diff --git a/Documentation/ioctl/ioctl-number.txt b/Documentation/ioctl/ioctl-number.txt index 65259d459fd1..c15c4f3bdd82 100644 --- a/Documentation/ioctl/ioctl-number.txt +++ b/Documentation/ioctl/ioctl-number.txt @@ -166,7 +166,7 @@ Code Seq#(hex) Include File Comments 'P' all linux/soundcard.h conflict! 'P' 60-6F sound/sscape_ioctl.h conflict! 'P' 00-0F drivers/usb/class/usblp.c conflict! -'P' 01-07 drivers/misc/pci_endpoint_test.c conflict! +'P' 01-09 drivers/misc/pci_endpoint_test.c conflict! 'Q' all linux/soundcard.h 'R' 00-1F linux/random.h conflict! 'R' 01 linux/rfkill.h conflict! diff --git a/Documentation/misc-devices/pci-endpoint-test.txt b/Documentation/misc-devices/pci-endpoint-test.txt index fdfa0f66d3d0..58ccca4416b1 100644 --- a/Documentation/misc-devices/pci-endpoint-test.txt +++ b/Documentation/misc-devices/pci-endpoint-test.txt @@ -28,6 +28,9 @@ ioctl to be tested should be passed as argument. PCITEST_MSIX: Tests message signalled interrupts. The MSI-X number to be tested should be passed as argument. + PCITEST_SET_IRQTYPE: Changes driver IRQ type configuration. The IRQ type + should be passed as argument (0: Legacy, 1:MSI, 2:MSI-X). + PCITEST_GET_IRQTYPE: Gets driver IRQ type configuration. PCITEST_WRITE: Perform write tests. The size of the buffer should be passed as argument. PCITEST_READ: Perform read tests. The size of the buffer should be passed diff --git a/drivers/misc/pci_endpoint_test.c b/drivers/misc/pci_endpoint_test.c index f4fef108caff..896e2df9400f 100644 --- a/drivers/misc/pci_endpoint_test.c +++ b/drivers/misc/pci_endpoint_test.c @@ -37,6 +37,7 @@ #define DRV_MODULE_NAME "pci-endpoint-test" +#define IRQ_TYPE_UNDEFINED -1 #define IRQ_TYPE_LEGACY 0 #define IRQ_TYPE_MSI 1 #define IRQ_TYPE_MSIX 2 @@ -157,6 +158,100 @@ static irqreturn_t pci_endpoint_test_irqhandler(int irq, void *dev_id) return IRQ_HANDLED; } +static void pci_endpoint_test_free_irq_vectors(struct pci_endpoint_test *test) +{ + struct pci_dev *pdev = test->pdev; + + pci_free_irq_vectors(pdev); +} + +static bool pci_endpoint_test_alloc_irq_vectors(struct pci_endpoint_test *test, + int type) +{ + int irq = -1; + struct pci_dev *pdev = test->pdev; + struct device *dev = &pdev->dev; + bool res = true; + + switch (type) { + case IRQ_TYPE_LEGACY: + irq = pci_alloc_irq_vectors(pdev, 1, 1, PCI_IRQ_LEGACY); + if (irq < 0) + dev_err(dev, "Failed to get Legacy interrupt\n"); + break; + case IRQ_TYPE_MSI: + irq = pci_alloc_irq_vectors(pdev, 1, 32, PCI_IRQ_MSI); + if (irq < 0) + dev_err(dev, "Failed to get MSI interrupts\n"); + break; + case IRQ_TYPE_MSIX: + irq = pci_alloc_irq_vectors(pdev, 1, 2048, PCI_IRQ_MSIX); + if (irq < 0) + dev_err(dev, "Failed to get MSI-X interrupts\n"); + break; + default: + dev_err(dev, "Invalid IRQ type selected\n"); + } + + if (irq < 0) { + irq = 0; + res = false; + } + test->num_irqs = irq; + + return res; +} + +static void pci_endpoint_test_release_irq(struct pci_endpoint_test *test) +{ + int i; + struct pci_dev *pdev = test->pdev; + struct device *dev = &pdev->dev; + + for (i = 0; i < test->num_irqs; i++) + devm_free_irq(dev, pci_irq_vector(pdev, i), test); + + test->num_irqs = 0; +} + +static bool pci_endpoint_test_request_irq(struct pci_endpoint_test *test) +{ + int i; + int err; + struct pci_dev *pdev = test->pdev; + struct device *dev = &pdev->dev; + + for (i = 0; i < test->num_irqs; i++) { + err = devm_request_irq(dev, pci_irq_vector(pdev, i), + pci_endpoint_test_irqhandler, + IRQF_SHARED, DRV_MODULE_NAME, test); + if (err) + goto fail; + } + + return true; + +fail: + switch (irq_type) { + case IRQ_TYPE_LEGACY: + dev_err(dev, "Failed to request IRQ %d for Legacy\n", + pci_irq_vector(pdev, i)); + break; + case IRQ_TYPE_MSI: + dev_err(dev, "Failed to request IRQ %d for MSI %d\n", + pci_irq_vector(pdev, i), + i + 1); + break; + case IRQ_TYPE_MSIX: + dev_err(dev, "Failed to request IRQ %d for MSI-X %d\n", + pci_irq_vector(pdev, i), + i + 1); + break; + } + + return false; +} + static bool pci_endpoint_test_bar(struct pci_endpoint_test *test, enum pci_barno barno) { @@ -247,6 +342,11 @@ static bool pci_endpoint_test_copy(struct pci_endpoint_test *test, size_t size) if (size > SIZE_MAX - alignment) goto err; + if (irq_type < IRQ_TYPE_LEGACY || irq_type > IRQ_TYPE_MSIX) { + dev_err(dev, "Invalid IRQ type option\n"); + goto err; + } + orig_src_addr = dma_alloc_coherent(dev, size + alignment, &orig_src_phys_addr, GFP_KERNEL); if (!orig_src_addr) { @@ -337,6 +437,11 @@ static bool pci_endpoint_test_write(struct pci_endpoint_test *test, size_t size) if (size > SIZE_MAX - alignment) goto err; + if (irq_type < IRQ_TYPE_LEGACY || irq_type > IRQ_TYPE_MSIX) { + dev_err(dev, "Invalid IRQ type option\n"); + goto err; + } + orig_addr = dma_alloc_coherent(dev, size + alignment, &orig_phys_addr, GFP_KERNEL); if (!orig_addr) { @@ -400,6 +505,11 @@ static bool pci_endpoint_test_read(struct pci_endpoint_test *test, size_t size) if (size > SIZE_MAX - alignment) goto err; + if (irq_type < IRQ_TYPE_LEGACY || irq_type > IRQ_TYPE_MSIX) { + dev_err(dev, "Invalid IRQ type option\n"); + goto err; + } + orig_addr = dma_alloc_coherent(dev, size + alignment, &orig_phys_addr, GFP_KERNEL); if (!orig_addr) { @@ -440,6 +550,38 @@ err: return ret; } +static bool pci_endpoint_test_set_irq(struct pci_endpoint_test *test, + int req_irq_type) +{ + struct pci_dev *pdev = test->pdev; + struct device *dev = &pdev->dev; + + if (req_irq_type < IRQ_TYPE_LEGACY || req_irq_type > IRQ_TYPE_MSIX) { + dev_err(dev, "Invalid IRQ type option\n"); + return false; + } + + if (irq_type == req_irq_type) + return true; + + pci_endpoint_test_release_irq(test); + pci_endpoint_test_free_irq_vectors(test); + + if (!pci_endpoint_test_alloc_irq_vectors(test, req_irq_type)) + goto err; + + if (!pci_endpoint_test_request_irq(test)) + goto err; + + irq_type = req_irq_type; + return true; + +err: + pci_endpoint_test_free_irq_vectors(test); + irq_type = IRQ_TYPE_UNDEFINED; + return false; +} + static long pci_endpoint_test_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { @@ -471,6 +613,12 @@ static long pci_endpoint_test_ioctl(struct file *file, unsigned int cmd, case PCITEST_COPY: ret = pci_endpoint_test_copy(test, arg); break; + case PCITEST_SET_IRQTYPE: + ret = pci_endpoint_test_set_irq(test, arg); + break; + case PCITEST_GET_IRQTYPE: + ret = irq_type; + break; } ret: @@ -486,9 +634,7 @@ static const struct file_operations pci_endpoint_test_fops = { static int pci_endpoint_test_probe(struct pci_dev *pdev, const struct pci_device_id *ent) { - int i; int err; - int irq = 0; int id; char name[20]; enum pci_barno bar; @@ -537,41 +683,11 @@ static int pci_endpoint_test_probe(struct pci_dev *pdev, pci_set_master(pdev); - switch (irq_type) { - case IRQ_TYPE_LEGACY: - break; - case IRQ_TYPE_MSI: - irq = pci_alloc_irq_vectors(pdev, 1, 32, PCI_IRQ_MSI); - if (irq < 0) - dev_err(dev, "Failed to get MSI interrupts\n"); - test->num_irqs = irq; - break; - case IRQ_TYPE_MSIX: - irq = pci_alloc_irq_vectors(pdev, 1, 2048, PCI_IRQ_MSIX); - if (irq < 0) - dev_err(dev, "Failed to get MSI-X interrupts\n"); - test->num_irqs = irq; - break; - default: - dev_err(dev, "Invalid IRQ type selected\n"); - } + if (!pci_endpoint_test_alloc_irq_vectors(test, irq_type)) + goto err_disable_irq; - err = devm_request_irq(dev, pdev->irq, pci_endpoint_test_irqhandler, - IRQF_SHARED, DRV_MODULE_NAME, test); - if (err) { - dev_err(dev, "Failed to request IRQ %d\n", pdev->irq); - goto err_disable_msi; - } - - for (i = 1; i < irq; i++) { - err = devm_request_irq(dev, pci_irq_vector(pdev, i), - pci_endpoint_test_irqhandler, - IRQF_SHARED, DRV_MODULE_NAME, test); - if (err) - dev_err(dev, "Failed to request IRQ %d for MSI%s %d\n", - pci_irq_vector(pdev, i), - irq_type == IRQ_TYPE_MSIX ? "-X" : "", i + 1); - } + if (!pci_endpoint_test_request_irq(test)) + goto err_disable_irq; for (bar = BAR_0; bar <= BAR_5; bar++) { if (pci_resource_flags(pdev, bar) & IORESOURCE_MEM) { @@ -630,13 +746,10 @@ err_iounmap: if (test->bar[bar]) pci_iounmap(pdev, test->bar[bar]); } + pci_endpoint_test_release_irq(test); - for (i = 0; i < irq; i++) - devm_free_irq(&pdev->dev, pci_irq_vector(pdev, i), test); - -err_disable_msi: - pci_disable_msi(pdev); - pci_disable_msix(pdev); +err_disable_irq: + pci_endpoint_test_free_irq_vectors(test); pci_release_regions(pdev); err_disable_pdev: @@ -648,7 +761,6 @@ err_disable_pdev: static void pci_endpoint_test_remove(struct pci_dev *pdev) { int id; - int i; enum pci_barno bar; struct pci_endpoint_test *test = pci_get_drvdata(pdev); struct miscdevice *misc_device = &test->miscdev; @@ -665,10 +777,10 @@ static void pci_endpoint_test_remove(struct pci_dev *pdev) if (test->bar[bar]) pci_iounmap(pdev, test->bar[bar]); } - for (i = 0; i < test->num_irqs; i++) - devm_free_irq(&pdev->dev, pci_irq_vector(pdev, i), test); - pci_disable_msi(pdev); - pci_disable_msix(pdev); + + pci_endpoint_test_release_irq(test); + pci_endpoint_test_free_irq_vectors(test); + pci_release_regions(pdev); pci_disable_device(pdev); } diff --git a/include/uapi/linux/pcitest.h b/include/uapi/linux/pcitest.h index d746fb159dcd..cbf422e56696 100644 --- a/include/uapi/linux/pcitest.h +++ b/include/uapi/linux/pcitest.h @@ -17,5 +17,7 @@ #define PCITEST_READ _IOW('P', 0x5, unsigned long) #define PCITEST_COPY _IOW('P', 0x6, unsigned long) #define PCITEST_MSIX _IOW('P', 0x7, int) +#define PCITEST_SET_IRQTYPE _IOW('P', 0x8, int) +#define PCITEST_GET_IRQTYPE _IO('P', 0x9) #endif /* __UAPI_LINUX_PCITEST_H */ -- cgit v1.2.3 From 0653217c180f0e5332ef75f7299220e644091b20 Mon Sep 17 00:00:00 2001 From: Gustavo Pimentel Date: Thu, 19 Jul 2018 10:32:21 +0200 Subject: tools: PCI: Add MSI-X support Add MSI-X support to pcitest tool. Modify pcitest.sh script to accommodate MSI-X interrupt tests. Update documentation accordingly. Signed-off-by: Gustavo Pimentel Signed-off-by: Lorenzo Pieralisi Acked-by: Kishon Vijay Abraham I --- Documentation/PCI/endpoint/pci-test-howto.txt | 36 +++++++++++-------- tools/pci/pcitest.c | 51 ++++++++++++++++++++++++++- tools/pci/pcitest.sh | 15 ++++++++ 3 files changed, 87 insertions(+), 15 deletions(-) (limited to 'Documentation') diff --git a/Documentation/PCI/endpoint/pci-test-howto.txt b/Documentation/PCI/endpoint/pci-test-howto.txt index 65f1a137e35c..e40cf0fb58d7 100644 --- a/Documentation/PCI/endpoint/pci-test-howto.txt +++ b/Documentation/PCI/endpoint/pci-test-howto.txt @@ -121,7 +121,9 @@ following commands. Interrupt tests + SET IRQ TYPE TO LEGACY: OKAY LEGACY IRQ: NOT OKAY + SET IRQ TYPE TO MSI: OKAY MSI1: OKAY MSI2: OKAY MSI3: OKAY @@ -154,24 +156,30 @@ following commands. MSI30: NOT OKAY MSI31: NOT OKAY MSI32: NOT OKAY - MSIX1: OKAY - MSIX2: OKAY - MSIX3: OKAY - MSIX4: OKAY - MSIX5: OKAY - MSIX6: OKAY - MSIX7: OKAY - MSIX8: OKAY - MSIX9: NOT OKAY - MSIX10: NOT OKAY - MSIX11: NOT OKAY - MSIX12: NOT OKAY - MSIX13: NOT OKAY + SET IRQ TYPE TO MSI-X: OKAY + MSI-X1: OKAY + MSI-X2: OKAY + MSI-X3: OKAY + MSI-X4: OKAY + MSI-X5: OKAY + MSI-X6: OKAY + MSI-X7: OKAY + MSI-X8: OKAY + MSI-X9: NOT OKAY + MSI-X10: NOT OKAY + MSI-X11: NOT OKAY + MSI-X12: NOT OKAY + MSI-X13: NOT OKAY + MSI-X14: NOT OKAY + MSI-X15: NOT OKAY + MSI-X16: NOT OKAY [...] - MSIX2048: NOT OKAY + MSI-X2047: NOT OKAY + MSI-X2048: NOT OKAY Read Tests + SET IRQ TYPE TO MSI: OKAY READ ( 1 bytes): OKAY READ ( 1024 bytes): OKAY READ ( 1025 bytes): OKAY diff --git a/tools/pci/pcitest.c b/tools/pci/pcitest.c index 9074b477bff0..af146bb03b4d 100644 --- a/tools/pci/pcitest.c +++ b/tools/pci/pcitest.c @@ -31,12 +31,17 @@ #define BILLION 1E9 static char *result[] = { "NOT OKAY", "OKAY" }; +static char *irq[] = { "LEGACY", "MSI", "MSI-X" }; struct pci_test { char *device; char barnum; bool legacyirq; unsigned int msinum; + unsigned int msixnum; + int irqtype; + bool set_irqtype; + bool get_irqtype; bool read; bool write; bool copy; @@ -65,6 +70,24 @@ static int run_test(struct pci_test *test) fprintf(stdout, "%s\n", result[ret]); } + if (test->set_irqtype) { + ret = ioctl(fd, PCITEST_SET_IRQTYPE, test->irqtype); + fprintf(stdout, "SET IRQ TYPE TO %s:\t\t", irq[test->irqtype]); + if (ret < 0) + fprintf(stdout, "FAILED\n"); + else + fprintf(stdout, "%s\n", result[ret]); + } + + if (test->get_irqtype) { + ret = ioctl(fd, PCITEST_GET_IRQTYPE); + fprintf(stdout, "GET IRQ TYPE:\t\t"); + if (ret < 0) + fprintf(stdout, "FAILED\n"); + else + fprintf(stdout, "%s\n", irq[ret]); + } + if (test->legacyirq) { ret = ioctl(fd, PCITEST_LEGACY_IRQ, 0); fprintf(stdout, "LEGACY IRQ:\t"); @@ -83,6 +106,15 @@ static int run_test(struct pci_test *test) fprintf(stdout, "%s\n", result[ret]); } + if (test->msixnum > 0 && test->msixnum <= 2048) { + ret = ioctl(fd, PCITEST_MSIX, test->msixnum); + fprintf(stdout, "MSI-X%d:\t\t", test->msixnum); + if (ret < 0) + fprintf(stdout, "TEST FAILED\n"); + else + fprintf(stdout, "%s\n", result[ret]); + } + if (test->write) { ret = ioctl(fd, PCITEST_WRITE, test->size); fprintf(stdout, "WRITE (%7ld bytes):\t\t", test->size); @@ -133,7 +165,7 @@ int main(int argc, char **argv) /* set default endpoint device */ test->device = "/dev/pci-endpoint-test.0"; - while ((c = getopt(argc, argv, "D:b:m:lrwcs:")) != EOF) + while ((c = getopt(argc, argv, "D:b:m:x:i:Ilrwcs:")) != EOF) switch (c) { case 'D': test->device = optarg; @@ -151,6 +183,20 @@ int main(int argc, char **argv) if (test->msinum < 1 || test->msinum > 32) goto usage; continue; + case 'x': + test->msixnum = atoi(optarg); + if (test->msixnum < 1 || test->msixnum > 2048) + goto usage; + continue; + case 'i': + test->irqtype = atoi(optarg); + if (test->irqtype < 0 || test->irqtype > 2) + goto usage; + test->set_irqtype = true; + continue; + case 'I': + test->get_irqtype = true; + continue; case 'r': test->read = true; continue; @@ -173,6 +219,9 @@ usage: "\t-D PCI endpoint test device {default: /dev/pci-endpoint-test.0}\n" "\t-b BAR test (bar number between 0..5)\n" "\t-m MSI test (msi number between 1..32)\n" + "\t-x \tMSI-X test (msix number between 1..2048)\n" + "\t-i \tSet IRQ type (0 - Legacy, 1 - MSI, 2 - MSI-X)\n" + "\t-I Get current IRQ type configured\n" "\t-l Legacy IRQ test\n" "\t-r Read buffer test\n" "\t-w Write buffer test\n" diff --git a/tools/pci/pcitest.sh b/tools/pci/pcitest.sh index 77e8c85ef744..75ed48ff2990 100644 --- a/tools/pci/pcitest.sh +++ b/tools/pci/pcitest.sh @@ -16,7 +16,10 @@ echo echo "Interrupt tests" echo +pcitest -i 0 pcitest -l + +pcitest -i 1 msi=1 while [ $msi -lt 33 ] @@ -26,9 +29,21 @@ do done echo +pcitest -i 2 +msix=1 + +while [ $msix -lt 2049 ] +do + pcitest -x $msix + msix=`expr $msix + 1` +done +echo + echo "Read Tests" echo +pcitest -i 1 + pcitest -r -s 1 pcitest -r -s 1024 pcitest -r -s 1025 -- cgit v1.2.3 From 81aa5206f9a7c9793e2f7971400351664e40b04f Mon Sep 17 00:00:00 2001 From: Rajat Jain Date: Thu, 21 Jun 2018 16:48:28 -0700 Subject: PCI/AER: Add sysfs attributes to provide AER stats and breakdown Add sysfs attributes to provide total and breakdown of the AERs seen, into different type of correctable, fatal and nonfatal errors: /sys/bus/pci/devices//aer_dev_correctable /sys/bus/pci/devices//aer_dev_fatal /sys/bus/pci/devices//aer_dev_nonfatal Signed-off-by: Rajat Jain Signed-off-by: Bjorn Helgaas --- .../ABI/testing/sysfs-bus-pci-devices-aer_stats | 94 ++++++++++++++++++++++ Documentation/PCI/pcieaer-howto.txt | 5 ++ drivers/pci/pci-sysfs.c | 3 + drivers/pci/pci.h | 1 + drivers/pci/pcie/aer.c | 94 ++++++++++++++++++++++ 5 files changed, 197 insertions(+) create mode 100644 Documentation/ABI/testing/sysfs-bus-pci-devices-aer_stats (limited to 'Documentation') diff --git a/Documentation/ABI/testing/sysfs-bus-pci-devices-aer_stats b/Documentation/ABI/testing/sysfs-bus-pci-devices-aer_stats new file mode 100644 index 000000000000..3a784297cfed --- /dev/null +++ b/Documentation/ABI/testing/sysfs-bus-pci-devices-aer_stats @@ -0,0 +1,94 @@ +========================== +PCIe Device AER statistics +========================== +These attributes show up under all the devices that are AER capable. These +statistical counters indicate the errors "as seen/reported by the device". +Note that this may mean that if an endpoint is causing problems, the AER +counters may increment at its link partner (e.g. root port) because the +errors may be "seen" / reported by the link partner and not the +problematic endpoint itself (which may report all counters as 0 as it never +saw any problems). + +Where: /sys/bus/pci/devices//aer_dev_correctable +Date: July 2018 +Kernel Version: 4.19.0 +Contact: linux-pci@vger.kernel.org, rajatja@google.com +Description: List of correctable errors seen and reported by this + PCI device using ERR_COR. Note that since multiple errors may + be reported using a single ERR_COR message, thus + TOTAL_ERR_COR at the end of the file may not match the actual + total of all the errors in the file. Sample output: +------------------------------------------------------------------------- +localhost /sys/devices/pci0000:00/0000:00:1c.0 # cat aer_dev_correctable +Receiver Error 2 +Bad TLP 0 +Bad DLLP 0 +RELAY_NUM Rollover 0 +Replay Timer Timeout 0 +Advisory Non-Fatal 0 +Corrected Internal Error 0 +Header Log Overflow 0 +TOTAL_ERR_COR 2 +------------------------------------------------------------------------- + +Where: /sys/bus/pci/devices//aer_dev_fatal +Date: July 2018 +Kernel Version: 4.19.0 +Contact: linux-pci@vger.kernel.org, rajatja@google.com +Description: List of uncorrectable fatal errors seen and reported by this + PCI device using ERR_FATAL. Note that since multiple errors may + be reported using a single ERR_FATAL message, thus + TOTAL_ERR_FATAL at the end of the file may not match the actual + total of all the errors in the file. Sample output: +------------------------------------------------------------------------- +localhost /sys/devices/pci0000:00/0000:00:1c.0 # cat aer_dev_fatal +Undefined 0 +Data Link Protocol 0 +Surprise Down Error 0 +Poisoned TLP 0 +Flow Control Protocol 0 +Completion Timeout 0 +Completer Abort 0 +Unexpected Completion 0 +Receiver Overflow 0 +Malformed TLP 0 +ECRC 0 +Unsupported Request 0 +ACS Violation 0 +Uncorrectable Internal Error 0 +MC Blocked TLP 0 +AtomicOp Egress Blocked 0 +TLP Prefix Blocked Error 0 +TOTAL_ERR_FATAL 0 +------------------------------------------------------------------------- + +Where: /sys/bus/pci/devices//aer_dev_nonfatal +Date: July 2018 +Kernel Version: 4.19.0 +Contact: linux-pci@vger.kernel.org, rajatja@google.com +Description: List of uncorrectable nonfatal errors seen and reported by this + PCI device using ERR_NONFATAL. Note that since multiple errors + may be reported using a single ERR_FATAL message, thus + TOTAL_ERR_NONFATAL at the end of the file may not match the + actual total of all the errors in the file. Sample output: +------------------------------------------------------------------------- +localhost /sys/devices/pci0000:00/0000:00:1c.0 # cat aer_dev_nonfatal +Undefined 0 +Data Link Protocol 0 +Surprise Down Error 0 +Poisoned TLP 0 +Flow Control Protocol 0 +Completion Timeout 0 +Completer Abort 0 +Unexpected Completion 0 +Receiver Overflow 0 +Malformed TLP 0 +ECRC 0 +Unsupported Request 0 +ACS Violation 0 +Uncorrectable Internal Error 0 +MC Blocked TLP 0 +AtomicOp Egress Blocked 0 +TLP Prefix Blocked Error 0 +TOTAL_ERR_NONFATAL 0 +------------------------------------------------------------------------- diff --git a/Documentation/PCI/pcieaer-howto.txt b/Documentation/PCI/pcieaer-howto.txt index acd0dddd6bb8..48ce7903e3c6 100644 --- a/Documentation/PCI/pcieaer-howto.txt +++ b/Documentation/PCI/pcieaer-howto.txt @@ -73,6 +73,11 @@ In the example, 'Requester ID' means the ID of the device who sends the error message to root port. Pls. refer to pci express specs for other fields. +2.4 AER Statistics / Counters + +When PCIe AER errors are captured, the counters / statistics are also exposed +in the form of sysfs attributes which are documented at +Documentation/ABI/testing/sysfs-bus-pci-devices-aer_stats 3. Developer Guide diff --git a/drivers/pci/pci-sysfs.c b/drivers/pci/pci-sysfs.c index 0c4653c1d2ce..9f1cb9051d7d 100644 --- a/drivers/pci/pci-sysfs.c +++ b/drivers/pci/pci-sysfs.c @@ -1746,6 +1746,9 @@ static const struct attribute_group *pci_dev_attr_groups[] = { #endif &pci_bridge_attr_group, &pcie_dev_attr_group, +#ifdef CONFIG_PCIEAER + &aer_stats_attr_group, +#endif NULL, }; diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h index 1877a14e06a9..b1ce0dcad1dc 100644 --- a/drivers/pci/pci.h +++ b/drivers/pci/pci.h @@ -484,6 +484,7 @@ static inline int devm_of_pci_get_host_bridge_resources(struct device *dev, void pci_no_aer(void); void pci_aer_init(struct pci_dev *dev); void pci_aer_exit(struct pci_dev *dev); +extern const struct attribute_group aer_stats_attr_group; #else static inline void pci_no_aer(void) { } static inline int pci_aer_init(struct pci_dev *d) { return -ENODEV; } diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c index fe1b9d22a331..b18c5aca30bd 100644 --- a/drivers/pci/pcie/aer.c +++ b/drivers/pci/pcie/aer.c @@ -542,6 +542,99 @@ static const char *aer_agent_string[] = { "Transmitter ID" }; +#define aer_stats_dev_attr(name, stats_array, strings_array, \ + total_string, total_field) \ + static ssize_t \ + name##_show(struct device *dev, struct device_attribute *attr, \ + char *buf) \ +{ \ + unsigned int i; \ + char *str = buf; \ + struct pci_dev *pdev = to_pci_dev(dev); \ + u64 *stats = pdev->aer_stats->stats_array; \ + \ + for (i = 0; i < ARRAY_SIZE(strings_array); i++) { \ + if (strings_array[i]) \ + str += sprintf(str, "%s %llu\n", \ + strings_array[i], stats[i]); \ + else if (stats[i]) \ + str += sprintf(str, #stats_array "_bit[%d] %llu\n",\ + i, stats[i]); \ + } \ + str += sprintf(str, "TOTAL_%s %llu\n", total_string, \ + pdev->aer_stats->total_field); \ + return str-buf; \ +} \ +static DEVICE_ATTR_RO(name) + +aer_stats_dev_attr(aer_dev_correctable, dev_cor_errs, + aer_correctable_error_string, "ERR_COR", + dev_total_cor_errs); +aer_stats_dev_attr(aer_dev_fatal, dev_fatal_errs, + aer_uncorrectable_error_string, "ERR_FATAL", + dev_total_fatal_errs); +aer_stats_dev_attr(aer_dev_nonfatal, dev_nonfatal_errs, + aer_uncorrectable_error_string, "ERR_NONFATAL", + dev_total_nonfatal_errs); + +static struct attribute *aer_stats_attrs[] __ro_after_init = { + &dev_attr_aer_dev_correctable.attr, + &dev_attr_aer_dev_fatal.attr, + &dev_attr_aer_dev_nonfatal.attr, + NULL +}; + +static umode_t aer_stats_attrs_are_visible(struct kobject *kobj, + struct attribute *a, int n) +{ + struct device *dev = kobj_to_dev(kobj); + struct pci_dev *pdev = to_pci_dev(dev); + + if (!pdev->aer_stats) + return 0; + + return a->mode; +} + +const struct attribute_group aer_stats_attr_group = { + .attrs = aer_stats_attrs, + .is_visible = aer_stats_attrs_are_visible, +}; + +static void pci_dev_aer_stats_incr(struct pci_dev *pdev, + struct aer_err_info *info) +{ + int status, i, max = -1; + u64 *counter = NULL; + struct aer_stats *aer_stats = pdev->aer_stats; + + if (!aer_stats) + return; + + switch (info->severity) { + case AER_CORRECTABLE: + aer_stats->dev_total_cor_errs++; + counter = &aer_stats->dev_cor_errs[0]; + max = AER_MAX_TYPEOF_COR_ERRS; + break; + case AER_NONFATAL: + aer_stats->dev_total_nonfatal_errs++; + counter = &aer_stats->dev_nonfatal_errs[0]; + max = AER_MAX_TYPEOF_UNCOR_ERRS; + break; + case AER_FATAL: + aer_stats->dev_total_fatal_errs++; + counter = &aer_stats->dev_fatal_errs[0]; + max = AER_MAX_TYPEOF_UNCOR_ERRS; + break; + } + + status = (info->status & ~info->mask); + for (i = 0; i < max; i++) + if (status & (1 << i)) + counter[i]++; +} + static void __print_tlp_header(struct pci_dev *dev, struct aer_header_log_regs *t) { @@ -574,6 +667,7 @@ static void __aer_print_error(struct pci_dev *dev, pci_err(dev, " [%2d] Unknown Error Bit%s\n", i, info->first_error == i ? " (First)" : ""); } + pci_dev_aer_stats_incr(dev, info); } void aer_print_error(struct pci_dev *dev, struct aer_err_info *info) -- cgit v1.2.3 From 12833017e581c384afa35fb85ce540082b2d59fc Mon Sep 17 00:00:00 2001 From: Rajat Jain Date: Thu, 21 Jun 2018 16:48:29 -0700 Subject: PCI/AER: Add sysfs attributes for rootport cumulative stats Add sysfs attributes for rootport statistics (that are cumulative of all the ERR_* messages seen on this PCI hierarchy). Signed-off-by: Rajat Jain Signed-off-by: Bjorn Helgaas --- .../ABI/testing/sysfs-bus-pci-devices-aer_stats | 28 +++++++++++++ drivers/pci/pcie/aer.c | 47 ++++++++++++++++++++++ 2 files changed, 75 insertions(+) (limited to 'Documentation') diff --git a/Documentation/ABI/testing/sysfs-bus-pci-devices-aer_stats b/Documentation/ABI/testing/sysfs-bus-pci-devices-aer_stats index 3a784297cfed..4b0318c99507 100644 --- a/Documentation/ABI/testing/sysfs-bus-pci-devices-aer_stats +++ b/Documentation/ABI/testing/sysfs-bus-pci-devices-aer_stats @@ -92,3 +92,31 @@ AtomicOp Egress Blocked 0 TLP Prefix Blocked Error 0 TOTAL_ERR_NONFATAL 0 ------------------------------------------------------------------------- + +============================ +PCIe Rootport AER statistics +============================ +These attributes show up under only the rootports (or root complex event +collectors) that are AER capable. These indicate the number of error messages as +"reported to" the rootport. Please note that the rootports also transmit +(internally) the ERR_* messages for errors seen by the internal rootport PCI +device, so these counters include them and are thus cumulative of all the error +messages on the PCI hierarchy originating at that root port. + +Where: /sys/bus/pci/devices//aer_stats/aer_rootport_total_err_cor +Date: July 2018 +Kernel Version: 4.19.0 +Contact: linux-pci@vger.kernel.org, rajatja@google.com +Description: Total number of ERR_COR messages reported to rootport. + +Where: /sys/bus/pci/devices//aer_stats/aer_rootport_total_err_fatal +Date: July 2018 +Kernel Version: 4.19.0 +Contact: linux-pci@vger.kernel.org, rajatja@google.com +Description: Total number of ERR_FATAL messages reported to rootport. + +Where: /sys/bus/pci/devices//aer_stats/aer_rootport_total_err_nonfatal +Date: July 2018 +Kernel Version: 4.19.0 +Contact: linux-pci@vger.kernel.org, rajatja@google.com +Description: Total number of ERR_NONFATAL messages reported to rootport. diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c index b18c5aca30bd..47c67de1ccf1 100644 --- a/drivers/pci/pcie/aer.c +++ b/drivers/pci/pcie/aer.c @@ -577,10 +577,30 @@ aer_stats_dev_attr(aer_dev_nonfatal, dev_nonfatal_errs, aer_uncorrectable_error_string, "ERR_NONFATAL", dev_total_nonfatal_errs); +#define aer_stats_rootport_attr(name, field) \ + static ssize_t \ + name##_show(struct device *dev, struct device_attribute *attr, \ + char *buf) \ +{ \ + struct pci_dev *pdev = to_pci_dev(dev); \ + return sprintf(buf, "%llu\n", pdev->aer_stats->field); \ +} \ +static DEVICE_ATTR_RO(name) + +aer_stats_rootport_attr(aer_rootport_total_err_cor, + rootport_total_cor_errs); +aer_stats_rootport_attr(aer_rootport_total_err_fatal, + rootport_total_fatal_errs); +aer_stats_rootport_attr(aer_rootport_total_err_nonfatal, + rootport_total_nonfatal_errs); + static struct attribute *aer_stats_attrs[] __ro_after_init = { &dev_attr_aer_dev_correctable.attr, &dev_attr_aer_dev_fatal.attr, &dev_attr_aer_dev_nonfatal.attr, + &dev_attr_aer_rootport_total_err_cor.attr, + &dev_attr_aer_rootport_total_err_fatal.attr, + &dev_attr_aer_rootport_total_err_nonfatal.attr, NULL }; @@ -593,6 +613,12 @@ static umode_t aer_stats_attrs_are_visible(struct kobject *kobj, if (!pdev->aer_stats) return 0; + if ((a == &dev_attr_aer_rootport_total_err_cor.attr || + a == &dev_attr_aer_rootport_total_err_fatal.attr || + a == &dev_attr_aer_rootport_total_err_nonfatal.attr) && + pci_pcie_type(pdev) != PCI_EXP_TYPE_ROOT_PORT) + return 0; + return a->mode; } @@ -635,6 +661,25 @@ static void pci_dev_aer_stats_incr(struct pci_dev *pdev, counter[i]++; } +static void pci_rootport_aer_stats_incr(struct pci_dev *pdev, + struct aer_err_source *e_src) +{ + struct aer_stats *aer_stats = pdev->aer_stats; + + if (!aer_stats) + return; + + if (e_src->status & PCI_ERR_ROOT_COR_RCV) + aer_stats->rootport_total_cor_errs++; + + if (e_src->status & PCI_ERR_ROOT_UNCOR_RCV) { + if (e_src->status & PCI_ERR_ROOT_FATAL_RCV) + aer_stats->rootport_total_fatal_errs++; + else + aer_stats->rootport_total_nonfatal_errs++; + } +} + static void __print_tlp_header(struct pci_dev *dev, struct aer_header_log_regs *t) { @@ -1085,6 +1130,8 @@ static void aer_isr_one_error(struct aer_rpc *rpc, struct pci_dev *pdev = rpc->rpd; struct aer_err_info *e_info = &rpc->e_info; + pci_rootport_aer_stats_incr(pdev, e_src); + /* * There is a possibility that both correctable error and * uncorrectable error being logged. Report correctable error first. -- cgit v1.2.3 From 39a212ad15f34cebf6d785114c0776a318024ba3 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Tue, 26 Jun 2018 17:45:34 -0500 Subject: PCI: Document ACPI description of PCI host bridges Add a writeup about how PCI host bridges should be described in ACPI using PNP0A03/PNP0A08 devices, PNP0C02 devices, and the MCFG table. Signed-off-by: Bjorn Helgaas Reviewed-by: Sinan Kaya Reviewed-by: Rafael J. Wysocki --- Documentation/PCI/00-INDEX | 2 + Documentation/PCI/acpi-info.txt | 187 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 189 insertions(+) create mode 100644 Documentation/PCI/acpi-info.txt (limited to 'Documentation') diff --git a/Documentation/PCI/00-INDEX b/Documentation/PCI/00-INDEX index 00c9a90b6f38..206b1d5c1e71 100644 --- a/Documentation/PCI/00-INDEX +++ b/Documentation/PCI/00-INDEX @@ -1,5 +1,7 @@ 00-INDEX - this file +acpi-info.txt + - info on how PCI host bridges are represented in ACPI MSI-HOWTO.txt - the Message Signaled Interrupts (MSI) Driver Guide HOWTO and FAQ. PCIEBUS-HOWTO.txt diff --git a/Documentation/PCI/acpi-info.txt b/Documentation/PCI/acpi-info.txt new file mode 100644 index 000000000000..3ffa3b03970e --- /dev/null +++ b/Documentation/PCI/acpi-info.txt @@ -0,0 +1,187 @@ + ACPI considerations for PCI host bridges + +The general rule is that the ACPI namespace should describe everything the +OS might use unless there's another way for the OS to find it [1, 2]. + +For example, there's no standard hardware mechanism for enumerating PCI +host bridges, so the ACPI namespace must describe each host bridge, the +method for accessing PCI config space below it, the address space windows +the host bridge forwards to PCI (using _CRS), and the routing of legacy +INTx interrupts (using _PRT). + +PCI devices, which are below the host bridge, generally do not need to be +described via ACPI. The OS can discover them via the standard PCI +enumeration mechanism, using config accesses to discover and identify +devices and read and size their BARs. However, ACPI may describe PCI +devices if it provides power management or hotplug functionality for them +or if the device has INTx interrupts connected by platform interrupt +controllers and a _PRT is needed to describe those connections. + +ACPI resource description is done via _CRS objects of devices in the ACPI +namespace [2].   The _CRS is like a generalized PCI BAR: the OS can read +_CRS and figure out what resource is being consumed even if it doesn't have +a driver for the device [3].  That's important because it means an old OS +can work correctly even on a system with new devices unknown to the OS. +The new devices might not do anything, but the OS can at least make sure no +resources conflict with them. + +Static tables like MCFG, HPET, ECDT, etc., are *not* mechanisms for +reserving address space. The static tables are for things the OS needs to +know early in boot, before it can parse the ACPI namespace. If a new table +is defined, an old OS needs to operate correctly even though it ignores the +table. _CRS allows that because it is generic and understood by the old +OS; a static table does not. + +If the OS is expected to manage a non-discoverable device described via +ACPI, that device will have a specific _HID/_CID that tells the OS what +driver to bind to it, and the _CRS tells the OS and the driver where the +device's registers are. + +PCI host bridges are PNP0A03 or PNP0A08 devices.  Their _CRS should +describe all the address space they consume.  This includes all the windows +they forward down to the PCI bus, as well as registers of the host bridge +itself that are not forwarded to PCI.  The host bridge registers include +things like secondary/subordinate bus registers that determine the bus +range below the bridge, window registers that describe the apertures, etc. +These are all device-specific, non-architected things, so the only way a +PNP0A03/PNP0A08 driver can manage them is via _PRS/_CRS/_SRS, which contain +the device-specific details.  The host bridge registers also include ECAM +space, since it is consumed by the host bridge. + +ACPI defines a Consumer/Producer bit to distinguish the bridge registers +("Consumer") from the bridge apertures ("Producer") [4, 5], but early +BIOSes didn't use that bit correctly. The result is that the current ACPI +spec defines Consumer/Producer only for the Extended Address Space +descriptors; the bit should be ignored in the older QWord/DWord/Word +Address Space descriptors. Consequently, OSes have to assume all +QWord/DWord/Word descriptors are windows. + +Prior to the addition of Extended Address Space descriptors, the failure of +Consumer/Producer meant there was no way to describe bridge registers in +the PNP0A03/PNP0A08 device itself. The workaround was to describe the +bridge registers (including ECAM space) in PNP0C02 catch-all devices [6]. +With the exception of ECAM, the bridge register space is device-specific +anyway, so the generic PNP0A03/PNP0A08 driver (pci_root.c) has no need to +know about it.   + +New architectures should be able to use "Consumer" Extended Address Space +descriptors in the PNP0A03 device for bridge registers, including ECAM, +although a strict interpretation of [6] might prohibit this. Old x86 and +ia64 kernels assume all address space descriptors, including "Consumer" +Extended Address Space ones, are windows, so it would not be safe to +describe bridge registers this way on those architectures. + +PNP0C02 "motherboard" devices are basically a catch-all.  There's no +programming model for them other than "don't use these resources for +anything else."  So a PNP0C02 _CRS should claim any address space that is +(1) not claimed by _CRS under any other device object in the ACPI namespace +and (2) should not be assigned by the OS to something else. + +The PCIe spec requires the Enhanced Configuration Access Method (ECAM) +unless there's a standard firmware interface for config access, e.g., the +ia64 SAL interface [7]. A host bridge consumes ECAM memory address space +and converts memory accesses into PCI configuration accesses. The spec +defines the ECAM address space layout and functionality; only the base of +the address space is device-specific. An ACPI OS learns the base address +from either the static MCFG table or a _CBA method in the PNP0A03 device. + +The MCFG table must describe the ECAM space of non-hot pluggable host +bridges [8]. Since MCFG is a static table and can't be updated by hotplug, +a _CBA method in the PNP0A03 device describes the ECAM space of a +hot-pluggable host bridge [9]. Note that for both MCFG and _CBA, the base +address always corresponds to bus 0, even if the bus range below the bridge +(which is reported via _CRS) doesn't start at 0. + + +[1] ACPI 6.2, sec 6.1: + For any device that is on a non-enumerable type of bus (for example, an + ISA bus), OSPM enumerates the devices' identifier(s) and the ACPI + system firmware must supply an _HID object ... for each device to + enable OSPM to do that. + +[2] ACPI 6.2, sec 3.7: + The OS enumerates motherboard devices simply by reading through the + ACPI Namespace looking for devices with hardware IDs. + + Each device enumerated by ACPI includes ACPI-defined objects in the + ACPI Namespace that report the hardware resources the device could + occupy [_PRS], an object that reports the resources that are currently + used by the device [_CRS], and objects for configuring those resources + [_SRS]. The information is used by the Plug and Play OS (OSPM) to + configure the devices. + +[3] ACPI 6.2, sec 6.2: + OSPM uses device configuration objects to configure hardware resources + for devices enumerated via ACPI. Device configuration objects provide + information about current and possible resource requirements, the + relationship between shared resources, and methods for configuring + hardware resources. + + When OSPM enumerates a device, it calls _PRS to determine the resource + requirements of the device. It may also call _CRS to find the current + resource settings for the device. Using this information, the Plug and + Play system determines what resources the device should consume and + sets those resources by calling the device’s _SRS control method. + + In ACPI, devices can consume resources (for example, legacy keyboards), + provide resources (for example, a proprietary PCI bridge), or do both. + Unless otherwise specified, resources for a device are assumed to be + taken from the nearest matching resource above the device in the device + hierarchy. + +[4] ACPI 6.2, sec 6.4.3.5.1, 2, 3, 4: + QWord/DWord/Word Address Space Descriptor (.1, .2, .3) + General Flags: Bit [0] Ignored + + Extended Address Space Descriptor (.4) + General Flags: Bit [0] Consumer/Producer: + 1–This device consumes this resource + 0–This device produces and consumes this resource + +[5] ACPI 6.2, sec 19.6.43: + ResourceUsage specifies whether the Memory range is consumed by + this device (ResourceConsumer) or passed on to child devices + (ResourceProducer). If nothing is specified, then + ResourceConsumer is assumed. + +[6] PCI Firmware 3.2, sec 4.1.2: + If the operating system does not natively comprehend reserving the + MMCFG region, the MMCFG region must be reserved by firmware. The + address range reported in the MCFG table or by _CBA method (see Section + 4.1.3) must be reserved by declaring a motherboard resource. For most + systems, the motherboard resource would appear at the root of the ACPI + namespace (under \_SB) in a node with a _HID of EISAID (PNP0C02), and + the resources in this case should not be claimed in the root PCI bus’s + _CRS. The resources can optionally be returned in Int15 E820 or + EFIGetMemoryMap as reserved memory but must always be reported through + ACPI as a motherboard resource. + +[7] PCI Express 4.0, sec 7.2.2: + For systems that are PC-compatible, or that do not implement a + processor-architecture-specific firmware interface standard that allows + access to the Configuration Space, the ECAM is required as defined in + this section. + +[8] PCI Firmware 3.2, sec 4.1.2: + The MCFG table is an ACPI table that is used to communicate the base + addresses corresponding to the non-hot removable PCI Segment Groups + range within a PCI Segment Group available to the operating system at + boot. This is required for the PC-compatible systems. + + The MCFG table is only used to communicate the base addresses + corresponding to the PCI Segment Groups available to the system at + boot. + +[9] PCI Firmware 3.2, sec 4.1.3: + The _CBA (Memory mapped Configuration Base Address) control method is + an optional ACPI object that returns the 64-bit memory mapped + configuration base address for the hot plug capable host bridge. The + base address returned by _CBA is processor-relative address. The _CBA + control method evaluates to an Integer. + + This control method appears under a host bridge object. When the _CBA + method appears under an active host bridge object, the operating system + evaluates this structure to identify the memory mapped configuration + base address corresponding to the PCI Segment Group for the bus number + range specified in _CRS method. An ACPI name space object that contains + the _CBA method must also contain a corresponding _SEG method. -- cgit v1.2.3 From 07d8d7e57c28ca9a07dab4efd75dad3a654aeb85 Mon Sep 17 00:00:00 2001 From: Logan Gunthorpe Date: Mon, 30 Jul 2018 10:18:37 -0600 Subject: PCI: Make specifying PCI devices in kernel parameters reusable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Separate out the code to match a PCI device with a string (typically originating from a kernel parameter) from the pci_specified_resource_alignment() function into its own helper function. While we are at it, this change fixes the kernel style of the function (fixing a number of long lines and extra parentheses). Additionally, make the analogous change to the kernel parameter documentation: Separate the description of how to specify a PCI device into its own section at the head of the "pci=" parameter. This patch should have no functional alterations. Signed-off-by: Logan Gunthorpe [bhelgaas: use "device" instead of "slot" in documentation since that's the usual language in the PCI specs] Signed-off-by: Bjorn Helgaas Reviewed-by: Stephen Bates Reviewed-by: Alex Williamson Acked-by: Christian König --- Documentation/admin-guide/kernel-parameters.txt | 28 ++++- drivers/pci/pci.c | 157 ++++++++++++++++-------- 2 files changed, 126 insertions(+), 59 deletions(-) (limited to 'Documentation') diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index efc7aa7a0670..ab36fb34ed01 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -2994,7 +2994,26 @@ See header of drivers/block/paride/pcd.c. See also Documentation/blockdev/paride.txt. - pci=option[,option...] [PCI] various PCI subsystem options: + pci=option[,option...] [PCI] various PCI subsystem options. + + Some options herein operate on a specific device + or a set of devices (). These are + specified in one of the following formats: + + [:]:. + pci::[::] + + Note: the first format specifies a PCI + bus/device/function address which may change + if new hardware is inserted, if motherboard + firmware changes, or due to changes caused + by other kernel parameters. If the + domain is left unspecified, it is + taken to be zero. The second format + selects devices using IDs from the + configuration space which may match multiple + devices in the system. + earlydump [X86] dump PCI config space before the kernel changes anything off [X86] don't probe for the PCI bus @@ -3123,11 +3142,10 @@ window. The default value is 64 megabytes. resource_alignment= Format: - [@][:]:.[; ...] - [@]pci::\ - [::][; ...] + [@][; ...] Specifies alignment and device to reassign - aligned memory resources. + aligned memory resources. How to + specify the device is described above. If is not specified, PAGE_SIZE is used as alignment. PCI-PCI bridge can be specified, if resource diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 97acba712e4e..1574b2da25e7 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -191,6 +191,92 @@ void __iomem *pci_ioremap_wc_bar(struct pci_dev *pdev, int bar) EXPORT_SYMBOL_GPL(pci_ioremap_wc_bar); #endif +/** + * pci_dev_str_match - test if a string matches a device + * @dev: the PCI device to test + * @p: string to match the device against + * @endptr: pointer to the string after the match + * + * Test if a string (typically from a kernel parameter) matches a specified + * PCI device. The string may be of one of the following formats: + * + * [:]:. + * pci::[::] + * + * The first format specifies a PCI bus/device/function address which + * may change if new hardware is inserted, if motherboard firmware changes, + * or due to changes caused in kernel parameters. If the domain is + * left unspecified, it is taken to be 0. + * + * The second format matches devices using IDs in the configuration + * space which may match multiple devices in the system. A value of 0 + * for any field will match all devices. (Note: this differs from + * in-kernel code that uses PCI_ANY_ID which is ~0; this is for + * legacy reasons and convenience so users don't have to specify + * FFFFFFFFs on the command line.) + * + * Returns 1 if the string matches the device, 0 if it does not and + * a negative error code if the string cannot be parsed. + */ +static int pci_dev_str_match(struct pci_dev *dev, const char *p, + const char **endptr) +{ + int ret; + int seg, bus, slot, func, count; + unsigned short vendor, device, subsystem_vendor, subsystem_device; + + if (strncmp(p, "pci:", 4) == 0) { + /* PCI vendor/device (subvendor/subdevice) IDs are specified */ + p += 4; + ret = sscanf(p, "%hx:%hx:%hx:%hx%n", &vendor, &device, + &subsystem_vendor, &subsystem_device, &count); + if (ret != 4) { + ret = sscanf(p, "%hx:%hx%n", &vendor, &device, &count); + if (ret != 2) + return -EINVAL; + + subsystem_vendor = 0; + subsystem_device = 0; + } + + p += count; + + if ((!vendor || vendor == dev->vendor) && + (!device || device == dev->device) && + (!subsystem_vendor || + subsystem_vendor == dev->subsystem_vendor) && + (!subsystem_device || + subsystem_device == dev->subsystem_device)) + goto found; + + } else { + /* PCI Bus, Device, Function IDs are specified */ + ret = sscanf(p, "%x:%x:%x.%x%n", &seg, &bus, &slot, + &func, &count); + if (ret != 4) { + seg = 0; + ret = sscanf(p, "%x:%x.%x%n", &bus, &slot, + &func, &count); + if (ret != 3) + return -EINVAL; + } + + p += count; + + if (seg == pci_domain_nr(dev->bus) && + bus == dev->bus->number && + slot == PCI_SLOT(dev->devfn) && + func == PCI_FUNC(dev->devfn)) + goto found; + } + + *endptr = p; + return 0; + +found: + *endptr = p; + return 1; +} static int __pci_find_next_cap_ttl(struct pci_bus *bus, unsigned int devfn, u8 pos, int cap, int *ttl) @@ -5454,10 +5540,10 @@ static DEFINE_SPINLOCK(resource_alignment_lock); static resource_size_t pci_specified_resource_alignment(struct pci_dev *dev, bool *resize) { - int seg, bus, slot, func, align_order, count; - unsigned short vendor, device, subsystem_vendor, subsystem_device; + int align_order, count; resource_size_t align = pcibios_default_alignment(); - char *p; + const char *p; + int ret; spin_lock(&resource_alignment_lock); p = resource_alignment_param; @@ -5477,58 +5563,21 @@ static resource_size_t pci_specified_resource_alignment(struct pci_dev *dev, } else { align_order = -1; } - if (strncmp(p, "pci:", 4) == 0) { - /* PCI vendor/device (subvendor/subdevice) ids are specified */ - p += 4; - if (sscanf(p, "%hx:%hx:%hx:%hx%n", - &vendor, &device, &subsystem_vendor, &subsystem_device, &count) != 4) { - if (sscanf(p, "%hx:%hx%n", &vendor, &device, &count) != 2) { - printk(KERN_ERR "PCI: Can't parse resource_alignment parameter: pci:%s\n", - p); - break; - } - subsystem_vendor = subsystem_device = 0; - } - p += count; - if ((!vendor || (vendor == dev->vendor)) && - (!device || (device == dev->device)) && - (!subsystem_vendor || (subsystem_vendor == dev->subsystem_vendor)) && - (!subsystem_device || (subsystem_device == dev->subsystem_device))) { - *resize = true; - if (align_order == -1) - align = PAGE_SIZE; - else - align = 1 << align_order; - /* Found */ - break; - } - } - else { - if (sscanf(p, "%x:%x:%x.%x%n", - &seg, &bus, &slot, &func, &count) != 4) { - seg = 0; - if (sscanf(p, "%x:%x.%x%n", - &bus, &slot, &func, &count) != 3) { - /* Invalid format */ - printk(KERN_ERR "PCI: Can't parse resource_alignment parameter: %s\n", - p); - break; - } - } - p += count; - if (seg == pci_domain_nr(dev->bus) && - bus == dev->bus->number && - slot == PCI_SLOT(dev->devfn) && - func == PCI_FUNC(dev->devfn)) { - *resize = true; - if (align_order == -1) - align = PAGE_SIZE; - else - align = 1 << align_order; - /* Found */ - break; - } + + ret = pci_dev_str_match(dev, p, &p); + if (ret == 1) { + *resize = true; + if (align_order == -1) + align = PAGE_SIZE; + else + align = 1 << align_order; + break; + } else if (ret < 0) { + pr_err("PCI: Can't parse resource_alignment parameter: %s\n", + p); + break; } + if (*p != ';' && *p != ',') { /* End of param or invalid format */ break; -- cgit v1.2.3 From 45db33709ccc7330c55fc6751c96468de407f2ac Mon Sep 17 00:00:00 2001 From: Logan Gunthorpe Date: Mon, 30 Jul 2018 10:18:38 -0600 Subject: PCI: Allow specifying devices using a base bus and path of devfns MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When specifying PCI devices on the kernel command line using a bus/device/function address, bus numbers can change when adding or replacing a device, changing motherboard firmware, or applying kernel parameters like "pci=assign-buses". When bus numbers change, it's likely the command line tweak will be applied to the wrong device. Therefore, it is useful to be able to specify devices with a base bus number and the path of devfns needed to get to it, similar to the "device scope" structure in the Intel VT-d spec, Section 8.3.1. Thus, we add an option to specify devices in the following format: [:]:.[/.]* The path can be any segment within the PCI hierarchy of any length and determined through the use of 'lspci -t'. When specified this way, it is less likely that a renumbered bus will result in a valid device specification and the tweak won't be applied to the wrong device. Signed-off-by: Logan Gunthorpe [bhelgaas: use "device" instead of "slot" in documentation since that's the usual language in the PCI specs] Signed-off-by: Bjorn Helgaas Reviewed-by: Stephen Bates Reviewed-by: Alex Williamson Acked-by: Christian König --- Documentation/admin-guide/kernel-parameters.txt | 8 +- drivers/pci/pci.c | 118 +++++++++++++++++++----- 2 files changed, 103 insertions(+), 23 deletions(-) (limited to 'Documentation') diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index ab36fb34ed01..4fa4c9ff04ae 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -3000,7 +3000,7 @@ or a set of devices (). These are specified in one of the following formats: - [:]:. + [:]:.[/.]* pci::[::] Note: the first format specifies a PCI @@ -3009,7 +3009,11 @@ firmware changes, or due to changes caused by other kernel parameters. If the domain is left unspecified, it is - taken to be zero. The second format + taken to be zero. Optionally, a path + to a device through multiple device/function + addresses can be specified after the base + address (this is more robust against + renumbering issues). The second format selects devices using IDs from the configuration space which may match multiple devices in the system. diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 1574b2da25e7..a6c38b15ac33 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -191,6 +191,89 @@ void __iomem *pci_ioremap_wc_bar(struct pci_dev *pdev, int bar) EXPORT_SYMBOL_GPL(pci_ioremap_wc_bar); #endif +/** + * pci_dev_str_match_path - test if a path string matches a device + * @dev: the PCI device to test + * @p: string to match the device against + * @endptr: pointer to the string after the match + * + * Test if a string (typically from a kernel parameter) formatted as a + * path of device/function addresses matches a PCI device. The string must + * be of the form: + * + * [:]:.[/.]* + * + * A path for a device can be obtained using 'lspci -t'. Using a path + * is more robust against bus renumbering than using only a single bus, + * device and function address. + * + * Returns 1 if the string matches the device, 0 if it does not and + * a negative error code if it fails to parse the string. + */ +static int pci_dev_str_match_path(struct pci_dev *dev, const char *path, + const char **endptr) +{ + int ret; + int seg, bus, slot, func; + char *wpath, *p; + char end; + + *endptr = strchrnul(path, ';'); + + wpath = kmemdup_nul(path, *endptr - path, GFP_KERNEL); + if (!wpath) + return -ENOMEM; + + while (1) { + p = strrchr(wpath, '/'); + if (!p) + break; + ret = sscanf(p, "/%x.%x%c", &slot, &func, &end); + if (ret != 2) { + ret = -EINVAL; + goto free_and_exit; + } + + if (dev->devfn != PCI_DEVFN(slot, func)) { + ret = 0; + goto free_and_exit; + } + + /* + * Note: we don't need to get a reference to the upstream + * bridge because we hold a reference to the top level + * device which should hold a reference to the bridge, + * and so on. + */ + dev = pci_upstream_bridge(dev); + if (!dev) { + ret = 0; + goto free_and_exit; + } + + *p = 0; + } + + ret = sscanf(wpath, "%x:%x:%x.%x%c", &seg, &bus, &slot, + &func, &end); + if (ret != 4) { + seg = 0; + ret = sscanf(wpath, "%x:%x.%x%c", &bus, &slot, &func, &end); + if (ret != 3) { + ret = -EINVAL; + goto free_and_exit; + } + } + + ret = (seg == pci_domain_nr(dev->bus) && + bus == dev->bus->number && + dev->devfn == PCI_DEVFN(slot, func)); + +free_and_exit: + kfree(wpath); + return ret; +} + /** * pci_dev_str_match - test if a string matches a device * @dev: the PCI device to test @@ -200,13 +283,16 @@ EXPORT_SYMBOL_GPL(pci_ioremap_wc_bar); * Test if a string (typically from a kernel parameter) matches a specified * PCI device. The string may be of one of the following formats: * - * [:]:. + * [:]:.[/.]* * pci::[::] * * The first format specifies a PCI bus/device/function address which * may change if new hardware is inserted, if motherboard firmware changes, * or due to changes caused in kernel parameters. If the domain is - * left unspecified, it is taken to be 0. + * left unspecified, it is taken to be 0. In order to be robust against + * bus renumbering issues, a path of PCI device/function numbers may be used + * to address the specific device. The path for a device can be determined + * through the use of 'lspci -t'. * * The second format matches devices using IDs in the configuration * space which may match multiple devices in the system. A value of 0 @@ -222,7 +308,7 @@ static int pci_dev_str_match(struct pci_dev *dev, const char *p, const char **endptr) { int ret; - int seg, bus, slot, func, count; + int count; unsigned short vendor, device, subsystem_vendor, subsystem_device; if (strncmp(p, "pci:", 4) == 0) { @@ -248,25 +334,15 @@ static int pci_dev_str_match(struct pci_dev *dev, const char *p, (!subsystem_device || subsystem_device == dev->subsystem_device)) goto found; - } else { - /* PCI Bus, Device, Function IDs are specified */ - ret = sscanf(p, "%x:%x:%x.%x%n", &seg, &bus, &slot, - &func, &count); - if (ret != 4) { - seg = 0; - ret = sscanf(p, "%x:%x.%x%n", &bus, &slot, - &func, &count); - if (ret != 3) - return -EINVAL; - } - - p += count; - - if (seg == pci_domain_nr(dev->bus) && - bus == dev->bus->number && - slot == PCI_SLOT(dev->devfn) && - func == PCI_FUNC(dev->devfn)) + /* + * PCI Bus, Device, Function IDs are specified + * (optionally, may include a path of devfns following it) + */ + ret = pci_dev_str_match_path(dev, p, &p); + if (ret < 0) + return ret; + else if (ret) goto found; } -- cgit v1.2.3 From aaca43fda742223e4f62bd73e13055f5364e9a9b Mon Sep 17 00:00:00 2001 From: Logan Gunthorpe Date: Mon, 30 Jul 2018 10:18:40 -0600 Subject: PCI: Add "pci=disable_acs_redir=" parameter for peer-to-peer support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit To support peer-to-peer traffic on a segment of the PCI hierarchy, we must disable the ACS redirect bits for select PCI bridges. The bridges must be selected before the devices are discovered by the kernel and the IOMMU groups created. Therefore, add a kernel command line parameter to specify devices which must have their ACS bits disabled. The new parameter takes a list of devices separated by a semicolon. Each device specified will have its ACS redirect bits disabled. This is similar to the existing 'resource_alignment' parameter. The ACS Request P2P Request Redirect, P2P Completion Redirect and P2P Egress Control bits are disabled, which is sufficient to always allow passing P2P traffic uninterrupted. The bits are set after the kernel (optionally) enables the ACS bits itself. It is also done regardless of whether the kernel or platform firmware sets the bits. If the user tries to disable the ACS redirect for a device without the ACS capability, print a warning to dmesg. Signed-off-by: Logan Gunthorpe [bhelgaas: reorder to add the generic code first and move the device-specific quirk to subsequent patches] Signed-off-by: Bjorn Helgaas Reviewed-by: Stephen Bates Reviewed-by: Alex Williamson Acked-by: Christian König --- Documentation/admin-guide/kernel-parameters.txt | 9 +++ drivers/pci/pci.c | 73 ++++++++++++++++++++++++- 2 files changed, 80 insertions(+), 2 deletions(-) (limited to 'Documentation') diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 4fa4c9ff04ae..d5c27d947c2e 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -3192,6 +3192,15 @@ Adding the window is slightly risky (it may conflict with unreported devices), so this taints the kernel. + disable_acs_redir=[; ...] + Specify one or more PCI devices (in the format + specified above) separated by semicolons. + Each device specified will have the PCI ACS + redirect capabilities forced off which will + allow P2P traffic between devices through + bridges without forcing it upstream. Note: + this removes isolation between devices and + may put more devices in an IOMMU group. pcie_aspm= [PCIE] Forcibly enable or disable PCIe Active State Power Management. diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index a6c38b15ac33..822577d9b39e 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -2982,6 +2982,63 @@ void pci_request_acs(void) pci_acs_enable = 1; } +static const char *disable_acs_redir_param; + +/** + * pci_disable_acs_redir - disable ACS redirect capabilities + * @dev: the PCI device + * + * For only devices specified in the disable_acs_redir parameter. + */ +static void pci_disable_acs_redir(struct pci_dev *dev) +{ + int ret = 0; + const char *p; + int pos; + u16 ctrl; + + if (!disable_acs_redir_param) + return; + + p = disable_acs_redir_param; + while (*p) { + ret = pci_dev_str_match(dev, p, &p); + if (ret < 0) { + pr_info_once("PCI: Can't parse disable_acs_redir parameter: %s\n", + disable_acs_redir_param); + + break; + } else if (ret == 1) { + /* Found a match */ + break; + } + + if (*p != ';' && *p != ',') { + /* End of param or invalid format */ + break; + } + p++; + } + + if (ret != 1) + return; + + pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ACS); + if (!pos) { + pci_warn(dev, "cannot disable ACS redirect for this hardware as it does not have ACS capabilities\n"); + return; + } + + pci_read_config_word(dev, pos + PCI_ACS_CTRL, &ctrl); + + /* P2P Request & Completion Redirect */ + ctrl &= ~(PCI_ACS_RR | PCI_ACS_CR | PCI_ACS_EC); + + pci_write_config_word(dev, pos + PCI_ACS_CTRL, ctrl); + + pci_info(dev, "disabled ACS redirect\n"); +} + /** * pci_std_enable_acs - enable ACS on devices using standard ACS capabilites * @dev: the PCI device @@ -3021,12 +3078,22 @@ static void pci_std_enable_acs(struct pci_dev *dev) void pci_enable_acs(struct pci_dev *dev) { if (!pci_acs_enable) - return; + goto disable_acs_redir; if (!pci_dev_specific_enable_acs(dev)) - return; + goto disable_acs_redir; pci_std_enable_acs(dev); + +disable_acs_redir: + /* + * Note: pci_disable_acs_redir() must be called even if ACS was not + * enabled by the kernel because it may have been enabled by + * platform firmware. So if we are told to disable it, we should + * always disable it after setting the kernel's default + * preferences. + */ + pci_disable_acs_redir(dev); } static bool pci_acs_flags_enabled(struct pci_dev *pdev, u16 acs_flags) @@ -5966,6 +6033,8 @@ static int __init pci_setup(char *str) pcie_bus_config = PCIE_BUS_PEER2PEER; } else if (!strncmp(str, "pcie_scan_all", 13)) { pci_add_flags(PCI_SCAN_ALL_PCIE_DEVS); + } else if (!strncmp(str, "disable_acs_redir=", 18)) { + disable_acs_redir_param = str + 18; } else { printk(KERN_ERR "PCI: Unknown option `%s'\n", str); -- cgit v1.2.3