From 07d8d7e57c28ca9a07dab4efd75dad3a654aeb85 Mon Sep 17 00:00:00 2001 From: Logan Gunthorpe Date: Mon, 30 Jul 2018 10:18:37 -0600 Subject: PCI: Make specifying PCI devices in kernel parameters reusable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Separate out the code to match a PCI device with a string (typically originating from a kernel parameter) from the pci_specified_resource_alignment() function into its own helper function. While we are at it, this change fixes the kernel style of the function (fixing a number of long lines and extra parentheses). Additionally, make the analogous change to the kernel parameter documentation: Separate the description of how to specify a PCI device into its own section at the head of the "pci=" parameter. This patch should have no functional alterations. Signed-off-by: Logan Gunthorpe [bhelgaas: use "device" instead of "slot" in documentation since that's the usual language in the PCI specs] Signed-off-by: Bjorn Helgaas Reviewed-by: Stephen Bates Reviewed-by: Alex Williamson Acked-by: Christian König --- Documentation/admin-guide/kernel-parameters.txt | 28 ++++++++++++++++++++----- 1 file changed, 23 insertions(+), 5 deletions(-) (limited to 'Documentation') diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index efc7aa7a0670..ab36fb34ed01 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -2994,7 +2994,26 @@ See header of drivers/block/paride/pcd.c. See also Documentation/blockdev/paride.txt. - pci=option[,option...] [PCI] various PCI subsystem options: + pci=option[,option...] [PCI] various PCI subsystem options. + + Some options herein operate on a specific device + or a set of devices (). These are + specified in one of the following formats: + + [:]:. + pci::[::] + + Note: the first format specifies a PCI + bus/device/function address which may change + if new hardware is inserted, if motherboard + firmware changes, or due to changes caused + by other kernel parameters. If the + domain is left unspecified, it is + taken to be zero. The second format + selects devices using IDs from the + configuration space which may match multiple + devices in the system. + earlydump [X86] dump PCI config space before the kernel changes anything off [X86] don't probe for the PCI bus @@ -3123,11 +3142,10 @@ window. The default value is 64 megabytes. resource_alignment= Format: - [@][:]:.[; ...] - [@]pci::\ - [::][; ...] + [@][; ...] Specifies alignment and device to reassign - aligned memory resources. + aligned memory resources. How to + specify the device is described above. If is not specified, PAGE_SIZE is used as alignment. PCI-PCI bridge can be specified, if resource -- cgit v1.2.3 From 45db33709ccc7330c55fc6751c96468de407f2ac Mon Sep 17 00:00:00 2001 From: Logan Gunthorpe Date: Mon, 30 Jul 2018 10:18:38 -0600 Subject: PCI: Allow specifying devices using a base bus and path of devfns MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When specifying PCI devices on the kernel command line using a bus/device/function address, bus numbers can change when adding or replacing a device, changing motherboard firmware, or applying kernel parameters like "pci=assign-buses". When bus numbers change, it's likely the command line tweak will be applied to the wrong device. Therefore, it is useful to be able to specify devices with a base bus number and the path of devfns needed to get to it, similar to the "device scope" structure in the Intel VT-d spec, Section 8.3.1. Thus, we add an option to specify devices in the following format: [:]:.[/.]* The path can be any segment within the PCI hierarchy of any length and determined through the use of 'lspci -t'. When specified this way, it is less likely that a renumbered bus will result in a valid device specification and the tweak won't be applied to the wrong device. Signed-off-by: Logan Gunthorpe [bhelgaas: use "device" instead of "slot" in documentation since that's the usual language in the PCI specs] Signed-off-by: Bjorn Helgaas Reviewed-by: Stephen Bates Reviewed-by: Alex Williamson Acked-by: Christian König --- Documentation/admin-guide/kernel-parameters.txt | 8 +- drivers/pci/pci.c | 118 +++++++++++++++++++----- 2 files changed, 103 insertions(+), 23 deletions(-) (limited to 'Documentation') diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index ab36fb34ed01..4fa4c9ff04ae 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -3000,7 +3000,7 @@ or a set of devices (). These are specified in one of the following formats: - [:]:. + [:]:.[/.]* pci::[::] Note: the first format specifies a PCI @@ -3009,7 +3009,11 @@ firmware changes, or due to changes caused by other kernel parameters. If the domain is left unspecified, it is - taken to be zero. The second format + taken to be zero. Optionally, a path + to a device through multiple device/function + addresses can be specified after the base + address (this is more robust against + renumbering issues). The second format selects devices using IDs from the configuration space which may match multiple devices in the system. diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 1574b2da25e7..a6c38b15ac33 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -191,6 +191,89 @@ void __iomem *pci_ioremap_wc_bar(struct pci_dev *pdev, int bar) EXPORT_SYMBOL_GPL(pci_ioremap_wc_bar); #endif +/** + * pci_dev_str_match_path - test if a path string matches a device + * @dev: the PCI device to test + * @p: string to match the device against + * @endptr: pointer to the string after the match + * + * Test if a string (typically from a kernel parameter) formatted as a + * path of device/function addresses matches a PCI device. The string must + * be of the form: + * + * [:]:.[/.]* + * + * A path for a device can be obtained using 'lspci -t'. Using a path + * is more robust against bus renumbering than using only a single bus, + * device and function address. + * + * Returns 1 if the string matches the device, 0 if it does not and + * a negative error code if it fails to parse the string. + */ +static int pci_dev_str_match_path(struct pci_dev *dev, const char *path, + const char **endptr) +{ + int ret; + int seg, bus, slot, func; + char *wpath, *p; + char end; + + *endptr = strchrnul(path, ';'); + + wpath = kmemdup_nul(path, *endptr - path, GFP_KERNEL); + if (!wpath) + return -ENOMEM; + + while (1) { + p = strrchr(wpath, '/'); + if (!p) + break; + ret = sscanf(p, "/%x.%x%c", &slot, &func, &end); + if (ret != 2) { + ret = -EINVAL; + goto free_and_exit; + } + + if (dev->devfn != PCI_DEVFN(slot, func)) { + ret = 0; + goto free_and_exit; + } + + /* + * Note: we don't need to get a reference to the upstream + * bridge because we hold a reference to the top level + * device which should hold a reference to the bridge, + * and so on. + */ + dev = pci_upstream_bridge(dev); + if (!dev) { + ret = 0; + goto free_and_exit; + } + + *p = 0; + } + + ret = sscanf(wpath, "%x:%x:%x.%x%c", &seg, &bus, &slot, + &func, &end); + if (ret != 4) { + seg = 0; + ret = sscanf(wpath, "%x:%x.%x%c", &bus, &slot, &func, &end); + if (ret != 3) { + ret = -EINVAL; + goto free_and_exit; + } + } + + ret = (seg == pci_domain_nr(dev->bus) && + bus == dev->bus->number && + dev->devfn == PCI_DEVFN(slot, func)); + +free_and_exit: + kfree(wpath); + return ret; +} + /** * pci_dev_str_match - test if a string matches a device * @dev: the PCI device to test @@ -200,13 +283,16 @@ EXPORT_SYMBOL_GPL(pci_ioremap_wc_bar); * Test if a string (typically from a kernel parameter) matches a specified * PCI device. The string may be of one of the following formats: * - * [:]:. + * [:]:.[/.]* * pci::[::] * * The first format specifies a PCI bus/device/function address which * may change if new hardware is inserted, if motherboard firmware changes, * or due to changes caused in kernel parameters. If the domain is - * left unspecified, it is taken to be 0. + * left unspecified, it is taken to be 0. In order to be robust against + * bus renumbering issues, a path of PCI device/function numbers may be used + * to address the specific device. The path for a device can be determined + * through the use of 'lspci -t'. * * The second format matches devices using IDs in the configuration * space which may match multiple devices in the system. A value of 0 @@ -222,7 +308,7 @@ static int pci_dev_str_match(struct pci_dev *dev, const char *p, const char **endptr) { int ret; - int seg, bus, slot, func, count; + int count; unsigned short vendor, device, subsystem_vendor, subsystem_device; if (strncmp(p, "pci:", 4) == 0) { @@ -248,25 +334,15 @@ static int pci_dev_str_match(struct pci_dev *dev, const char *p, (!subsystem_device || subsystem_device == dev->subsystem_device)) goto found; - } else { - /* PCI Bus, Device, Function IDs are specified */ - ret = sscanf(p, "%x:%x:%x.%x%n", &seg, &bus, &slot, - &func, &count); - if (ret != 4) { - seg = 0; - ret = sscanf(p, "%x:%x.%x%n", &bus, &slot, - &func, &count); - if (ret != 3) - return -EINVAL; - } - - p += count; - - if (seg == pci_domain_nr(dev->bus) && - bus == dev->bus->number && - slot == PCI_SLOT(dev->devfn) && - func == PCI_FUNC(dev->devfn)) + /* + * PCI Bus, Device, Function IDs are specified + * (optionally, may include a path of devfns following it) + */ + ret = pci_dev_str_match_path(dev, p, &p); + if (ret < 0) + return ret; + else if (ret) goto found; } -- cgit v1.2.3 From aaca43fda742223e4f62bd73e13055f5364e9a9b Mon Sep 17 00:00:00 2001 From: Logan Gunthorpe Date: Mon, 30 Jul 2018 10:18:40 -0600 Subject: PCI: Add "pci=disable_acs_redir=" parameter for peer-to-peer support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit To support peer-to-peer traffic on a segment of the PCI hierarchy, we must disable the ACS redirect bits for select PCI bridges. The bridges must be selected before the devices are discovered by the kernel and the IOMMU groups created. Therefore, add a kernel command line parameter to specify devices which must have their ACS bits disabled. The new parameter takes a list of devices separated by a semicolon. Each device specified will have its ACS redirect bits disabled. This is similar to the existing 'resource_alignment' parameter. The ACS Request P2P Request Redirect, P2P Completion Redirect and P2P Egress Control bits are disabled, which is sufficient to always allow passing P2P traffic uninterrupted. The bits are set after the kernel (optionally) enables the ACS bits itself. It is also done regardless of whether the kernel or platform firmware sets the bits. If the user tries to disable the ACS redirect for a device without the ACS capability, print a warning to dmesg. Signed-off-by: Logan Gunthorpe [bhelgaas: reorder to add the generic code first and move the device-specific quirk to subsequent patches] Signed-off-by: Bjorn Helgaas Reviewed-by: Stephen Bates Reviewed-by: Alex Williamson Acked-by: Christian König --- Documentation/admin-guide/kernel-parameters.txt | 9 +++ drivers/pci/pci.c | 73 ++++++++++++++++++++++++- 2 files changed, 80 insertions(+), 2 deletions(-) (limited to 'Documentation') diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 4fa4c9ff04ae..d5c27d947c2e 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -3192,6 +3192,15 @@ Adding the window is slightly risky (it may conflict with unreported devices), so this taints the kernel. + disable_acs_redir=[; ...] + Specify one or more PCI devices (in the format + specified above) separated by semicolons. + Each device specified will have the PCI ACS + redirect capabilities forced off which will + allow P2P traffic between devices through + bridges without forcing it upstream. Note: + this removes isolation between devices and + may put more devices in an IOMMU group. pcie_aspm= [PCIE] Forcibly enable or disable PCIe Active State Power Management. diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index a6c38b15ac33..822577d9b39e 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -2982,6 +2982,63 @@ void pci_request_acs(void) pci_acs_enable = 1; } +static const char *disable_acs_redir_param; + +/** + * pci_disable_acs_redir - disable ACS redirect capabilities + * @dev: the PCI device + * + * For only devices specified in the disable_acs_redir parameter. + */ +static void pci_disable_acs_redir(struct pci_dev *dev) +{ + int ret = 0; + const char *p; + int pos; + u16 ctrl; + + if (!disable_acs_redir_param) + return; + + p = disable_acs_redir_param; + while (*p) { + ret = pci_dev_str_match(dev, p, &p); + if (ret < 0) { + pr_info_once("PCI: Can't parse disable_acs_redir parameter: %s\n", + disable_acs_redir_param); + + break; + } else if (ret == 1) { + /* Found a match */ + break; + } + + if (*p != ';' && *p != ',') { + /* End of param or invalid format */ + break; + } + p++; + } + + if (ret != 1) + return; + + pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ACS); + if (!pos) { + pci_warn(dev, "cannot disable ACS redirect for this hardware as it does not have ACS capabilities\n"); + return; + } + + pci_read_config_word(dev, pos + PCI_ACS_CTRL, &ctrl); + + /* P2P Request & Completion Redirect */ + ctrl &= ~(PCI_ACS_RR | PCI_ACS_CR | PCI_ACS_EC); + + pci_write_config_word(dev, pos + PCI_ACS_CTRL, ctrl); + + pci_info(dev, "disabled ACS redirect\n"); +} + /** * pci_std_enable_acs - enable ACS on devices using standard ACS capabilites * @dev: the PCI device @@ -3021,12 +3078,22 @@ static void pci_std_enable_acs(struct pci_dev *dev) void pci_enable_acs(struct pci_dev *dev) { if (!pci_acs_enable) - return; + goto disable_acs_redir; if (!pci_dev_specific_enable_acs(dev)) - return; + goto disable_acs_redir; pci_std_enable_acs(dev); + +disable_acs_redir: + /* + * Note: pci_disable_acs_redir() must be called even if ACS was not + * enabled by the kernel because it may have been enabled by + * platform firmware. So if we are told to disable it, we should + * always disable it after setting the kernel's default + * preferences. + */ + pci_disable_acs_redir(dev); } static bool pci_acs_flags_enabled(struct pci_dev *pdev, u16 acs_flags) @@ -5966,6 +6033,8 @@ static int __init pci_setup(char *str) pcie_bus_config = PCIE_BUS_PEER2PEER; } else if (!strncmp(str, "pcie_scan_all", 13)) { pci_add_flags(PCI_SCAN_ALL_PCIE_DEVS); + } else if (!strncmp(str, "disable_acs_redir=", 18)) { + disable_acs_redir_param = str + 18; } else { printk(KERN_ERR "PCI: Unknown option `%s'\n", str); -- cgit v1.2.3