summaryrefslogtreecommitdiff
path: root/drivers/pci/tph.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2024-11-27 05:05:44 +0300
committerLinus Torvalds <torvalds@linux-foundation.org>2024-11-27 05:05:44 +0300
commit1746db26f85e4f4b3dd11d7b55f4eff4b0423884 (patch)
tree7a4f43c90c531566086ee274d59c5b536f4c7225 /drivers/pci/tph.c
parent1dc707e647bc919834eff9636c8d00b78c782545 (diff)
parent10099266dec8275a6899e6a27dcdfebbcc726cc7 (diff)
downloadlinux-1746db26f85e4f4b3dd11d7b55f4eff4b0423884.tar.xz
Merge tag 'pci-v6.13-changes' of git://git.kernel.org/pub/scm/linux/kernel/git/pci/pci
Pull PCI updates from Bjorn Helgaas: "Enumeration: - Make pci_stop_dev() and pci_destroy_dev() safe so concurrent callers can't stop a device multiple times, even as we migrate from the global pci_rescan_remove_lock to finer-grained locking (Keith Busch) - Improve pci_walk_bus() implementation by making it recursive and moving locking up to avoid need for a 'locked' parameter (Keith Busch) - Unexport pci_walk_bus_locked(), which is only used internally by the PCI core (Keith Busch) - Detect some Thunderbolt chips that are built-in and hence 'trustworthy' by a heuristic since the 'ExternalFacingPort' and 'usb4-host-interface' ACPI properties are not quite enough (Esther Shimanovich) Resource management: - Use PCI bus addresses (not CPU addresses) in 'ranges' properties when building dynamic DT nodes so systems where PCI and CPU addresses differ work correctly (Andrea della Porta) - Tidy resource sizing and assignment with helpers to reduce redundancy (Ilpo Järvinen) - Improve pdev_sort_resources() 'bogus alignment' warning to be more specific (Ilpo Järvinen) Driver binding: - Convert driver .remove_new() callbacks to .remove() again to finish the conversion from returning 'int' to being 'void' (Sergio Paracuellos) - Export pcim_request_all_regions(), a managed interface to request all BARs (Philipp Stanner) - Replace pcim_iomap_regions_request_all() with pcim_request_all_regions(), and pcim_iomap_table()[n] with pcim_iomap(n), in the following drivers: ahci, crypto qat, crypto octeontx2, intel_th, iwlwifi, ntb idt, serial rp2, ALSA korg1212 (Philipp Stanner) - Remove the now unused pcim_iomap_regions_request_all() (Philipp Stanner) - Export pcim_iounmap_region(), a managed interface to unmap and release a PCI BAR (Philipp Stanner) - Replace pcim_iomap_regions(mask) with pcim_iomap_region(n), and pcim_iounmap_regions(mask) with pcim_iounmap_region(n), in the following drivers: fpga dfl-pci, block mtip32xx, gpio-merrifield, cavium (Philipp Stanner) Error handling: - Add sysfs 'reset_subordinate' to reset the entire hierarchy below a bridge; previously Secondary Bus Reset could only be used when there was a single device below a bridge (Keith Busch) - Warn if we reset a running device where the driver didn't register pci_error_handlers notification callbacks (Keith Busch) ASPM: - Disable ASPM L1 before touching L1 PM Substates to follow the spec closer and avoid a CPU load timeout on some platforms (Ajay Agarwal) - Set devices below Intel VMD to D0 before enabling ASPM L1 Substates as required per spec for all L1 Substates changes (Jian-Hong Pan) Power management: - Enable starfive controller runtime PM before probing host bridge (Mayank Rana) - Enable runtime power management for host bridges (Krishna chaitanya chundru) Power control: - Use of_platform_device_create() instead of of_platform_populate() to create pwrctl platform devices so we can control it based on the child nodes (Manivannan Sadhasivam) - Create pwrctrl platform devices only if there's a relevant power supply property (Manivannan Sadhasivam) - Add device link from the pwrctl supplier to the PCI dev to ensure pwrctl drivers are probed before the PCI dev driver; this avoids a race where pwrctl could change device power state while the PCI driver was active (Manivannan Sadhasivam) - Find pwrctl device for removal with of_find_device_by_node() instead of searching all children of the parent (Manivannan Sadhasivam) - Rename 'pwrctl' to 'pwrctrl' to match new bandwidth controller ('bwctrl') and hotplug files (Bjorn Helgaas) Bandwidth control: - Add read/modify/write locking for Link Control 2, which is used to manage Link speed (Ilpo Järvinen) - Extract Link Bandwidth Management Status check into pcie_lbms_seen(), where it can be shared between the bandwidth controller and quirks that use it to help retrain failed links (Ilpo Järvinen) - Re-add Link Bandwidth notification support with updates to address the reasons it was previously reverted (Alexandru Gagniuc, Ilpo Järvinen) - Add pcie_set_target_speed() and related functionality so drivers can manage PCIe Link speed based on thermal or other constraints (Ilpo Järvinen) - Add a thermal cooling driver to throttle PCIe Links via the existing thermal management framework (Ilpo Järvinen) - Add a userspace selftest for the PCIe bandwidth controller (Ilpo Järvinen) PCI device hotplug: - Add hotplug controller driver for Marvell OCTEON multi-function device where function 0 has a management console interface to enable/disable and provision various personalities for the other functions (Shijith Thotton) - Retain a reference to the pci_bus for the lifetime of a pci_slot to avoid a use-after-free when the thunderbolt driver resets USB4 host routers on boot, causing hotplug remove/add of downstream docks or other devices (Lukas Wunner) - Remove unused cpcihp struct cpci_hp_controller_ops.hardware_test (Guilherme Giacomo Simoes) - Remove unused cpqphp struct ctrl_dbg.ctrl (Christophe JAILLET) - Use pci_bus_read_dev_vendor_id() instead of hand-coded presence detection in cpqphp (Ilpo Järvinen) - Simplify cpqphp enumeration, which is already simple-minded and doesn't handle devices below hot-added bridges (Ilpo Järvinen) Virtualization: - Add ACS quirk for Wangxun FF5xxx NICs, which don't advertise an ACS capability but do isolate functions as though PCI_ACS_RR and PCI_ACS_CR were set, so the functions can be in independent IOMMU groups (Mengyuan Lou) TLP Processing Hints (TPH): - Add and document TLP Processing Hints (TPH) support so drivers can enable and disable TPH and the kernel can save/restore TPH configuration (Wei Huang) - Add TPH Steering Tag support so drivers can retrieve Steering Tag values associated with specific CPUs via an ACPI _DSM to improve performance by directing DMA writes closer to their consumers (Wei Huang) Data Object Exchange (DOE): - Wait up to 1 second for DOE Busy bit to clear before writing a request to the mailbox to avoid failures if the mailbox is still busy from a previous transfer (Gregory Price) Endpoint framework: - Skip attempts to allocate from endpoint controller memory window if the requested size is larger than the window (Damien Le Moal) - Add and document pci_epc_mem_map() and pci_epc_mem_unmap() to handle controller-specific size and alignment constraints, and add test cases to the endpoint test driver (Damien Le Moal) - Implement dwc pci_epc_ops.align_addr() so pci_epc_mem_map() can observe DWC-specific alignment requirements (Damien Le Moal) - Synchronously cancel command handler work in endpoint test before cleaning up DMA and BARs (Damien Le Moal) - Respect endpoint page size in dw_pcie_ep_align_addr() (Niklas Cassel) - Use dw_pcie_ep_align_addr() in dw_pcie_ep_raise_msi_irq() and dw_pcie_ep_raise_msix_irq() instead of open coding the equivalent (Niklas Cassel) - Avoid NULL dereference if Modem Host Interface Endpoint lacks 'mmio' DT property (Zhongqiu Han) - Release PCI domain ID of Endpoint controller parent (not controller itself) and before unregistering the controller, to avoid use-after-free (Zijun Hu) - Clear secondary (not primary) EPC in pci_epc_remove_epf() when removing the secondary controller associated with an NTB (Zijun Hu) Cadence PCIe controller driver: - Lower severity of 'phy-names' message (Bartosz Wawrzyniak) Freescale i.MX6 PCIe controller driver: - Fix suspend/resume support on i.MX6QDL, which has a hardware erratum that prevents use of L2 (Stefan Eichenberger) Intel VMD host bridge driver: - Add 0xb60b and 0xb06f Device IDs for client SKUs (Nirmal Patel) MediaTek PCIe Gen3 controller driver: - Update mediatek-gen3 DT binding to require the exact number of clocks for each SoC (Fei Shao) - Add support for DT 'max-link-speed' and 'num-lanes' properties to restrict the link speed and width (AngeloGioacchino Del Regno) Microchip PolarFlare PCIe controller driver: - Add DT and driver support for using either of the two PolarFire Root Ports (Conor Dooley) NVIDIA Tegra194 PCIe controller driver: - Move endpoint controller cleanups that depend on refclk from the host to the notifier that tells us the host has deasserted PERST#, when refclk should be valid (Manivannan Sadhasivam) Qualcomm PCIe controller driver: - Add qcom SAR2130P DT binding with an additional clock (Dmitry Baryshkov) - Enable MSI interrupts if 'global' IRQ is supported, since a previous commit unintentionally masked them (Manivannan Sadhasivam) - Move endpoint controller cleanups that depend on refclk from the host to the notifier that tells us the host has deasserted PERST#, when refclk should be valid (Manivannan Sadhasivam) - Add DT binding and driver support for IPQ9574, with Synopsys IP v5.80a and Qcom IP 1.27.0 (devi priya) - Move the OPP "operating-points-v2" table from the qcom,pcie-sm8450.yaml DT binding to qcom,pcie-common.yaml, where it can be used by other Qcom platforms (Qiang Yu) - Add 'global' SPI interrupt for events like link-up, link-down to qcom,pcie-x1e80100 DT binding so we can start enumeration when the link comes up (Qiang Yu) - Disable ASPM L0s for qcom,pcie-x1e80100 since the PHY is not tuned to support this (Qiang Yu) - Add ops_1_21_0 for SC8280X family SoC, which doesn't use the 'iommu-map' DT property and doesn't need BDF-to-SID translation (Qiang Yu) Rockchip PCIe controller driver: - Define ROCKCHIP_PCIE_AT_SIZE_ALIGN to replace magic 256 endpoint .align value (Damien Le Moal) - When unmapping an endpoint window, compute the region index instead of searching for it, and verify that the address was mapped (Damien Le Moal) - When mapping an endpoint window, verify that the address hasn't been mapped already (Damien Le Moal) - Implement pci_epc_ops.align_addr() for rockchip-ep (Damien Le Moal) - Fix MSI IRQ data mapping to observe the alignment constraint, which fixes intermittent page faults in memcpy_toio() and memcpy_fromio() (Damien Le Moal) - Rename rockchip_pcie_parse_ep_dt() to rockchip_pcie_ep_get_resources() for consistency with similar DT interfaces (Damien Le Moal) - Skip the unnecessary link train in rockchip_pcie_ep_probe() and do it only in the endpoint start operation (Damien Le Moal) - Implement pci_epc_ops.stop_link() to disable link training and controller configuration (Damien Le Moal) - Attempt link training at 5 GT/s when both partners support it (Damien Le Moal) - Add a handler for PERST# signal so we can detect host-initiated resets and start link training after PERST# is deasserted (Damien Le Moal) Synopsys DesignWare PCIe controller driver: - Clear outbound address on unmap so dw_pcie_find_index() won't match an ATU index that was already unmapped (Damien Le Moal) - Use of_property_present() instead of of_property_read_bool() when testing for presence of non-boolean DT properties (Rob Herring) - Advertise 1MB size if endpoint supports Resizable BARs, which was inadvertently lost in v6.11 (Niklas Cassel) TI J721E PCIe driver: - Add PCIe support for J722S SoC (Siddharth Vadapalli) - Delay PCIE_T_PVPERL_MS (100 ms), not just PCIE_T_PERST_CLK_US (100 us), before deasserting PERST# to ensure power and refclk are stable (Siddharth Vadapalli) TI Keystone PCIe controller driver: - Set the 'ti,keystone-pcie' mode so v3.65a devices work in Root Complex mode (Kishon Vijay Abraham I) - Try to avoid unrecoverable SError for attempts to issue config transactions when the link is down; this is racy but the best we can do (Kishon Vijay Abraham I) Miscellaneous: - Reorganize kerneldoc parameter names to match order in function signature (Julia Lawall) - Fix sysfs reset_method_store() memory leak (Todd Kjos) - Simplify pci_create_slot() (Ilpo Järvinen) - Fix incorrect printf format specifiers in pcitest (Luo Yifan)" * tag 'pci-v6.13-changes' of git://git.kernel.org/pub/scm/linux/kernel/git/pci/pci: (127 commits) PCI: rockchip-ep: Handle PERST# signal in EP mode PCI: rockchip-ep: Improve link training PCI: rockship-ep: Implement the pci_epc_ops::stop_link() operation PCI: rockchip-ep: Refactor endpoint link training enable PCI: rockchip-ep: Refactor rockchip_pcie_ep_probe() MSI-X hiding PCI: rockchip-ep: Refactor rockchip_pcie_ep_probe() memory allocations PCI: rockchip-ep: Rename rockchip_pcie_parse_ep_dt() PCI: rockchip-ep: Fix MSI IRQ data mapping PCI: rockchip-ep: Implement the pci_epc_ops::align_addr() operation PCI: rockchip-ep: Improve rockchip_pcie_ep_map_addr() PCI: rockchip-ep: Improve rockchip_pcie_ep_unmap_addr() PCI: rockchip-ep: Use a macro to define EP controller .align feature PCI: rockchip-ep: Fix address translation unit programming PCI/pwrctrl: Rename pwrctrl functions and structures PCI/pwrctrl: Rename pwrctl files to pwrctrl PCI/pwrctl: Remove pwrctl device without iterating over all children of pwrctl parent PCI/pwrctl: Ensure that pwrctl drivers are probed before PCI client drivers PCI/pwrctl: Create pwrctl device only if at least one power supply is present PCI/pwrctl: Use of_platform_device_create() to create pwrctl devices tools: PCI: Fix incorrect printf format specifiers ...
Diffstat (limited to 'drivers/pci/tph.c')
-rw-r--r--drivers/pci/tph.c547
1 files changed, 547 insertions, 0 deletions
diff --git a/drivers/pci/tph.c b/drivers/pci/tph.c
new file mode 100644
index 000000000000..1e604fbbda65
--- /dev/null
+++ b/drivers/pci/tph.c
@@ -0,0 +1,547 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * TPH (TLP Processing Hints) support
+ *
+ * Copyright (C) 2024 Advanced Micro Devices, Inc.
+ * Eric Van Tassell <Eric.VanTassell@amd.com>
+ * Wei Huang <wei.huang2@amd.com>
+ */
+#include <linux/pci.h>
+#include <linux/pci-acpi.h>
+#include <linux/msi.h>
+#include <linux/bitfield.h>
+#include <linux/pci-tph.h>
+
+#include "pci.h"
+
+/* System-wide TPH disabled */
+static bool pci_tph_disabled;
+
+#ifdef CONFIG_ACPI
+/*
+ * The st_info struct defines the Steering Tag (ST) info returned by the
+ * firmware PCI ACPI _DSM method (rev=0x7, func=0xF, "_DSM to Query Cache
+ * Locality TPH Features"), as specified in the approved ECN for PCI Firmware
+ * Spec and available at https://members.pcisig.com/wg/PCI-SIG/document/15470.
+ *
+ * @vm_st_valid: 8-bit ST for volatile memory is valid
+ * @vm_xst_valid: 16-bit extended ST for volatile memory is valid
+ * @vm_ph_ignore: 1 => PH was and will be ignored, 0 => PH should be supplied
+ * @vm_st: 8-bit ST for volatile mem
+ * @vm_xst: 16-bit extended ST for volatile mem
+ * @pm_st_valid: 8-bit ST for persistent memory is valid
+ * @pm_xst_valid: 16-bit extended ST for persistent memory is valid
+ * @pm_ph_ignore: 1 => PH was and will be ignored, 0 => PH should be supplied
+ * @pm_st: 8-bit ST for persistent mem
+ * @pm_xst: 16-bit extended ST for persistent mem
+ */
+union st_info {
+ struct {
+ u64 vm_st_valid : 1;
+ u64 vm_xst_valid : 1;
+ u64 vm_ph_ignore : 1;
+ u64 rsvd1 : 5;
+ u64 vm_st : 8;
+ u64 vm_xst : 16;
+ u64 pm_st_valid : 1;
+ u64 pm_xst_valid : 1;
+ u64 pm_ph_ignore : 1;
+ u64 rsvd2 : 5;
+ u64 pm_st : 8;
+ u64 pm_xst : 16;
+ };
+ u64 value;
+};
+
+static u16 tph_extract_tag(enum tph_mem_type mem_type, u8 req_type,
+ union st_info *info)
+{
+ switch (req_type) {
+ case PCI_TPH_REQ_TPH_ONLY: /* 8-bit tag */
+ switch (mem_type) {
+ case TPH_MEM_TYPE_VM:
+ if (info->vm_st_valid)
+ return info->vm_st;
+ break;
+ case TPH_MEM_TYPE_PM:
+ if (info->pm_st_valid)
+ return info->pm_st;
+ break;
+ }
+ break;
+ case PCI_TPH_REQ_EXT_TPH: /* 16-bit tag */
+ switch (mem_type) {
+ case TPH_MEM_TYPE_VM:
+ if (info->vm_xst_valid)
+ return info->vm_xst;
+ break;
+ case TPH_MEM_TYPE_PM:
+ if (info->pm_xst_valid)
+ return info->pm_xst;
+ break;
+ }
+ break;
+ default:
+ return 0;
+ }
+
+ return 0;
+}
+
+#define TPH_ST_DSM_FUNC_INDEX 0xF
+static acpi_status tph_invoke_dsm(acpi_handle handle, u32 cpu_uid,
+ union st_info *st_out)
+{
+ union acpi_object arg3[3], in_obj, *out_obj;
+
+ if (!acpi_check_dsm(handle, &pci_acpi_dsm_guid, 7,
+ BIT(TPH_ST_DSM_FUNC_INDEX)))
+ return AE_ERROR;
+
+ /* DWORD: feature ID (0 for processor cache ST query) */
+ arg3[0].integer.type = ACPI_TYPE_INTEGER;
+ arg3[0].integer.value = 0;
+
+ /* DWORD: target UID */
+ arg3[1].integer.type = ACPI_TYPE_INTEGER;
+ arg3[1].integer.value = cpu_uid;
+
+ /* QWORD: properties, all 0's */
+ arg3[2].integer.type = ACPI_TYPE_INTEGER;
+ arg3[2].integer.value = 0;
+
+ in_obj.type = ACPI_TYPE_PACKAGE;
+ in_obj.package.count = ARRAY_SIZE(arg3);
+ in_obj.package.elements = arg3;
+
+ out_obj = acpi_evaluate_dsm(handle, &pci_acpi_dsm_guid, 7,
+ TPH_ST_DSM_FUNC_INDEX, &in_obj);
+ if (!out_obj)
+ return AE_ERROR;
+
+ if (out_obj->type != ACPI_TYPE_BUFFER) {
+ ACPI_FREE(out_obj);
+ return AE_ERROR;
+ }
+
+ st_out->value = *((u64 *)(out_obj->buffer.pointer));
+
+ ACPI_FREE(out_obj);
+
+ return AE_OK;
+}
+#endif
+
+/* Update the TPH Requester Enable field of TPH Control Register */
+static void set_ctrl_reg_req_en(struct pci_dev *pdev, u8 req_type)
+{
+ u32 reg;
+
+ pci_read_config_dword(pdev, pdev->tph_cap + PCI_TPH_CTRL, &reg);
+
+ reg &= ~PCI_TPH_CTRL_REQ_EN_MASK;
+ reg |= FIELD_PREP(PCI_TPH_CTRL_REQ_EN_MASK, req_type);
+
+ pci_write_config_dword(pdev, pdev->tph_cap + PCI_TPH_CTRL, reg);
+}
+
+static u8 get_st_modes(struct pci_dev *pdev)
+{
+ u32 reg;
+
+ pci_read_config_dword(pdev, pdev->tph_cap + PCI_TPH_CAP, &reg);
+ reg &= PCI_TPH_CAP_ST_NS | PCI_TPH_CAP_ST_IV | PCI_TPH_CAP_ST_DS;
+
+ return reg;
+}
+
+static u32 get_st_table_loc(struct pci_dev *pdev)
+{
+ u32 reg;
+
+ pci_read_config_dword(pdev, pdev->tph_cap + PCI_TPH_CAP, &reg);
+
+ return FIELD_GET(PCI_TPH_CAP_LOC_MASK, reg);
+}
+
+/*
+ * Return the size of ST table. If ST table is not in TPH Requester Extended
+ * Capability space, return 0. Otherwise return the ST Table Size + 1.
+ */
+static u16 get_st_table_size(struct pci_dev *pdev)
+{
+ u32 reg;
+ u32 loc;
+
+ /* Check ST table location first */
+ loc = get_st_table_loc(pdev);
+
+ /* Convert loc to match with PCI_TPH_LOC_* defined in pci_regs.h */
+ loc = FIELD_PREP(PCI_TPH_CAP_LOC_MASK, loc);
+ if (loc != PCI_TPH_LOC_CAP)
+ return 0;
+
+ pci_read_config_dword(pdev, pdev->tph_cap + PCI_TPH_CAP, &reg);
+
+ return FIELD_GET(PCI_TPH_CAP_ST_MASK, reg) + 1;
+}
+
+/* Return device's Root Port completer capability */
+static u8 get_rp_completer_type(struct pci_dev *pdev)
+{
+ struct pci_dev *rp;
+ u32 reg;
+ int ret;
+
+ rp = pcie_find_root_port(pdev);
+ if (!rp)
+ return 0;
+
+ ret = pcie_capability_read_dword(rp, PCI_EXP_DEVCAP2, &reg);
+ if (ret)
+ return 0;
+
+ return FIELD_GET(PCI_EXP_DEVCAP2_TPH_COMP_MASK, reg);
+}
+
+/* Write ST to MSI-X vector control reg - Return 0 if OK, otherwise -errno */
+static int write_tag_to_msix(struct pci_dev *pdev, int msix_idx, u16 tag)
+{
+#ifdef CONFIG_PCI_MSI
+ struct msi_desc *msi_desc = NULL;
+ void __iomem *vec_ctrl;
+ u32 val;
+ int err = 0;
+
+ msi_lock_descs(&pdev->dev);
+
+ /* Find the msi_desc entry with matching msix_idx */
+ msi_for_each_desc(msi_desc, &pdev->dev, MSI_DESC_ASSOCIATED) {
+ if (msi_desc->msi_index == msix_idx)
+ break;
+ }
+
+ if (!msi_desc) {
+ err = -ENXIO;
+ goto err_out;
+ }
+
+ /* Get the vector control register (offset 0xc) pointed by msix_idx */
+ vec_ctrl = pdev->msix_base + msix_idx * PCI_MSIX_ENTRY_SIZE;
+ vec_ctrl += PCI_MSIX_ENTRY_VECTOR_CTRL;
+
+ val = readl(vec_ctrl);
+ val &= ~PCI_MSIX_ENTRY_CTRL_ST;
+ val |= FIELD_PREP(PCI_MSIX_ENTRY_CTRL_ST, tag);
+ writel(val, vec_ctrl);
+
+ /* Read back to flush the update */
+ val = readl(vec_ctrl);
+
+err_out:
+ msi_unlock_descs(&pdev->dev);
+ return err;
+#else
+ return -ENODEV;
+#endif
+}
+
+/* Write tag to ST table - Return 0 if OK, otherwise -errno */
+static int write_tag_to_st_table(struct pci_dev *pdev, int index, u16 tag)
+{
+ int st_table_size;
+ int offset;
+
+ /* Check if index is out of bound */
+ st_table_size = get_st_table_size(pdev);
+ if (index >= st_table_size)
+ return -ENXIO;
+
+ offset = pdev->tph_cap + PCI_TPH_BASE_SIZEOF + index * sizeof(u16);
+
+ return pci_write_config_word(pdev, offset, tag);
+}
+
+/**
+ * pcie_tph_get_cpu_st() - Retrieve Steering Tag for a target memory associated
+ * with a specific CPU
+ * @pdev: PCI device
+ * @mem_type: target memory type (volatile or persistent RAM)
+ * @cpu_uid: associated CPU id
+ * @tag: Steering Tag to be returned
+ *
+ * Return the Steering Tag for a target memory that is associated with a
+ * specific CPU as indicated by cpu_uid.
+ *
+ * Return: 0 if success, otherwise negative value (-errno)
+ */
+int pcie_tph_get_cpu_st(struct pci_dev *pdev, enum tph_mem_type mem_type,
+ unsigned int cpu_uid, u16 *tag)
+{
+#ifdef CONFIG_ACPI
+ struct pci_dev *rp;
+ acpi_handle rp_acpi_handle;
+ union st_info info;
+
+ rp = pcie_find_root_port(pdev);
+ if (!rp || !rp->bus || !rp->bus->bridge)
+ return -ENODEV;
+
+ rp_acpi_handle = ACPI_HANDLE(rp->bus->bridge);
+
+ if (tph_invoke_dsm(rp_acpi_handle, cpu_uid, &info) != AE_OK) {
+ *tag = 0;
+ return -EINVAL;
+ }
+
+ *tag = tph_extract_tag(mem_type, pdev->tph_req_type, &info);
+
+ pci_dbg(pdev, "get steering tag: mem_type=%s, cpu_uid=%d, tag=%#04x\n",
+ (mem_type == TPH_MEM_TYPE_VM) ? "volatile" : "persistent",
+ cpu_uid, *tag);
+
+ return 0;
+#else
+ return -ENODEV;
+#endif
+}
+EXPORT_SYMBOL(pcie_tph_get_cpu_st);
+
+/**
+ * pcie_tph_set_st_entry() - Set Steering Tag in the ST table entry
+ * @pdev: PCI device
+ * @index: ST table entry index
+ * @tag: Steering Tag to be written
+ *
+ * Figure out the proper location of ST table, either in the MSI-X table or
+ * in the TPH Extended Capability space, and write the Steering Tag into
+ * the ST entry pointed by index.
+ *
+ * Return: 0 if success, otherwise negative value (-errno)
+ */
+int pcie_tph_set_st_entry(struct pci_dev *pdev, unsigned int index, u16 tag)
+{
+ u32 loc;
+ int err = 0;
+
+ if (!pdev->tph_cap)
+ return -EINVAL;
+
+ if (!pdev->tph_enabled)
+ return -EINVAL;
+
+ /* No need to write tag if device is in "No ST Mode" */
+ if (pdev->tph_mode == PCI_TPH_ST_NS_MODE)
+ return 0;
+
+ /*
+ * Disable TPH before updating ST to avoid potential instability as
+ * cautioned in PCIe r6.2, sec 6.17.3, "ST Modes of Operation"
+ */
+ set_ctrl_reg_req_en(pdev, PCI_TPH_REQ_DISABLE);
+
+ loc = get_st_table_loc(pdev);
+ /* Convert loc to match with PCI_TPH_LOC_* */
+ loc = FIELD_PREP(PCI_TPH_CAP_LOC_MASK, loc);
+
+ switch (loc) {
+ case PCI_TPH_LOC_MSIX:
+ err = write_tag_to_msix(pdev, index, tag);
+ break;
+ case PCI_TPH_LOC_CAP:
+ err = write_tag_to_st_table(pdev, index, tag);
+ break;
+ default:
+ err = -EINVAL;
+ }
+
+ if (err) {
+ pcie_disable_tph(pdev);
+ return err;
+ }
+
+ set_ctrl_reg_req_en(pdev, pdev->tph_mode);
+
+ pci_dbg(pdev, "set steering tag: %s table, index=%d, tag=%#04x\n",
+ (loc == PCI_TPH_LOC_MSIX) ? "MSI-X" : "ST", index, tag);
+
+ return 0;
+}
+EXPORT_SYMBOL(pcie_tph_set_st_entry);
+
+/**
+ * pcie_disable_tph - Turn off TPH support for device
+ * @pdev: PCI device
+ *
+ * Return: none
+ */
+void pcie_disable_tph(struct pci_dev *pdev)
+{
+ if (!pdev->tph_cap)
+ return;
+
+ if (!pdev->tph_enabled)
+ return;
+
+ pci_write_config_dword(pdev, pdev->tph_cap + PCI_TPH_CTRL, 0);
+
+ pdev->tph_mode = 0;
+ pdev->tph_req_type = 0;
+ pdev->tph_enabled = 0;
+}
+EXPORT_SYMBOL(pcie_disable_tph);
+
+/**
+ * pcie_enable_tph - Enable TPH support for device using a specific ST mode
+ * @pdev: PCI device
+ * @mode: ST mode to enable. Current supported modes include:
+ *
+ * - PCI_TPH_ST_NS_MODE: NO ST Mode
+ * - PCI_TPH_ST_IV_MODE: Interrupt Vector Mode
+ * - PCI_TPH_ST_DS_MODE: Device Specific Mode
+ *
+ * Check whether the mode is actually supported by the device before enabling
+ * and return an error if not. Additionally determine what types of requests,
+ * TPH or extended TPH, can be issued by the device based on its TPH requester
+ * capability and the Root Port's completer capability.
+ *
+ * Return: 0 on success, otherwise negative value (-errno)
+ */
+int pcie_enable_tph(struct pci_dev *pdev, int mode)
+{
+ u32 reg;
+ u8 dev_modes;
+ u8 rp_req_type;
+
+ /* Honor "notph" kernel parameter */
+ if (pci_tph_disabled)
+ return -EINVAL;
+
+ if (!pdev->tph_cap)
+ return -EINVAL;
+
+ if (pdev->tph_enabled)
+ return -EBUSY;
+
+ /* Sanitize and check ST mode compatibility */
+ mode &= PCI_TPH_CTRL_MODE_SEL_MASK;
+ dev_modes = get_st_modes(pdev);
+ if (!((1 << mode) & dev_modes))
+ return -EINVAL;
+
+ pdev->tph_mode = mode;
+
+ /* Get req_type supported by device and its Root Port */
+ pci_read_config_dword(pdev, pdev->tph_cap + PCI_TPH_CAP, &reg);
+ if (FIELD_GET(PCI_TPH_CAP_EXT_TPH, reg))
+ pdev->tph_req_type = PCI_TPH_REQ_EXT_TPH;
+ else
+ pdev->tph_req_type = PCI_TPH_REQ_TPH_ONLY;
+
+ rp_req_type = get_rp_completer_type(pdev);
+
+ /* Final req_type is the smallest value of two */
+ pdev->tph_req_type = min(pdev->tph_req_type, rp_req_type);
+
+ if (pdev->tph_req_type == PCI_TPH_REQ_DISABLE)
+ return -EINVAL;
+
+ /* Write them into TPH control register */
+ pci_read_config_dword(pdev, pdev->tph_cap + PCI_TPH_CTRL, &reg);
+
+ reg &= ~PCI_TPH_CTRL_MODE_SEL_MASK;
+ reg |= FIELD_PREP(PCI_TPH_CTRL_MODE_SEL_MASK, pdev->tph_mode);
+
+ reg &= ~PCI_TPH_CTRL_REQ_EN_MASK;
+ reg |= FIELD_PREP(PCI_TPH_CTRL_REQ_EN_MASK, pdev->tph_req_type);
+
+ pci_write_config_dword(pdev, pdev->tph_cap + PCI_TPH_CTRL, reg);
+
+ pdev->tph_enabled = 1;
+
+ return 0;
+}
+EXPORT_SYMBOL(pcie_enable_tph);
+
+void pci_restore_tph_state(struct pci_dev *pdev)
+{
+ struct pci_cap_saved_state *save_state;
+ int num_entries, i, offset;
+ u16 *st_entry;
+ u32 *cap;
+
+ if (!pdev->tph_cap)
+ return;
+
+ if (!pdev->tph_enabled)
+ return;
+
+ save_state = pci_find_saved_ext_cap(pdev, PCI_EXT_CAP_ID_TPH);
+ if (!save_state)
+ return;
+
+ /* Restore control register and all ST entries */
+ cap = &save_state->cap.data[0];
+ pci_write_config_dword(pdev, pdev->tph_cap + PCI_TPH_CTRL, *cap++);
+ st_entry = (u16 *)cap;
+ offset = PCI_TPH_BASE_SIZEOF;
+ num_entries = get_st_table_size(pdev);
+ for (i = 0; i < num_entries; i++) {
+ pci_write_config_word(pdev, pdev->tph_cap + offset,
+ *st_entry++);
+ offset += sizeof(u16);
+ }
+}
+
+void pci_save_tph_state(struct pci_dev *pdev)
+{
+ struct pci_cap_saved_state *save_state;
+ int num_entries, i, offset;
+ u16 *st_entry;
+ u32 *cap;
+
+ if (!pdev->tph_cap)
+ return;
+
+ if (!pdev->tph_enabled)
+ return;
+
+ save_state = pci_find_saved_ext_cap(pdev, PCI_EXT_CAP_ID_TPH);
+ if (!save_state)
+ return;
+
+ /* Save control register */
+ cap = &save_state->cap.data[0];
+ pci_read_config_dword(pdev, pdev->tph_cap + PCI_TPH_CTRL, cap++);
+
+ /* Save all ST entries in extended capability structure */
+ st_entry = (u16 *)cap;
+ offset = PCI_TPH_BASE_SIZEOF;
+ num_entries = get_st_table_size(pdev);
+ for (i = 0; i < num_entries; i++) {
+ pci_read_config_word(pdev, pdev->tph_cap + offset,
+ st_entry++);
+ offset += sizeof(u16);
+ }
+}
+
+void pci_no_tph(void)
+{
+ pci_tph_disabled = true;
+
+ pr_info("PCIe TPH is disabled\n");
+}
+
+void pci_tph_init(struct pci_dev *pdev)
+{
+ int num_entries;
+ u32 save_size;
+
+ pdev->tph_cap = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_TPH);
+ if (!pdev->tph_cap)
+ return;
+
+ num_entries = get_st_table_size(pdev);
+ save_size = sizeof(u32) + num_entries * sizeof(u16);
+ pci_add_ext_cap_save_buffer(pdev, PCI_EXT_CAP_ID_TPH, save_size);
+}