summaryrefslogtreecommitdiff
path: root/drivers/pci
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/pci')
-rw-r--r--drivers/pci/Kconfig10
-rw-r--r--drivers/pci/Makefile2
-rw-r--r--drivers/pci/access.c3
-rw-r--r--drivers/pci/bus.c8
-rw-r--r--drivers/pci/dmar.c361
-rw-r--r--drivers/pci/hotplug/acpi_pcihp.c58
-rw-r--r--drivers/pci/hotplug/acpiphp_glue.c14
-rw-r--r--drivers/pci/hotplug/fakephp.c444
-rw-r--r--drivers/pci/hotplug/pciehp.h13
-rw-r--r--drivers/pci/hotplug/pciehp_acpi.c21
-rw-r--r--drivers/pci/hotplug/pciehp_core.c18
-rw-r--r--drivers/pci/hotplug/pciehp_hpc.c34
-rw-r--r--drivers/pci/hotplug/shpchp.h10
-rw-r--r--drivers/pci/hotplug/shpchp_pci.c2
-rw-r--r--drivers/pci/intel-iommu.c630
-rw-r--r--drivers/pci/intr_remapping.c190
-rw-r--r--drivers/pci/iov.c681
-rw-r--r--drivers/pci/msi.c426
-rw-r--r--drivers/pci/msi.h6
-rw-r--r--drivers/pci/pci-acpi.c219
-rw-r--r--drivers/pci/pci-driver.c261
-rw-r--r--drivers/pci/pci-sysfs.c122
-rw-r--r--drivers/pci/pci.c341
-rw-r--r--drivers/pci/pci.h66
-rw-r--r--drivers/pci/pcie/aer/aerdrv.c28
-rw-r--r--drivers/pci/pcie/aer/aerdrv_acpi.c2
-rw-r--r--drivers/pci/pcie/aer/aerdrv_core.c10
-rw-r--r--drivers/pci/pcie/portdrv.h14
-rw-r--r--drivers/pci/pcie/portdrv_bus.c18
-rw-r--r--drivers/pci/pcie/portdrv_core.c379
-rw-r--r--drivers/pci/pcie/portdrv_pci.c50
-rw-r--r--drivers/pci/probe.c210
-rw-r--r--drivers/pci/quirks.c273
-rw-r--r--drivers/pci/remove.c4
-rw-r--r--drivers/pci/search.c2
-rw-r--r--drivers/pci/setup-bus.c7
-rw-r--r--drivers/pci/setup-res.c15
-rw-r--r--drivers/pci/slot.c18
38 files changed, 3276 insertions, 1694 deletions
diff --git a/drivers/pci/Kconfig b/drivers/pci/Kconfig
index 2a4501dd2515..fdc864f9cf23 100644
--- a/drivers/pci/Kconfig
+++ b/drivers/pci/Kconfig
@@ -59,3 +59,13 @@ config HT_IRQ
This allows native hypertransport devices to use interrupts.
If unsure say Y.
+
+config PCI_IOV
+ bool "PCI IOV support"
+ depends on PCI
+ help
+ I/O Virtualization is a PCI feature supported by some devices
+ which allows them to create virtual devices which share their
+ physical resources.
+
+ If unsure, say N.
diff --git a/drivers/pci/Makefile b/drivers/pci/Makefile
index 3d07ce24f6a8..ba6af162fd39 100644
--- a/drivers/pci/Makefile
+++ b/drivers/pci/Makefile
@@ -29,6 +29,8 @@ obj-$(CONFIG_DMAR) += dmar.o iova.o intel-iommu.o
obj-$(CONFIG_INTR_REMAP) += dmar.o intr_remapping.o
+obj-$(CONFIG_PCI_IOV) += iov.o
+
#
# Some architectures use the generic PCI setup functions
#
diff --git a/drivers/pci/access.c b/drivers/pci/access.c
index 381444794778..64dd7df90e62 100644
--- a/drivers/pci/access.c
+++ b/drivers/pci/access.c
@@ -356,7 +356,8 @@ int pci_vpd_truncate(struct pci_dev *dev, size_t size)
return -EINVAL;
dev->vpd->len = size;
- dev->vpd->attr->size = size;
+ if (dev->vpd->attr)
+ dev->vpd->attr->size = size;
return 0;
}
diff --git a/drivers/pci/bus.c b/drivers/pci/bus.c
index 52b54f053be0..97a8194063b5 100644
--- a/drivers/pci/bus.c
+++ b/drivers/pci/bus.c
@@ -133,7 +133,7 @@ int pci_bus_add_child(struct pci_bus *bus)
*
* Call hotplug for each new devices.
*/
-void pci_bus_add_devices(struct pci_bus *bus)
+void pci_bus_add_devices(const struct pci_bus *bus)
{
struct pci_dev *dev;
struct pci_bus *child;
@@ -184,8 +184,10 @@ void pci_enable_bridges(struct pci_bus *bus)
list_for_each_entry(dev, &bus->devices, bus_list) {
if (dev->subordinate) {
- retval = pci_enable_device(dev);
- pci_set_master(dev);
+ if (!pci_is_enabled(dev)) {
+ retval = pci_enable_device(dev);
+ pci_set_master(dev);
+ }
pci_enable_bridges(dev->subordinate);
}
}
diff --git a/drivers/pci/dmar.c b/drivers/pci/dmar.c
index 5f333403c2ea..25a00ce4f24d 100644
--- a/drivers/pci/dmar.c
+++ b/drivers/pci/dmar.c
@@ -31,6 +31,8 @@
#include <linux/iova.h>
#include <linux/intel-iommu.h>
#include <linux/timer.h>
+#include <linux/irq.h>
+#include <linux/interrupt.h>
#undef PREFIX
#define PREFIX "DMAR:"
@@ -178,6 +180,7 @@ dmar_parse_one_drhd(struct acpi_dmar_header *header)
dmaru->hdr = header;
drhd = (struct acpi_dmar_hardware_unit *)header;
dmaru->reg_base_addr = drhd->address;
+ dmaru->segment = drhd->segment;
dmaru->include_all = drhd->flags & 0x1; /* BIT0: INCLUDE_ALL */
ret = alloc_iommu(dmaru);
@@ -509,6 +512,7 @@ int alloc_iommu(struct dmar_drhd_unit *drhd)
return -ENOMEM;
iommu->seq_id = iommu_allocated++;
+ sprintf (iommu->name, "dmar%d", iommu->seq_id);
iommu->reg = ioremap(drhd->reg_base_addr, VTD_PAGE_SIZE);
if (!iommu->reg) {
@@ -751,14 +755,77 @@ int qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr,
}
/*
+ * Disable Queued Invalidation interface.
+ */
+void dmar_disable_qi(struct intel_iommu *iommu)
+{
+ unsigned long flags;
+ u32 sts;
+ cycles_t start_time = get_cycles();
+
+ if (!ecap_qis(iommu->ecap))
+ return;
+
+ spin_lock_irqsave(&iommu->register_lock, flags);
+
+ sts = dmar_readq(iommu->reg + DMAR_GSTS_REG);
+ if (!(sts & DMA_GSTS_QIES))
+ goto end;
+
+ /*
+ * Give a chance to HW to complete the pending invalidation requests.
+ */
+ while ((readl(iommu->reg + DMAR_IQT_REG) !=
+ readl(iommu->reg + DMAR_IQH_REG)) &&
+ (DMAR_OPERATION_TIMEOUT > (get_cycles() - start_time)))
+ cpu_relax();
+
+ iommu->gcmd &= ~DMA_GCMD_QIE;
+
+ writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
+
+ IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, readl,
+ !(sts & DMA_GSTS_QIES), sts);
+end:
+ spin_unlock_irqrestore(&iommu->register_lock, flags);
+}
+
+/*
+ * Enable queued invalidation.
+ */
+static void __dmar_enable_qi(struct intel_iommu *iommu)
+{
+ u32 cmd, sts;
+ unsigned long flags;
+ struct q_inval *qi = iommu->qi;
+
+ qi->free_head = qi->free_tail = 0;
+ qi->free_cnt = QI_LENGTH;
+
+ spin_lock_irqsave(&iommu->register_lock, flags);
+
+ /* write zero to the tail reg */
+ writel(0, iommu->reg + DMAR_IQT_REG);
+
+ dmar_writeq(iommu->reg + DMAR_IQA_REG, virt_to_phys(qi->desc));
+
+ cmd = iommu->gcmd | DMA_GCMD_QIE;
+ iommu->gcmd |= DMA_GCMD_QIE;
+ writel(cmd, iommu->reg + DMAR_GCMD_REG);
+
+ /* Make sure hardware complete it */
+ IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, readl, (sts & DMA_GSTS_QIES), sts);
+
+ spin_unlock_irqrestore(&iommu->register_lock, flags);
+}
+
+/*
* Enable Queued Invalidation interface. This is a must to support
* interrupt-remapping. Also used by DMA-remapping, which replaces
* register based IOTLB invalidation.
*/
int dmar_enable_qi(struct intel_iommu *iommu)
{
- u32 cmd, sts;
- unsigned long flags;
struct q_inval *qi;
if (!ecap_qis(iommu->ecap))
@@ -770,20 +837,20 @@ int dmar_enable_qi(struct intel_iommu *iommu)
if (iommu->qi)
return 0;
- iommu->qi = kmalloc(sizeof(*qi), GFP_KERNEL);
+ iommu->qi = kmalloc(sizeof(*qi), GFP_ATOMIC);
if (!iommu->qi)
return -ENOMEM;
qi = iommu->qi;
- qi->desc = (void *)(get_zeroed_page(GFP_KERNEL));
+ qi->desc = (void *)(get_zeroed_page(GFP_ATOMIC));
if (!qi->desc) {
kfree(qi);
iommu->qi = 0;
return -ENOMEM;
}
- qi->desc_status = kmalloc(QI_LENGTH * sizeof(int), GFP_KERNEL);
+ qi->desc_status = kmalloc(QI_LENGTH * sizeof(int), GFP_ATOMIC);
if (!qi->desc_status) {
free_page((unsigned long) qi->desc);
kfree(qi);
@@ -796,19 +863,283 @@ int dmar_enable_qi(struct intel_iommu *iommu)
spin_lock_init(&qi->q_lock);
- spin_lock_irqsave(&iommu->register_lock, flags);
- /* write zero to the tail reg */
- writel(0, iommu->reg + DMAR_IQT_REG);
+ __dmar_enable_qi(iommu);
- dmar_writeq(iommu->reg + DMAR_IQA_REG, virt_to_phys(qi->desc));
+ return 0;
+}
- cmd = iommu->gcmd | DMA_GCMD_QIE;
- iommu->gcmd |= DMA_GCMD_QIE;
- writel(cmd, iommu->reg + DMAR_GCMD_REG);
+/* iommu interrupt handling. Most stuff are MSI-like. */
- /* Make sure hardware complete it */
- IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, readl, (sts & DMA_GSTS_QIES), sts);
- spin_unlock_irqrestore(&iommu->register_lock, flags);
+enum faulttype {
+ DMA_REMAP,
+ INTR_REMAP,
+ UNKNOWN,
+};
+
+static const char *dma_remap_fault_reasons[] =
+{
+ "Software",
+ "Present bit in root entry is clear",
+ "Present bit in context entry is clear",
+ "Invalid context entry",
+ "Access beyond MGAW",
+ "PTE Write access is not set",
+ "PTE Read access is not set",
+ "Next page table ptr is invalid",
+ "Root table address invalid",
+ "Context table ptr is invalid",
+ "non-zero reserved fields in RTP",
+ "non-zero reserved fields in CTP",
+ "non-zero reserved fields in PTE",
+};
+
+static const char *intr_remap_fault_reasons[] =
+{
+ "Detected reserved fields in the decoded interrupt-remapped request",
+ "Interrupt index exceeded the interrupt-remapping table size",
+ "Present field in the IRTE entry is clear",
+ "Error accessing interrupt-remapping table pointed by IRTA_REG",
+ "Detected reserved fields in the IRTE entry",
+ "Blocked a compatibility format interrupt request",
+ "Blocked an interrupt request due to source-id verification failure",
+};
+
+#define MAX_FAULT_REASON_IDX (ARRAY_SIZE(fault_reason_strings) - 1)
+
+const char *dmar_get_fault_reason(u8 fault_reason, int *fault_type)
+{
+ if (fault_reason >= 0x20 && (fault_reason <= 0x20 +
+ ARRAY_SIZE(intr_remap_fault_reasons))) {
+ *fault_type = INTR_REMAP;
+ return intr_remap_fault_reasons[fault_reason - 0x20];
+ } else if (fault_reason < ARRAY_SIZE(dma_remap_fault_reasons)) {
+ *fault_type = DMA_REMAP;
+ return dma_remap_fault_reasons[fault_reason];
+ } else {
+ *fault_type = UNKNOWN;
+ return "Unknown";
+ }
+}
+
+void dmar_msi_unmask(unsigned int irq)
+{
+ struct intel_iommu *iommu = get_irq_data(irq);
+ unsigned long flag;
+
+ /* unmask it */
+ spin_lock_irqsave(&iommu->register_lock, flag);
+ writel(0, iommu->reg + DMAR_FECTL_REG);
+ /* Read a reg to force flush the post write */
+ readl(iommu->reg + DMAR_FECTL_REG);
+ spin_unlock_irqrestore(&iommu->register_lock, flag);
+}
+
+void dmar_msi_mask(unsigned int irq)
+{
+ unsigned long flag;
+ struct intel_iommu *iommu = get_irq_data(irq);
+
+ /* mask it */
+ spin_lock_irqsave(&iommu->register_lock, flag);
+ writel(DMA_FECTL_IM, iommu->reg + DMAR_FECTL_REG);
+ /* Read a reg to force flush the post write */
+ readl(iommu->reg + DMAR_FECTL_REG);
+ spin_unlock_irqrestore(&iommu->register_lock, flag);
+}
+
+void dmar_msi_write(int irq, struct msi_msg *msg)
+{
+ struct intel_iommu *iommu = get_irq_data(irq);
+ unsigned long flag;
+
+ spin_lock_irqsave(&iommu->register_lock, flag);
+ writel(msg->data, iommu->reg + DMAR_FEDATA_REG);
+ writel(msg->address_lo, iommu->reg + DMAR_FEADDR_REG);
+ writel(msg->address_hi, iommu->reg + DMAR_FEUADDR_REG);
+ spin_unlock_irqrestore(&iommu->register_lock, flag);
+}
+
+void dmar_msi_read(int irq, struct msi_msg *msg)
+{
+ struct intel_iommu *iommu = get_irq_data(irq);
+ unsigned long flag;
+
+ spin_lock_irqsave(&iommu->register_lock, flag);
+ msg->data = readl(iommu->reg + DMAR_FEDATA_REG);
+ msg->address_lo = readl(iommu->reg + DMAR_FEADDR_REG);
+ msg->address_hi = readl(iommu->reg + DMAR_FEUADDR_REG);
+ spin_unlock_irqrestore(&iommu->register_lock, flag);
+}
+
+static int dmar_fault_do_one(struct intel_iommu *iommu, int type,
+ u8 fault_reason, u16 source_id, unsigned long long addr)
+{
+ const char *reason;
+ int fault_type;
+
+ reason = dmar_get_fault_reason(fault_reason, &fault_type);
+
+ if (fault_type == INTR_REMAP)
+ printk(KERN_ERR "INTR-REMAP: Request device [[%02x:%02x.%d] "
+ "fault index %llx\n"
+ "INTR-REMAP:[fault reason %02d] %s\n",
+ (source_id >> 8), PCI_SLOT(source_id & 0xFF),
+ PCI_FUNC(source_id & 0xFF), addr >> 48,
+ fault_reason, reason);
+ else
+ printk(KERN_ERR
+ "DMAR:[%s] Request device [%02x:%02x.%d] "
+ "fault addr %llx \n"
+ "DMAR:[fault reason %02d] %s\n",
+ (type ? "DMA Read" : "DMA Write"),
+ (source_id >> 8), PCI_SLOT(source_id & 0xFF),
+ PCI_FUNC(source_id & 0xFF), addr, fault_reason, reason);
+ return 0;
+}
+
+#define PRIMARY_FAULT_REG_LEN (16)
+irqreturn_t dmar_fault(int irq, void *dev_id)
+{
+ struct intel_iommu *iommu = dev_id;
+ int reg, fault_index;
+ u32 fault_status;
+ unsigned long flag;
+
+ spin_lock_irqsave(&iommu->register_lock, flag);
+ fault_status = readl(iommu->reg + DMAR_FSTS_REG);
+ if (fault_status)
+ printk(KERN_ERR "DRHD: handling fault status reg %x\n",
+ fault_status);
+
+ /* TBD: ignore advanced fault log currently */
+ if (!(fault_status & DMA_FSTS_PPF))
+ goto clear_rest;
+
+ fault_index = dma_fsts_fault_record_index(fault_status);
+ reg = cap_fault_reg_offset(iommu->cap);
+ while (1) {
+ u8 fault_reason;
+ u16 source_id;
+ u64 guest_addr;
+ int type;
+ u32 data;
+
+ /* highest 32 bits */
+ data = readl(iommu->reg + reg +
+ fault_index * PRIMARY_FAULT_REG_LEN + 12);
+ if (!(data & DMA_FRCD_F))
+ break;
+
+ fault_reason = dma_frcd_fault_reason(data);
+ type = dma_frcd_type(data);
+
+ data = readl(iommu->reg + reg +
+ fault_index * PRIMARY_FAULT_REG_LEN + 8);
+ source_id = dma_frcd_source_id(data);
+
+ guest_addr = dmar_readq(iommu->reg + reg +
+ fault_index * PRIMARY_FAULT_REG_LEN);
+ guest_addr = dma_frcd_page_addr(guest_addr);
+ /* clear the fault */
+ writel(DMA_FRCD_F, iommu->reg + reg +
+ fault_index * PRIMARY_FAULT_REG_LEN + 12);
+
+ spin_unlock_irqrestore(&iommu->register_lock, flag);
+
+ dmar_fault_do_one(iommu, type, fault_reason,
+ source_id, guest_addr);
+
+ fault_index++;
+ if (fault_index > cap_num_fault_regs(iommu->cap))
+ fault_index = 0;
+ spin_lock_irqsave(&iommu->register_lock, flag);
+ }
+clear_rest:
+ /* clear all the other faults */
+ fault_status = readl(iommu->reg + DMAR_FSTS_REG);
+ writel(fault_status, iommu->reg + DMAR_FSTS_REG);
+
+ spin_unlock_irqrestore(&iommu->register_lock, flag);
+ return IRQ_HANDLED;
+}
+
+int dmar_set_interrupt(struct intel_iommu *iommu)
+{
+ int irq, ret;
+
+ /*
+ * Check if the fault interrupt is already initialized.
+ */
+ if (iommu->irq)
+ return 0;
+
+ irq = create_irq();
+ if (!irq) {
+ printk(KERN_ERR "IOMMU: no free vectors\n");
+ return -EINVAL;
+ }
+
+ set_irq_data(irq, iommu);
+ iommu->irq = irq;
+
+ ret = arch_setup_dmar_msi(irq);
+ if (ret) {
+ set_irq_data(irq, NULL);
+ iommu->irq = 0;
+ destroy_irq(irq);
+ return 0;
+ }
+
+ ret = request_irq(irq, dmar_fault, 0, iommu->name, iommu);
+ if (ret)
+ printk(KERN_ERR "IOMMU: can't request irq\n");
+ return ret;
+}
+
+int __init enable_drhd_fault_handling(void)
+{
+ struct dmar_drhd_unit *drhd;
+
+ /*
+ * Enable fault control interrupt.
+ */
+ for_each_drhd_unit(drhd) {
+ int ret;
+ struct intel_iommu *iommu = drhd->iommu;
+ ret = dmar_set_interrupt(iommu);
+
+ if (ret) {
+ printk(KERN_ERR "DRHD %Lx: failed to enable fault, "
+ " interrupt, ret %d\n",
+ (unsigned long long)drhd->reg_base_addr, ret);
+ return -1;
+ }
+ }
+
+ return 0;
+}
+
+/*
+ * Re-enable Queued Invalidation interface.
+ */
+int dmar_reenable_qi(struct intel_iommu *iommu)
+{
+ if (!ecap_qis(iommu->ecap))
+ return -ENOENT;
+
+ if (!iommu->qi)
+ return -ENOENT;
+
+ /*
+ * First disable queued invalidation.
+ */
+ dmar_disable_qi(iommu);
+ /*
+ * Then enable queued invalidation again. Since there is no pending
+ * invalidation requests now, it's safe to re-enable queued
+ * invalidation.
+ */
+ __dmar_enable_qi(iommu);
return 0;
}
diff --git a/drivers/pci/hotplug/acpi_pcihp.c b/drivers/pci/hotplug/acpi_pcihp.c
index 1c1141801060..fbc63d5e459f 100644
--- a/drivers/pci/hotplug/acpi_pcihp.c
+++ b/drivers/pci/hotplug/acpi_pcihp.c
@@ -30,9 +30,8 @@
#include <linux/types.h>
#include <linux/pci.h>
#include <linux/pci_hotplug.h>
+#include <linux/acpi.h>
#include <linux/pci-acpi.h>
-#include <acpi/acpi.h>
-#include <acpi/acpi_bus.h>
#define MY_NAME "acpi_pcihp"
@@ -333,19 +332,14 @@ acpi_status acpi_get_hp_params_from_firmware(struct pci_bus *bus,
{
acpi_status status = AE_NOT_FOUND;
acpi_handle handle, phandle;
- struct pci_bus *pbus = bus;
- struct pci_dev *pdev;
-
- do {
- pdev = pbus->self;
- if (!pdev) {
- handle = acpi_get_pci_rootbridge_handle(
- pci_domain_nr(pbus), pbus->number);
+ struct pci_bus *pbus;
+
+ handle = NULL;
+ for (pbus = bus; pbus; pbus = pbus->parent) {
+ handle = acpi_pci_get_bridge_handle(pbus);
+ if (handle)
break;
- }
- handle = DEVICE_ACPI_HANDLE(&(pdev->dev));
- pbus = pbus->parent;
- } while (!handle);
+ }
/*
* _HPP settings apply to all child buses, until another _HPP is
@@ -378,12 +372,10 @@ EXPORT_SYMBOL_GPL(acpi_get_hp_params_from_firmware);
*
* Attempt to take hotplug control from firmware.
*/
-int acpi_get_hp_hw_control_from_firmware(struct pci_dev *dev, u32 flags)
+int acpi_get_hp_hw_control_from_firmware(struct pci_dev *pdev, u32 flags)
{
acpi_status status;
acpi_handle chandle, handle;
- struct pci_dev *pdev = dev;
- struct pci_bus *parent;
struct acpi_buffer string = { ACPI_ALLOCATE_BUFFER, NULL };
flags &= (OSC_PCI_EXPRESS_NATIVE_HP_CONTROL |
@@ -408,33 +400,25 @@ int acpi_get_hp_hw_control_from_firmware(struct pci_dev *dev, u32 flags)
acpi_get_name(handle, ACPI_FULL_PATHNAME, &string);
dbg("Trying to get hotplug control for %s\n",
(char *)string.pointer);
- status = pci_osc_control_set(handle, flags);
+ status = acpi_pci_osc_control_set(handle, flags);
if (ACPI_SUCCESS(status))
goto got_one;
kfree(string.pointer);
string = (struct acpi_buffer){ ACPI_ALLOCATE_BUFFER, NULL };
}
- pdev = dev;
- handle = DEVICE_ACPI_HANDLE(&dev->dev);
- while (!handle) {
+ handle = DEVICE_ACPI_HANDLE(&pdev->dev);
+ if (!handle) {
/*
* This hotplug controller was not listed in the ACPI name
* space at all. Try to get acpi handle of parent pci bus.
*/
- if (!pdev || !pdev->bus->parent)
- break;
- parent = pdev->bus->parent;
- dbg("Could not find %s in acpi namespace, trying parent\n",
- pci_name(pdev));
- if (!parent->self)
- /* Parent must be a host bridge */
- handle = acpi_get_pci_rootbridge_handle(
- pci_domain_nr(parent),
- parent->number);
- else
- handle = DEVICE_ACPI_HANDLE(&(parent->self->dev));
- pdev = parent->self;
+ struct pci_bus *pbus;
+ for (pbus = pdev->bus; pbus; pbus = pbus->parent) {
+ handle = acpi_pci_get_bridge_handle(pbus);
+ if (handle)
+ break;
+ }
}
while (handle) {
@@ -453,13 +437,13 @@ int acpi_get_hp_hw_control_from_firmware(struct pci_dev *dev, u32 flags)
}
dbg("Cannot get control of hotplug hardware for pci %s\n",
- pci_name(dev));
+ pci_name(pdev));
kfree(string.pointer);
return -ENODEV;
got_one:
- dbg("Gained control for hotplug HW for pci %s (%s)\n", pci_name(dev),
- (char *)string.pointer);
+ dbg("Gained control for hotplug HW for pci %s (%s)\n",
+ pci_name(pdev), (char *)string.pointer);
kfree(string.pointer);
return 0;
}
diff --git a/drivers/pci/hotplug/acpiphp_glue.c b/drivers/pci/hotplug/acpiphp_glue.c
index 803d9ddd6e75..a33794d9e0dc 100644
--- a/drivers/pci/hotplug/acpiphp_glue.c
+++ b/drivers/pci/hotplug/acpiphp_glue.c
@@ -38,6 +38,8 @@
* - The one in acpiphp_bridge has its refcount elevated by pci_get_slot()
* when the bridge is scanned and it loses a refcount when the bridge
* is removed.
+ * - When a P2P bridge is present, we elevate the refcount on the subordinate
+ * bus. It loses the refcount when the the driver unloads.
*/
#include <linux/init.h>
@@ -440,6 +442,12 @@ static void add_p2p_bridge(acpi_handle *handle, struct pci_dev *pci_dev)
goto err;
}
+ /*
+ * Grab a ref to the subordinate PCI bus in case the bus is
+ * removed via PCI core logical hotplug. The ref pins the bus
+ * (which we access during module unload).
+ */
+ get_device(&bridge->pci_bus->dev);
spin_lock_init(&bridge->res_lock);
init_bridge_misc(bridge);
@@ -619,6 +627,12 @@ static void cleanup_bridge(struct acpiphp_bridge *bridge)
slot = next;
}
+ /*
+ * Only P2P bridges have a pci_dev
+ */
+ if (bridge->pci_dev)
+ put_device(&bridge->pci_bus->dev);
+
pci_dev_put(bridge->pci_dev);
list_del(&bridge->list);
kfree(bridge);
diff --git a/drivers/pci/hotplug/fakephp.c b/drivers/pci/hotplug/fakephp.c
index d8649e127298..6151389fd903 100644
--- a/drivers/pci/hotplug/fakephp.c
+++ b/drivers/pci/hotplug/fakephp.c
@@ -1,395 +1,163 @@
-/*
- * Fake PCI Hot Plug Controller Driver
+/* Works like the fakephp driver used to, except a little better.
*
- * Copyright (C) 2003 Greg Kroah-Hartman <greg@kroah.com>
- * Copyright (C) 2003 IBM Corp.
- * Copyright (C) 2003 Rolf Eike Beer <eike-kernel@sf-tec.de>
+ * - It's possible to remove devices with subordinate busses.
+ * - New PCI devices that appear via any method, not just a fakephp triggered
+ * rescan, will be noticed.
+ * - Devices that are removed via any method, not just a fakephp triggered
+ * removal, will also be noticed.
*
- * Based on ideas and code from:
- * Vladimir Kondratiev <vladimir.kondratiev@intel.com>
- * Rolf Eike Beer <eike-kernel@sf-tec.de>
+ * Uses nothing from the pci-hotplug subsystem.
*
- * All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, version 2 of the License.
- *
- * Send feedback to <greg@kroah.com>
*/
-/*
- *
- * This driver will "emulate" removing PCI devices from the system. If
- * the "power" file is written to with "0" then the specified PCI device
- * will be completely removed from the kernel.
- *
- * WARNING, this does NOT turn off the power to the PCI device. This is
- * a "logical" removal, not a physical or electrical removal.
- *
- * Use this module at your own risk, you have been warned!
- *
- * Enabling PCI devices is left as an exercise for the reader...
- *
- */
-#include <linux/kernel.h>
#include <linux/module.h>
-#include <linux/pci.h>
-#include <linux/pci_hotplug.h>
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/list.h>
+#include <linux/kobject.h>
+#include <linux/sysfs.h>
#include <linux/init.h>
-#include <linux/string.h>
-#include <linux/slab.h>
-#include <linux/workqueue.h>
+#include <linux/pci.h>
+#include <linux/device.h>
#include "../pci.h"
-#if !defined(MODULE)
- #define MY_NAME "fakephp"
-#else
- #define MY_NAME THIS_MODULE->name
-#endif
-
-#define dbg(format, arg...) \
- do { \
- if (debug) \
- printk(KERN_DEBUG "%s: " format, \
- MY_NAME , ## arg); \
- } while (0)
-#define err(format, arg...) printk(KERN_ERR "%s: " format, MY_NAME , ## arg)
-#define info(format, arg...) printk(KERN_INFO "%s: " format, MY_NAME , ## arg)
-
-#define DRIVER_AUTHOR "Greg Kroah-Hartman <greg@kroah.com>"
-#define DRIVER_DESC "Fake PCI Hot Plug Controller Driver"
-
-struct dummy_slot {
- struct list_head node;
- struct hotplug_slot *slot;
- struct pci_dev *dev;
- struct work_struct remove_work;
- unsigned long removed;
+struct legacy_slot {
+ struct kobject kobj;
+ struct pci_dev *dev;
+ struct list_head list;
};
-static int debug;
-static int dup_slots;
-static LIST_HEAD(slot_list);
-static struct workqueue_struct *dummyphp_wq;
-
-static void pci_rescan_worker(struct work_struct *work);
-static DECLARE_WORK(pci_rescan_work, pci_rescan_worker);
-
-static int enable_slot (struct hotplug_slot *slot);
-static int disable_slot (struct hotplug_slot *slot);
+static LIST_HEAD(legacy_list);
-static struct hotplug_slot_ops dummy_hotplug_slot_ops = {
- .owner = THIS_MODULE,
- .enable_slot = enable_slot,
- .disable_slot = disable_slot,
-};
-
-static void dummy_release(struct hotplug_slot *slot)
+static ssize_t legacy_show(struct kobject *kobj, struct attribute *attr,
+ char *buf)
{
- struct dummy_slot *dslot = slot->private;
-
- list_del(&dslot->node);
- kfree(dslot->slot->info);
- kfree(dslot->slot);
- pci_dev_put(dslot->dev);
- kfree(dslot);
+ struct legacy_slot *slot = container_of(kobj, typeof(*slot), kobj);
+ strcpy(buf, "1\n");
+ return 2;
}
-#define SLOT_NAME_SIZE 8
-
-static int add_slot(struct pci_dev *dev)
+static void remove_callback(void *data)
{
- struct dummy_slot *dslot;
- struct hotplug_slot *slot;
- char name[SLOT_NAME_SIZE];
- int retval = -ENOMEM;
- static int count = 1;
-
- slot = kzalloc(sizeof(struct hotplug_slot), GFP_KERNEL);
- if (!slot)
- goto error;
-
- slot->info = kzalloc(sizeof(struct hotplug_slot_info), GFP_KERNEL);
- if (!slot->info)
- goto error_slot;
-
- slot->info->power_status = 1;
- slot->info->max_bus_speed = PCI_SPEED_UNKNOWN;
- slot->info->cur_bus_speed = PCI_SPEED_UNKNOWN;
-
- dslot = kzalloc(sizeof(struct dummy_slot), GFP_KERNEL);
- if (!dslot)
- goto error_info;
-
- if (dup_slots)
- snprintf(name, SLOT_NAME_SIZE, "fake");
- else
- snprintf(name, SLOT_NAME_SIZE, "fake%d", count++);
- dbg("slot->name = %s\n", name);
- slot->ops = &dummy_hotplug_slot_ops;
- slot->release = &dummy_release;
- slot->private = dslot;
-
- retval = pci_hp_register(slot, dev->bus, PCI_SLOT(dev->devfn), name);
- if (retval) {
- err("pci_hp_register failed with error %d\n", retval);
- goto error_dslot;
- }
-
- dbg("slot->name = %s\n", hotplug_slot_name(slot));
- dslot->slot = slot;
- dslot->dev = pci_dev_get(dev);
- list_add (&dslot->node, &slot_list);
- return retval;
-
-error_dslot:
- kfree(dslot);
-error_info:
- kfree(slot->info);
-error_slot:
- kfree(slot);
-error:
- return retval;
+ pci_remove_bus_device((struct pci_dev *)data);
}
-static int __init pci_scan_buses(void)
+static ssize_t legacy_store(struct kobject *kobj, struct attribute *attr,
+ const char *buf, size_t len)
{
- struct pci_dev *dev = NULL;
- int lastslot = 0;
+ struct legacy_slot *slot = container_of(kobj, typeof(*slot), kobj);
+ unsigned long val;
- while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) {
- if (PCI_FUNC(dev->devfn) > 0 &&
- lastslot == PCI_SLOT(dev->devfn))
- continue;
- lastslot = PCI_SLOT(dev->devfn);
- add_slot(dev);
- }
+ if (strict_strtoul(buf, 0, &val) < 0)
+ return -EINVAL;
- return 0;
+ if (val)
+ pci_rescan_bus(slot->dev->bus);
+ else
+ sysfs_schedule_callback(&slot->dev->dev.kobj, remove_callback,
+ slot->dev, THIS_MODULE);
+ return len;
}
-static void remove_slot(struct dummy_slot *dslot)
-{
- int retval;
-
- dbg("removing slot %s\n", hotplug_slot_name(dslot->slot));
- retval = pci_hp_deregister(dslot->slot);
- if (retval)
- err("Problem unregistering a slot %s\n",
- hotplug_slot_name(dslot->slot));
-}
+static struct attribute *legacy_attrs[] = {
+ &(struct attribute){ .name = "power", .mode = 0644 },
+ NULL,
+};
-/* called from the single-threaded workqueue handler to remove a slot */
-static void remove_slot_worker(struct work_struct *work)
+static void legacy_release(struct kobject *kobj)
{
- struct dummy_slot *dslot =
- container_of(work, struct dummy_slot, remove_work);
- remove_slot(dslot);
-}
+ struct legacy_slot *slot = container_of(kobj, typeof(*slot), kobj);
-/**
- * pci_rescan_slot - Rescan slot
- * @temp: Device template. Should be set: bus and devfn.
- *
- * Tries hard not to re-enable already existing devices;
- * also handles scanning of subfunctions.
- */
-static int pci_rescan_slot(struct pci_dev *temp)
-{
- struct pci_bus *bus = temp->bus;
- struct pci_dev *dev;
- int func;
- u8 hdr_type;
- int count = 0;
-
- if (!pci_read_config_byte(temp, PCI_HEADER_TYPE, &hdr_type)) {
- temp->hdr_type = hdr_type & 0x7f;
- if ((dev = pci_get_slot(bus, temp->devfn)) != NULL)
- pci_dev_put(dev);
- else {
- dev = pci_scan_single_device(bus, temp->devfn);
- if (dev) {
- dbg("New device on %s function %x:%x\n",
- bus->name, temp->devfn >> 3,
- temp->devfn & 7);
- count++;
- }
- }
- /* multifunction device? */
- if (!(hdr_type & 0x80))
- return count;
-
- /* continue scanning for other functions */
- for (func = 1, temp->devfn++; func < 8; func++, temp->devfn++) {
- if (pci_read_config_byte(temp, PCI_HEADER_TYPE, &hdr_type))
- continue;
- temp->hdr_type = hdr_type & 0x7f;
-
- if ((dev = pci_get_slot(bus, temp->devfn)) != NULL)
- pci_dev_put(dev);
- else {
- dev = pci_scan_single_device(bus, temp->devfn);
- if (dev) {
- dbg("New device on %s function %x:%x\n",
- bus->name, temp->devfn >> 3,
- temp->devfn & 7);
- count++;
- }
- }
- }
- }
-
- return count;
+ pci_dev_put(slot->dev);
+ kfree(slot);
}
+static struct kobj_type legacy_ktype = {
+ .sysfs_ops = &(struct sysfs_ops){
+ .store = legacy_store, .show = legacy_show
+ },
+ .release = &legacy_release,
+ .default_attrs = legacy_attrs,
+};
-/**
- * pci_rescan_bus - Rescan PCI bus
- * @bus: the PCI bus to rescan
- *
- * Call pci_rescan_slot for each possible function of the bus.
- */
-static void pci_rescan_bus(const struct pci_bus *bus)
+static int legacy_add_slot(struct pci_dev *pdev)
{
- unsigned int devfn;
- struct pci_dev *dev;
- int retval;
- int found = 0;
- dev = alloc_pci_dev();
- if (!dev)
- return;
+ struct legacy_slot *slot = kzalloc(sizeof(*slot), GFP_KERNEL);
- dev->bus = (struct pci_bus*)bus;
- dev->sysdata = bus->sysdata;
- for (devfn = 0; devfn < 0x100; devfn += 8) {
- dev->devfn = devfn;
- found += pci_rescan_slot(dev);
- }
-
- if (found) {
- pci_bus_assign_resources(bus);
- list_for_each_entry(dev, &bus->devices, bus_list) {
- /* Skip already-added devices */
- if (dev->is_added)
- continue;
- retval = pci_bus_add_device(dev);
- if (retval)
- dev_err(&dev->dev,
- "Error adding device, continuing\n");
- else
- add_slot(dev);
- }
- pci_bus_add_devices(bus);
- }
- kfree(dev);
-}
+ if (!slot)
+ return -ENOMEM;
-/* recursively scan all buses */
-static void pci_rescan_buses(const struct list_head *list)
-{
- const struct list_head *l;
- list_for_each(l,list) {
- const struct pci_bus *b = pci_bus_b(l);
- pci_rescan_bus(b);
- pci_rescan_buses(&b->children);
+ if (kobject_init_and_add(&slot->kobj, &legacy_ktype,
+ &pci_slots_kset->kobj, "%s",
+ dev_name(&pdev->dev))) {
+ dev_warn(&pdev->dev, "Failed to created legacy fake slot\n");
+ return -EINVAL;
}
-}
+ slot->dev = pci_dev_get(pdev);
-/* initiate rescan of all pci buses */
-static inline void pci_rescan(void) {
- pci_rescan_buses(&pci_root_buses);
-}
-
-/* called from the single-threaded workqueue handler to rescan all pci buses */
-static void pci_rescan_worker(struct work_struct *work)
-{
- pci_rescan();
-}
+ list_add(&slot->list, &legacy_list);
-static int enable_slot(struct hotplug_slot *hotplug_slot)
-{
- /* mis-use enable_slot for rescanning of the pci bus */
- cancel_work_sync(&pci_rescan_work);
- queue_work(dummyphp_wq, &pci_rescan_work);
return 0;
}
-static int disable_slot(struct hotplug_slot *slot)
+static int legacy_notify(struct notifier_block *nb,
+ unsigned long action, void *data)
{
- struct dummy_slot *dslot;
- struct pci_dev *dev;
- int func;
-
- if (!slot)
- return -ENODEV;
- dslot = slot->private;
-
- dbg("%s - physical_slot = %s\n", __func__, hotplug_slot_name(slot));
+ struct pci_dev *pdev = to_pci_dev(data);
- for (func = 7; func >= 0; func--) {
- dev = pci_get_slot(dslot->dev->bus, dslot->dev->devfn + func);
- if (!dev)
- continue;
+ if (action == BUS_NOTIFY_ADD_DEVICE) {
+ legacy_add_slot(pdev);
+ } else if (action == BUS_NOTIFY_DEL_DEVICE) {
+ struct legacy_slot *slot;
- if (test_and_set_bit(0, &dslot->removed)) {
- dbg("Slot already scheduled for removal\n");
- pci_dev_put(dev);
- return -ENODEV;
- }
+ list_for_each_entry(slot, &legacy_list, list)
+ if (slot->dev == pdev)
+ goto found;
- /* remove the device from the pci core */
- pci_remove_bus_device(dev);
-
- /* queue work item to blow away this sysfs entry and other
- * parts.
- */
- INIT_WORK(&dslot->remove_work, remove_slot_worker);
- queue_work(dummyphp_wq, &dslot->remove_work);
-
- pci_dev_put(dev);
+ dev_warn(&pdev->dev, "Missing legacy fake slot?");
+ return -ENODEV;
+found:
+ kobject_del(&slot->kobj);
+ list_del(&slot->list);
+ kobject_put(&slot->kobj);
}
+
return 0;
}
-static void cleanup_slots (void)
-{
- struct list_head *tmp;
- struct list_head *next;
- struct dummy_slot *dslot;
-
- destroy_workqueue(dummyphp_wq);
- list_for_each_safe (tmp, next, &slot_list) {
- dslot = list_entry (tmp, struct dummy_slot, node);
- remove_slot(dslot);
- }
-
-}
+static struct notifier_block legacy_notifier = {
+ .notifier_call = legacy_notify
+};
-static int __init dummyphp_init(void)
+static int __init init_legacy(void)
{
- info(DRIVER_DESC "\n");
+ struct pci_dev *pdev = NULL;
- dummyphp_wq = create_singlethread_workqueue(MY_NAME);
- if (!dummyphp_wq)
- return -ENOMEM;
+ /* Add existing devices */
+ while ((pdev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, pdev)))
+ legacy_add_slot(pdev);
- return pci_scan_buses();
+ /* Be alerted of any new ones */
+ bus_register_notifier(&pci_bus_type, &legacy_notifier);
+ return 0;
}
+module_init(init_legacy);
-
-static void __exit dummyphp_exit(void)
+static void __exit remove_legacy(void)
{
- cleanup_slots();
+ struct legacy_slot *slot, *tmp;
+
+ bus_unregister_notifier(&pci_bus_type, &legacy_notifier);
+
+ list_for_each_entry_safe(slot, tmp, &legacy_list, list) {
+ list_del(&slot->list);
+ kobject_del(&slot->kobj);
+ kobject_put(&slot->kobj);
+ }
}
+module_exit(remove_legacy);
-module_init(dummyphp_init);
-module_exit(dummyphp_exit);
-MODULE_AUTHOR(DRIVER_AUTHOR);
-MODULE_DESCRIPTION(DRIVER_DESC);
+MODULE_AUTHOR("Trent Piepho <xyzzy@speakeasy.org>");
+MODULE_DESCRIPTION("Legacy version of the fakephp interface");
MODULE_LICENSE("GPL");
-module_param(debug, bool, S_IRUGO | S_IWUSR);
-MODULE_PARM_DESC(debug, "Debugging mode enabled or not");
-module_param(dup_slots, bool, S_IRUGO | S_IWUSR);
-MODULE_PARM_DESC(dup_slots, "Force duplicate slot names for debugging");
diff --git a/drivers/pci/hotplug/pciehp.h b/drivers/pci/hotplug/pciehp.h
index 39ae37589fda..0a368547e633 100644
--- a/drivers/pci/hotplug/pciehp.h
+++ b/drivers/pci/hotplug/pciehp.h
@@ -46,10 +46,10 @@ extern int pciehp_force;
extern struct workqueue_struct *pciehp_wq;
#define dbg(format, arg...) \
- do { \
- if (pciehp_debug) \
- printk("%s: " format, MY_NAME , ## arg); \
- } while (0)
+do { \
+ if (pciehp_debug) \
+ printk(KERN_DEBUG "%s: " format, MY_NAME , ## arg); \
+} while (0)
#define err(format, arg...) \
printk(KERN_ERR "%s: " format, MY_NAME , ## arg)
#define info(format, arg...) \
@@ -60,7 +60,7 @@ extern struct workqueue_struct *pciehp_wq;
#define ctrl_dbg(ctrl, format, arg...) \
do { \
if (pciehp_debug) \
- dev_printk(, &ctrl->pcie->device, \
+ dev_printk(KERN_DEBUG, &ctrl->pcie->device, \
format, ## arg); \
} while (0)
#define ctrl_err(ctrl, format, arg...) \
@@ -108,10 +108,11 @@ struct controller {
u32 slot_cap;
u8 cap_base;
struct timer_list poll_timer;
- int cmd_busy;
+ unsigned int cmd_busy:1;
unsigned int no_cmd_complete:1;
unsigned int link_active_reporting:1;
unsigned int notification_enabled:1;
+ unsigned int power_fault_detected;
};
#define INT_BUTTON_IGNORE 0
diff --git a/drivers/pci/hotplug/pciehp_acpi.c b/drivers/pci/hotplug/pciehp_acpi.c
index 438d795f9fe3..96048010e7d9 100644
--- a/drivers/pci/hotplug/pciehp_acpi.c
+++ b/drivers/pci/hotplug/pciehp_acpi.c
@@ -67,37 +67,27 @@ static int __init parse_detect_mode(void)
return PCIEHP_DETECT_DEFAULT;
}
-static struct pcie_port_service_id __initdata port_pci_ids[] = {
- {
- .vendor = PCI_ANY_ID,
- .device = PCI_ANY_ID,
- .port_type = PCIE_ANY_PORT,
- .service_type = PCIE_PORT_SERVICE_HP,
- .driver_data = 0,
- }, { /* end: all zeroes */ }
-};
-
static int __initdata dup_slot_id;
static int __initdata acpi_slot_detected;
static struct list_head __initdata dummy_slots = LIST_HEAD_INIT(dummy_slots);
/* Dummy driver for dumplicate name detection */
-static int __init dummy_probe(struct pcie_device *dev,
- const struct pcie_port_service_id *id)
+static int __init dummy_probe(struct pcie_device *dev)
{
int pos;
u32 slot_cap;
struct slot *slot, *tmp;
struct pci_dev *pdev = dev->port;
struct pci_bus *pbus = pdev->subordinate;
- if (!(slot = kzalloc(sizeof(*slot), GFP_KERNEL)))
- return -ENOMEM;
/* Note: pciehp_detect_mode != PCIEHP_DETECT_ACPI here */
if (pciehp_get_hp_hw_control_from_firmware(pdev))
return -ENODEV;
if (!(pos = pci_find_capability(pdev, PCI_CAP_ID_EXP)))
return -ENODEV;
pci_read_config_dword(pdev, pos + PCI_EXP_SLTCAP, &slot_cap);
+ slot = kzalloc(sizeof(*slot), GFP_KERNEL);
+ if (!slot)
+ return -ENOMEM;
slot->number = slot_cap >> 19;
list_for_each_entry(tmp, &dummy_slots, slot_list) {
if (tmp->number == slot->number)
@@ -111,7 +101,8 @@ static int __init dummy_probe(struct pcie_device *dev,
static struct pcie_port_service_driver __initdata dummy_driver = {
.name = "pciehp_dummy",
- .id_table = port_pci_ids,
+ .port_type = PCIE_ANY_PORT,
+ .service = PCIE_PORT_SERVICE_HP,
.probe = dummy_probe,
};
diff --git a/drivers/pci/hotplug/pciehp_core.c b/drivers/pci/hotplug/pciehp_core.c
index 681e3912b821..fb254b2454de 100644
--- a/drivers/pci/hotplug/pciehp_core.c
+++ b/drivers/pci/hotplug/pciehp_core.c
@@ -401,7 +401,7 @@ static int get_cur_bus_speed(struct hotplug_slot *hotplug_slot, enum pci_bus_spe
return 0;
}
-static int pciehp_probe(struct pcie_device *dev, const struct pcie_port_service_id *id)
+static int pciehp_probe(struct pcie_device *dev)
{
int rc;
struct controller *ctrl;
@@ -475,7 +475,7 @@ static void pciehp_remove (struct pcie_device *dev)
}
#ifdef CONFIG_PM
-static int pciehp_suspend (struct pcie_device *dev, pm_message_t state)
+static int pciehp_suspend (struct pcie_device *dev)
{
dev_info(&dev->device, "%s ENTRY\n", __func__);
return 0;
@@ -503,20 +503,12 @@ static int pciehp_resume (struct pcie_device *dev)
}
return 0;
}
-#endif
-
-static struct pcie_port_service_id port_pci_ids[] = { {
- .vendor = PCI_ANY_ID,
- .device = PCI_ANY_ID,
- .port_type = PCIE_ANY_PORT,
- .service_type = PCIE_PORT_SERVICE_HP,
- .driver_data = 0,
- }, { /* end: all zeroes */ }
-};
+#endif /* PM */
static struct pcie_port_service_driver hpdriver_portdrv = {
.name = PCIE_MODULE_NAME,
- .id_table = &port_pci_ids[0],
+ .port_type = PCIE_ANY_PORT,
+ .service = PCIE_PORT_SERVICE_HP,
.probe = pciehp_probe,
.remove = pciehp_remove,
diff --git a/drivers/pci/hotplug/pciehp_hpc.c b/drivers/pci/hotplug/pciehp_hpc.c
index 7a16c6897bb9..07bd32151146 100644
--- a/drivers/pci/hotplug/pciehp_hpc.c
+++ b/drivers/pci/hotplug/pciehp_hpc.c
@@ -548,23 +548,21 @@ static int hpc_power_on_slot(struct slot * slot)
slot_cmd = POWER_ON;
cmd_mask = PCI_EXP_SLTCTL_PCC;
- /* Enable detection that we turned off at slot power-off time */
if (!pciehp_poll_mode) {
- slot_cmd |= (PCI_EXP_SLTCTL_PFDE | PCI_EXP_SLTCTL_MRLSCE |
- PCI_EXP_SLTCTL_PDCE);
- cmd_mask |= (PCI_EXP_SLTCTL_PFDE | PCI_EXP_SLTCTL_MRLSCE |
- PCI_EXP_SLTCTL_PDCE);
+ /* Enable power fault detection turned off at power off time */
+ slot_cmd |= PCI_EXP_SLTCTL_PFDE;
+ cmd_mask |= PCI_EXP_SLTCTL_PFDE;
}
retval = pcie_write_cmd(ctrl, slot_cmd, cmd_mask);
-
if (retval) {
ctrl_err(ctrl, "Write %x command failed!\n", slot_cmd);
- return -1;
+ return retval;
}
ctrl_dbg(ctrl, "%s: SLOTCTRL %x write cmd %x\n",
__func__, ctrl->cap_base + PCI_EXP_SLTCTL, slot_cmd);
+ ctrl->power_fault_detected = 0;
return retval;
}
@@ -621,18 +619,10 @@ static int hpc_power_off_slot(struct slot * slot)
slot_cmd = POWER_OFF;
cmd_mask = PCI_EXP_SLTCTL_PCC;
- /*
- * If we get MRL or presence detect interrupts now, the isr
- * will notice the sticky power-fault bit too and issue power
- * indicator change commands. This will lead to an endless loop
- * of command completions, since the power-fault bit remains on
- * till the slot is powered on again.
- */
if (!pciehp_poll_mode) {
- slot_cmd &= ~(PCI_EXP_SLTCTL_PFDE | PCI_EXP_SLTCTL_MRLSCE |
- PCI_EXP_SLTCTL_PDCE);
- cmd_mask |= (PCI_EXP_SLTCTL_PFDE | PCI_EXP_SLTCTL_MRLSCE |
- PCI_EXP_SLTCTL_PDCE);
+ /* Disable power fault detection */
+ slot_cmd &= ~PCI_EXP_SLTCTL_PFDE;
+ cmd_mask |= PCI_EXP_SLTCTL_PFDE;
}
retval = pcie_write_cmd(ctrl, slot_cmd, cmd_mask);
@@ -672,10 +662,11 @@ static irqreturn_t pcie_isr(int irq, void *dev_id)
detected &= (PCI_EXP_SLTSTA_ABP | PCI_EXP_SLTSTA_PFD |
PCI_EXP_SLTSTA_MRLSC | PCI_EXP_SLTSTA_PDC |
PCI_EXP_SLTSTA_CC);
+ detected &= ~intr_loc;
intr_loc |= detected;
if (!intr_loc)
return IRQ_NONE;
- if (detected && pciehp_writew(ctrl, PCI_EXP_SLTSTA, detected)) {
+ if (detected && pciehp_writew(ctrl, PCI_EXP_SLTSTA, intr_loc)) {
ctrl_err(ctrl, "%s: Cannot write to SLOTSTATUS\n",
__func__);
return IRQ_NONE;
@@ -709,9 +700,10 @@ static irqreturn_t pcie_isr(int irq, void *dev_id)
pciehp_handle_presence_change(p_slot);
/* Check Power Fault Detected */
- if (intr_loc & PCI_EXP_SLTSTA_PFD)
+ if ((intr_loc & PCI_EXP_SLTSTA_PFD) && !ctrl->power_fault_detected) {
+ ctrl->power_fault_detected = 1;
pciehp_handle_power_fault(p_slot);
-
+ }
return IRQ_HANDLED;
}
diff --git a/drivers/pci/hotplug/shpchp.h b/drivers/pci/hotplug/shpchp.h
index 6aba0b6cf2e0..974e924ca96d 100644
--- a/drivers/pci/hotplug/shpchp.h
+++ b/drivers/pci/hotplug/shpchp.h
@@ -48,10 +48,10 @@ extern int shpchp_debug;
extern struct workqueue_struct *shpchp_wq;
#define dbg(format, arg...) \
- do { \
- if (shpchp_debug) \
- printk("%s: " format, MY_NAME , ## arg); \
- } while (0)
+do { \
+ if (shpchp_debug) \
+ printk(KERN_DEBUG "%s: " format, MY_NAME , ## arg); \
+} while (0)
#define err(format, arg...) \
printk(KERN_ERR "%s: " format, MY_NAME , ## arg)
#define info(format, arg...) \
@@ -62,7 +62,7 @@ extern struct workqueue_struct *shpchp_wq;
#define ctrl_dbg(ctrl, format, arg...) \
do { \
if (shpchp_debug) \
- dev_printk(, &ctrl->pci_dev->dev, \
+ dev_printk(KERN_DEBUG, &ctrl->pci_dev->dev, \
format, ## arg); \
} while (0)
#define ctrl_err(ctrl, format, arg...) \
diff --git a/drivers/pci/hotplug/shpchp_pci.c b/drivers/pci/hotplug/shpchp_pci.c
index 138f161becc0..aa315e52529b 100644
--- a/drivers/pci/hotplug/shpchp_pci.c
+++ b/drivers/pci/hotplug/shpchp_pci.c
@@ -137,7 +137,7 @@ int __ref shpchp_configure_device(struct slot *p_slot)
busnr))
break;
}
- if (busnr >= end) {
+ if (busnr > end) {
ctrl_err(ctrl,
"No free bus for hot-added bridge\n");
pci_dev_put(dev);
diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c
index f3f686581a90..fb3a3f3fca7a 100644
--- a/drivers/pci/intel-iommu.c
+++ b/drivers/pci/intel-iommu.c
@@ -36,6 +36,7 @@
#include <linux/iova.h>
#include <linux/iommu.h>
#include <linux/intel-iommu.h>
+#include <linux/sysdev.h>
#include <asm/cacheflush.h>
#include <asm/iommu.h>
#include "pci.h"
@@ -55,8 +56,8 @@
#define DOMAIN_MAX_ADDR(gaw) ((((u64)1) << gaw) - 1)
#define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT)
-#define DMA_32BIT_PFN IOVA_PFN(DMA_32BIT_MASK)
-#define DMA_64BIT_PFN IOVA_PFN(DMA_64BIT_MASK)
+#define DMA_32BIT_PFN IOVA_PFN(DMA_BIT_MASK(32))
+#define DMA_64BIT_PFN IOVA_PFN(DMA_BIT_MASK(64))
/* global iommu list, set NULL for ignored DMAR units */
static struct intel_iommu **g_iommus;
@@ -164,7 +165,8 @@ static inline void context_clear_entry(struct context_entry *context)
* 1: writable
* 2-6: reserved
* 7: super page
- * 8-11: available
+ * 8-10: available
+ * 11: snoop behavior
* 12-63: Host physcial address
*/
struct dma_pte {
@@ -186,6 +188,11 @@ static inline void dma_set_pte_writable(struct dma_pte *pte)
pte->val |= DMA_PTE_WRITE;
}
+static inline void dma_set_pte_snp(struct dma_pte *pte)
+{
+ pte->val |= DMA_PTE_SNP;
+}
+
static inline void dma_set_pte_prot(struct dma_pte *pte, unsigned long prot)
{
pte->val = (pte->val & ~3) | (prot & 3);
@@ -231,6 +238,7 @@ struct dmar_domain {
int flags; /* flags to find out type of domain */
int iommu_coherency;/* indicate coherency of iommu access */
+ int iommu_snooping; /* indicate snooping control feature*/
int iommu_count; /* reference count of iommu */
spinlock_t iommu_lock; /* protect iommu set in domain */
u64 max_addr; /* maximum mapped address */
@@ -240,7 +248,8 @@ struct dmar_domain {
struct device_domain_info {
struct list_head link; /* link to domain siblings */
struct list_head global; /* link to global list */
- u8 bus; /* PCI bus numer */
+ int segment; /* PCI domain */
+ u8 bus; /* PCI bus number */
u8 devfn; /* PCI devfn number */
struct pci_dev *dev; /* it's NULL for PCIE-to-PCI bridge */
struct dmar_domain *domain; /* pointer to domain */
@@ -421,7 +430,6 @@ static struct intel_iommu *domain_get_iommu(struct dmar_domain *domain)
return g_iommus[iommu_id];
}
-/* "Coherency" capability may be different across iommus */
static void domain_update_iommu_coherency(struct dmar_domain *domain)
{
int i;
@@ -438,7 +446,30 @@ static void domain_update_iommu_coherency(struct dmar_domain *domain)
}
}
-static struct intel_iommu *device_to_iommu(u8 bus, u8 devfn)
+static void domain_update_iommu_snooping(struct dmar_domain *domain)
+{
+ int i;
+
+ domain->iommu_snooping = 1;
+
+ i = find_first_bit(&domain->iommu_bmp, g_num_of_iommus);
+ for (; i < g_num_of_iommus; ) {
+ if (!ecap_sc_support(g_iommus[i]->ecap)) {
+ domain->iommu_snooping = 0;
+ break;
+ }
+ i = find_next_bit(&domain->iommu_bmp, g_num_of_iommus, i+1);
+ }
+}
+
+/* Some capabilities may be different across iommus */
+static void domain_update_iommu_cap(struct dmar_domain *domain)
+{
+ domain_update_iommu_coherency(domain);
+ domain_update_iommu_snooping(domain);
+}
+
+static struct intel_iommu *device_to_iommu(int segment, u8 bus, u8 devfn)
{
struct dmar_drhd_unit *drhd = NULL;
int i;
@@ -446,12 +477,20 @@ static struct intel_iommu *device_to_iommu(u8 bus, u8 devfn)
for_each_drhd_unit(drhd) {
if (drhd->ignored)
continue;
+ if (segment != drhd->segment)
+ continue;
- for (i = 0; i < drhd->devices_cnt; i++)
+ for (i = 0; i < drhd->devices_cnt; i++) {
if (drhd->devices[i] &&
drhd->devices[i]->bus->number == bus &&
drhd->devices[i]->devfn == devfn)
return drhd->iommu;
+ if (drhd->devices[i] &&
+ drhd->devices[i]->subordinate &&
+ drhd->devices[i]->subordinate->number <= bus &&
+ drhd->devices[i]->subordinate->subordinate >= bus)
+ return drhd->iommu;
+ }
if (drhd->include_all)
return drhd->iommu;
@@ -689,15 +728,17 @@ static void dma_pte_clear_one(struct dmar_domain *domain, u64 addr)
static void dma_pte_clear_range(struct dmar_domain *domain, u64 start, u64 end)
{
int addr_width = agaw_to_width(domain->agaw);
+ int npages;
start &= (((u64)1) << addr_width) - 1;
end &= (((u64)1) << addr_width) - 1;
/* in case it's partial page */
start = PAGE_ALIGN(start);
end &= PAGE_MASK;
+ npages = (end - start) / VTD_PAGE_SIZE;
/* we don't need lock here, nobody else touches the iova range */
- while (start < end) {
+ while (npages--) {
dma_pte_clear_one(domain, start);
start += VTD_PAGE_SIZE;
}
@@ -1004,194 +1045,6 @@ static int iommu_disable_translation(struct intel_iommu *iommu)
return 0;
}
-/* iommu interrupt handling. Most stuff are MSI-like. */
-
-static const char *fault_reason_strings[] =
-{
- "Software",
- "Present bit in root entry is clear",
- "Present bit in context entry is clear",
- "Invalid context entry",
- "Access beyond MGAW",
- "PTE Write access is not set",
- "PTE Read access is not set",
- "Next page table ptr is invalid",
- "Root table address invalid",
- "Context table ptr is invalid",
- "non-zero reserved fields in RTP",
- "non-zero reserved fields in CTP",
- "non-zero reserved fields in PTE",
-};
-#define MAX_FAULT_REASON_IDX (ARRAY_SIZE(fault_reason_strings) - 1)
-
-const char *dmar_get_fault_reason(u8 fault_reason)
-{
- if (fault_reason > MAX_FAULT_REASON_IDX)
- return "Unknown";
- else
- return fault_reason_strings[fault_reason];
-}
-
-void dmar_msi_unmask(unsigned int irq)
-{
- struct intel_iommu *iommu = get_irq_data(irq);
- unsigned long flag;
-
- /* unmask it */
- spin_lock_irqsave(&iommu->register_lock, flag);
- writel(0, iommu->reg + DMAR_FECTL_REG);
- /* Read a reg to force flush the post write */
- readl(iommu->reg + DMAR_FECTL_REG);
- spin_unlock_irqrestore(&iommu->register_lock, flag);
-}
-
-void dmar_msi_mask(unsigned int irq)
-{
- unsigned long flag;
- struct intel_iommu *iommu = get_irq_data(irq);
-
- /* mask it */
- spin_lock_irqsave(&iommu->register_lock, flag);
- writel(DMA_FECTL_IM, iommu->reg + DMAR_FECTL_REG);
- /* Read a reg to force flush the post write */
- readl(iommu->reg + DMAR_FECTL_REG);
- spin_unlock_irqrestore(&iommu->register_lock, flag);
-}
-
-void dmar_msi_write(int irq, struct msi_msg *msg)
-{
- struct intel_iommu *iommu = get_irq_data(irq);
- unsigned long flag;
-
- spin_lock_irqsave(&iommu->register_lock, flag);
- writel(msg->data, iommu->reg + DMAR_FEDATA_REG);
- writel(msg->address_lo, iommu->reg + DMAR_FEADDR_REG);
- writel(msg->address_hi, iommu->reg + DMAR_FEUADDR_REG);
- spin_unlock_irqrestore(&iommu->register_lock, flag);
-}
-
-void dmar_msi_read(int irq, struct msi_msg *msg)
-{
- struct intel_iommu *iommu = get_irq_data(irq);
- unsigned long flag;
-
- spin_lock_irqsave(&iommu->register_lock, flag);
- msg->data = readl(iommu->reg + DMAR_FEDATA_REG);
- msg->address_lo = readl(iommu->reg + DMAR_FEADDR_REG);
- msg->address_hi = readl(iommu->reg + DMAR_FEUADDR_REG);
- spin_unlock_irqrestore(&iommu->register_lock, flag);
-}
-
-static int iommu_page_fault_do_one(struct intel_iommu *iommu, int type,
- u8 fault_reason, u16 source_id, unsigned long long addr)
-{
- const char *reason;
-
- reason = dmar_get_fault_reason(fault_reason);
-
- printk(KERN_ERR
- "DMAR:[%s] Request device [%02x:%02x.%d] "
- "fault addr %llx \n"
- "DMAR:[fault reason %02d] %s\n",
- (type ? "DMA Read" : "DMA Write"),
- (source_id >> 8), PCI_SLOT(source_id & 0xFF),
- PCI_FUNC(source_id & 0xFF), addr, fault_reason, reason);
- return 0;
-}
-
-#define PRIMARY_FAULT_REG_LEN (16)
-static irqreturn_t iommu_page_fault(int irq, void *dev_id)
-{
- struct intel_iommu *iommu = dev_id;
- int reg, fault_index;
- u32 fault_status;
- unsigned long flag;
-
- spin_lock_irqsave(&iommu->register_lock, flag);
- fault_status = readl(iommu->reg + DMAR_FSTS_REG);
-
- /* TBD: ignore advanced fault log currently */
- if (!(fault_status & DMA_FSTS_PPF))
- goto clear_overflow;
-
- fault_index = dma_fsts_fault_record_index(fault_status);
- reg = cap_fault_reg_offset(iommu->cap);
- while (1) {
- u8 fault_reason;
- u16 source_id;
- u64 guest_addr;
- int type;
- u32 data;
-
- /* highest 32 bits */
- data = readl(iommu->reg + reg +
- fault_index * PRIMARY_FAULT_REG_LEN + 12);
- if (!(data & DMA_FRCD_F))
- break;
-
- fault_reason = dma_frcd_fault_reason(data);
- type = dma_frcd_type(data);
-
- data = readl(iommu->reg + reg +
- fault_index * PRIMARY_FAULT_REG_LEN + 8);
- source_id = dma_frcd_source_id(data);
-
- guest_addr = dmar_readq(iommu->reg + reg +
- fault_index * PRIMARY_FAULT_REG_LEN);
- guest_addr = dma_frcd_page_addr(guest_addr);
- /* clear the fault */
- writel(DMA_FRCD_F, iommu->reg + reg +
- fault_index * PRIMARY_FAULT_REG_LEN + 12);
-
- spin_unlock_irqrestore(&iommu->register_lock, flag);
-
- iommu_page_fault_do_one(iommu, type, fault_reason,
- source_id, guest_addr);
-
- fault_index++;
- if (fault_index > cap_num_fault_regs(iommu->cap))
- fault_index = 0;
- spin_lock_irqsave(&iommu->register_lock, flag);
- }
-clear_overflow:
- /* clear primary fault overflow */
- fault_status = readl(iommu->reg + DMAR_FSTS_REG);
- if (fault_status & DMA_FSTS_PFO)
- writel(DMA_FSTS_PFO, iommu->reg + DMAR_FSTS_REG);
-
- spin_unlock_irqrestore(&iommu->register_lock, flag);
- return IRQ_HANDLED;
-}
-
-int dmar_set_interrupt(struct intel_iommu *iommu)
-{
- int irq, ret;
-
- irq = create_irq();
- if (!irq) {
- printk(KERN_ERR "IOMMU: no free vectors\n");
- return -EINVAL;
- }
-
- set_irq_data(irq, iommu);
- iommu->irq = irq;
-
- ret = arch_setup_dmar_msi(irq);
- if (ret) {
- set_irq_data(irq, NULL);
- iommu->irq = 0;
- destroy_irq(irq);
- return 0;
- }
-
- /* Force fault register is cleared */
- iommu_page_fault(irq, iommu);
-
- ret = request_irq(irq, iommu_page_fault, 0, iommu->name, iommu);
- if (ret)
- printk(KERN_ERR "IOMMU: can't request irq\n");
- return ret;
-}
static int iommu_init_domains(struct intel_iommu *iommu)
{
@@ -1429,6 +1282,11 @@ static int domain_init(struct dmar_domain *domain, int guest_width)
else
domain->iommu_coherency = 0;
+ if (ecap_sc_support(iommu->ecap))
+ domain->iommu_snooping = 1;
+ else
+ domain->iommu_snooping = 0;
+
domain->iommu_count = 1;
/* always allocate the top pgd */
@@ -1464,7 +1322,7 @@ static void domain_exit(struct dmar_domain *domain)
}
static int domain_context_mapping_one(struct dmar_domain *domain,
- u8 bus, u8 devfn)
+ int segment, u8 bus, u8 devfn)
{
struct context_entry *context;
unsigned long flags;
@@ -1479,7 +1337,7 @@ static int domain_context_mapping_one(struct dmar_domain *domain,
bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
BUG_ON(!domain->pgd);
- iommu = device_to_iommu(bus, devfn);
+ iommu = device_to_iommu(segment, bus, devfn);
if (!iommu)
return -ENODEV;
@@ -1557,7 +1415,7 @@ static int domain_context_mapping_one(struct dmar_domain *domain,
spin_lock_irqsave(&domain->iommu_lock, flags);
if (!test_and_set_bit(iommu->seq_id, &domain->iommu_bmp)) {
domain->iommu_count++;
- domain_update_iommu_coherency(domain);
+ domain_update_iommu_cap(domain);
}
spin_unlock_irqrestore(&domain->iommu_lock, flags);
return 0;
@@ -1569,8 +1427,8 @@ domain_context_mapping(struct dmar_domain *domain, struct pci_dev *pdev)
int ret;
struct pci_dev *tmp, *parent;
- ret = domain_context_mapping_one(domain, pdev->bus->number,
- pdev->devfn);
+ ret = domain_context_mapping_one(domain, pci_domain_nr(pdev->bus),
+ pdev->bus->number, pdev->devfn);
if (ret)
return ret;
@@ -1581,18 +1439,23 @@ domain_context_mapping(struct dmar_domain *domain, struct pci_dev *pdev)
/* Secondary interface's bus number and devfn 0 */
parent = pdev->bus->self;
while (parent != tmp) {
- ret = domain_context_mapping_one(domain, parent->bus->number,
- parent->devfn);
+ ret = domain_context_mapping_one(domain,
+ pci_domain_nr(parent->bus),
+ parent->bus->number,
+ parent->devfn);
if (ret)
return ret;
parent = parent->bus->self;
}
if (tmp->is_pcie) /* this is a PCIE-to-PCI bridge */
return domain_context_mapping_one(domain,
- tmp->subordinate->number, 0);
+ pci_domain_nr(tmp->subordinate),
+ tmp->subordinate->number, 0);
else /* this is a legacy PCI bridge */
return domain_context_mapping_one(domain,
- tmp->bus->number, tmp->devfn);
+ pci_domain_nr(tmp->bus),
+ tmp->bus->number,
+ tmp->devfn);
}
static int domain_context_mapped(struct pci_dev *pdev)
@@ -1601,12 +1464,12 @@ static int domain_context_mapped(struct pci_dev *pdev)
struct pci_dev *tmp, *parent;
struct intel_iommu *iommu;
- iommu = device_to_iommu(pdev->bus->number, pdev->devfn);
+ iommu = device_to_iommu(pci_domain_nr(pdev->bus), pdev->bus->number,
+ pdev->devfn);
if (!iommu)
return -ENODEV;
- ret = device_context_mapped(iommu,
- pdev->bus->number, pdev->devfn);
+ ret = device_context_mapped(iommu, pdev->bus->number, pdev->devfn);
if (!ret)
return ret;
/* dependent device mapping */
@@ -1617,17 +1480,17 @@ static int domain_context_mapped(struct pci_dev *pdev)
parent = pdev->bus->self;
while (parent != tmp) {
ret = device_context_mapped(iommu, parent->bus->number,
- parent->devfn);
+ parent->devfn);
if (!ret)
return ret;
parent = parent->bus->self;
}
if (tmp->is_pcie)
- return device_context_mapped(iommu,
- tmp->subordinate->number, 0);
+ return device_context_mapped(iommu, tmp->subordinate->number,
+ 0);
else
- return device_context_mapped(iommu,
- tmp->bus->number, tmp->devfn);
+ return device_context_mapped(iommu, tmp->bus->number,
+ tmp->devfn);
}
static int
@@ -1657,6 +1520,8 @@ domain_page_mapping(struct dmar_domain *domain, dma_addr_t iova,
BUG_ON(dma_pte_addr(pte));
dma_set_pte_addr(pte, start_pfn << VTD_PAGE_SHIFT);
dma_set_pte_prot(pte, prot);
+ if (prot & DMA_PTE_SNP)
+ dma_set_pte_snp(pte);
domain_flush_cache(domain, pte, sizeof(*pte));
start_pfn++;
index++;
@@ -1692,7 +1557,7 @@ static void domain_remove_dev_info(struct dmar_domain *domain)
info->dev->dev.archdata.iommu = NULL;
spin_unlock_irqrestore(&device_domain_lock, flags);
- iommu = device_to_iommu(info->bus, info->devfn);
+ iommu = device_to_iommu(info->segment, info->bus, info->devfn);
iommu_detach_dev(iommu, info->bus, info->devfn);
free_devinfo_mem(info);
@@ -1727,11 +1592,14 @@ static struct dmar_domain *get_domain_for_dev(struct pci_dev *pdev, int gaw)
struct pci_dev *dev_tmp;
unsigned long flags;
int bus = 0, devfn = 0;
+ int segment;
domain = find_domain(pdev);
if (domain)
return domain;
+ segment = pci_domain_nr(pdev->bus);
+
dev_tmp = pci_find_upstream_pcie_bridge(pdev);
if (dev_tmp) {
if (dev_tmp->is_pcie) {
@@ -1743,7 +1611,8 @@ static struct dmar_domain *get_domain_for_dev(struct pci_dev *pdev, int gaw)
}
spin_lock_irqsave(&device_domain_lock, flags);
list_for_each_entry(info, &device_domain_list, global) {
- if (info->bus == bus && info->devfn == devfn) {
+ if (info->segment == segment &&
+ info->bus == bus && info->devfn == devfn) {
found = info->domain;
break;
}
@@ -1781,6 +1650,7 @@ static struct dmar_domain *get_domain_for_dev(struct pci_dev *pdev, int gaw)
domain_exit(domain);
goto error;
}
+ info->segment = segment;
info->bus = bus;
info->devfn = devfn;
info->dev = NULL;
@@ -1792,7 +1662,8 @@ static struct dmar_domain *get_domain_for_dev(struct pci_dev *pdev, int gaw)
found = NULL;
spin_lock_irqsave(&device_domain_lock, flags);
list_for_each_entry(tmp, &device_domain_list, global) {
- if (tmp->bus == bus && tmp->devfn == devfn) {
+ if (tmp->segment == segment &&
+ tmp->bus == bus && tmp->devfn == devfn) {
found = tmp->domain;
break;
}
@@ -1812,6 +1683,7 @@ found_domain:
info = alloc_devinfo_mem();
if (!info)
goto error;
+ info->segment = segment;
info->bus = pdev->bus->number;
info->devfn = pdev->devfn;
info->dev = pdev;
@@ -1970,7 +1842,7 @@ static inline void iommu_prepare_isa(void)
ret = iommu_prepare_identity_map(pdev, 0, 16*1024*1024);
if (ret)
- printk("IOMMU: Failed to create 0-64M identity map, "
+ printk(KERN_ERR "IOMMU: Failed to create 0-64M identity map, "
"floppy might not work\n");
}
@@ -1987,7 +1859,7 @@ static int __init init_dmars(void)
struct dmar_rmrr_unit *rmrr;
struct pci_dev *pdev;
struct intel_iommu *iommu;
- int i, ret, unit = 0;
+ int i, ret;
/*
* for each drhd
@@ -2043,11 +1915,40 @@ static int __init init_dmars(void)
}
}
+ /*
+ * Start from the sane iommu hardware state.
+ */
+ for_each_drhd_unit(drhd) {
+ if (drhd->ignored)
+ continue;
+
+ iommu = drhd->iommu;
+
+ /*
+ * If the queued invalidation is already initialized by us
+ * (for example, while enabling interrupt-remapping) then
+ * we got the things already rolling from a sane state.
+ */
+ if (iommu->qi)
+ continue;
+
+ /*
+ * Clear any previous faults.
+ */
+ dmar_fault(-1, iommu);
+ /*
+ * Disable queued invalidation if supported and already enabled
+ * before OS handover.
+ */
+ dmar_disable_qi(iommu);
+ }
+
for_each_drhd_unit(drhd) {
if (drhd->ignored)
continue;
iommu = drhd->iommu;
+
if (dmar_enable_qi(iommu)) {
/*
* Queued Invalidate not enabled, use Register Based
@@ -2067,6 +1968,15 @@ static int __init init_dmars(void)
}
}
+#ifdef CONFIG_INTR_REMAP
+ if (!intr_remapping_enabled) {
+ ret = enable_intr_remapping(0);
+ if (ret)
+ printk(KERN_ERR
+ "IOMMU: enable interrupt remapping failed\n");
+ }
+#endif
+
/*
* For each rmrr
* for each dev attached to rmrr
@@ -2109,7 +2019,6 @@ static int __init init_dmars(void)
if (drhd->ignored)
continue;
iommu = drhd->iommu;
- sprintf (iommu->name, "dmar%d", unit++);
iommu_flush_write_buffer(iommu);
@@ -2171,15 +2080,15 @@ __intel_alloc_iova(struct device *dev, struct dmar_domain *domain,
struct pci_dev *pdev = to_pci_dev(dev);
struct iova *iova = NULL;
- if (dma_mask <= DMA_32BIT_MASK || dmar_forcedac)
+ if (dma_mask <= DMA_BIT_MASK(32) || dmar_forcedac)
iova = iommu_alloc_iova(domain, size, dma_mask);
else {
/*
* First try to allocate an io virtual address in
- * DMA_32BIT_MASK and if that fails then try allocating
+ * DMA_BIT_MASK(32) and if that fails then try allocating
* from higher range
*/
- iova = iommu_alloc_iova(domain, size, DMA_32BIT_MASK);
+ iova = iommu_alloc_iova(domain, size, DMA_BIT_MASK(32));
if (!iova)
iova = iommu_alloc_iova(domain, size, dma_mask);
}
@@ -2279,16 +2188,18 @@ static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr,
error:
if (iova)
__free_iova(&domain->iovad, iova);
- printk(KERN_ERR"Device %s request: %lx@%llx dir %d --- failed\n",
+ printk(KERN_ERR"Device %s request: %zx@%llx dir %d --- failed\n",
pci_name(pdev), size, (unsigned long long)paddr, dir);
return 0;
}
-dma_addr_t intel_map_single(struct device *hwdev, phys_addr_t paddr,
- size_t size, int dir)
+static dma_addr_t intel_map_page(struct device *dev, struct page *page,
+ unsigned long offset, size_t size,
+ enum dma_data_direction dir,
+ struct dma_attrs *attrs)
{
- return __intel_map_single(hwdev, paddr, size, dir,
- to_pci_dev(hwdev)->dma_mask);
+ return __intel_map_single(dev, page_to_phys(page) + offset, size,
+ dir, to_pci_dev(dev)->dma_mask);
}
static void flush_unmaps(void)
@@ -2352,8 +2263,9 @@ static void add_unmap(struct dmar_domain *dom, struct iova *iova)
spin_unlock_irqrestore(&async_umap_flush_lock, flags);
}
-void intel_unmap_single(struct device *dev, dma_addr_t dev_addr, size_t size,
- int dir)
+static void intel_unmap_page(struct device *dev, dma_addr_t dev_addr,
+ size_t size, enum dma_data_direction dir,
+ struct dma_attrs *attrs)
{
struct pci_dev *pdev = to_pci_dev(dev);
struct dmar_domain *domain;
@@ -2375,7 +2287,7 @@ void intel_unmap_single(struct device *dev, dma_addr_t dev_addr, size_t size,
start_addr = iova->pfn_lo << PAGE_SHIFT;
size = aligned_size((u64)dev_addr, size);
- pr_debug("Device %s unmapping: %lx@%llx\n",
+ pr_debug("Device %s unmapping: %zx@%llx\n",
pci_name(pdev), size, (unsigned long long)start_addr);
/* clear the whole page */
@@ -2397,8 +2309,14 @@ void intel_unmap_single(struct device *dev, dma_addr_t dev_addr, size_t size,
}
}
-void *intel_alloc_coherent(struct device *hwdev, size_t size,
- dma_addr_t *dma_handle, gfp_t flags)
+static void intel_unmap_single(struct device *dev, dma_addr_t dev_addr, size_t size,
+ int dir)
+{
+ intel_unmap_page(dev, dev_addr, size, dir, NULL);
+}
+
+static void *intel_alloc_coherent(struct device *hwdev, size_t size,
+ dma_addr_t *dma_handle, gfp_t flags)
{
void *vaddr;
int order;
@@ -2421,8 +2339,8 @@ void *intel_alloc_coherent(struct device *hwdev, size_t size,
return NULL;
}
-void intel_free_coherent(struct device *hwdev, size_t size, void *vaddr,
- dma_addr_t dma_handle)
+static void intel_free_coherent(struct device *hwdev, size_t size, void *vaddr,
+ dma_addr_t dma_handle)
{
int order;
@@ -2433,10 +2351,9 @@ void intel_free_coherent(struct device *hwdev, size_t size, void *vaddr,
free_pages((unsigned long)vaddr, order);
}
-#define SG_ENT_VIRT_ADDRESS(sg) (sg_virt((sg)))
-
-void intel_unmap_sg(struct device *hwdev, struct scatterlist *sglist,
- int nelems, int dir)
+static void intel_unmap_sg(struct device *hwdev, struct scatterlist *sglist,
+ int nelems, enum dma_data_direction dir,
+ struct dma_attrs *attrs)
{
int i;
struct pci_dev *pdev = to_pci_dev(hwdev);
@@ -2444,7 +2361,7 @@ void intel_unmap_sg(struct device *hwdev, struct scatterlist *sglist,
unsigned long start_addr;
struct iova *iova;
size_t size = 0;
- void *addr;
+ phys_addr_t addr;
struct scatterlist *sg;
struct intel_iommu *iommu;
@@ -2460,7 +2377,7 @@ void intel_unmap_sg(struct device *hwdev, struct scatterlist *sglist,
if (!iova)
return;
for_each_sg(sglist, sg, nelems, i) {
- addr = SG_ENT_VIRT_ADDRESS(sg);
+ addr = page_to_phys(sg_page(sg)) + sg->offset;
size += aligned_size((u64)addr, sg->length);
}
@@ -2487,16 +2404,16 @@ static int intel_nontranslate_map_sg(struct device *hddev,
for_each_sg(sglist, sg, nelems, i) {
BUG_ON(!sg_page(sg));
- sg->dma_address = virt_to_bus(SG_ENT_VIRT_ADDRESS(sg));
+ sg->dma_address = page_to_phys(sg_page(sg)) + sg->offset;
sg->dma_length = sg->length;
}
return nelems;
}
-int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int nelems,
- int dir)
+static int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int nelems,
+ enum dma_data_direction dir, struct dma_attrs *attrs)
{
- void *addr;
+ phys_addr_t addr;
int i;
struct pci_dev *pdev = to_pci_dev(hwdev);
struct dmar_domain *domain;
@@ -2520,8 +2437,7 @@ int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int nelems,
iommu = domain_get_iommu(domain);
for_each_sg(sglist, sg, nelems, i) {
- addr = SG_ENT_VIRT_ADDRESS(sg);
- addr = (void *)virt_to_phys(addr);
+ addr = page_to_phys(sg_page(sg)) + sg->offset;
size += aligned_size((u64)addr, sg->length);
}
@@ -2544,8 +2460,7 @@ int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int nelems,
start_addr = iova->pfn_lo << PAGE_SHIFT;
offset = 0;
for_each_sg(sglist, sg, nelems, i) {
- addr = SG_ENT_VIRT_ADDRESS(sg);
- addr = (void *)virt_to_phys(addr);
+ addr = page_to_phys(sg_page(sg)) + sg->offset;
size = aligned_size((u64)addr, sg->length);
ret = domain_page_mapping(domain, start_addr + offset,
((u64)addr) & PAGE_MASK,
@@ -2574,13 +2489,19 @@ int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int nelems,
return nelems;
}
-static struct dma_mapping_ops intel_dma_ops = {
+static int intel_mapping_error(struct device *dev, dma_addr_t dma_addr)
+{
+ return !dma_addr;
+}
+
+struct dma_map_ops intel_dma_ops = {
.alloc_coherent = intel_alloc_coherent,
.free_coherent = intel_free_coherent,
- .map_single = intel_map_single,
- .unmap_single = intel_unmap_single,
.map_sg = intel_map_sg,
.unmap_sg = intel_unmap_sg,
+ .map_page = intel_map_page,
+ .unmap_page = intel_unmap_page,
+ .mapping_error = intel_mapping_error,
};
static inline int iommu_domain_cache_init(void)
@@ -2707,6 +2628,150 @@ static void __init init_no_remapping_devices(void)
}
}
+#ifdef CONFIG_SUSPEND
+static int init_iommu_hw(void)
+{
+ struct dmar_drhd_unit *drhd;
+ struct intel_iommu *iommu = NULL;
+
+ for_each_active_iommu(iommu, drhd)
+ if (iommu->qi)
+ dmar_reenable_qi(iommu);
+
+ for_each_active_iommu(iommu, drhd) {
+ iommu_flush_write_buffer(iommu);
+
+ iommu_set_root_entry(iommu);
+
+ iommu->flush.flush_context(iommu, 0, 0, 0,
+ DMA_CCMD_GLOBAL_INVL, 0);
+ iommu->flush.flush_iotlb(iommu, 0, 0, 0,
+ DMA_TLB_GLOBAL_FLUSH, 0);
+ iommu_disable_protect_mem_regions(iommu);
+ iommu_enable_translation(iommu);
+ }
+
+ return 0;
+}
+
+static void iommu_flush_all(void)
+{
+ struct dmar_drhd_unit *drhd;
+ struct intel_iommu *iommu;
+
+ for_each_active_iommu(iommu, drhd) {
+ iommu->flush.flush_context(iommu, 0, 0, 0,
+ DMA_CCMD_GLOBAL_INVL, 0);
+ iommu->flush.flush_iotlb(iommu, 0, 0, 0,
+ DMA_TLB_GLOBAL_FLUSH, 0);
+ }
+}
+
+static int iommu_suspend(struct sys_device *dev, pm_message_t state)
+{
+ struct dmar_drhd_unit *drhd;
+ struct intel_iommu *iommu = NULL;
+ unsigned long flag;
+
+ for_each_active_iommu(iommu, drhd) {
+ iommu->iommu_state = kzalloc(sizeof(u32) * MAX_SR_DMAR_REGS,
+ GFP_ATOMIC);
+ if (!iommu->iommu_state)
+ goto nomem;
+ }
+
+ iommu_flush_all();
+
+ for_each_active_iommu(iommu, drhd) {
+ iommu_disable_translation(iommu);
+
+ spin_lock_irqsave(&iommu->register_lock, flag);
+
+ iommu->iommu_state[SR_DMAR_FECTL_REG] =
+ readl(iommu->reg + DMAR_FECTL_REG);
+ iommu->iommu_state[SR_DMAR_FEDATA_REG] =
+ readl(iommu->reg + DMAR_FEDATA_REG);
+ iommu->iommu_state[SR_DMAR_FEADDR_REG] =
+ readl(iommu->reg + DMAR_FEADDR_REG);
+ iommu->iommu_state[SR_DMAR_FEUADDR_REG] =
+ readl(iommu->reg + DMAR_FEUADDR_REG);
+
+ spin_unlock_irqrestore(&iommu->register_lock, flag);
+ }
+ return 0;
+
+nomem:
+ for_each_active_iommu(iommu, drhd)
+ kfree(iommu->iommu_state);
+
+ return -ENOMEM;
+}
+
+static int iommu_resume(struct sys_device *dev)
+{
+ struct dmar_drhd_unit *drhd;
+ struct intel_iommu *iommu = NULL;
+ unsigned long flag;
+
+ if (init_iommu_hw()) {
+ WARN(1, "IOMMU setup failed, DMAR can not resume!\n");
+ return -EIO;
+ }
+
+ for_each_active_iommu(iommu, drhd) {
+
+ spin_lock_irqsave(&iommu->register_lock, flag);
+
+ writel(iommu->iommu_state[SR_DMAR_FECTL_REG],
+ iommu->reg + DMAR_FECTL_REG);
+ writel(iommu->iommu_state[SR_DMAR_FEDATA_REG],
+ iommu->reg + DMAR_FEDATA_REG);
+ writel(iommu->iommu_state[SR_DMAR_FEADDR_REG],
+ iommu->reg + DMAR_FEADDR_REG);
+ writel(iommu->iommu_state[SR_DMAR_FEUADDR_REG],
+ iommu->reg + DMAR_FEUADDR_REG);
+
+ spin_unlock_irqrestore(&iommu->register_lock, flag);
+ }
+
+ for_each_active_iommu(iommu, drhd)
+ kfree(iommu->iommu_state);
+
+ return 0;
+}
+
+static struct sysdev_class iommu_sysclass = {
+ .name = "iommu",
+ .resume = iommu_resume,
+ .suspend = iommu_suspend,
+};
+
+static struct sys_device device_iommu = {
+ .cls = &iommu_sysclass,
+};
+
+static int __init init_iommu_sysfs(void)
+{
+ int error;
+
+ error = sysdev_class_register(&iommu_sysclass);
+ if (error)
+ return error;
+
+ error = sysdev_register(&device_iommu);
+ if (error)
+ sysdev_class_unregister(&iommu_sysclass);
+
+ return error;
+}
+
+#else
+static int __init init_iommu_sysfs(void)
+{
+ return 0;
+}
+#endif /* CONFIG_PM */
+
int __init intel_iommu_init(void)
{
int ret = 0;
@@ -2742,6 +2807,7 @@ int __init intel_iommu_init(void)
init_timer(&unmap_timer);
force_iommu = 1;
dma_ops = &intel_dma_ops;
+ init_iommu_sysfs();
register_iommu(&intel_iommu_ops);
@@ -2758,6 +2824,7 @@ static int vm_domain_add_dev_info(struct dmar_domain *domain,
if (!info)
return -ENOMEM;
+ info->segment = pci_domain_nr(pdev->bus);
info->bus = pdev->bus->number;
info->devfn = pdev->devfn;
info->dev = pdev;
@@ -2772,6 +2839,33 @@ static int vm_domain_add_dev_info(struct dmar_domain *domain,
return 0;
}
+static void iommu_detach_dependent_devices(struct intel_iommu *iommu,
+ struct pci_dev *pdev)
+{
+ struct pci_dev *tmp, *parent;
+
+ if (!iommu || !pdev)
+ return;
+
+ /* dependent device detach */
+ tmp = pci_find_upstream_pcie_bridge(pdev);
+ /* Secondary interface's bus number and devfn 0 */
+ if (tmp) {
+ parent = pdev->bus->self;
+ while (parent != tmp) {
+ iommu_detach_dev(iommu, parent->bus->number,
+ parent->devfn);
+ parent = parent->bus->self;
+ }
+ if (tmp->is_pcie) /* this is a PCIE-to-PCI bridge */
+ iommu_detach_dev(iommu,
+ tmp->subordinate->number, 0);
+ else /* this is a legacy PCI bridge */
+ iommu_detach_dev(iommu, tmp->bus->number,
+ tmp->devfn);
+ }
+}
+
static void vm_domain_remove_one_dev_info(struct dmar_domain *domain,
struct pci_dev *pdev)
{
@@ -2781,13 +2875,15 @@ static void vm_domain_remove_one_dev_info(struct dmar_domain *domain,
int found = 0;
struct list_head *entry, *tmp;
- iommu = device_to_iommu(pdev->bus->number, pdev->devfn);
+ iommu = device_to_iommu(pci_domain_nr(pdev->bus), pdev->bus->number,
+ pdev->devfn);
if (!iommu)
return;
spin_lock_irqsave(&device_domain_lock, flags);
list_for_each_safe(entry, tmp, &domain->devices) {
info = list_entry(entry, struct device_domain_info, link);
+ /* No need to compare PCI domain; it has to be the same */
if (info->bus == pdev->bus->number &&
info->devfn == pdev->devfn) {
list_del(&info->link);
@@ -2797,6 +2893,7 @@ static void vm_domain_remove_one_dev_info(struct dmar_domain *domain,
spin_unlock_irqrestore(&device_domain_lock, flags);
iommu_detach_dev(iommu, info->bus, info->devfn);
+ iommu_detach_dependent_devices(iommu, pdev);
free_devinfo_mem(info);
spin_lock_irqsave(&device_domain_lock, flags);
@@ -2811,7 +2908,8 @@ static void vm_domain_remove_one_dev_info(struct dmar_domain *domain,
* owned by this domain, clear this iommu in iommu_bmp
* update iommu count and coherency
*/
- if (device_to_iommu(info->bus, info->devfn) == iommu)
+ if (iommu == device_to_iommu(info->segment, info->bus,
+ info->devfn))
found = 1;
}
@@ -2820,7 +2918,7 @@ static void vm_domain_remove_one_dev_info(struct dmar_domain *domain,
spin_lock_irqsave(&domain->iommu_lock, tmp_flags);
clear_bit(iommu->seq_id, &domain->iommu_bmp);
domain->iommu_count--;
- domain_update_iommu_coherency(domain);
+ domain_update_iommu_cap(domain);
spin_unlock_irqrestore(&domain->iommu_lock, tmp_flags);
}
@@ -2844,17 +2942,18 @@ static void vm_domain_remove_all_dev_info(struct dmar_domain *domain)
spin_unlock_irqrestore(&device_domain_lock, flags1);
- iommu = device_to_iommu(info->bus, info->devfn);
+ iommu = device_to_iommu(info->segment, info->bus, info->devfn);
iommu_detach_dev(iommu, info->bus, info->devfn);
+ iommu_detach_dependent_devices(iommu, info->dev);
/* clear this iommu in iommu_bmp, update iommu count
- * and coherency
+ * and capabilities
*/
spin_lock_irqsave(&domain->iommu_lock, flags2);
if (test_and_clear_bit(iommu->seq_id,
&domain->iommu_bmp)) {
domain->iommu_count--;
- domain_update_iommu_coherency(domain);
+ domain_update_iommu_cap(domain);
}
spin_unlock_irqrestore(&domain->iommu_lock, flags2);
@@ -3031,7 +3130,8 @@ static int intel_iommu_attach_device(struct iommu_domain *domain,
}
}
- iommu = device_to_iommu(pdev->bus->number, pdev->devfn);
+ iommu = device_to_iommu(pci_domain_nr(pdev->bus), pdev->bus->number,
+ pdev->devfn);
if (!iommu)
return -ENODEV;
@@ -3077,6 +3177,8 @@ static int intel_iommu_map_range(struct iommu_domain *domain,
prot |= DMA_PTE_READ;
if (iommu_prot & IOMMU_WRITE)
prot |= DMA_PTE_WRITE;
+ if ((iommu_prot & IOMMU_CACHE) && dmar_domain->iommu_snooping)
+ prot |= DMA_PTE_SNP;
max_addr = (iova & VTD_PAGE_MASK) + VTD_PAGE_ALIGN(size);
if (dmar_domain->max_addr < max_addr) {
@@ -3130,6 +3232,17 @@ static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
return phys;
}
+static int intel_iommu_domain_has_cap(struct iommu_domain *domain,
+ unsigned long cap)
+{
+ struct dmar_domain *dmar_domain = domain->priv;
+
+ if (cap == IOMMU_CAP_CACHE_COHERENCY)
+ return dmar_domain->iommu_snooping;
+
+ return 0;
+}
+
static struct iommu_ops intel_iommu_ops = {
.domain_init = intel_iommu_domain_init,
.domain_destroy = intel_iommu_domain_destroy,
@@ -3138,6 +3251,7 @@ static struct iommu_ops intel_iommu_ops = {
.map = intel_iommu_map_range,
.unmap = intel_iommu_unmap_range,
.iova_to_phys = intel_iommu_iova_to_phys,
+ .domain_has_cap = intel_iommu_domain_has_cap,
};
static void __devinit quirk_iommu_rwbf(struct pci_dev *dev)
diff --git a/drivers/pci/intr_remapping.c b/drivers/pci/intr_remapping.c
index 9d07a05d26f1..f5e0ea724a6f 100644
--- a/drivers/pci/intr_remapping.c
+++ b/drivers/pci/intr_remapping.c
@@ -9,6 +9,7 @@
#include <asm/cpu.h>
#include <linux/intel-iommu.h>
#include "intr_remapping.h"
+#include <acpi/acpi.h>
static struct ioapic_scope ir_ioapic[MAX_IO_APICS];
static int ir_ioapic_num;
@@ -117,21 +118,22 @@ int get_irte(int irq, struct irte *entry)
{
int index;
struct irq_2_iommu *irq_iommu;
+ unsigned long flags;
if (!entry)
return -1;
- spin_lock(&irq_2_ir_lock);
+ spin_lock_irqsave(&irq_2_ir_lock, flags);
irq_iommu = valid_irq_2_iommu(irq);
if (!irq_iommu) {
- spin_unlock(&irq_2_ir_lock);
+ spin_unlock_irqrestore(&irq_2_ir_lock, flags);
return -1;
}
index = irq_iommu->irte_index + irq_iommu->sub_handle;
*entry = *(irq_iommu->iommu->ir_table->base + index);
- spin_unlock(&irq_2_ir_lock);
+ spin_unlock_irqrestore(&irq_2_ir_lock, flags);
return 0;
}
@@ -141,6 +143,7 @@ int alloc_irte(struct intel_iommu *iommu, int irq, u16 count)
struct irq_2_iommu *irq_iommu;
u16 index, start_index;
unsigned int mask = 0;
+ unsigned long flags;
int i;
if (!count)
@@ -170,7 +173,7 @@ int alloc_irte(struct intel_iommu *iommu, int irq, u16 count)
return -1;
}
- spin_lock(&irq_2_ir_lock);
+ spin_lock_irqsave(&irq_2_ir_lock, flags);
do {
for (i = index; i < index + count; i++)
if (table->base[i].present)
@@ -182,7 +185,7 @@ int alloc_irte(struct intel_iommu *iommu, int irq, u16 count)
index = (index + count) % INTR_REMAP_TABLE_ENTRIES;
if (index == start_index) {
- spin_unlock(&irq_2_ir_lock);
+ spin_unlock_irqrestore(&irq_2_ir_lock, flags);
printk(KERN_ERR "can't allocate an IRTE\n");
return -1;
}
@@ -193,7 +196,7 @@ int alloc_irte(struct intel_iommu *iommu, int irq, u16 count)
irq_iommu = irq_2_iommu_alloc(irq);
if (!irq_iommu) {
- spin_unlock(&irq_2_ir_lock);
+ spin_unlock_irqrestore(&irq_2_ir_lock, flags);
printk(KERN_ERR "can't allocate irq_2_iommu\n");
return -1;
}
@@ -203,7 +206,7 @@ int alloc_irte(struct intel_iommu *iommu, int irq, u16 count)
irq_iommu->sub_handle = 0;
irq_iommu->irte_mask = mask;
- spin_unlock(&irq_2_ir_lock);
+ spin_unlock_irqrestore(&irq_2_ir_lock, flags);
return index;
}
@@ -223,30 +226,32 @@ int map_irq_to_irte_handle(int irq, u16 *sub_handle)
{
int index;
struct irq_2_iommu *irq_iommu;
+ unsigned long flags;
- spin_lock(&irq_2_ir_lock);
+ spin_lock_irqsave(&irq_2_ir_lock, flags);
irq_iommu = valid_irq_2_iommu(irq);
if (!irq_iommu) {
- spin_unlock(&irq_2_ir_lock);
+ spin_unlock_irqrestore(&irq_2_ir_lock, flags);
return -1;
}
*sub_handle = irq_iommu->sub_handle;
index = irq_iommu->irte_index;
- spin_unlock(&irq_2_ir_lock);
+ spin_unlock_irqrestore(&irq_2_ir_lock, flags);
return index;
}
int set_irte_irq(int irq, struct intel_iommu *iommu, u16 index, u16 subhandle)
{
struct irq_2_iommu *irq_iommu;
+ unsigned long flags;
- spin_lock(&irq_2_ir_lock);
+ spin_lock_irqsave(&irq_2_ir_lock, flags);
irq_iommu = irq_2_iommu_alloc(irq);
if (!irq_iommu) {
- spin_unlock(&irq_2_ir_lock);
+ spin_unlock_irqrestore(&irq_2_ir_lock, flags);
printk(KERN_ERR "can't allocate irq_2_iommu\n");
return -1;
}
@@ -256,7 +261,7 @@ int set_irte_irq(int irq, struct intel_iommu *iommu, u16 index, u16 subhandle)
irq_iommu->sub_handle = subhandle;
irq_iommu->irte_mask = 0;
- spin_unlock(&irq_2_ir_lock);
+ spin_unlock_irqrestore(&irq_2_ir_lock, flags);
return 0;
}
@@ -264,11 +269,12 @@ int set_irte_irq(int irq, struct intel_iommu *iommu, u16 index, u16 subhandle)
int clear_irte_irq(int irq, struct intel_iommu *iommu, u16 index)
{
struct irq_2_iommu *irq_iommu;
+ unsigned long flags;
- spin_lock(&irq_2_ir_lock);
+ spin_lock_irqsave(&irq_2_ir_lock, flags);
irq_iommu = valid_irq_2_iommu(irq);
if (!irq_iommu) {
- spin_unlock(&irq_2_ir_lock);
+ spin_unlock_irqrestore(&irq_2_ir_lock, flags);
return -1;
}
@@ -277,7 +283,7 @@ int clear_irte_irq(int irq, struct intel_iommu *iommu, u16 index)
irq_iommu->sub_handle = 0;
irq_2_iommu(irq)->irte_mask = 0;
- spin_unlock(&irq_2_ir_lock);
+ spin_unlock_irqrestore(&irq_2_ir_lock, flags);
return 0;
}
@@ -289,11 +295,12 @@ int modify_irte(int irq, struct irte *irte_modified)
struct irte *irte;
struct intel_iommu *iommu;
struct irq_2_iommu *irq_iommu;
+ unsigned long flags;
- spin_lock(&irq_2_ir_lock);
+ spin_lock_irqsave(&irq_2_ir_lock, flags);
irq_iommu = valid_irq_2_iommu(irq);
if (!irq_iommu) {
- spin_unlock(&irq_2_ir_lock);
+ spin_unlock_irqrestore(&irq_2_ir_lock, flags);
return -1;
}
@@ -302,11 +309,11 @@ int modify_irte(int irq, struct irte *irte_modified)
index = irq_iommu->irte_index + irq_iommu->sub_handle;
irte = &iommu->ir_table->base[index];
- set_64bit((unsigned long *)irte, irte_modified->low | (1 << 1));
+ set_64bit((unsigned long *)irte, irte_modified->low);
__iommu_flush_cache(iommu, irte, sizeof(*irte));
rc = qi_flush_iec(iommu, index, 0);
- spin_unlock(&irq_2_ir_lock);
+ spin_unlock_irqrestore(&irq_2_ir_lock, flags);
return rc;
}
@@ -317,11 +324,12 @@ int flush_irte(int irq)
int index;
struct intel_iommu *iommu;
struct irq_2_iommu *irq_iommu;
+ unsigned long flags;
- spin_lock(&irq_2_ir_lock);
+ spin_lock_irqsave(&irq_2_ir_lock, flags);
irq_iommu = valid_irq_2_iommu(irq);
if (!irq_iommu) {
- spin_unlock(&irq_2_ir_lock);
+ spin_unlock_irqrestore(&irq_2_ir_lock, flags);
return -1;
}
@@ -330,7 +338,7 @@ int flush_irte(int irq)
index = irq_iommu->irte_index + irq_iommu->sub_handle;
rc = qi_flush_iec(iommu, index, irq_iommu->irte_mask);
- spin_unlock(&irq_2_ir_lock);
+ spin_unlock_irqrestore(&irq_2_ir_lock, flags);
return rc;
}
@@ -363,11 +371,12 @@ int free_irte(int irq)
struct irte *irte;
struct intel_iommu *iommu;
struct irq_2_iommu *irq_iommu;
+ unsigned long flags;
- spin_lock(&irq_2_ir_lock);
+ spin_lock_irqsave(&irq_2_ir_lock, flags);
irq_iommu = valid_irq_2_iommu(irq);
if (!irq_iommu) {
- spin_unlock(&irq_2_ir_lock);
+ spin_unlock_irqrestore(&irq_2_ir_lock, flags);
return -1;
}
@@ -378,7 +387,7 @@ int free_irte(int irq)
if (!irq_iommu->sub_handle) {
for (i = 0; i < (1 << irq_iommu->irte_mask); i++)
- set_64bit((unsigned long *)irte, 0);
+ set_64bit((unsigned long *)(irte + i), 0);
rc = qi_flush_iec(iommu, index, irq_iommu->irte_mask);
}
@@ -387,7 +396,7 @@ int free_irte(int irq)
irq_iommu->sub_handle = 0;
irq_iommu->irte_mask = 0;
- spin_unlock(&irq_2_ir_lock);
+ spin_unlock_irqrestore(&irq_2_ir_lock, flags);
return rc;
}
@@ -407,12 +416,27 @@ static void iommu_set_intr_remapping(struct intel_iommu *iommu, int mode)
/* Set interrupt-remapping table pointer */
cmd = iommu->gcmd | DMA_GCMD_SIRTP;
+ iommu->gcmd |= DMA_GCMD_SIRTP;
writel(cmd, iommu->reg + DMAR_GCMD_REG);
IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
readl, (sts & DMA_GSTS_IRTPS), sts);
spin_unlock_irqrestore(&iommu->register_lock, flags);
+ if (mode == 0) {
+ spin_lock_irqsave(&iommu->register_lock, flags);
+
+ /* enable comaptiblity format interrupt pass through */
+ cmd = iommu->gcmd | DMA_GCMD_CFI;
+ iommu->gcmd |= DMA_GCMD_CFI;
+ writel(cmd, iommu->reg + DMAR_GCMD_REG);
+
+ IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
+ readl, (sts & DMA_GSTS_CFIS), sts);
+
+ spin_unlock_irqrestore(&iommu->register_lock, flags);
+ }
+
/*
* global invalidation of interrupt entry cache before enabling
* interrupt-remapping.
@@ -439,12 +463,12 @@ static int setup_intr_remapping(struct intel_iommu *iommu, int mode)
struct page *pages;
ir_table = iommu->ir_table = kzalloc(sizeof(struct ir_table),
- GFP_KERNEL);
+ GFP_ATOMIC);
if (!iommu->ir_table)
return -ENOMEM;
- pages = alloc_pages(GFP_KERNEL | __GFP_ZERO, INTR_REMAP_PAGE_ORDER);
+ pages = alloc_pages(GFP_ATOMIC | __GFP_ZERO, INTR_REMAP_PAGE_ORDER);
if (!pages) {
printk(KERN_ERR "failed to allocate pages of order %d\n",
@@ -459,11 +483,68 @@ static int setup_intr_remapping(struct intel_iommu *iommu, int mode)
return 0;
}
+/*
+ * Disable Interrupt Remapping.
+ */
+static void iommu_disable_intr_remapping(struct intel_iommu *iommu)
+{
+ unsigned long flags;
+ u32 sts;
+
+ if (!ecap_ir_support(iommu->ecap))
+ return;
+
+ /*
+ * global invalidation of interrupt entry cache before disabling
+ * interrupt-remapping.
+ */
+ qi_global_iec(iommu);
+
+ spin_lock_irqsave(&iommu->register_lock, flags);
+
+ sts = dmar_readq(iommu->reg + DMAR_GSTS_REG);
+ if (!(sts & DMA_GSTS_IRES))
+ goto end;
+
+ iommu->gcmd &= ~DMA_GCMD_IRE;
+ writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
+
+ IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
+ readl, !(sts & DMA_GSTS_IRES), sts);
+
+end:
+ spin_unlock_irqrestore(&iommu->register_lock, flags);
+}
+
int __init enable_intr_remapping(int eim)
{
struct dmar_drhd_unit *drhd;
int setup = 0;
+ for_each_drhd_unit(drhd) {
+ struct intel_iommu *iommu = drhd->iommu;
+
+ /*
+ * If the queued invalidation is already initialized,
+ * shouldn't disable it.
+ */
+ if (iommu->qi)
+ continue;
+
+ /*
+ * Clear previous faults.
+ */
+ dmar_fault(-1, iommu);
+
+ /*
+ * Disable intr remapping and queued invalidation, if already
+ * enabled prior to OS handover.
+ */
+ iommu_disable_intr_remapping(iommu);
+
+ dmar_disable_qi(iommu);
+ }
+
/*
* check for the Interrupt-remapping support
*/
@@ -587,3 +668,54 @@ int __init parse_ioapics_under_ir(void)
return ir_supported;
}
+
+void disable_intr_remapping(void)
+{
+ struct dmar_drhd_unit *drhd;
+ struct intel_iommu *iommu = NULL;
+
+ /*
+ * Disable Interrupt-remapping for all the DRHD's now.
+ */
+ for_each_iommu(iommu, drhd) {
+ if (!ecap_ir_support(iommu->ecap))
+ continue;
+
+ iommu_disable_intr_remapping(iommu);
+ }
+}
+
+int reenable_intr_remapping(int eim)
+{
+ struct dmar_drhd_unit *drhd;
+ int setup = 0;
+ struct intel_iommu *iommu = NULL;
+
+ for_each_iommu(iommu, drhd)
+ if (iommu->qi)
+ dmar_reenable_qi(iommu);
+
+ /*
+ * Setup Interrupt-remapping for all the DRHD's now.
+ */
+ for_each_iommu(iommu, drhd) {
+ if (!ecap_ir_support(iommu->ecap))
+ continue;
+
+ /* Set up interrupt remapping for iommu.*/
+ iommu_set_intr_remapping(iommu, eim);
+ setup = 1;
+ }
+
+ if (!setup)
+ goto error;
+
+ return 0;
+
+error:
+ /*
+ * handle error condition gracefully here!
+ */
+ return -1;
+}
+
diff --git a/drivers/pci/iov.c b/drivers/pci/iov.c
new file mode 100644
index 000000000000..b497daab3d4a
--- /dev/null
+++ b/drivers/pci/iov.c
@@ -0,0 +1,681 @@
+/*
+ * drivers/pci/iov.c
+ *
+ * Copyright (C) 2009 Intel Corporation, Yu Zhao <yu.zhao@intel.com>
+ *
+ * PCI Express I/O Virtualization (IOV) support.
+ * Single Root IOV 1.0
+ */
+
+#include <linux/pci.h>
+#include <linux/mutex.h>
+#include <linux/string.h>
+#include <linux/delay.h>
+#include "pci.h"
+
+#define VIRTFN_ID_LEN 16
+
+static inline u8 virtfn_bus(struct pci_dev *dev, int id)
+{
+ return dev->bus->number + ((dev->devfn + dev->sriov->offset +
+ dev->sriov->stride * id) >> 8);
+}
+
+static inline u8 virtfn_devfn(struct pci_dev *dev, int id)
+{
+ return (dev->devfn + dev->sriov->offset +
+ dev->sriov->stride * id) & 0xff;
+}
+
+static struct pci_bus *virtfn_add_bus(struct pci_bus *bus, int busnr)
+{
+ int rc;
+ struct pci_bus *child;
+
+ if (bus->number == busnr)
+ return bus;
+
+ child = pci_find_bus(pci_domain_nr(bus), busnr);
+ if (child)
+ return child;
+
+ child = pci_add_new_bus(bus, NULL, busnr);
+ if (!child)
+ return NULL;
+
+ child->subordinate = busnr;
+ child->dev.parent = bus->bridge;
+ rc = pci_bus_add_child(child);
+ if (rc) {
+ pci_remove_bus(child);
+ return NULL;
+ }
+
+ return child;
+}
+
+static void virtfn_remove_bus(struct pci_bus *bus, int busnr)
+{
+ struct pci_bus *child;
+
+ if (bus->number == busnr)
+ return;
+
+ child = pci_find_bus(pci_domain_nr(bus), busnr);
+ BUG_ON(!child);
+
+ if (list_empty(&child->devices))
+ pci_remove_bus(child);
+}
+
+static int virtfn_add(struct pci_dev *dev, int id, int reset)
+{
+ int i;
+ int rc;
+ u64 size;
+ char buf[VIRTFN_ID_LEN];
+ struct pci_dev *virtfn;
+ struct resource *res;
+ struct pci_sriov *iov = dev->sriov;
+
+ virtfn = alloc_pci_dev();
+ if (!virtfn)
+ return -ENOMEM;
+
+ mutex_lock(&iov->dev->sriov->lock);
+ virtfn->bus = virtfn_add_bus(dev->bus, virtfn_bus(dev, id));
+ if (!virtfn->bus) {
+ kfree(virtfn);
+ mutex_unlock(&iov->dev->sriov->lock);
+ return -ENOMEM;
+ }
+ virtfn->devfn = virtfn_devfn(dev, id);
+ virtfn->vendor = dev->vendor;
+ pci_read_config_word(dev, iov->pos + PCI_SRIOV_VF_DID, &virtfn->device);
+ pci_setup_device(virtfn);
+ virtfn->dev.parent = dev->dev.parent;
+
+ for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {
+ res = dev->resource + PCI_IOV_RESOURCES + i;
+ if (!res->parent)
+ continue;
+ virtfn->resource[i].name = pci_name(virtfn);
+ virtfn->resource[i].flags = res->flags;
+ size = resource_size(res);
+ do_div(size, iov->total);
+ virtfn->resource[i].start = res->start + size * id;
+ virtfn->resource[i].end = virtfn->resource[i].start + size - 1;
+ rc = request_resource(res, &virtfn->resource[i]);
+ BUG_ON(rc);
+ }
+
+ if (reset)
+ pci_execute_reset_function(virtfn);
+
+ pci_device_add(virtfn, virtfn->bus);
+ mutex_unlock(&iov->dev->sriov->lock);
+
+ virtfn->physfn = pci_dev_get(dev);
+ virtfn->is_virtfn = 1;
+
+ rc = pci_bus_add_device(virtfn);
+ if (rc)
+ goto failed1;
+ sprintf(buf, "virtfn%u", id);
+ rc = sysfs_create_link(&dev->dev.kobj, &virtfn->dev.kobj, buf);
+ if (rc)
+ goto failed1;
+ rc = sysfs_create_link(&virtfn->dev.kobj, &dev->dev.kobj, "physfn");
+ if (rc)
+ goto failed2;
+
+ kobject_uevent(&virtfn->dev.kobj, KOBJ_CHANGE);
+
+ return 0;
+
+failed2:
+ sysfs_remove_link(&dev->dev.kobj, buf);
+failed1:
+ pci_dev_put(dev);
+ mutex_lock(&iov->dev->sriov->lock);
+ pci_remove_bus_device(virtfn);
+ virtfn_remove_bus(dev->bus, virtfn_bus(dev, id));
+ mutex_unlock(&iov->dev->sriov->lock);
+
+ return rc;
+}
+
+static void virtfn_remove(struct pci_dev *dev, int id, int reset)
+{
+ char buf[VIRTFN_ID_LEN];
+ struct pci_bus *bus;
+ struct pci_dev *virtfn;
+ struct pci_sriov *iov = dev->sriov;
+
+ bus = pci_find_bus(pci_domain_nr(dev->bus), virtfn_bus(dev, id));
+ if (!bus)
+ return;
+
+ virtfn = pci_get_slot(bus, virtfn_devfn(dev, id));
+ if (!virtfn)
+ return;
+
+ pci_dev_put(virtfn);
+
+ if (reset) {
+ device_release_driver(&virtfn->dev);
+ pci_execute_reset_function(virtfn);
+ }
+
+ sprintf(buf, "virtfn%u", id);
+ sysfs_remove_link(&dev->dev.kobj, buf);
+ sysfs_remove_link(&virtfn->dev.kobj, "physfn");
+
+ mutex_lock(&iov->dev->sriov->lock);
+ pci_remove_bus_device(virtfn);
+ virtfn_remove_bus(dev->bus, virtfn_bus(dev, id));
+ mutex_unlock(&iov->dev->sriov->lock);
+
+ pci_dev_put(dev);
+}
+
+static int sriov_migration(struct pci_dev *dev)
+{
+ u16 status;
+ struct pci_sriov *iov = dev->sriov;
+
+ if (!iov->nr_virtfn)
+ return 0;
+
+ if (!(iov->cap & PCI_SRIOV_CAP_VFM))
+ return 0;
+
+ pci_read_config_word(dev, iov->pos + PCI_SRIOV_STATUS, &status);
+ if (!(status & PCI_SRIOV_STATUS_VFM))
+ return 0;
+
+ schedule_work(&iov->mtask);
+
+ return 1;
+}
+
+static void sriov_migration_task(struct work_struct *work)
+{
+ int i;
+ u8 state;
+ u16 status;
+ struct pci_sriov *iov = container_of(work, struct pci_sriov, mtask);
+
+ for (i = iov->initial; i < iov->nr_virtfn; i++) {
+ state = readb(iov->mstate + i);
+ if (state == PCI_SRIOV_VFM_MI) {
+ writeb(PCI_SRIOV_VFM_AV, iov->mstate + i);
+ state = readb(iov->mstate + i);
+ if (state == PCI_SRIOV_VFM_AV)
+ virtfn_add(iov->self, i, 1);
+ } else if (state == PCI_SRIOV_VFM_MO) {
+ virtfn_remove(iov->self, i, 1);
+ writeb(PCI_SRIOV_VFM_UA, iov->mstate + i);
+ state = readb(iov->mstate + i);
+ if (state == PCI_SRIOV_VFM_AV)
+ virtfn_add(iov->self, i, 0);
+ }
+ }
+
+ pci_read_config_word(iov->self, iov->pos + PCI_SRIOV_STATUS, &status);
+ status &= ~PCI_SRIOV_STATUS_VFM;
+ pci_write_config_word(iov->self, iov->pos + PCI_SRIOV_STATUS, status);
+}
+
+static int sriov_enable_migration(struct pci_dev *dev, int nr_virtfn)
+{
+ int bir;
+ u32 table;
+ resource_size_t pa;
+ struct pci_sriov *iov = dev->sriov;
+
+ if (nr_virtfn <= iov->initial)
+ return 0;
+
+ pci_read_config_dword(dev, iov->pos + PCI_SRIOV_VFM, &table);
+ bir = PCI_SRIOV_VFM_BIR(table);
+ if (bir > PCI_STD_RESOURCE_END)
+ return -EIO;
+
+ table = PCI_SRIOV_VFM_OFFSET(table);
+ if (table + nr_virtfn > pci_resource_len(dev, bir))
+ return -EIO;
+
+ pa = pci_resource_start(dev, bir) + table;
+ iov->mstate = ioremap(pa, nr_virtfn);
+ if (!iov->mstate)
+ return -ENOMEM;
+
+ INIT_WORK(&iov->mtask, sriov_migration_task);
+
+ iov->ctrl |= PCI_SRIOV_CTRL_VFM | PCI_SRIOV_CTRL_INTR;
+ pci_write_config_word(dev, iov->pos + PCI_SRIOV_CTRL, iov->ctrl);
+
+ return 0;
+}
+
+static void sriov_disable_migration(struct pci_dev *dev)
+{
+ struct pci_sriov *iov = dev->sriov;
+
+ iov->ctrl &= ~(PCI_SRIOV_CTRL_VFM | PCI_SRIOV_CTRL_INTR);
+ pci_write_config_word(dev, iov->pos + PCI_SRIOV_CTRL, iov->ctrl);
+
+ cancel_work_sync(&iov->mtask);
+ iounmap(iov->mstate);
+}
+
+static int sriov_enable(struct pci_dev *dev, int nr_virtfn)
+{
+ int rc;
+ int i, j;
+ int nres;
+ u16 offset, stride, initial;
+ struct resource *res;
+ struct pci_dev *pdev;
+ struct pci_sriov *iov = dev->sriov;
+
+ if (!nr_virtfn)
+ return 0;
+
+ if (iov->nr_virtfn)
+ return -EINVAL;
+
+ pci_read_config_word(dev, iov->pos + PCI_SRIOV_INITIAL_VF, &initial);
+ if (initial > iov->total ||
+ (!(iov->cap & PCI_SRIOV_CAP_VFM) && (initial != iov->total)))
+ return -EIO;
+
+ if (nr_virtfn < 0 || nr_virtfn > iov->total ||
+ (!(iov->cap & PCI_SRIOV_CAP_VFM) && (nr_virtfn > initial)))
+ return -EINVAL;
+
+ pci_write_config_word(dev, iov->pos + PCI_SRIOV_NUM_VF, nr_virtfn);
+ pci_read_config_word(dev, iov->pos + PCI_SRIOV_VF_OFFSET, &offset);
+ pci_read_config_word(dev, iov->pos + PCI_SRIOV_VF_STRIDE, &stride);
+ if (!offset || (nr_virtfn > 1 && !stride))
+ return -EIO;
+
+ nres = 0;
+ for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {
+ res = dev->resource + PCI_IOV_RESOURCES + i;
+ if (res->parent)
+ nres++;
+ }
+ if (nres != iov->nres) {
+ dev_err(&dev->dev, "not enough MMIO resources for SR-IOV\n");
+ return -ENOMEM;
+ }
+
+ iov->offset = offset;
+ iov->stride = stride;
+
+ if (virtfn_bus(dev, nr_virtfn - 1) > dev->bus->subordinate) {
+ dev_err(&dev->dev, "SR-IOV: bus number out of range\n");
+ return -ENOMEM;
+ }
+
+ if (iov->link != dev->devfn) {
+ pdev = pci_get_slot(dev->bus, iov->link);
+ if (!pdev)
+ return -ENODEV;
+
+ pci_dev_put(pdev);
+
+ if (!pdev->is_physfn)
+ return -ENODEV;
+
+ rc = sysfs_create_link(&dev->dev.kobj,
+ &pdev->dev.kobj, "dep_link");
+ if (rc)
+ return rc;
+ }
+
+ iov->ctrl |= PCI_SRIOV_CTRL_VFE | PCI_SRIOV_CTRL_MSE;
+ pci_block_user_cfg_access(dev);
+ pci_write_config_word(dev, iov->pos + PCI_SRIOV_CTRL, iov->ctrl);
+ msleep(100);
+ pci_unblock_user_cfg_access(dev);
+
+ iov->initial = initial;
+ if (nr_virtfn < initial)
+ initial = nr_virtfn;
+
+ for (i = 0; i < initial; i++) {
+ rc = virtfn_add(dev, i, 0);
+ if (rc)
+ goto failed;
+ }
+
+ if (iov->cap & PCI_SRIOV_CAP_VFM) {
+ rc = sriov_enable_migration(dev, nr_virtfn);
+ if (rc)
+ goto failed;
+ }
+
+ kobject_uevent(&dev->dev.kobj, KOBJ_CHANGE);
+ iov->nr_virtfn = nr_virtfn;
+
+ return 0;
+
+failed:
+ for (j = 0; j < i; j++)
+ virtfn_remove(dev, j, 0);
+
+ iov->ctrl &= ~(PCI_SRIOV_CTRL_VFE | PCI_SRIOV_CTRL_MSE);
+ pci_block_user_cfg_access(dev);
+ pci_write_config_word(dev, iov->pos + PCI_SRIOV_CTRL, iov->ctrl);
+ ssleep(1);
+ pci_unblock_user_cfg_access(dev);
+
+ if (iov->link != dev->devfn)
+ sysfs_remove_link(&dev->dev.kobj, "dep_link");
+
+ return rc;
+}
+
+static void sriov_disable(struct pci_dev *dev)
+{
+ int i;
+ struct pci_sriov *iov = dev->sriov;
+
+ if (!iov->nr_virtfn)
+ return;
+
+ if (iov->cap & PCI_SRIOV_CAP_VFM)
+ sriov_disable_migration(dev);
+
+ for (i = 0; i < iov->nr_virtfn; i++)
+ virtfn_remove(dev, i, 0);
+
+ iov->ctrl &= ~(PCI_SRIOV_CTRL_VFE | PCI_SRIOV_CTRL_MSE);
+ pci_block_user_cfg_access(dev);
+ pci_write_config_word(dev, iov->pos + PCI_SRIOV_CTRL, iov->ctrl);
+ ssleep(1);
+ pci_unblock_user_cfg_access(dev);
+
+ if (iov->link != dev->devfn)
+ sysfs_remove_link(&dev->dev.kobj, "dep_link");
+
+ iov->nr_virtfn = 0;
+}
+
+static int sriov_init(struct pci_dev *dev, int pos)
+{
+ int i;
+ int rc;
+ int nres;
+ u32 pgsz;
+ u16 ctrl, total, offset, stride;
+ struct pci_sriov *iov;
+ struct resource *res;
+ struct pci_dev *pdev;
+
+ if (dev->pcie_type != PCI_EXP_TYPE_RC_END &&
+ dev->pcie_type != PCI_EXP_TYPE_ENDPOINT)
+ return -ENODEV;
+
+ pci_read_config_word(dev, pos + PCI_SRIOV_CTRL, &ctrl);
+ if (ctrl & PCI_SRIOV_CTRL_VFE) {
+ pci_write_config_word(dev, pos + PCI_SRIOV_CTRL, 0);
+ ssleep(1);
+ }
+
+ pci_read_config_word(dev, pos + PCI_SRIOV_TOTAL_VF, &total);
+ if (!total)
+ return 0;
+
+ ctrl = 0;
+ list_for_each_entry(pdev, &dev->bus->devices, bus_list)
+ if (pdev->is_physfn)
+ goto found;
+
+ pdev = NULL;
+ if (pci_ari_enabled(dev->bus))
+ ctrl |= PCI_SRIOV_CTRL_ARI;
+
+found:
+ pci_write_config_word(dev, pos + PCI_SRIOV_CTRL, ctrl);
+ pci_write_config_word(dev, pos + PCI_SRIOV_NUM_VF, total);
+ pci_read_config_word(dev, pos + PCI_SRIOV_VF_OFFSET, &offset);
+ pci_read_config_word(dev, pos + PCI_SRIOV_VF_STRIDE, &stride);
+ if (!offset || (total > 1 && !stride))
+ return -EIO;
+
+ pci_read_config_dword(dev, pos + PCI_SRIOV_SUP_PGSIZE, &pgsz);
+ i = PAGE_SHIFT > 12 ? PAGE_SHIFT - 12 : 0;
+ pgsz &= ~((1 << i) - 1);
+ if (!pgsz)
+ return -EIO;
+
+ pgsz &= ~(pgsz - 1);
+ pci_write_config_dword(dev, pos + PCI_SRIOV_SYS_PGSIZE, pgsz);
+
+ nres = 0;
+ for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {
+ res = dev->resource + PCI_IOV_RESOURCES + i;
+ i += __pci_read_base(dev, pci_bar_unknown, res,
+ pos + PCI_SRIOV_BAR + i * 4);
+ if (!res->flags)
+ continue;
+ if (resource_size(res) & (PAGE_SIZE - 1)) {
+ rc = -EIO;
+ goto failed;
+ }
+ res->end = res->start + resource_size(res) * total - 1;
+ nres++;
+ }
+
+ iov = kzalloc(sizeof(*iov), GFP_KERNEL);
+ if (!iov) {
+ rc = -ENOMEM;
+ goto failed;
+ }
+
+ iov->pos = pos;
+ iov->nres = nres;
+ iov->ctrl = ctrl;
+ iov->total = total;
+ iov->offset = offset;
+ iov->stride = stride;
+ iov->pgsz = pgsz;
+ iov->self = dev;
+ pci_read_config_dword(dev, pos + PCI_SRIOV_CAP, &iov->cap);
+ pci_read_config_byte(dev, pos + PCI_SRIOV_FUNC_LINK, &iov->link);
+
+ if (pdev)
+ iov->dev = pci_dev_get(pdev);
+ else {
+ iov->dev = dev;
+ mutex_init(&iov->lock);
+ }
+
+ dev->sriov = iov;
+ dev->is_physfn = 1;
+
+ return 0;
+
+failed:
+ for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {
+ res = dev->resource + PCI_IOV_RESOURCES + i;
+ res->flags = 0;
+ }
+
+ return rc;
+}
+
+static void sriov_release(struct pci_dev *dev)
+{
+ BUG_ON(dev->sriov->nr_virtfn);
+
+ if (dev == dev->sriov->dev)
+ mutex_destroy(&dev->sriov->lock);
+ else
+ pci_dev_put(dev->sriov->dev);
+
+ kfree(dev->sriov);
+ dev->sriov = NULL;
+}
+
+static void sriov_restore_state(struct pci_dev *dev)
+{
+ int i;
+ u16 ctrl;
+ struct pci_sriov *iov = dev->sriov;
+
+ pci_read_config_word(dev, iov->pos + PCI_SRIOV_CTRL, &ctrl);
+ if (ctrl & PCI_SRIOV_CTRL_VFE)
+ return;
+
+ for (i = PCI_IOV_RESOURCES; i <= PCI_IOV_RESOURCE_END; i++)
+ pci_update_resource(dev, i);
+
+ pci_write_config_dword(dev, iov->pos + PCI_SRIOV_SYS_PGSIZE, iov->pgsz);
+ pci_write_config_word(dev, iov->pos + PCI_SRIOV_NUM_VF, iov->nr_virtfn);
+ pci_write_config_word(dev, iov->pos + PCI_SRIOV_CTRL, iov->ctrl);
+ if (iov->ctrl & PCI_SRIOV_CTRL_VFE)
+ msleep(100);
+}
+
+/**
+ * pci_iov_init - initialize the IOV capability
+ * @dev: the PCI device
+ *
+ * Returns 0 on success, or negative on failure.
+ */
+int pci_iov_init(struct pci_dev *dev)
+{
+ int pos;
+
+ if (!dev->is_pcie)
+ return -ENODEV;
+
+ pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_SRIOV);
+ if (pos)
+ return sriov_init(dev, pos);
+
+ return -ENODEV;
+}
+
+/**
+ * pci_iov_release - release resources used by the IOV capability
+ * @dev: the PCI device
+ */
+void pci_iov_release(struct pci_dev *dev)
+{
+ if (dev->is_physfn)
+ sriov_release(dev);
+}
+
+/**
+ * pci_iov_resource_bar - get position of the SR-IOV BAR
+ * @dev: the PCI device
+ * @resno: the resource number
+ * @type: the BAR type to be filled in
+ *
+ * Returns position of the BAR encapsulated in the SR-IOV capability.
+ */
+int pci_iov_resource_bar(struct pci_dev *dev, int resno,
+ enum pci_bar_type *type)
+{
+ if (resno < PCI_IOV_RESOURCES || resno > PCI_IOV_RESOURCE_END)
+ return 0;
+
+ BUG_ON(!dev->is_physfn);
+
+ *type = pci_bar_unknown;
+
+ return dev->sriov->pos + PCI_SRIOV_BAR +
+ 4 * (resno - PCI_IOV_RESOURCES);
+}
+
+/**
+ * pci_restore_iov_state - restore the state of the IOV capability
+ * @dev: the PCI device
+ */
+void pci_restore_iov_state(struct pci_dev *dev)
+{
+ if (dev->is_physfn)
+ sriov_restore_state(dev);
+}
+
+/**
+ * pci_iov_bus_range - find bus range used by Virtual Function
+ * @bus: the PCI bus
+ *
+ * Returns max number of buses (exclude current one) used by Virtual
+ * Functions.
+ */
+int pci_iov_bus_range(struct pci_bus *bus)
+{
+ int max = 0;
+ u8 busnr;
+ struct pci_dev *dev;
+
+ list_for_each_entry(dev, &bus->devices, bus_list) {
+ if (!dev->is_physfn)
+ continue;
+ busnr = virtfn_bus(dev, dev->sriov->total - 1);
+ if (busnr > max)
+ max = busnr;
+ }
+
+ return max ? max - bus->number : 0;
+}
+
+/**
+ * pci_enable_sriov - enable the SR-IOV capability
+ * @dev: the PCI device
+ * @nr_virtfn: number of virtual functions to enable
+ *
+ * Returns 0 on success, or negative on failure.
+ */
+int pci_enable_sriov(struct pci_dev *dev, int nr_virtfn)
+{
+ might_sleep();
+
+ if (!dev->is_physfn)
+ return -ENODEV;
+
+ return sriov_enable(dev, nr_virtfn);
+}
+EXPORT_SYMBOL_GPL(pci_enable_sriov);
+
+/**
+ * pci_disable_sriov - disable the SR-IOV capability
+ * @dev: the PCI device
+ */
+void pci_disable_sriov(struct pci_dev *dev)
+{
+ might_sleep();
+
+ if (!dev->is_physfn)
+ return;
+
+ sriov_disable(dev);
+}
+EXPORT_SYMBOL_GPL(pci_disable_sriov);
+
+/**
+ * pci_sriov_migration - notify SR-IOV core of Virtual Function Migration
+ * @dev: the PCI device
+ *
+ * Returns IRQ_HANDLED if the IRQ is handled, or IRQ_NONE if not.
+ *
+ * Physical Function driver is responsible to register IRQ handler using
+ * VF Migration Interrupt Message Number, and call this function when the
+ * interrupt is generated by the hardware.
+ */
+irqreturn_t pci_sriov_migration(struct pci_dev *dev)
+{
+ if (!dev->is_physfn)
+ return IRQ_NONE;
+
+ return sriov_migration(dev) ? IRQ_HANDLED : IRQ_NONE;
+}
+EXPORT_SYMBOL_GPL(pci_sriov_migration);
diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c
index baba2eb5367d..6f2e6295e773 100644
--- a/drivers/pci/msi.c
+++ b/drivers/pci/msi.c
@@ -27,48 +27,53 @@ static int pci_msi_enable = 1;
/* Arch hooks */
-int __attribute__ ((weak))
-arch_msi_check_device(struct pci_dev *dev, int nvec, int type)
+#ifndef arch_msi_check_device
+int arch_msi_check_device(struct pci_dev *dev, int nvec, int type)
{
return 0;
}
+#endif
-int __attribute__ ((weak))
-arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *entry)
-{
- return 0;
-}
-
-int __attribute__ ((weak))
-arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
+#ifndef arch_setup_msi_irqs
+int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
{
struct msi_desc *entry;
int ret;
+ /*
+ * If an architecture wants to support multiple MSI, it needs to
+ * override arch_setup_msi_irqs()
+ */
+ if (type == PCI_CAP_ID_MSI && nvec > 1)
+ return 1;
+
list_for_each_entry(entry, &dev->msi_list, list) {
ret = arch_setup_msi_irq(dev, entry);
- if (ret)
+ if (ret < 0)
return ret;
+ if (ret > 0)
+ return -ENOSPC;
}
return 0;
}
+#endif
-void __attribute__ ((weak)) arch_teardown_msi_irq(unsigned int irq)
-{
- return;
-}
-
-void __attribute__ ((weak))
-arch_teardown_msi_irqs(struct pci_dev *dev)
+#ifndef arch_teardown_msi_irqs
+void arch_teardown_msi_irqs(struct pci_dev *dev)
{
struct msi_desc *entry;
list_for_each_entry(entry, &dev->msi_list, list) {
- if (entry->irq != 0)
- arch_teardown_msi_irq(entry->irq);
+ int i, nvec;
+ if (entry->irq == 0)
+ continue;
+ nvec = 1 << entry->msi_attrib.multiple;
+ for (i = 0; i < nvec; i++)
+ arch_teardown_msi_irq(entry->irq + i);
}
}
+#endif
static void __msi_set_enable(struct pci_dev *dev, int pos, int enable)
{
@@ -111,27 +116,14 @@ static inline __attribute_const__ u32 msi_mask(unsigned x)
return (1 << (1 << x)) - 1;
}
-static void msix_flush_writes(struct irq_desc *desc)
+static inline __attribute_const__ u32 msi_capable_mask(u16 control)
{
- struct msi_desc *entry;
+ return msi_mask((control >> 1) & 7);
+}
- entry = get_irq_desc_msi(desc);
- BUG_ON(!entry || !entry->dev);
- switch (entry->msi_attrib.type) {
- case PCI_CAP_ID_MSI:
- /* nothing to do */
- break;
- case PCI_CAP_ID_MSIX:
- {
- int offset = entry->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE +
- PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET;
- readl(entry->mask_base + offset);
- break;
- }
- default:
- BUG();
- break;
- }
+static inline __attribute_const__ u32 msi_enabled_mask(u16 control)
+{
+ return msi_mask((control >> 4) & 7);
}
/*
@@ -143,49 +135,71 @@ static void msix_flush_writes(struct irq_desc *desc)
* Returns 1 if it succeeded in masking the interrupt and 0 if the device
* doesn't support MSI masking.
*/
-static int msi_set_mask_bits(struct irq_desc *desc, u32 mask, u32 flag)
+static void msi_mask_irq(struct msi_desc *desc, u32 mask, u32 flag)
{
- struct msi_desc *entry;
+ u32 mask_bits = desc->masked;
- entry = get_irq_desc_msi(desc);
- BUG_ON(!entry || !entry->dev);
- switch (entry->msi_attrib.type) {
- case PCI_CAP_ID_MSI:
- if (entry->msi_attrib.maskbit) {
- int pos;
- u32 mask_bits;
-
- pos = (long)entry->mask_base;
- pci_read_config_dword(entry->dev, pos, &mask_bits);
- mask_bits &= ~(mask);
- mask_bits |= flag & mask;
- pci_write_config_dword(entry->dev, pos, mask_bits);
- } else {
- return 0;
- }
- break;
- case PCI_CAP_ID_MSIX:
- {
- int offset = entry->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE +
- PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET;
- writel(flag, entry->mask_base + offset);
- readl(entry->mask_base + offset);
- break;
- }
- default:
- BUG();
- break;
+ if (!desc->msi_attrib.maskbit)
+ return;
+
+ mask_bits &= ~mask;
+ mask_bits |= flag;
+ pci_write_config_dword(desc->dev, desc->mask_pos, mask_bits);
+ desc->masked = mask_bits;
+}
+
+/*
+ * This internal function does not flush PCI writes to the device.
+ * All users must ensure that they read from the device before either
+ * assuming that the device state is up to date, or returning out of this
+ * file. This saves a few milliseconds when initialising devices with lots
+ * of MSI-X interrupts.
+ */
+static void msix_mask_irq(struct msi_desc *desc, u32 flag)
+{
+ u32 mask_bits = desc->masked;
+ unsigned offset = desc->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE +
+ PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET;
+ mask_bits &= ~1;
+ mask_bits |= flag;
+ writel(mask_bits, desc->mask_base + offset);
+ desc->masked = mask_bits;
+}
+
+static void msi_set_mask_bit(unsigned irq, u32 flag)
+{
+ struct msi_desc *desc = get_irq_msi(irq);
+
+ if (desc->msi_attrib.is_msix) {
+ msix_mask_irq(desc, flag);
+ readl(desc->mask_base); /* Flush write to device */
+ } else {
+ unsigned offset = irq - desc->dev->irq;
+ msi_mask_irq(desc, 1 << offset, flag << offset);
}
- entry->msi_attrib.masked = !!flag;
- return 1;
+}
+
+void mask_msi_irq(unsigned int irq)
+{
+ msi_set_mask_bit(irq, 1);
+}
+
+void unmask_msi_irq(unsigned int irq)
+{
+ msi_set_mask_bit(irq, 0);
}
void read_msi_msg_desc(struct irq_desc *desc, struct msi_msg *msg)
{
struct msi_desc *entry = get_irq_desc_msi(desc);
- switch(entry->msi_attrib.type) {
- case PCI_CAP_ID_MSI:
- {
+ if (entry->msi_attrib.is_msix) {
+ void __iomem *base = entry->mask_base +
+ entry->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE;
+
+ msg->address_lo = readl(base + PCI_MSIX_ENTRY_LOWER_ADDR_OFFSET);
+ msg->address_hi = readl(base + PCI_MSIX_ENTRY_UPPER_ADDR_OFFSET);
+ msg->data = readl(base + PCI_MSIX_ENTRY_DATA_OFFSET);
+ } else {
struct pci_dev *dev = entry->dev;
int pos = entry->msi_attrib.pos;
u16 data;
@@ -201,21 +215,6 @@ void read_msi_msg_desc(struct irq_desc *desc, struct msi_msg *msg)
pci_read_config_word(dev, msi_data_reg(pos, 0), &data);
}
msg->data = data;
- break;
- }
- case PCI_CAP_ID_MSIX:
- {
- void __iomem *base;
- base = entry->mask_base +
- entry->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE;
-
- msg->address_lo = readl(base + PCI_MSIX_ENTRY_LOWER_ADDR_OFFSET);
- msg->address_hi = readl(base + PCI_MSIX_ENTRY_UPPER_ADDR_OFFSET);
- msg->data = readl(base + PCI_MSIX_ENTRY_DATA_OFFSET);
- break;
- }
- default:
- BUG();
}
}
@@ -229,11 +228,25 @@ void read_msi_msg(unsigned int irq, struct msi_msg *msg)
void write_msi_msg_desc(struct irq_desc *desc, struct msi_msg *msg)
{
struct msi_desc *entry = get_irq_desc_msi(desc);
- switch (entry->msi_attrib.type) {
- case PCI_CAP_ID_MSI:
- {
+ if (entry->msi_attrib.is_msix) {
+ void __iomem *base;
+ base = entry->mask_base +
+ entry->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE;
+
+ writel(msg->address_lo,
+ base + PCI_MSIX_ENTRY_LOWER_ADDR_OFFSET);
+ writel(msg->address_hi,
+ base + PCI_MSIX_ENTRY_UPPER_ADDR_OFFSET);
+ writel(msg->data, base + PCI_MSIX_ENTRY_DATA_OFFSET);
+ } else {
struct pci_dev *dev = entry->dev;
int pos = entry->msi_attrib.pos;
+ u16 msgctl;
+
+ pci_read_config_word(dev, msi_control_reg(pos), &msgctl);
+ msgctl &= ~PCI_MSI_FLAGS_QSIZE;
+ msgctl |= entry->msi_attrib.multiple << 4;
+ pci_write_config_word(dev, msi_control_reg(pos), msgctl);
pci_write_config_dword(dev, msi_lower_address_reg(pos),
msg->address_lo);
@@ -246,23 +259,6 @@ void write_msi_msg_desc(struct irq_desc *desc, struct msi_msg *msg)
pci_write_config_word(dev, msi_data_reg(pos, 0),
msg->data);
}
- break;
- }
- case PCI_CAP_ID_MSIX:
- {
- void __iomem *base;
- base = entry->mask_base +
- entry->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE;
-
- writel(msg->address_lo,
- base + PCI_MSIX_ENTRY_LOWER_ADDR_OFFSET);
- writel(msg->address_hi,
- base + PCI_MSIX_ENTRY_UPPER_ADDR_OFFSET);
- writel(msg->data, base + PCI_MSIX_ENTRY_DATA_OFFSET);
- break;
- }
- default:
- BUG();
}
entry->msg = *msg;
}
@@ -274,37 +270,18 @@ void write_msi_msg(unsigned int irq, struct msi_msg *msg)
write_msi_msg_desc(desc, msg);
}
-void mask_msi_irq(unsigned int irq)
-{
- struct irq_desc *desc = irq_to_desc(irq);
-
- msi_set_mask_bits(desc, 1, 1);
- msix_flush_writes(desc);
-}
-
-void unmask_msi_irq(unsigned int irq)
-{
- struct irq_desc *desc = irq_to_desc(irq);
-
- msi_set_mask_bits(desc, 1, 0);
- msix_flush_writes(desc);
-}
-
static int msi_free_irqs(struct pci_dev* dev);
-static struct msi_desc* alloc_msi_entry(void)
+static struct msi_desc *alloc_msi_entry(struct pci_dev *dev)
{
- struct msi_desc *entry;
-
- entry = kzalloc(sizeof(struct msi_desc), GFP_KERNEL);
- if (!entry)
+ struct msi_desc *desc = kzalloc(sizeof(*desc), GFP_KERNEL);
+ if (!desc)
return NULL;
- INIT_LIST_HEAD(&entry->list);
- entry->irq = 0;
- entry->dev = NULL;
+ INIT_LIST_HEAD(&desc->list);
+ desc->dev = dev;
- return entry;
+ return desc;
}
static void pci_intx_for_msi(struct pci_dev *dev, int enable)
@@ -328,15 +305,11 @@ static void __pci_restore_msi_state(struct pci_dev *dev)
pci_intx_for_msi(dev, 0);
msi_set_enable(dev, 0);
write_msi_msg(dev->irq, &entry->msg);
- if (entry->msi_attrib.maskbit) {
- struct irq_desc *desc = irq_to_desc(dev->irq);
- msi_set_mask_bits(desc, entry->msi_attrib.maskbits_mask,
- entry->msi_attrib.masked);
- }
pci_read_config_word(dev, pos + PCI_MSI_FLAGS, &control);
+ msi_mask_irq(entry, msi_capable_mask(control), entry->masked);
control &= ~PCI_MSI_FLAGS_QSIZE;
- control |= PCI_MSI_FLAGS_ENABLE;
+ control |= (entry->msi_attrib.multiple << 4) | PCI_MSI_FLAGS_ENABLE;
pci_write_config_word(dev, pos + PCI_MSI_FLAGS, control);
}
@@ -354,9 +327,8 @@ static void __pci_restore_msix_state(struct pci_dev *dev)
msix_set_enable(dev, 0);
list_for_each_entry(entry, &dev->msi_list, list) {
- struct irq_desc *desc = irq_to_desc(entry->irq);
write_msi_msg(entry->irq, &entry->msg);
- msi_set_mask_bits(desc, 1, entry->msi_attrib.masked);
+ msix_mask_irq(entry, entry->masked);
}
BUG_ON(list_empty(&dev->msi_list));
@@ -378,52 +350,48 @@ EXPORT_SYMBOL_GPL(pci_restore_msi_state);
/**
* msi_capability_init - configure device's MSI capability structure
* @dev: pointer to the pci_dev data structure of MSI device function
+ * @nvec: number of interrupts to allocate
*
- * Setup the MSI capability structure of device function with a single
- * MSI irq, regardless of device function is capable of handling
- * multiple messages. A return of zero indicates the successful setup
- * of an entry zero with the new MSI irq or non-zero for otherwise.
- **/
-static int msi_capability_init(struct pci_dev *dev)
+ * Setup the MSI capability structure of the device with the requested
+ * number of interrupts. A return value of zero indicates the successful
+ * setup of an entry with the new MSI irq. A negative return value indicates
+ * an error, and a positive return value indicates the number of interrupts
+ * which could have been allocated.
+ */
+static int msi_capability_init(struct pci_dev *dev, int nvec)
{
struct msi_desc *entry;
int pos, ret;
u16 control;
+ unsigned mask;
msi_set_enable(dev, 0); /* Ensure msi is disabled as I set it up */
pos = pci_find_capability(dev, PCI_CAP_ID_MSI);
pci_read_config_word(dev, msi_control_reg(pos), &control);
/* MSI Entry Initialization */
- entry = alloc_msi_entry();
+ entry = alloc_msi_entry(dev);
if (!entry)
return -ENOMEM;
- entry->msi_attrib.type = PCI_CAP_ID_MSI;
+ entry->msi_attrib.is_msix = 0;
entry->msi_attrib.is_64 = is_64bit_address(control);
entry->msi_attrib.entry_nr = 0;
entry->msi_attrib.maskbit = is_mask_bit_support(control);
- entry->msi_attrib.masked = 1;
entry->msi_attrib.default_irq = dev->irq; /* Save IOAPIC IRQ */
entry->msi_attrib.pos = pos;
- entry->dev = dev;
- if (entry->msi_attrib.maskbit) {
- unsigned int base, maskbits, temp;
-
- base = msi_mask_bits_reg(pos, entry->msi_attrib.is_64);
- entry->mask_base = (void __iomem *)(long)base;
-
- /* All MSIs are unmasked by default, Mask them all */
- pci_read_config_dword(dev, base, &maskbits);
- temp = msi_mask((control & PCI_MSI_FLAGS_QMASK) >> 1);
- maskbits |= temp;
- pci_write_config_dword(dev, base, maskbits);
- entry->msi_attrib.maskbits_mask = temp;
- }
+
+ entry->mask_pos = msi_mask_bits_reg(pos, entry->msi_attrib.is_64);
+ /* All MSIs are unmasked by default, Mask them all */
+ if (entry->msi_attrib.maskbit)
+ pci_read_config_dword(dev, entry->mask_pos, &entry->masked);
+ mask = msi_capable_mask(control);
+ msi_mask_irq(entry, mask, mask);
+
list_add_tail(&entry->list, &dev->msi_list);
/* Configure MSI capability structure */
- ret = arch_setup_msi_irqs(dev, 1, PCI_CAP_ID_MSI);
+ ret = arch_setup_msi_irqs(dev, nvec, PCI_CAP_ID_MSI);
if (ret) {
msi_free_irqs(dev);
return ret;
@@ -476,26 +444,28 @@ static int msix_capability_init(struct pci_dev *dev,
/* MSI-X Table Initialization */
for (i = 0; i < nvec; i++) {
- entry = alloc_msi_entry();
+ entry = alloc_msi_entry(dev);
if (!entry)
break;
j = entries[i].entry;
- entry->msi_attrib.type = PCI_CAP_ID_MSIX;
+ entry->msi_attrib.is_msix = 1;
entry->msi_attrib.is_64 = 1;
entry->msi_attrib.entry_nr = j;
- entry->msi_attrib.maskbit = 1;
- entry->msi_attrib.masked = 1;
entry->msi_attrib.default_irq = dev->irq;
entry->msi_attrib.pos = pos;
- entry->dev = dev;
entry->mask_base = base;
+ entry->masked = readl(base + j * PCI_MSIX_ENTRY_SIZE +
+ PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET);
+ msix_mask_irq(entry, 1);
list_add_tail(&entry->list, &dev->msi_list);
}
ret = arch_setup_msi_irqs(dev, nvec, PCI_CAP_ID_MSIX);
- if (ret) {
+ if (ret < 0) {
+ /* If we had some success report the number of irqs
+ * we succeeded in setting up. */
int avail = 0;
list_for_each_entry(entry, &dev->msi_list, list) {
if (entry->irq != 0) {
@@ -503,14 +473,13 @@ static int msix_capability_init(struct pci_dev *dev,
}
}
- msi_free_irqs(dev);
+ if (avail != 0)
+ ret = avail;
+ }
- /* If we had some success report the number of irqs
- * we succeeded in setting up.
- */
- if (avail == 0)
- avail = ret;
- return avail;
+ if (ret) {
+ msi_free_irqs(dev);
+ return ret;
}
i = 0;
@@ -575,39 +544,54 @@ static int pci_msi_check_device(struct pci_dev* dev, int nvec, int type)
}
/**
- * pci_enable_msi - configure device's MSI capability structure
- * @dev: pointer to the pci_dev data structure of MSI device function
+ * pci_enable_msi_block - configure device's MSI capability structure
+ * @dev: device to configure
+ * @nvec: number of interrupts to configure
*
- * Setup the MSI capability structure of device function with
- * a single MSI irq upon its software driver call to request for
- * MSI mode enabled on its hardware device function. A return of zero
- * indicates the successful setup of an entry zero with the new MSI
- * irq or non-zero for otherwise.
- **/
-int pci_enable_msi(struct pci_dev* dev)
+ * Allocate IRQs for a device with the MSI capability.
+ * This function returns a negative errno if an error occurs. If it
+ * is unable to allocate the number of interrupts requested, it returns
+ * the number of interrupts it might be able to allocate. If it successfully
+ * allocates at least the number of interrupts requested, it returns 0 and
+ * updates the @dev's irq member to the lowest new interrupt number; the
+ * other interrupt numbers allocated to this device are consecutive.
+ */
+int pci_enable_msi_block(struct pci_dev *dev, unsigned int nvec)
{
- int status;
+ int status, pos, maxvec;
+ u16 msgctl;
- status = pci_msi_check_device(dev, 1, PCI_CAP_ID_MSI);
+ pos = pci_find_capability(dev, PCI_CAP_ID_MSI);
+ if (!pos)
+ return -EINVAL;
+ pci_read_config_word(dev, pos + PCI_MSI_FLAGS, &msgctl);
+ maxvec = 1 << ((msgctl & PCI_MSI_FLAGS_QMASK) >> 1);
+ if (nvec > maxvec)
+ return maxvec;
+
+ status = pci_msi_check_device(dev, nvec, PCI_CAP_ID_MSI);
if (status)
return status;
WARN_ON(!!dev->msi_enabled);
- /* Check whether driver already requested for MSI-X irqs */
+ /* Check whether driver already requested MSI-X irqs */
if (dev->msix_enabled) {
dev_info(&dev->dev, "can't enable MSI "
"(MSI-X already enabled)\n");
return -EINVAL;
}
- status = msi_capability_init(dev);
+
+ status = msi_capability_init(dev, nvec);
return status;
}
-EXPORT_SYMBOL(pci_enable_msi);
+EXPORT_SYMBOL(pci_enable_msi_block);
-void pci_msi_shutdown(struct pci_dev* dev)
+void pci_msi_shutdown(struct pci_dev *dev)
{
- struct msi_desc *entry;
+ struct msi_desc *desc;
+ u32 mask;
+ u16 ctrl;
if (!pci_msi_enable || !dev || !dev->msi_enabled)
return;
@@ -617,19 +601,15 @@ void pci_msi_shutdown(struct pci_dev* dev)
dev->msi_enabled = 0;
BUG_ON(list_empty(&dev->msi_list));
- entry = list_entry(dev->msi_list.next, struct msi_desc, list);
- /* Return the the pci reset with msi irqs unmasked */
- if (entry->msi_attrib.maskbit) {
- u32 mask = entry->msi_attrib.maskbits_mask;
- struct irq_desc *desc = irq_to_desc(dev->irq);
- msi_set_mask_bits(desc, mask, ~mask);
- }
- if (!entry->dev || entry->msi_attrib.type != PCI_CAP_ID_MSI)
- return;
+ desc = list_first_entry(&dev->msi_list, struct msi_desc, list);
+ pci_read_config_word(dev, desc->msi_attrib.pos + PCI_MSI_FLAGS, &ctrl);
+ mask = msi_capable_mask(ctrl);
+ msi_mask_irq(desc, mask, ~mask);
/* Restore dev->irq to its default pin-assertion irq */
- dev->irq = entry->msi_attrib.default_irq;
+ dev->irq = desc->msi_attrib.default_irq;
}
+
void pci_disable_msi(struct pci_dev* dev)
{
struct msi_desc *entry;
@@ -640,7 +620,7 @@ void pci_disable_msi(struct pci_dev* dev)
pci_msi_shutdown(dev);
entry = list_entry(dev->msi_list.next, struct msi_desc, list);
- if (!entry->dev || entry->msi_attrib.type != PCI_CAP_ID_MSI)
+ if (entry->msi_attrib.is_msix)
return;
msi_free_irqs(dev);
@@ -652,14 +632,18 @@ static int msi_free_irqs(struct pci_dev* dev)
struct msi_desc *entry, *tmp;
list_for_each_entry(entry, &dev->msi_list, list) {
- if (entry->irq)
- BUG_ON(irq_has_action(entry->irq));
+ int i, nvec;
+ if (!entry->irq)
+ continue;
+ nvec = 1 << entry->msi_attrib.multiple;
+ for (i = 0; i < nvec; i++)
+ BUG_ON(irq_has_action(entry->irq + i));
}
arch_teardown_msi_irqs(dev);
list_for_each_entry_safe(entry, tmp, &dev->msi_list, list) {
- if (entry->msi_attrib.type == PCI_CAP_ID_MSIX) {
+ if (entry->msi_attrib.is_msix) {
writel(1, entry->mask_base + entry->msi_attrib.entry_nr
* PCI_MSIX_ENTRY_SIZE
+ PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET);
@@ -675,6 +659,23 @@ static int msi_free_irqs(struct pci_dev* dev)
}
/**
+ * pci_msix_table_size - return the number of device's MSI-X table entries
+ * @dev: pointer to the pci_dev data structure of MSI-X device function
+ */
+int pci_msix_table_size(struct pci_dev *dev)
+{
+ int pos;
+ u16 control;
+
+ pos = pci_find_capability(dev, PCI_CAP_ID_MSIX);
+ if (!pos)
+ return 0;
+
+ pci_read_config_word(dev, msi_control_reg(pos), &control);
+ return multi_msix_capable(control);
+}
+
+/**
* pci_enable_msix - configure device's MSI-X capability structure
* @dev: pointer to the pci_dev data structure of MSI-X device function
* @entries: pointer to an array of MSI-X entries
@@ -691,9 +692,8 @@ static int msi_free_irqs(struct pci_dev* dev)
**/
int pci_enable_msix(struct pci_dev* dev, struct msix_entry *entries, int nvec)
{
- int status, pos, nr_entries;
+ int status, nr_entries;
int i, j;
- u16 control;
if (!entries)
return -EINVAL;
@@ -702,9 +702,7 @@ int pci_enable_msix(struct pci_dev* dev, struct msix_entry *entries, int nvec)
if (status)
return status;
- pos = pci_find_capability(dev, PCI_CAP_ID_MSIX);
- pci_read_config_word(dev, msi_control_reg(pos), &control);
- nr_entries = multi_msix_capable(control);
+ nr_entries = pci_msix_table_size(dev);
if (nvec > nr_entries)
return -EINVAL;
diff --git a/drivers/pci/msi.h b/drivers/pci/msi.h
index 3898f5237144..71f4df2ef654 100644
--- a/drivers/pci/msi.h
+++ b/drivers/pci/msi.h
@@ -20,14 +20,8 @@
#define msi_mask_bits_reg(base, is64bit) \
( (is64bit == 1) ? base+PCI_MSI_MASK_BIT : base+PCI_MSI_MASK_BIT-4)
#define msi_disable(control) control &= ~PCI_MSI_FLAGS_ENABLE
-#define multi_msi_capable(control) \
- (1 << ((control & PCI_MSI_FLAGS_QMASK) >> 1))
-#define multi_msi_enable(control, num) \
- control |= (((num >> 1) << 4) & PCI_MSI_FLAGS_QSIZE);
#define is_64bit_address(control) (!!(control & PCI_MSI_FLAGS_64BIT))
#define is_mask_bit_support(control) (!!(control & PCI_MSI_FLAGS_MASKBIT))
-#define msi_enable(control, num) multi_msi_enable(control, num); \
- control |= PCI_MSI_FLAGS_ENABLE
#define msix_table_offset_reg(base) (base + 0x04)
#define msix_pba_offset_reg(base) (base + 0x08)
diff --git a/drivers/pci/pci-acpi.c b/drivers/pci/pci-acpi.c
index deea8a187eb8..ea15b0537457 100644
--- a/drivers/pci/pci-acpi.c
+++ b/drivers/pci/pci-acpi.c
@@ -18,221 +18,6 @@
#include <linux/pci-acpi.h>
#include "pci.h"
-struct acpi_osc_data {
- acpi_handle handle;
- u32 support_set;
- u32 control_set;
- u32 control_query;
- int is_queried;
- struct list_head sibiling;
-};
-static LIST_HEAD(acpi_osc_data_list);
-
-struct acpi_osc_args {
- u32 capbuf[3];
-};
-
-static DEFINE_MUTEX(pci_acpi_lock);
-
-static struct acpi_osc_data *acpi_get_osc_data(acpi_handle handle)
-{
- struct acpi_osc_data *data;
-
- list_for_each_entry(data, &acpi_osc_data_list, sibiling) {
- if (data->handle == handle)
- return data;
- }
- data = kzalloc(sizeof(*data), GFP_KERNEL);
- if (!data)
- return NULL;
- INIT_LIST_HEAD(&data->sibiling);
- data->handle = handle;
- list_add_tail(&data->sibiling, &acpi_osc_data_list);
- return data;
-}
-
-static u8 OSC_UUID[16] = {0x5B, 0x4D, 0xDB, 0x33, 0xF7, 0x1F, 0x1C, 0x40,
- 0x96, 0x57, 0x74, 0x41, 0xC0, 0x3D, 0xD7, 0x66};
-
-static acpi_status acpi_run_osc(acpi_handle handle,
- struct acpi_osc_args *osc_args, u32 *retval)
-{
- acpi_status status;
- struct acpi_object_list input;
- union acpi_object in_params[4];
- struct acpi_buffer output = {ACPI_ALLOCATE_BUFFER, NULL};
- union acpi_object *out_obj;
- u32 errors, flags = osc_args->capbuf[OSC_QUERY_TYPE];
-
- /* Setting up input parameters */
- input.count = 4;
- input.pointer = in_params;
- in_params[0].type = ACPI_TYPE_BUFFER;
- in_params[0].buffer.length = 16;
- in_params[0].buffer.pointer = OSC_UUID;
- in_params[1].type = ACPI_TYPE_INTEGER;
- in_params[1].integer.value = 1;
- in_params[2].type = ACPI_TYPE_INTEGER;
- in_params[2].integer.value = 3;
- in_params[3].type = ACPI_TYPE_BUFFER;
- in_params[3].buffer.length = 12;
- in_params[3].buffer.pointer = (u8 *)osc_args->capbuf;
-
- status = acpi_evaluate_object(handle, "_OSC", &input, &output);
- if (ACPI_FAILURE(status))
- return status;
-
- if (!output.length)
- return AE_NULL_OBJECT;
-
- out_obj = output.pointer;
- if (out_obj->type != ACPI_TYPE_BUFFER) {
- printk(KERN_DEBUG "Evaluate _OSC returns wrong type\n");
- status = AE_TYPE;
- goto out_kfree;
- }
- /* Need to ignore the bit0 in result code */
- errors = *((u32 *)out_obj->buffer.pointer) & ~(1 << 0);
- if (errors) {
- if (errors & OSC_REQUEST_ERROR)
- printk(KERN_DEBUG "_OSC request fails\n");
- if (errors & OSC_INVALID_UUID_ERROR)
- printk(KERN_DEBUG "_OSC invalid UUID\n");
- if (errors & OSC_INVALID_REVISION_ERROR)
- printk(KERN_DEBUG "_OSC invalid revision\n");
- if (errors & OSC_CAPABILITIES_MASK_ERROR) {
- if (flags & OSC_QUERY_ENABLE)
- goto out_success;
- printk(KERN_DEBUG "_OSC FW not grant req. control\n");
- status = AE_SUPPORT;
- goto out_kfree;
- }
- status = AE_ERROR;
- goto out_kfree;
- }
-out_success:
- *retval = *((u32 *)(out_obj->buffer.pointer + 8));
- status = AE_OK;
-
-out_kfree:
- kfree(output.pointer);
- return status;
-}
-
-static acpi_status __acpi_query_osc(u32 flags, struct acpi_osc_data *osc_data)
-{
- acpi_status status;
- u32 support_set, result;
- struct acpi_osc_args osc_args;
-
- /* do _OSC query for all possible controls */
- support_set = osc_data->support_set | (flags & OSC_SUPPORT_MASKS);
- osc_args.capbuf[OSC_QUERY_TYPE] = OSC_QUERY_ENABLE;
- osc_args.capbuf[OSC_SUPPORT_TYPE] = support_set;
- osc_args.capbuf[OSC_CONTROL_TYPE] = OSC_CONTROL_MASKS;
-
- status = acpi_run_osc(osc_data->handle, &osc_args, &result);
- if (ACPI_SUCCESS(status)) {
- osc_data->support_set = support_set;
- osc_data->control_query = result;
- osc_data->is_queried = 1;
- }
-
- return status;
-}
-
-/*
- * pci_acpi_osc_support: Invoke _OSC indicating support for the given feature
- * @flags: Bitmask of flags to support
- *
- * See the ACPI spec for the definition of the flags
- */
-int pci_acpi_osc_support(acpi_handle handle, u32 flags)
-{
- acpi_status status;
- acpi_handle tmp;
- struct acpi_osc_data *osc_data;
- int rc = 0;
-
- status = acpi_get_handle(handle, "_OSC", &tmp);
- if (ACPI_FAILURE(status))
- return -ENOTTY;
-
- mutex_lock(&pci_acpi_lock);
- osc_data = acpi_get_osc_data(handle);
- if (!osc_data) {
- printk(KERN_ERR "acpi osc data array is full\n");
- rc = -ENOMEM;
- goto out;
- }
-
- __acpi_query_osc(flags, osc_data);
-out:
- mutex_unlock(&pci_acpi_lock);
- return rc;
-}
-
-/**
- * pci_osc_control_set - commit requested control to Firmware
- * @handle: acpi_handle for the target ACPI object
- * @flags: driver's requested control bits
- *
- * Attempt to take control from Firmware on requested control bits.
- **/
-acpi_status pci_osc_control_set(acpi_handle handle, u32 flags)
-{
- acpi_status status;
- u32 control_req, control_set, result;
- acpi_handle tmp;
- struct acpi_osc_data *osc_data;
- struct acpi_osc_args osc_args;
-
- status = acpi_get_handle(handle, "_OSC", &tmp);
- if (ACPI_FAILURE(status))
- return status;
-
- mutex_lock(&pci_acpi_lock);
- osc_data = acpi_get_osc_data(handle);
- if (!osc_data) {
- printk(KERN_ERR "acpi osc data array is full\n");
- status = AE_ERROR;
- goto out;
- }
-
- control_req = (flags & OSC_CONTROL_MASKS);
- if (!control_req) {
- status = AE_TYPE;
- goto out;
- }
-
- /* No need to evaluate _OSC if the control was already granted. */
- if ((osc_data->control_set & control_req) == control_req)
- goto out;
-
- if (!osc_data->is_queried) {
- status = __acpi_query_osc(osc_data->support_set, osc_data);
- if (ACPI_FAILURE(status))
- goto out;
- }
-
- if ((osc_data->control_query & control_req) != control_req) {
- status = AE_SUPPORT;
- goto out;
- }
-
- control_set = osc_data->control_set | control_req;
- osc_args.capbuf[OSC_QUERY_TYPE] = 0;
- osc_args.capbuf[OSC_SUPPORT_TYPE] = osc_data->support_set;
- osc_args.capbuf[OSC_CONTROL_TYPE] = control_set;
- status = acpi_run_osc(handle, &osc_args, &result);
- if (ACPI_SUCCESS(status))
- osc_data->control_set = result;
-out:
- mutex_unlock(&pci_acpi_lock);
- return status;
-}
-EXPORT_SYMBOL(pci_osc_control_set);
-
/*
* _SxD returns the D-state with the highest power
* (lowest D-state number) supported in the S-state "x".
@@ -386,12 +171,12 @@ static int __init acpi_pci_init(void)
{
int ret;
- if (acpi_gbl_FADT.boot_flags & BAF_MSI_NOT_SUPPORTED) {
+ if (acpi_gbl_FADT.boot_flags & ACPI_FADT_NO_MSI) {
printk(KERN_INFO"ACPI FADT declares the system doesn't support MSI, so disable it\n");
pci_no_msi();
}
- if (acpi_gbl_FADT.boot_flags & BAF_PCIE_ASPM_CONTROL) {
+ if (acpi_gbl_FADT.boot_flags & ACPI_FADT_NO_ASPM) {
printk(KERN_INFO"ACPI FADT declares the system doesn't support PCIe ASPM, so disable it\n");
pcie_no_aspm();
}
diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c
index 93eac1423585..d76c4c85367e 100644
--- a/drivers/pci/pci-driver.c
+++ b/drivers/pci/pci-driver.c
@@ -99,6 +99,52 @@ store_new_id(struct device_driver *driver, const char *buf, size_t count)
}
static DRIVER_ATTR(new_id, S_IWUSR, NULL, store_new_id);
+/**
+ * store_remove_id - remove a PCI device ID from this driver
+ * @driver: target device driver
+ * @buf: buffer for scanning device ID data
+ * @count: input size
+ *
+ * Removes a dynamic pci device ID to this driver.
+ */
+static ssize_t
+store_remove_id(struct device_driver *driver, const char *buf, size_t count)
+{
+ struct pci_dynid *dynid, *n;
+ struct pci_driver *pdrv = to_pci_driver(driver);
+ __u32 vendor, device, subvendor = PCI_ANY_ID,
+ subdevice = PCI_ANY_ID, class = 0, class_mask = 0;
+ int fields = 0;
+ int retval = -ENODEV;
+
+ fields = sscanf(buf, "%x %x %x %x %x %x",
+ &vendor, &device, &subvendor, &subdevice,
+ &class, &class_mask);
+ if (fields < 2)
+ return -EINVAL;
+
+ spin_lock(&pdrv->dynids.lock);
+ list_for_each_entry_safe(dynid, n, &pdrv->dynids.list, node) {
+ struct pci_device_id *id = &dynid->id;
+ if ((id->vendor == vendor) &&
+ (id->device == device) &&
+ (subvendor == PCI_ANY_ID || id->subvendor == subvendor) &&
+ (subdevice == PCI_ANY_ID || id->subdevice == subdevice) &&
+ !((id->class ^ class) & class_mask)) {
+ list_del(&dynid->node);
+ kfree(dynid);
+ retval = 0;
+ break;
+ }
+ }
+ spin_unlock(&pdrv->dynids.lock);
+
+ if (retval)
+ return retval;
+ return count;
+}
+static DRIVER_ATTR(remove_id, S_IWUSR, NULL, store_remove_id);
+
static void
pci_free_dynids(struct pci_driver *drv)
{
@@ -125,6 +171,20 @@ static void pci_remove_newid_file(struct pci_driver *drv)
{
driver_remove_file(&drv->driver, &driver_attr_new_id);
}
+
+static int
+pci_create_removeid_file(struct pci_driver *drv)
+{
+ int error = 0;
+ if (drv->probe != NULL)
+ error = driver_create_file(&drv->driver,&driver_attr_remove_id);
+ return error;
+}
+
+static void pci_remove_removeid_file(struct pci_driver *drv)
+{
+ driver_remove_file(&drv->driver, &driver_attr_remove_id);
+}
#else /* !CONFIG_HOTPLUG */
static inline void pci_free_dynids(struct pci_driver *drv) {}
static inline int pci_create_newid_file(struct pci_driver *drv)
@@ -132,6 +192,11 @@ static inline int pci_create_newid_file(struct pci_driver *drv)
return 0;
}
static inline void pci_remove_newid_file(struct pci_driver *drv) {}
+static inline int pci_create_removeid_file(struct pci_driver *drv)
+{
+ return 0;
+}
+static inline void pci_remove_removeid_file(struct pci_driver *drv) {}
#endif
/**
@@ -212,10 +277,9 @@ static int pci_call_probe(struct pci_driver *drv, struct pci_dev *dev,
node = dev_to_node(&dev->dev);
if (node >= 0) {
int cpu;
- node_to_cpumask_ptr(nodecpumask, node);
get_online_cpus();
- cpu = cpumask_any_and(nodecpumask, cpu_online_mask);
+ cpu = cpumask_any_and(cpumask_of_node(node), cpu_online_mask);
if (cpu < nr_cpu_ids)
error = work_on_cpu(cpu, local_pci_probe, &ddi);
else
@@ -352,53 +416,60 @@ static int pci_legacy_suspend(struct device *dev, pm_message_t state)
{
struct pci_dev * pci_dev = to_pci_dev(dev);
struct pci_driver * drv = pci_dev->driver;
- int i = 0;
+
+ pci_dev->state_saved = false;
if (drv && drv->suspend) {
pci_power_t prev = pci_dev->current_state;
+ int error;
- pci_dev->state_saved = false;
-
- i = drv->suspend(pci_dev, state);
- suspend_report_result(drv->suspend, i);
- if (i)
- return i;
-
- if (pci_dev->state_saved)
- goto Fixup;
+ error = drv->suspend(pci_dev, state);
+ suspend_report_result(drv->suspend, error);
+ if (error)
+ return error;
- if (pci_dev->current_state != PCI_D0
+ if (!pci_dev->state_saved && pci_dev->current_state != PCI_D0
&& pci_dev->current_state != PCI_UNKNOWN) {
WARN_ONCE(pci_dev->current_state != prev,
"PCI PM: Device state not saved by %pF\n",
drv->suspend);
- goto Fixup;
}
}
- pci_save_state(pci_dev);
- /*
- * This is for compatibility with existing code with legacy PM support.
- */
- pci_pm_set_unknown_state(pci_dev);
-
- Fixup:
pci_fixup_device(pci_fixup_suspend, pci_dev);
- return i;
+ return 0;
}
static int pci_legacy_suspend_late(struct device *dev, pm_message_t state)
{
struct pci_dev * pci_dev = to_pci_dev(dev);
struct pci_driver * drv = pci_dev->driver;
- int i = 0;
if (drv && drv->suspend_late) {
- i = drv->suspend_late(pci_dev, state);
- suspend_report_result(drv->suspend_late, i);
+ pci_power_t prev = pci_dev->current_state;
+ int error;
+
+ error = drv->suspend_late(pci_dev, state);
+ suspend_report_result(drv->suspend_late, error);
+ if (error)
+ return error;
+
+ if (!pci_dev->state_saved && pci_dev->current_state != PCI_D0
+ && pci_dev->current_state != PCI_UNKNOWN) {
+ WARN_ONCE(pci_dev->current_state != prev,
+ "PCI PM: Device state not saved by %pF\n",
+ drv->suspend_late);
+ return 0;
+ }
}
- return i;
+
+ if (!pci_dev->state_saved)
+ pci_save_state(pci_dev);
+
+ pci_pm_set_unknown_state(pci_dev);
+
+ return 0;
}
static int pci_legacy_resume_early(struct device *dev)
@@ -423,6 +494,23 @@ static int pci_legacy_resume(struct device *dev)
/* Auxiliary functions used by the new power management framework */
+/**
+ * pci_restore_standard_config - restore standard config registers of PCI device
+ * @pci_dev: PCI device to handle
+ */
+static int pci_restore_standard_config(struct pci_dev *pci_dev)
+{
+ pci_update_current_state(pci_dev, PCI_UNKNOWN);
+
+ if (pci_dev->current_state != PCI_D0) {
+ int error = pci_set_power_state(pci_dev, PCI_D0);
+ if (error)
+ return error;
+ }
+
+ return pci_dev->state_saved ? pci_restore_state(pci_dev) : 0;
+}
+
static void pci_pm_default_resume_noirq(struct pci_dev *pci_dev)
{
pci_restore_standard_config(pci_dev);
@@ -443,7 +531,6 @@ static void pci_pm_default_suspend(struct pci_dev *pci_dev)
/* Disable non-bridge devices without PM support */
if (!pci_is_bridge(pci_dev))
pci_disable_enabled_device(pci_dev);
- pci_save_state(pci_dev);
}
static bool pci_has_legacy_pm_support(struct pci_dev *pci_dev)
@@ -493,13 +580,13 @@ static int pci_pm_suspend(struct device *dev)
if (pci_has_legacy_pm_support(pci_dev))
return pci_legacy_suspend(dev, PMSG_SUSPEND);
+ pci_dev->state_saved = false;
+
if (!pm) {
pci_pm_default_suspend(pci_dev);
goto Fixup;
}
- pci_dev->state_saved = false;
-
if (pm->suspend) {
pci_power_t prev = pci_dev->current_state;
int error;
@@ -509,24 +596,14 @@ static int pci_pm_suspend(struct device *dev)
if (error)
return error;
- if (pci_dev->state_saved)
- goto Fixup;
-
- if (pci_dev->current_state != PCI_D0
+ if (!pci_dev->state_saved && pci_dev->current_state != PCI_D0
&& pci_dev->current_state != PCI_UNKNOWN) {
WARN_ONCE(pci_dev->current_state != prev,
"PCI PM: State of device not saved by %pF\n",
pm->suspend);
- goto Fixup;
}
}
- if (!pci_dev->state_saved) {
- pci_save_state(pci_dev);
- if (!pci_is_bridge(pci_dev))
- pci_prepare_to_sleep(pci_dev);
- }
-
Fixup:
pci_fixup_device(pci_fixup_suspend, pci_dev);
@@ -536,21 +613,43 @@ static int pci_pm_suspend(struct device *dev)
static int pci_pm_suspend_noirq(struct device *dev)
{
struct pci_dev *pci_dev = to_pci_dev(dev);
- struct device_driver *drv = dev->driver;
- int error = 0;
+ struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL;
if (pci_has_legacy_pm_support(pci_dev))
return pci_legacy_suspend_late(dev, PMSG_SUSPEND);
- if (drv && drv->pm && drv->pm->suspend_noirq) {
- error = drv->pm->suspend_noirq(dev);
- suspend_report_result(drv->pm->suspend_noirq, error);
+ if (!pm) {
+ pci_save_state(pci_dev);
+ return 0;
}
- if (!error)
- pci_pm_set_unknown_state(pci_dev);
+ if (pm->suspend_noirq) {
+ pci_power_t prev = pci_dev->current_state;
+ int error;
- return error;
+ error = pm->suspend_noirq(dev);
+ suspend_report_result(pm->suspend_noirq, error);
+ if (error)
+ return error;
+
+ if (!pci_dev->state_saved && pci_dev->current_state != PCI_D0
+ && pci_dev->current_state != PCI_UNKNOWN) {
+ WARN_ONCE(pci_dev->current_state != prev,
+ "PCI PM: State of device not saved by %pF\n",
+ pm->suspend_noirq);
+ return 0;
+ }
+ }
+
+ if (!pci_dev->state_saved) {
+ pci_save_state(pci_dev);
+ if (!pci_is_bridge(pci_dev))
+ pci_prepare_to_sleep(pci_dev);
+ }
+
+ pci_pm_set_unknown_state(pci_dev);
+
+ return 0;
}
static int pci_pm_resume_noirq(struct device *dev)
@@ -617,13 +716,13 @@ static int pci_pm_freeze(struct device *dev)
if (pci_has_legacy_pm_support(pci_dev))
return pci_legacy_suspend(dev, PMSG_FREEZE);
+ pci_dev->state_saved = false;
+
if (!pm) {
pci_pm_default_suspend(pci_dev);
return 0;
}
- pci_dev->state_saved = false;
-
if (pm->freeze) {
int error;
@@ -633,9 +732,6 @@ static int pci_pm_freeze(struct device *dev)
return error;
}
- if (!pci_dev->state_saved)
- pci_save_state(pci_dev);
-
return 0;
}
@@ -643,20 +739,25 @@ static int pci_pm_freeze_noirq(struct device *dev)
{
struct pci_dev *pci_dev = to_pci_dev(dev);
struct device_driver *drv = dev->driver;
- int error = 0;
if (pci_has_legacy_pm_support(pci_dev))
return pci_legacy_suspend_late(dev, PMSG_FREEZE);
if (drv && drv->pm && drv->pm->freeze_noirq) {
+ int error;
+
error = drv->pm->freeze_noirq(dev);
suspend_report_result(drv->pm->freeze_noirq, error);
+ if (error)
+ return error;
}
- if (!error)
- pci_pm_set_unknown_state(pci_dev);
+ if (!pci_dev->state_saved)
+ pci_save_state(pci_dev);
- return error;
+ pci_pm_set_unknown_state(pci_dev);
+
+ return 0;
}
static int pci_pm_thaw_noirq(struct device *dev)
@@ -699,46 +800,56 @@ static int pci_pm_poweroff(struct device *dev)
{
struct pci_dev *pci_dev = to_pci_dev(dev);
struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL;
- int error = 0;
if (pci_has_legacy_pm_support(pci_dev))
return pci_legacy_suspend(dev, PMSG_HIBERNATE);
+ pci_dev->state_saved = false;
+
if (!pm) {
pci_pm_default_suspend(pci_dev);
goto Fixup;
}
- pci_dev->state_saved = false;
-
if (pm->poweroff) {
+ int error;
+
error = pm->poweroff(dev);
suspend_report_result(pm->poweroff, error);
+ if (error)
+ return error;
}
- if (!pci_dev->state_saved && !pci_is_bridge(pci_dev))
- pci_prepare_to_sleep(pci_dev);
-
Fixup:
pci_fixup_device(pci_fixup_suspend, pci_dev);
- return error;
+ return 0;
}
static int pci_pm_poweroff_noirq(struct device *dev)
{
+ struct pci_dev *pci_dev = to_pci_dev(dev);
struct device_driver *drv = dev->driver;
- int error = 0;
if (pci_has_legacy_pm_support(to_pci_dev(dev)))
return pci_legacy_suspend_late(dev, PMSG_HIBERNATE);
- if (drv && drv->pm && drv->pm->poweroff_noirq) {
+ if (!drv || !drv->pm)
+ return 0;
+
+ if (drv->pm->poweroff_noirq) {
+ int error;
+
error = drv->pm->poweroff_noirq(dev);
suspend_report_result(drv->pm->poweroff_noirq, error);
+ if (error)
+ return error;
}
- return error;
+ if (!pci_dev->state_saved && !pci_is_bridge(pci_dev))
+ pci_prepare_to_sleep(pci_dev);
+
+ return 0;
}
static int pci_pm_restore_noirq(struct device *dev)
@@ -852,13 +963,23 @@ int __pci_register_driver(struct pci_driver *drv, struct module *owner,
/* register with core */
error = driver_register(&drv->driver);
if (error)
- return error;
+ goto out;
error = pci_create_newid_file(drv);
if (error)
- driver_unregister(&drv->driver);
+ goto out_newid;
+ error = pci_create_removeid_file(drv);
+ if (error)
+ goto out_removeid;
+out:
return error;
+
+out_removeid:
+ pci_remove_newid_file(drv);
+out_newid:
+ driver_unregister(&drv->driver);
+ goto out;
}
/**
@@ -874,6 +995,7 @@ int __pci_register_driver(struct pci_driver *drv, struct module *owner,
void
pci_unregister_driver(struct pci_driver *drv)
{
+ pci_remove_removeid_file(drv);
pci_remove_newid_file(drv);
driver_unregister(&drv->driver);
pci_free_dynids(drv);
@@ -973,6 +1095,7 @@ struct bus_type pci_bus_type = {
.remove = pci_device_remove,
.shutdown = pci_device_shutdown,
.dev_attrs = pci_dev_attrs,
+ .bus_attrs = pci_bus_attrs,
.pm = PCI_PM_OPS_PTR,
};
diff --git a/drivers/pci/pci-sysfs.c b/drivers/pci/pci-sysfs.c
index dfc4e0ddf241..a7eb1b46a5a8 100644
--- a/drivers/pci/pci-sysfs.c
+++ b/drivers/pci/pci-sysfs.c
@@ -148,7 +148,7 @@ static ssize_t is_enabled_store(struct device *dev,
return -EPERM;
if (!val) {
- if (atomic_read(&pdev->enable_cnt) != 0)
+ if (pci_is_enabled(pdev))
pci_disable_device(pdev);
else
result = -EIO;
@@ -219,6 +219,79 @@ msi_bus_store(struct device *dev, struct device_attribute *attr,
return count;
}
+#ifdef CONFIG_HOTPLUG
+static DEFINE_MUTEX(pci_remove_rescan_mutex);
+static ssize_t bus_rescan_store(struct bus_type *bus, const char *buf,
+ size_t count)
+{
+ unsigned long val;
+ struct pci_bus *b = NULL;
+
+ if (strict_strtoul(buf, 0, &val) < 0)
+ return -EINVAL;
+
+ if (val) {
+ mutex_lock(&pci_remove_rescan_mutex);
+ while ((b = pci_find_next_bus(b)) != NULL)
+ pci_rescan_bus(b);
+ mutex_unlock(&pci_remove_rescan_mutex);
+ }
+ return count;
+}
+
+struct bus_attribute pci_bus_attrs[] = {
+ __ATTR(rescan, (S_IWUSR|S_IWGRP), NULL, bus_rescan_store),
+ __ATTR_NULL
+};
+
+static ssize_t
+dev_rescan_store(struct device *dev, struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ unsigned long val;
+ struct pci_dev *pdev = to_pci_dev(dev);
+
+ if (strict_strtoul(buf, 0, &val) < 0)
+ return -EINVAL;
+
+ if (val) {
+ mutex_lock(&pci_remove_rescan_mutex);
+ pci_rescan_bus(pdev->bus);
+ mutex_unlock(&pci_remove_rescan_mutex);
+ }
+ return count;
+}
+
+static void remove_callback(struct device *dev)
+{
+ struct pci_dev *pdev = to_pci_dev(dev);
+
+ mutex_lock(&pci_remove_rescan_mutex);
+ pci_remove_bus_device(pdev);
+ mutex_unlock(&pci_remove_rescan_mutex);
+}
+
+static ssize_t
+remove_store(struct device *dev, struct device_attribute *dummy,
+ const char *buf, size_t count)
+{
+ int ret = 0;
+ unsigned long val;
+
+ if (strict_strtoul(buf, 0, &val) < 0)
+ return -EINVAL;
+
+ /* An attribute cannot be unregistered by one of its own methods,
+ * so we have to use this roundabout approach.
+ */
+ if (val)
+ ret = device_schedule_callback(dev, remove_callback);
+ if (ret)
+ count = ret;
+ return count;
+}
+#endif
+
struct device_attribute pci_dev_attrs[] = {
__ATTR_RO(resource),
__ATTR_RO(vendor),
@@ -237,10 +310,25 @@ struct device_attribute pci_dev_attrs[] = {
__ATTR(broken_parity_status,(S_IRUGO|S_IWUSR),
broken_parity_status_show,broken_parity_status_store),
__ATTR(msi_bus, 0644, msi_bus_show, msi_bus_store),
+#ifdef CONFIG_HOTPLUG
+ __ATTR(remove, (S_IWUSR|S_IWGRP), NULL, remove_store),
+ __ATTR(rescan, (S_IWUSR|S_IWGRP), NULL, dev_rescan_store),
+#endif
__ATTR_NULL,
};
static ssize_t
+boot_vga_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+ struct pci_dev *pdev = to_pci_dev(dev);
+
+ return sprintf(buf, "%u\n",
+ !!(pdev->resource[PCI_ROM_RESOURCE].flags &
+ IORESOURCE_ROM_SHADOW));
+}
+struct device_attribute vga_attr = __ATTR_RO(boot_vga);
+
+static ssize_t
pci_read_config(struct kobject *kobj, struct bin_attribute *bin_attr,
char *buf, loff_t off, size_t count)
{
@@ -493,6 +581,19 @@ pci_mmap_legacy_io(struct kobject *kobj, struct bin_attribute *attr,
}
/**
+ * pci_adjust_legacy_attr - adjustment of legacy file attributes
+ * @b: bus to create files under
+ * @mmap_type: I/O port or memory
+ *
+ * Stub implementation. Can be overridden by arch if necessary.
+ */
+void __weak
+pci_adjust_legacy_attr(struct pci_bus *b, enum pci_mmap_state mmap_type)
+{
+ return;
+}
+
+/**
* pci_create_legacy_files - create legacy I/O port and memory files
* @b: bus to create files under
*
@@ -518,6 +619,7 @@ void pci_create_legacy_files(struct pci_bus *b)
b->legacy_io->read = pci_read_legacy_io;
b->legacy_io->write = pci_write_legacy_io;
b->legacy_io->mmap = pci_mmap_legacy_io;
+ pci_adjust_legacy_attr(b, pci_mmap_io);
error = device_create_bin_file(&b->dev, b->legacy_io);
if (error)
goto legacy_io_err;
@@ -528,6 +630,7 @@ void pci_create_legacy_files(struct pci_bus *b)
b->legacy_mem->size = 1024*1024;
b->legacy_mem->attr.mode = S_IRUSR | S_IWUSR;
b->legacy_mem->mmap = pci_mmap_legacy_mem;
+ pci_adjust_legacy_attr(b, pci_mmap_mem);
error = device_create_bin_file(&b->dev, b->legacy_mem);
if (error)
goto legacy_mem_err;
@@ -719,8 +822,8 @@ static int pci_create_resource_files(struct pci_dev *pdev)
return 0;
}
#else /* !HAVE_PCI_MMAP */
-static inline int pci_create_resource_files(struct pci_dev *dev) { return 0; }
-static inline void pci_remove_resource_files(struct pci_dev *dev) { return; }
+int __weak pci_create_resource_files(struct pci_dev *dev) { return 0; }
+void __weak pci_remove_resource_files(struct pci_dev *dev) { return; }
#endif /* HAVE_PCI_MMAP */
/**
@@ -884,18 +987,27 @@ int __must_check pci_create_sysfs_dev_files (struct pci_dev *pdev)
pdev->rom_attr = attr;
}
+ if ((pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA) {
+ retval = device_create_file(&pdev->dev, &vga_attr);
+ if (retval)
+ goto err_rom_file;
+ }
+
/* add platform-specific attributes */
retval = pcibios_add_platform_entries(pdev);
if (retval)
- goto err_rom_file;
+ goto err_vga_file;
/* add sysfs entries for various capabilities */
retval = pci_create_capabilities_sysfs(pdev);
if (retval)
- goto err_rom_file;
+ goto err_vga_file;
return 0;
+err_vga_file:
+ if ((pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA)
+ device_remove_file(&pdev->dev, &vga_attr);
err_rom_file:
if (rom_size) {
sysfs_remove_bin_file(&pdev->dev.kobj, pdev->rom_attr);
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index 6d6120007af4..16fd0d4c3166 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -20,6 +20,8 @@
#include <linux/pm_wakeup.h>
#include <linux/interrupt.h>
#include <asm/dma.h> /* isa_dma_bridge_buggy */
+#include <linux/device.h>
+#include <asm/setup.h>
#include "pci.h"
unsigned int pci_pm_d3_delay = PCI_PM_D3_WAIT;
@@ -426,7 +428,6 @@ static inline int platform_pci_sleep_wake(struct pci_dev *dev, bool enable)
* given PCI device
* @dev: PCI device to handle.
* @state: PCI power state (D0, D1, D2, D3hot) to put the device into.
- * @wait: If 'true', wait for the device to change its power state
*
* RETURN VALUE:
* -EINVAL if the requested state is invalid.
@@ -435,12 +436,15 @@ static inline int platform_pci_sleep_wake(struct pci_dev *dev, bool enable)
* 0 if device already is in the requested state.
* 0 if device's power state has been successfully changed.
*/
-static int
-pci_raw_set_power_state(struct pci_dev *dev, pci_power_t state, bool wait)
+static int pci_raw_set_power_state(struct pci_dev *dev, pci_power_t state)
{
u16 pmcsr;
bool need_restore = false;
+ /* Check if we're already there */
+ if (dev->current_state == state)
+ return 0;
+
if (!dev->pm_cap)
return -EIO;
@@ -451,10 +455,7 @@ pci_raw_set_power_state(struct pci_dev *dev, pci_power_t state, bool wait)
* Can enter D0 from any state, but if we can only go deeper
* to sleep if we're already in a low power state
*/
- if (dev->current_state == state) {
- /* we're already there */
- return 0;
- } else if (state != PCI_D0 && dev->current_state <= PCI_D3cold
+ if (state != PCI_D0 && dev->current_state <= PCI_D3cold
&& dev->current_state > state) {
dev_err(&dev->dev, "invalid power transition "
"(from state %d to %d)\n", dev->current_state, state);
@@ -481,10 +482,8 @@ pci_raw_set_power_state(struct pci_dev *dev, pci_power_t state, bool wait)
break;
case PCI_UNKNOWN: /* Boot-up */
if ((pmcsr & PCI_PM_CTRL_STATE_MASK) == PCI_D3hot
- && !(pmcsr & PCI_PM_CTRL_NO_SOFT_RESET)) {
+ && !(pmcsr & PCI_PM_CTRL_NO_SOFT_RESET))
need_restore = true;
- wait = true;
- }
/* Fall-through: force to D0 */
default:
pmcsr = 0;
@@ -494,9 +493,6 @@ pci_raw_set_power_state(struct pci_dev *dev, pci_power_t state, bool wait)
/* enter specified state */
pci_write_config_word(dev, dev->pm_cap + PCI_PM_CTRL, pmcsr);
- if (!wait)
- return 0;
-
/* Mandatory power management transition delays */
/* see PCI PM 1.1 5.6.1 table 18 */
if (state == PCI_D3hot || dev->current_state == PCI_D3hot)
@@ -521,7 +517,7 @@ pci_raw_set_power_state(struct pci_dev *dev, pci_power_t state, bool wait)
if (need_restore)
pci_restore_bars(dev);
- if (wait && dev->bus->self)
+ if (dev->bus->self)
pcie_aspm_pm_state_change(dev->bus->self);
return 0;
@@ -546,11 +542,58 @@ void pci_update_current_state(struct pci_dev *dev, pci_power_t state)
}
/**
+ * pci_platform_power_transition - Use platform to change device power state
+ * @dev: PCI device to handle.
+ * @state: State to put the device into.
+ */
+static int pci_platform_power_transition(struct pci_dev *dev, pci_power_t state)
+{
+ int error;
+
+ if (platform_pci_power_manageable(dev)) {
+ error = platform_pci_set_power_state(dev, state);
+ if (!error)
+ pci_update_current_state(dev, state);
+ } else {
+ error = -ENODEV;
+ /* Fall back to PCI_D0 if native PM is not supported */
+ pci_update_current_state(dev, PCI_D0);
+ }
+
+ return error;
+}
+
+/**
+ * __pci_start_power_transition - Start power transition of a PCI device
+ * @dev: PCI device to handle.
+ * @state: State to put the device into.
+ */
+static void __pci_start_power_transition(struct pci_dev *dev, pci_power_t state)
+{
+ if (state == PCI_D0)
+ pci_platform_power_transition(dev, PCI_D0);
+}
+
+/**
+ * __pci_complete_power_transition - Complete power transition of a PCI device
+ * @dev: PCI device to handle.
+ * @state: State to put the device into.
+ *
+ * This function should not be called directly by device drivers.
+ */
+int __pci_complete_power_transition(struct pci_dev *dev, pci_power_t state)
+{
+ return state > PCI_D0 ?
+ pci_platform_power_transition(dev, state) : -EINVAL;
+}
+EXPORT_SYMBOL_GPL(__pci_complete_power_transition);
+
+/**
* pci_set_power_state - Set the power state of a PCI device
* @dev: PCI device to handle.
* @state: PCI power state (D0, D1, D2, D3hot) to put the device into.
*
- * Transition a device to a new power state, using the platform formware and/or
+ * Transition a device to a new power state, using the platform firmware and/or
* the device's PCI PM registers.
*
* RETURN VALUE:
@@ -577,30 +620,21 @@ int pci_set_power_state(struct pci_dev *dev, pci_power_t state)
*/
return 0;
- if (state == PCI_D0 && platform_pci_power_manageable(dev)) {
- /*
- * Allow the platform to change the state, for example via ACPI
- * _PR0, _PS0 and some such, but do not trust it.
- */
- int ret = platform_pci_set_power_state(dev, PCI_D0);
- if (!ret)
- pci_update_current_state(dev, PCI_D0);
- }
+ /* Check if we're already there */
+ if (dev->current_state == state)
+ return 0;
+
+ __pci_start_power_transition(dev, state);
+
/* This device is quirked not to be put into D3, so
don't put it in D3 */
if (state == PCI_D3hot && (dev->dev_flags & PCI_DEV_FLAGS_NO_D3))
return 0;
- error = pci_raw_set_power_state(dev, state, true);
+ error = pci_raw_set_power_state(dev, state);
- if (state > PCI_D0 && platform_pci_power_manageable(dev)) {
- /* Allow the platform to finalize the transition */
- int ret = platform_pci_set_power_state(dev, state);
- if (!ret) {
- pci_update_current_state(dev, state);
- error = 0;
- }
- }
+ if (!__pci_complete_power_transition(dev, state))
+ error = 0;
return error;
}
@@ -645,6 +679,8 @@ pci_power_t pci_choose_state(struct pci_dev *dev, pm_message_t state)
EXPORT_SYMBOL(pci_choose_state);
+#define PCI_EXP_SAVE_REGS 7
+
static int pci_save_pcie_state(struct pci_dev *dev)
{
int pos, i = 0;
@@ -657,7 +693,7 @@ static int pci_save_pcie_state(struct pci_dev *dev)
save_state = pci_find_saved_cap(dev, PCI_CAP_ID_EXP);
if (!save_state) {
- dev_err(&dev->dev, "buffer not found in %s\n", __FUNCTION__);
+ dev_err(&dev->dev, "buffer not found in %s\n", __func__);
return -ENOMEM;
}
cap = (u16 *)&save_state->data[0];
@@ -666,6 +702,9 @@ static int pci_save_pcie_state(struct pci_dev *dev)
pci_read_config_word(dev, pos + PCI_EXP_LNKCTL, &cap[i++]);
pci_read_config_word(dev, pos + PCI_EXP_SLTCTL, &cap[i++]);
pci_read_config_word(dev, pos + PCI_EXP_RTCTL, &cap[i++]);
+ pci_read_config_word(dev, pos + PCI_EXP_DEVCTL2, &cap[i++]);
+ pci_read_config_word(dev, pos + PCI_EXP_LNKCTL2, &cap[i++]);
+ pci_read_config_word(dev, pos + PCI_EXP_SLTCTL2, &cap[i++]);
return 0;
}
@@ -686,6 +725,9 @@ static void pci_restore_pcie_state(struct pci_dev *dev)
pci_write_config_word(dev, pos + PCI_EXP_LNKCTL, cap[i++]);
pci_write_config_word(dev, pos + PCI_EXP_SLTCTL, cap[i++]);
pci_write_config_word(dev, pos + PCI_EXP_RTCTL, cap[i++]);
+ pci_write_config_word(dev, pos + PCI_EXP_DEVCTL2, cap[i++]);
+ pci_write_config_word(dev, pos + PCI_EXP_LNKCTL2, cap[i++]);
+ pci_write_config_word(dev, pos + PCI_EXP_SLTCTL2, cap[i++]);
}
@@ -700,7 +742,7 @@ static int pci_save_pcix_state(struct pci_dev *dev)
save_state = pci_find_saved_cap(dev, PCI_CAP_ID_PCIX);
if (!save_state) {
- dev_err(&dev->dev, "buffer not found in %s\n", __FUNCTION__);
+ dev_err(&dev->dev, "buffer not found in %s\n", __func__);
return -ENOMEM;
}
@@ -773,6 +815,7 @@ pci_restore_state(struct pci_dev *dev)
}
pci_restore_pcix_state(dev);
pci_restore_msi_state(dev);
+ pci_restore_iov_state(dev);
return 0;
}
@@ -801,7 +844,7 @@ static int do_pci_enable_device(struct pci_dev *dev, int bars)
*/
int pci_reenable_device(struct pci_dev *dev)
{
- if (atomic_read(&dev->enable_cnt))
+ if (pci_is_enabled(dev))
return do_pci_enable_device(dev, (1 << PCI_NUM_RESOURCES) - 1);
return 0;
}
@@ -999,7 +1042,7 @@ static void do_pci_disable_device(struct pci_dev *dev)
*/
void pci_disable_enabled_device(struct pci_dev *dev)
{
- if (atomic_read(&dev->enable_cnt))
+ if (pci_is_enabled(dev))
do_pci_disable_device(dev);
}
@@ -1231,7 +1274,7 @@ int pci_prepare_to_sleep(struct pci_dev *dev)
if (target_state == PCI_POWER_ERROR)
return -EIO;
- pci_enable_wake(dev, target_state, true);
+ pci_enable_wake(dev, target_state, device_may_wakeup(&dev->dev));
error = pci_set_power_state(dev, target_state);
@@ -1369,7 +1412,8 @@ void pci_allocate_cap_save_buffers(struct pci_dev *dev)
{
int error;
- error = pci_add_cap_save_buffer(dev, PCI_CAP_ID_EXP, 4 * sizeof(u16));
+ error = pci_add_cap_save_buffer(dev, PCI_CAP_ID_EXP,
+ PCI_EXP_SAVE_REGS * sizeof(u16));
if (error)
dev_err(&dev->dev,
"unable to preallocate PCI Express save buffer\n");
@@ -1381,50 +1425,6 @@ void pci_allocate_cap_save_buffers(struct pci_dev *dev)
}
/**
- * pci_restore_standard_config - restore standard config registers of PCI device
- * @dev: PCI device to handle
- *
- * This function assumes that the device's configuration space is accessible.
- * If the device needs to be powered up, the function will wait for it to
- * change the state.
- */
-int pci_restore_standard_config(struct pci_dev *dev)
-{
- pci_power_t prev_state;
- int error;
-
- pci_update_current_state(dev, PCI_D0);
-
- prev_state = dev->current_state;
- if (prev_state == PCI_D0)
- goto Restore;
-
- error = pci_raw_set_power_state(dev, PCI_D0, false);
- if (error)
- return error;
-
- /*
- * This assumes that we won't get a bus in B2 or B3 from the BIOS, but
- * we've made this assumption forever and it appears to be universally
- * satisfied.
- */
- switch(prev_state) {
- case PCI_D3cold:
- case PCI_D3hot:
- mdelay(pci_pm_d3_delay);
- break;
- case PCI_D2:
- udelay(PCI_PM_D2_DELAY);
- break;
- }
-
- pci_update_current_state(dev, PCI_D0);
-
- Restore:
- return dev->state_saved ? pci_restore_state(dev) : 0;
-}
-
-/**
* pci_enable_ari - enable ARI forwarding if hardware support it
* @dev: the PCI device
*/
@@ -1484,7 +1484,7 @@ pci_get_interrupt_pin(struct pci_dev *dev, struct pci_dev **bridge)
if (!pin)
return -1;
- while (dev->bus->self) {
+ while (dev->bus->parent) {
pin = pci_swizzle_interrupt_pin(dev, pin);
dev = dev->bus->self;
}
@@ -1504,7 +1504,7 @@ u8 pci_common_swizzle(struct pci_dev *dev, u8 *pinp)
{
u8 pin = *pinp;
- while (dev->bus->self) {
+ while (dev->bus->parent) {
pin = pci_swizzle_interrupt_pin(dev, pin);
dev = dev->bus->self;
}
@@ -2028,18 +2028,24 @@ static int __pcie_flr(struct pci_dev *dev, int probe)
pci_block_user_cfg_access(dev);
/* Wait for Transaction Pending bit clean */
+ pci_read_config_word(dev, exppos + PCI_EXP_DEVSTA, &status);
+ if (!(status & PCI_EXP_DEVSTA_TRPND))
+ goto transaction_done;
+
msleep(100);
pci_read_config_word(dev, exppos + PCI_EXP_DEVSTA, &status);
- if (status & PCI_EXP_DEVSTA_TRPND) {
- dev_info(&dev->dev, "Busy after 100ms while trying to reset; "
+ if (!(status & PCI_EXP_DEVSTA_TRPND))
+ goto transaction_done;
+
+ dev_info(&dev->dev, "Busy after 100ms while trying to reset; "
"sleeping for 1 second\n");
- ssleep(1);
- pci_read_config_word(dev, exppos + PCI_EXP_DEVSTA, &status);
- if (status & PCI_EXP_DEVSTA_TRPND)
- dev_info(&dev->dev, "Still busy after 1s; "
+ ssleep(1);
+ pci_read_config_word(dev, exppos + PCI_EXP_DEVSTA, &status);
+ if (status & PCI_EXP_DEVSTA_TRPND)
+ dev_info(&dev->dev, "Still busy after 1s; "
"proceeding with reset anyway\n");
- }
+transaction_done:
pci_write_config_word(dev, exppos + PCI_EXP_DEVCTL,
PCI_EXP_DEVCTL_BCR_FLR);
mdelay(100);
@@ -2066,18 +2072,24 @@ static int __pci_af_flr(struct pci_dev *dev, int probe)
pci_block_user_cfg_access(dev);
/* Wait for Transaction Pending bit clean */
+ pci_read_config_byte(dev, cappos + PCI_AF_STATUS, &status);
+ if (!(status & PCI_AF_STATUS_TP))
+ goto transaction_done;
+
msleep(100);
pci_read_config_byte(dev, cappos + PCI_AF_STATUS, &status);
- if (status & PCI_AF_STATUS_TP) {
- dev_info(&dev->dev, "Busy after 100ms while trying to"
- " reset; sleeping for 1 second\n");
- ssleep(1);
- pci_read_config_byte(dev,
- cappos + PCI_AF_STATUS, &status);
- if (status & PCI_AF_STATUS_TP)
- dev_info(&dev->dev, "Still busy after 1s; "
- "proceeding with reset anyway\n");
- }
+ if (!(status & PCI_AF_STATUS_TP))
+ goto transaction_done;
+
+ dev_info(&dev->dev, "Busy after 100ms while trying to"
+ " reset; sleeping for 1 second\n");
+ ssleep(1);
+ pci_read_config_byte(dev, cappos + PCI_AF_STATUS, &status);
+ if (status & PCI_AF_STATUS_TP)
+ dev_info(&dev->dev, "Still busy after 1s; "
+ "proceeding with reset anyway\n");
+
+transaction_done:
pci_write_config_byte(dev, cappos + PCI_AF_CTRL, PCI_AF_CTRL_FLR);
mdelay(100);
@@ -2346,18 +2358,140 @@ int pci_select_bars(struct pci_dev *dev, unsigned long flags)
*/
int pci_resource_bar(struct pci_dev *dev, int resno, enum pci_bar_type *type)
{
+ int reg;
+
if (resno < PCI_ROM_RESOURCE) {
*type = pci_bar_unknown;
return PCI_BASE_ADDRESS_0 + 4 * resno;
} else if (resno == PCI_ROM_RESOURCE) {
*type = pci_bar_mem32;
return dev->rom_base_reg;
+ } else if (resno < PCI_BRIDGE_RESOURCES) {
+ /* device specific resource */
+ reg = pci_iov_resource_bar(dev, resno, type);
+ if (reg)
+ return reg;
}
dev_err(&dev->dev, "BAR: invalid resource #%d\n", resno);
return 0;
}
+#define RESOURCE_ALIGNMENT_PARAM_SIZE COMMAND_LINE_SIZE
+static char resource_alignment_param[RESOURCE_ALIGNMENT_PARAM_SIZE] = {0};
+spinlock_t resource_alignment_lock = SPIN_LOCK_UNLOCKED;
+
+/**
+ * pci_specified_resource_alignment - get resource alignment specified by user.
+ * @dev: the PCI device to get
+ *
+ * RETURNS: Resource alignment if it is specified.
+ * Zero if it is not specified.
+ */
+resource_size_t pci_specified_resource_alignment(struct pci_dev *dev)
+{
+ int seg, bus, slot, func, align_order, count;
+ resource_size_t align = 0;
+ char *p;
+
+ spin_lock(&resource_alignment_lock);
+ p = resource_alignment_param;
+ while (*p) {
+ count = 0;
+ if (sscanf(p, "%d%n", &align_order, &count) == 1 &&
+ p[count] == '@') {
+ p += count + 1;
+ } else {
+ align_order = -1;
+ }
+ if (sscanf(p, "%x:%x:%x.%x%n",
+ &seg, &bus, &slot, &func, &count) != 4) {
+ seg = 0;
+ if (sscanf(p, "%x:%x.%x%n",
+ &bus, &slot, &func, &count) != 3) {
+ /* Invalid format */
+ printk(KERN_ERR "PCI: Can't parse resource_alignment parameter: %s\n",
+ p);
+ break;
+ }
+ }
+ p += count;
+ if (seg == pci_domain_nr(dev->bus) &&
+ bus == dev->bus->number &&
+ slot == PCI_SLOT(dev->devfn) &&
+ func == PCI_FUNC(dev->devfn)) {
+ if (align_order == -1) {
+ align = PAGE_SIZE;
+ } else {
+ align = 1 << align_order;
+ }
+ /* Found */
+ break;
+ }
+ if (*p != ';' && *p != ',') {
+ /* End of param or invalid format */
+ break;
+ }
+ p++;
+ }
+ spin_unlock(&resource_alignment_lock);
+ return align;
+}
+
+/**
+ * pci_is_reassigndev - check if specified PCI is target device to reassign
+ * @dev: the PCI device to check
+ *
+ * RETURNS: non-zero for PCI device is a target device to reassign,
+ * or zero is not.
+ */
+int pci_is_reassigndev(struct pci_dev *dev)
+{
+ return (pci_specified_resource_alignment(dev) != 0);
+}
+
+ssize_t pci_set_resource_alignment_param(const char *buf, size_t count)
+{
+ if (count > RESOURCE_ALIGNMENT_PARAM_SIZE - 1)
+ count = RESOURCE_ALIGNMENT_PARAM_SIZE - 1;
+ spin_lock(&resource_alignment_lock);
+ strncpy(resource_alignment_param, buf, count);
+ resource_alignment_param[count] = '\0';
+ spin_unlock(&resource_alignment_lock);
+ return count;
+}
+
+ssize_t pci_get_resource_alignment_param(char *buf, size_t size)
+{
+ size_t count;
+ spin_lock(&resource_alignment_lock);
+ count = snprintf(buf, size, "%s", resource_alignment_param);
+ spin_unlock(&resource_alignment_lock);
+ return count;
+}
+
+static ssize_t pci_resource_alignment_show(struct bus_type *bus, char *buf)
+{
+ return pci_get_resource_alignment_param(buf, PAGE_SIZE);
+}
+
+static ssize_t pci_resource_alignment_store(struct bus_type *bus,
+ const char *buf, size_t count)
+{
+ return pci_set_resource_alignment_param(buf, count);
+}
+
+BUS_ATTR(resource_alignment, 0644, pci_resource_alignment_show,
+ pci_resource_alignment_store);
+
+static int __init pci_resource_alignment_sysfs_init(void)
+{
+ return bus_create_file(&pci_bus_type,
+ &bus_attr_resource_alignment);
+}
+
+late_initcall(pci_resource_alignment_sysfs_init);
+
static void __devinit pci_no_domains(void)
{
#ifdef CONFIG_PCI_DOMAINS
@@ -2406,6 +2540,9 @@ static int __init pci_setup(char *str)
pci_cardbus_io_size = memparse(str + 9, &str);
} else if (!strncmp(str, "cbmemsize=", 10)) {
pci_cardbus_mem_size = memparse(str + 10, &str);
+ } else if (!strncmp(str, "resource_alignment=", 19)) {
+ pci_set_resource_alignment_param(str + 19,
+ strlen(str + 19));
} else {
printk(KERN_ERR "PCI: Unknown option `%s'\n",
str);
diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
index 07c0aa5275e6..d03f6b99f292 100644
--- a/drivers/pci/pci.h
+++ b/drivers/pci/pci.h
@@ -1,6 +1,8 @@
#ifndef DRIVERS_PCI_H
#define DRIVERS_PCI_H
+#include <linux/workqueue.h>
+
#define PCI_CFG_SPACE_SIZE 256
#define PCI_CFG_SPACE_EXP_SIZE 4096
@@ -49,7 +51,6 @@ extern void pci_disable_enabled_device(struct pci_dev *dev);
extern void pci_pm_init(struct pci_dev *dev);
extern void platform_pci_wakeup_init(struct pci_dev *dev);
extern void pci_allocate_cap_save_buffers(struct pci_dev *dev);
-extern int pci_restore_standard_config(struct pci_dev *dev);
static inline bool pci_is_bridge(struct pci_dev *pci_dev)
{
@@ -136,6 +137,12 @@ extern int pcie_mch_quirk;
extern struct device_attribute pci_dev_attrs[];
extern struct device_attribute dev_attr_cpuaffinity;
extern struct device_attribute dev_attr_cpulistaffinity;
+#ifdef CONFIG_HOTPLUG
+extern struct bus_attribute pci_bus_attrs[];
+#else
+#define pci_bus_attrs NULL
+#endif
+
/**
* pci_match_one_device - Tell if a PCI device structure has a matching
@@ -178,6 +185,7 @@ enum pci_bar_type {
pci_bar_mem64, /* A 64-bit memory BAR */
};
+extern int pci_setup_device(struct pci_dev *dev);
extern int __pci_read_base(struct pci_dev *dev, enum pci_bar_type type,
struct resource *res, unsigned int reg);
extern int pci_resource_bar(struct pci_dev *dev, int resno,
@@ -195,4 +203,60 @@ static inline int pci_ari_enabled(struct pci_bus *bus)
return bus->self && bus->self->ari_enabled;
}
+#ifdef CONFIG_PCI_QUIRKS
+extern int pci_is_reassigndev(struct pci_dev *dev);
+resource_size_t pci_specified_resource_alignment(struct pci_dev *dev);
+extern void pci_disable_bridge_window(struct pci_dev *dev);
+#endif
+
+/* Single Root I/O Virtualization */
+struct pci_sriov {
+ int pos; /* capability position */
+ int nres; /* number of resources */
+ u32 cap; /* SR-IOV Capabilities */
+ u16 ctrl; /* SR-IOV Control */
+ u16 total; /* total VFs associated with the PF */
+ u16 initial; /* initial VFs associated with the PF */
+ u16 nr_virtfn; /* number of VFs available */
+ u16 offset; /* first VF Routing ID offset */
+ u16 stride; /* following VF stride */
+ u32 pgsz; /* page size for BAR alignment */
+ u8 link; /* Function Dependency Link */
+ struct pci_dev *dev; /* lowest numbered PF */
+ struct pci_dev *self; /* this PF */
+ struct mutex lock; /* lock for VF bus */
+ struct work_struct mtask; /* VF Migration task */
+ u8 __iomem *mstate; /* VF Migration State Array */
+};
+
+#ifdef CONFIG_PCI_IOV
+extern int pci_iov_init(struct pci_dev *dev);
+extern void pci_iov_release(struct pci_dev *dev);
+extern int pci_iov_resource_bar(struct pci_dev *dev, int resno,
+ enum pci_bar_type *type);
+extern void pci_restore_iov_state(struct pci_dev *dev);
+extern int pci_iov_bus_range(struct pci_bus *bus);
+#else
+static inline int pci_iov_init(struct pci_dev *dev)
+{
+ return -ENODEV;
+}
+static inline void pci_iov_release(struct pci_dev *dev)
+
+{
+}
+static inline int pci_iov_resource_bar(struct pci_dev *dev, int resno,
+ enum pci_bar_type *type)
+{
+ return 0;
+}
+static inline void pci_restore_iov_state(struct pci_dev *dev)
+{
+}
+static inline int pci_iov_bus_range(struct pci_bus *bus)
+{
+ return 0;
+}
+#endif /* CONFIG_PCI_IOV */
+
#endif /* DRIVERS_PCI_H */
diff --git a/drivers/pci/pcie/aer/aerdrv.c b/drivers/pci/pcie/aer/aerdrv.c
index e390707661dd..32ade5af927e 100644
--- a/drivers/pci/pcie/aer/aerdrv.c
+++ b/drivers/pci/pcie/aer/aerdrv.c
@@ -38,30 +38,13 @@ MODULE_AUTHOR(DRIVER_AUTHOR);
MODULE_DESCRIPTION(DRIVER_DESC);
MODULE_LICENSE("GPL");
-static int __devinit aer_probe (struct pcie_device *dev,
- const struct pcie_port_service_id *id );
+static int __devinit aer_probe (struct pcie_device *dev);
static void aer_remove(struct pcie_device *dev);
-static int aer_suspend(struct pcie_device *dev, pm_message_t state)
-{return 0;}
-static int aer_resume(struct pcie_device *dev) {return 0;}
static pci_ers_result_t aer_error_detected(struct pci_dev *dev,
enum pci_channel_state error);
static void aer_error_resume(struct pci_dev *dev);
static pci_ers_result_t aer_root_reset(struct pci_dev *dev);
-/*
- * PCI Express bus's AER Root service driver data structure
- */
-static struct pcie_port_service_id aer_id[] = {
- {
- .vendor = PCI_ANY_ID,
- .device = PCI_ANY_ID,
- .port_type = PCIE_RC_PORT,
- .service_type = PCIE_PORT_SERVICE_AER,
- },
- { /* end: all zeroes */ }
-};
-
static struct pci_error_handlers aer_error_handlers = {
.error_detected = aer_error_detected,
.resume = aer_error_resume,
@@ -69,14 +52,12 @@ static struct pci_error_handlers aer_error_handlers = {
static struct pcie_port_service_driver aerdriver = {
.name = "aer",
- .id_table = &aer_id[0],
+ .port_type = PCIE_ANY_PORT,
+ .service = PCIE_PORT_SERVICE_AER,
.probe = aer_probe,
.remove = aer_remove,
- .suspend = aer_suspend,
- .resume = aer_resume,
-
.err_handler = &aer_error_handlers,
.reset_link = aer_root_reset,
@@ -207,8 +188,7 @@ static void aer_remove(struct pcie_device *dev)
*
* Invoked when PCI Express bus loads AER service driver.
**/
-static int __devinit aer_probe (struct pcie_device *dev,
- const struct pcie_port_service_id *id )
+static int __devinit aer_probe (struct pcie_device *dev)
{
int status;
struct aer_rpc *rpc;
diff --git a/drivers/pci/pcie/aer/aerdrv_acpi.c b/drivers/pci/pcie/aer/aerdrv_acpi.c
index ebce26c37049..8edb2f300e8f 100644
--- a/drivers/pci/pcie/aer/aerdrv_acpi.c
+++ b/drivers/pci/pcie/aer/aerdrv_acpi.c
@@ -38,7 +38,7 @@ int aer_osc_setup(struct pcie_device *pciedev)
handle = acpi_find_root_bridge_handle(pdev);
if (handle) {
- status = pci_osc_control_set(handle,
+ status = acpi_pci_osc_control_set(handle,
OSC_PCI_EXPRESS_AER_CONTROL |
OSC_PCI_EXPRESS_CAP_STRUCTURE_CONTROL);
}
diff --git a/drivers/pci/pcie/aer/aerdrv_core.c b/drivers/pci/pcie/aer/aerdrv_core.c
index 382575007382..307452f30035 100644
--- a/drivers/pci/pcie/aer/aerdrv_core.c
+++ b/drivers/pci/pcie/aer/aerdrv_core.c
@@ -351,21 +351,21 @@ static int find_aer_service_iter(struct device *device, void *data)
{
struct device_driver *driver;
struct pcie_port_service_driver *service_driver;
- struct pcie_device *pcie_dev;
struct find_aer_service_data *result;
result = (struct find_aer_service_data *) data;
if (device->bus == &pcie_port_bus_type) {
- pcie_dev = to_pcie_device(device);
- if (pcie_dev->id.port_type == PCIE_SW_DOWNSTREAM_PORT)
+ struct pcie_port_data *port_data;
+
+ port_data = pci_get_drvdata(to_pcie_device(device)->port);
+ if (port_data->port_type == PCIE_SW_DOWNSTREAM_PORT)
result->is_downstream = 1;
driver = device->driver;
if (driver) {
service_driver = to_service_driver(driver);
- if (service_driver->id_table->service_type ==
- PCIE_PORT_SERVICE_AER) {
+ if (service_driver->service == PCIE_PORT_SERVICE_AER) {
result->aer_driver = service_driver;
return 1;
}
diff --git a/drivers/pci/pcie/portdrv.h b/drivers/pci/pcie/portdrv.h
index 2529f3f2ea5a..17ad53868f9f 100644
--- a/drivers/pci/pcie/portdrv.h
+++ b/drivers/pci/pcie/portdrv.h
@@ -25,19 +25,21 @@
#define PCIE_CAPABILITIES_REG 0x2
#define PCIE_SLOT_CAPABILITIES_REG 0x14
#define PCIE_PORT_DEVICE_MAXSERVICES 4
+#define PCIE_PORT_MSI_VECTOR_MASK 0x1f
+/*
+ * According to the PCI Express Base Specification 2.0, the indices of the MSI-X
+ * table entires used by port services must not exceed 31
+ */
+#define PCIE_PORT_MAX_MSIX_ENTRIES 32
#define get_descriptor_id(type, service) (((type - 4) << 4) | service)
-struct pcie_port_device_ext {
- int interrupt_mode; /* [0:INTx | 1:MSI | 2:MSI-X] */
-};
-
extern struct bus_type pcie_port_bus_type;
extern int pcie_port_device_probe(struct pci_dev *dev);
extern int pcie_port_device_register(struct pci_dev *dev);
#ifdef CONFIG_PM
-extern int pcie_port_device_suspend(struct pci_dev *dev, pm_message_t state);
-extern int pcie_port_device_resume(struct pci_dev *dev);
+extern int pcie_port_device_suspend(struct device *dev);
+extern int pcie_port_device_resume(struct device *dev);
#endif
extern void pcie_port_device_remove(struct pci_dev *dev);
extern int __must_check pcie_port_bus_register(void);
diff --git a/drivers/pci/pcie/portdrv_bus.c b/drivers/pci/pcie/portdrv_bus.c
index eec89b767f9f..ef3a4eeaebb4 100644
--- a/drivers/pci/pcie/portdrv_bus.c
+++ b/drivers/pci/pcie/portdrv_bus.c
@@ -26,20 +26,22 @@ EXPORT_SYMBOL_GPL(pcie_port_bus_type);
static int pcie_port_bus_match(struct device *dev, struct device_driver *drv)
{
struct pcie_device *pciedev;
+ struct pcie_port_data *port_data;
struct pcie_port_service_driver *driver;
if (drv->bus != &pcie_port_bus_type || dev->bus != &pcie_port_bus_type)
return 0;
-
+
pciedev = to_pcie_device(dev);
driver = to_service_driver(drv);
- if ( (driver->id_table->vendor != PCI_ANY_ID &&
- driver->id_table->vendor != pciedev->id.vendor) ||
- (driver->id_table->device != PCI_ANY_ID &&
- driver->id_table->device != pciedev->id.device) ||
- (driver->id_table->port_type != PCIE_ANY_PORT &&
- driver->id_table->port_type != pciedev->id.port_type) ||
- driver->id_table->service_type != pciedev->id.service_type )
+
+ if (driver->service != pciedev->service)
+ return 0;
+
+ port_data = pci_get_drvdata(pciedev->port);
+
+ if (driver->port_type != PCIE_ANY_PORT
+ && driver->port_type != port_data->port_type)
return 0;
return 1;
diff --git a/drivers/pci/pcie/portdrv_core.c b/drivers/pci/pcie/portdrv_core.c
index 8b3f8c18032f..e39982503863 100644
--- a/drivers/pci/pcie/portdrv_core.c
+++ b/drivers/pci/pcie/portdrv_core.c
@@ -15,10 +15,9 @@
#include <linux/slab.h>
#include <linux/pcieport_if.h>
+#include "../pci.h"
#include "portdrv.h"
-extern int pcie_mch_quirk; /* MSI-quirk Indicator */
-
/**
* release_pcie_device - free PCI Express port service device structure
* @dev: Port service device to release
@@ -31,26 +30,150 @@ static void release_pcie_device(struct device *dev)
kfree(to_pcie_device(dev));
}
-static int is_msi_quirked(struct pci_dev *dev)
+/**
+ * pcie_port_msix_add_entry - add entry to given array of MSI-X entries
+ * @entries: Array of MSI-X entries
+ * @new_entry: Index of the entry to add to the array
+ * @nr_entries: Number of entries aleady in the array
+ *
+ * Return value: Position of the added entry in the array
+ */
+static int pcie_port_msix_add_entry(
+ struct msix_entry *entries, int new_entry, int nr_entries)
{
- int port_type, quirk = 0;
+ int j;
+
+ for (j = 0; j < nr_entries; j++)
+ if (entries[j].entry == new_entry)
+ return j;
+
+ entries[j].entry = new_entry;
+ return j;
+}
+
+/**
+ * pcie_port_enable_msix - try to set up MSI-X as interrupt mode for given port
+ * @dev: PCI Express port to handle
+ * @vectors: Array of interrupt vectors to populate
+ * @mask: Bitmask of port capabilities returned by get_port_device_capability()
+ *
+ * Return value: 0 on success, error code on failure
+ */
+static int pcie_port_enable_msix(struct pci_dev *dev, int *vectors, int mask)
+{
+ struct msix_entry *msix_entries;
+ int idx[PCIE_PORT_DEVICE_MAXSERVICES];
+ int nr_entries, status, pos, i, nvec;
u16 reg16;
+ u32 reg32;
- pci_read_config_word(dev,
- pci_find_capability(dev, PCI_CAP_ID_EXP) +
- PCIE_CAPABILITIES_REG, &reg16);
- port_type = (reg16 >> 4) & PORT_TYPE_MASK;
- switch(port_type) {
- case PCIE_RC_PORT:
- if (pcie_mch_quirk == 1)
- quirk = 1;
- break;
- case PCIE_SW_UPSTREAM_PORT:
- case PCIE_SW_DOWNSTREAM_PORT:
- default:
- break;
+ nr_entries = pci_msix_table_size(dev);
+ if (!nr_entries)
+ return -EINVAL;
+ if (nr_entries > PCIE_PORT_MAX_MSIX_ENTRIES)
+ nr_entries = PCIE_PORT_MAX_MSIX_ENTRIES;
+
+ msix_entries = kzalloc(sizeof(*msix_entries) * nr_entries, GFP_KERNEL);
+ if (!msix_entries)
+ return -ENOMEM;
+
+ /*
+ * Allocate as many entries as the port wants, so that we can check
+ * which of them will be useful. Moreover, if nr_entries is correctly
+ * equal to the number of entries this port actually uses, we'll happily
+ * go through without any tricks.
+ */
+ for (i = 0; i < nr_entries; i++)
+ msix_entries[i].entry = i;
+
+ status = pci_enable_msix(dev, msix_entries, nr_entries);
+ if (status)
+ goto Exit;
+
+ for (i = 0; i < PCIE_PORT_DEVICE_MAXSERVICES; i++)
+ idx[i] = -1;
+ status = -EIO;
+ nvec = 0;
+
+ if (mask & (PCIE_PORT_SERVICE_PME | PCIE_PORT_SERVICE_HP)) {
+ int entry;
+
+ /*
+ * The code below follows the PCI Express Base Specification 2.0
+ * stating in Section 6.1.6 that "PME and Hot-Plug Event
+ * interrupts (when both are implemented) always share the same
+ * MSI or MSI-X vector, as indicated by the Interrupt Message
+ * Number field in the PCI Express Capabilities register", where
+ * according to Section 7.8.2 of the specification "For MSI-X,
+ * the value in this field indicates which MSI-X Table entry is
+ * used to generate the interrupt message."
+ */
+ pos = pci_find_capability(dev, PCI_CAP_ID_EXP);
+ pci_read_config_word(dev, pos + PCIE_CAPABILITIES_REG, &reg16);
+ entry = (reg16 >> 9) & PCIE_PORT_MSI_VECTOR_MASK;
+ if (entry >= nr_entries)
+ goto Error;
+
+ i = pcie_port_msix_add_entry(msix_entries, entry, nvec);
+ if (i == nvec)
+ nvec++;
+
+ idx[PCIE_PORT_SERVICE_PME_SHIFT] = i;
+ idx[PCIE_PORT_SERVICE_HP_SHIFT] = i;
+ }
+
+ if (mask & PCIE_PORT_SERVICE_AER) {
+ int entry;
+
+ /*
+ * The code below follows Section 7.10.10 of the PCI Express
+ * Base Specification 2.0 stating that bits 31-27 of the Root
+ * Error Status Register contain a value indicating which of the
+ * MSI/MSI-X vectors assigned to the port is going to be used
+ * for AER, where "For MSI-X, the value in this register
+ * indicates which MSI-X Table entry is used to generate the
+ * interrupt message."
+ */
+ pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR);
+ pci_read_config_dword(dev, pos + PCI_ERR_ROOT_STATUS, &reg32);
+ entry = reg32 >> 27;
+ if (entry >= nr_entries)
+ goto Error;
+
+ i = pcie_port_msix_add_entry(msix_entries, entry, nvec);
+ if (i == nvec)
+ nvec++;
+
+ idx[PCIE_PORT_SERVICE_AER_SHIFT] = i;
}
- return quirk;
+
+ /*
+ * If nvec is equal to the allocated number of entries, we can just use
+ * what we have. Otherwise, the port has some extra entries not for the
+ * services we know and we need to work around that.
+ */
+ if (nvec == nr_entries) {
+ status = 0;
+ } else {
+ /* Drop the temporary MSI-X setup */
+ pci_disable_msix(dev);
+
+ /* Now allocate the MSI-X vectors for real */
+ status = pci_enable_msix(dev, msix_entries, nvec);
+ if (status)
+ goto Exit;
+ }
+
+ for (i = 0; i < PCIE_PORT_DEVICE_MAXSERVICES; i++)
+ vectors[i] = idx[i] >= 0 ? msix_entries[idx[i]].vector : -1;
+
+ Exit:
+ kfree(msix_entries);
+ return status;
+
+ Error:
+ pci_disable_msix(dev);
+ goto Exit;
}
/**
@@ -64,47 +187,32 @@ static int is_msi_quirked(struct pci_dev *dev)
*/
static int assign_interrupt_mode(struct pci_dev *dev, int *vectors, int mask)
{
- int i, pos, nvec, status = -EINVAL;
- int interrupt_mode = PCIE_PORT_INTx_MODE;
+ struct pcie_port_data *port_data = pci_get_drvdata(dev);
+ int irq, interrupt_mode = PCIE_PORT_NO_IRQ;
+ int i;
- /* Set INTx as default */
- for (i = 0, nvec = 0; i < PCIE_PORT_DEVICE_MAXSERVICES; i++) {
- if (mask & (1 << i))
- nvec++;
- vectors[i] = dev->irq;
- }
-
/* Check MSI quirk */
- if (is_msi_quirked(dev))
- return interrupt_mode;
-
- /* Select MSI-X over MSI if supported */
- pos = pci_find_capability(dev, PCI_CAP_ID_MSIX);
- if (pos) {
- struct msix_entry msix_entries[PCIE_PORT_DEVICE_MAXSERVICES] =
- {{0, 0}, {0, 1}, {0, 2}, {0, 3}};
- status = pci_enable_msix(dev, msix_entries, nvec);
- if (!status) {
- int j = 0;
-
- interrupt_mode = PCIE_PORT_MSIX_MODE;
- for (i = 0; i < PCIE_PORT_DEVICE_MAXSERVICES; i++) {
- if (mask & (1 << i))
- vectors[i] = msix_entries[j++].vector;
- }
- }
- }
- if (status) {
- pos = pci_find_capability(dev, PCI_CAP_ID_MSI);
- if (pos) {
- status = pci_enable_msi(dev);
- if (!status) {
- interrupt_mode = PCIE_PORT_MSI_MODE;
- for (i = 0;i < PCIE_PORT_DEVICE_MAXSERVICES;i++)
- vectors[i] = dev->irq;
- }
- }
- }
+ if (port_data->port_type == PCIE_RC_PORT && pcie_mch_quirk)
+ goto Fallback;
+
+ /* Try to use MSI-X if supported */
+ if (!pcie_port_enable_msix(dev, vectors, mask))
+ return PCIE_PORT_MSIX_MODE;
+
+ /* We're not going to use MSI-X, so try MSI and fall back to INTx */
+ if (!pci_enable_msi(dev))
+ interrupt_mode = PCIE_PORT_MSI_MODE;
+
+ Fallback:
+ if (interrupt_mode == PCIE_PORT_NO_IRQ && dev->pin)
+ interrupt_mode = PCIE_PORT_INTx_MODE;
+
+ irq = interrupt_mode != PCIE_PORT_NO_IRQ ? dev->irq : -1;
+ for (i = 0; i < PCIE_PORT_DEVICE_MAXSERVICES; i++)
+ vectors[i] = irq;
+
+ vectors[PCIE_PORT_SERVICE_VC_SHIFT] = -1;
+
return interrupt_mode;
}
@@ -132,13 +240,11 @@ static int get_port_device_capability(struct pci_dev *dev)
pos + PCIE_SLOT_CAPABILITIES_REG, &reg32);
if (reg32 & SLOT_HP_CAPABLE_MASK)
services |= PCIE_PORT_SERVICE_HP;
- }
- /* PME Capable - root port capability */
- if (((reg16 >> 4) & PORT_TYPE_MASK) == PCIE_RC_PORT)
- services |= PCIE_PORT_SERVICE_PME;
-
+ }
+ /* AER capable */
if (pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR))
services |= PCIE_PORT_SERVICE_AER;
+ /* VC support */
if (pci_find_ext_capability(dev, PCI_EXT_CAP_ID_VC))
services |= PCIE_PORT_SERVICE_VC;
@@ -152,20 +258,17 @@ static int get_port_device_capability(struct pci_dev *dev)
* @port_type: Type of the port
* @service_type: Type of service to associate with the service device
* @irq: Interrupt vector to associate with the service device
- * @irq_mode: Interrupt mode of the service (INTx, MSI-X, MSI)
*/
static void pcie_device_init(struct pci_dev *parent, struct pcie_device *dev,
- int port_type, int service_type, int irq, int irq_mode)
+ int service_type, int irq)
{
+ struct pcie_port_data *port_data = pci_get_drvdata(parent);
struct device *device;
+ int port_type = port_data->port_type;
dev->port = parent;
- dev->interrupt_mode = irq_mode;
dev->irq = irq;
- dev->id.vendor = parent->vendor;
- dev->id.device = parent->device;
- dev->id.port_type = port_type;
- dev->id.service_type = (1 << service_type);
+ dev->service = service_type;
/* Initialize generic device interface */
device = &dev->device;
@@ -185,10 +288,9 @@ static void pcie_device_init(struct pci_dev *parent, struct pcie_device *dev,
* @port_type: Type of the port
* @service_type: Type of service to associate with the service device
* @irq: Interrupt vector to associate with the service device
- * @irq_mode: Interrupt mode of the service (INTx, MSI-X, MSI)
*/
static struct pcie_device* alloc_pcie_device(struct pci_dev *parent,
- int port_type, int service_type, int irq, int irq_mode)
+ int service_type, int irq)
{
struct pcie_device *device;
@@ -196,7 +298,7 @@ static struct pcie_device* alloc_pcie_device(struct pci_dev *parent,
if (!device)
return NULL;
- pcie_device_init(parent, device, port_type, service_type, irq,irq_mode);
+ pcie_device_init(parent, device, service_type, irq);
return device;
}
@@ -230,63 +332,90 @@ int pcie_port_device_probe(struct pci_dev *dev)
*/
int pcie_port_device_register(struct pci_dev *dev)
{
- struct pcie_port_device_ext *p_ext;
- int status, type, capabilities, irq_mode, i;
+ struct pcie_port_data *port_data;
+ int status, capabilities, irq_mode, i, nr_serv;
int vectors[PCIE_PORT_DEVICE_MAXSERVICES];
u16 reg16;
- /* Allocate port device extension */
- if (!(p_ext = kmalloc(sizeof(struct pcie_port_device_ext), GFP_KERNEL)))
+ port_data = kzalloc(sizeof(*port_data), GFP_KERNEL);
+ if (!port_data)
return -ENOMEM;
-
- pci_set_drvdata(dev, p_ext);
+ pci_set_drvdata(dev, port_data);
/* Get port type */
pci_read_config_word(dev,
pci_find_capability(dev, PCI_CAP_ID_EXP) +
PCIE_CAPABILITIES_REG, &reg16);
- type = (reg16 >> 4) & PORT_TYPE_MASK;
+ port_data->port_type = (reg16 >> 4) & PORT_TYPE_MASK;
- /* Now get port services */
capabilities = get_port_device_capability(dev);
+ /* Root ports are capable of generating PME too */
+ if (port_data->port_type == PCIE_RC_PORT)
+ capabilities |= PCIE_PORT_SERVICE_PME;
+
irq_mode = assign_interrupt_mode(dev, vectors, capabilities);
- p_ext->interrupt_mode = irq_mode;
+ if (irq_mode == PCIE_PORT_NO_IRQ) {
+ /*
+ * Don't use service devices that require interrupts if there is
+ * no way to generate them.
+ */
+ if (!(capabilities & PCIE_PORT_SERVICE_VC)) {
+ status = -ENODEV;
+ goto Error;
+ }
+ capabilities = PCIE_PORT_SERVICE_VC;
+ }
+ port_data->port_irq_mode = irq_mode;
+
+ status = pci_enable_device(dev);
+ if (status)
+ goto Error;
+ pci_set_master(dev);
/* Allocate child services if any */
- for (i = 0; i < PCIE_PORT_DEVICE_MAXSERVICES; i++) {
+ for (i = 0, nr_serv = 0; i < PCIE_PORT_DEVICE_MAXSERVICES; i++) {
struct pcie_device *child;
+ int service = 1 << i;
+
+ if (!(capabilities & service))
+ continue;
- if (capabilities & (1 << i)) {
- child = alloc_pcie_device(
- dev, /* parent */
- type, /* port type */
- i, /* service type */
- vectors[i], /* irq */
- irq_mode /* interrupt mode */);
- if (child) {
- status = device_register(&child->device);
- if (status) {
- kfree(child);
- continue;
- }
- get_device(&child->device);
- }
+ child = alloc_pcie_device(dev, service, vectors[i]);
+ if (!child)
+ continue;
+
+ status = device_register(&child->device);
+ if (status) {
+ kfree(child);
+ continue;
}
+
+ get_device(&child->device);
+ nr_serv++;
+ }
+ if (!nr_serv) {
+ pci_disable_device(dev);
+ status = -ENODEV;
+ goto Error;
}
+
return 0;
+
+ Error:
+ kfree(port_data);
+ return status;
}
#ifdef CONFIG_PM
static int suspend_iter(struct device *dev, void *data)
{
struct pcie_port_service_driver *service_driver;
- pm_message_t state = * (pm_message_t *) data;
if ((dev->bus == &pcie_port_bus_type) &&
(dev->driver)) {
service_driver = to_service_driver(dev->driver);
if (service_driver->suspend)
- service_driver->suspend(to_pcie_device(dev), state);
+ service_driver->suspend(to_pcie_device(dev));
}
return 0;
}
@@ -294,11 +423,10 @@ static int suspend_iter(struct device *dev, void *data)
/**
* pcie_port_device_suspend - suspend port services associated with a PCIe port
* @dev: PCI Express port to handle
- * @state: Representation of system power management transition in progress
*/
-int pcie_port_device_suspend(struct pci_dev *dev, pm_message_t state)
+int pcie_port_device_suspend(struct device *dev)
{
- return device_for_each_child(&dev->dev, &state, suspend_iter);
+ return device_for_each_child(dev, NULL, suspend_iter);
}
static int resume_iter(struct device *dev, void *data)
@@ -318,24 +446,17 @@ static int resume_iter(struct device *dev, void *data)
* pcie_port_device_suspend - resume port services associated with a PCIe port
* @dev: PCI Express port to handle
*/
-int pcie_port_device_resume(struct pci_dev *dev)
+int pcie_port_device_resume(struct device *dev)
{
- return device_for_each_child(&dev->dev, NULL, resume_iter);
+ return device_for_each_child(dev, NULL, resume_iter);
}
-#endif
+#endif /* PM */
static int remove_iter(struct device *dev, void *data)
{
- struct pcie_port_service_driver *service_driver;
-
if (dev->bus == &pcie_port_bus_type) {
- if (dev->driver) {
- service_driver = to_service_driver(dev->driver);
- if (service_driver->remove)
- service_driver->remove(to_pcie_device(dev));
- }
- *(unsigned long*)data = (unsigned long)dev;
- return 1;
+ put_device(dev);
+ device_unregister(dev);
}
return 0;
}
@@ -349,25 +470,21 @@ static int remove_iter(struct device *dev, void *data)
*/
void pcie_port_device_remove(struct pci_dev *dev)
{
- struct device *device;
- unsigned long device_addr;
- int interrupt_mode = PCIE_PORT_INTx_MODE;
- int status;
+ struct pcie_port_data *port_data = pci_get_drvdata(dev);
- do {
- status = device_for_each_child(&dev->dev, &device_addr, remove_iter);
- if (status) {
- device = (struct device*)device_addr;
- interrupt_mode = (to_pcie_device(device))->interrupt_mode;
- put_device(device);
- device_unregister(device);
- }
- } while (status);
- /* Switch to INTx by default if MSI enabled */
- if (interrupt_mode == PCIE_PORT_MSIX_MODE)
+ device_for_each_child(&dev->dev, NULL, remove_iter);
+ pci_disable_device(dev);
+
+ switch (port_data->port_irq_mode) {
+ case PCIE_PORT_MSIX_MODE:
pci_disable_msix(dev);
- else if (interrupt_mode == PCIE_PORT_MSI_MODE)
+ break;
+ case PCIE_PORT_MSI_MODE:
pci_disable_msi(dev);
+ break;
+ }
+
+ kfree(port_data);
}
/**
@@ -392,7 +509,7 @@ static int pcie_port_probe_service(struct device *dev)
return -ENODEV;
pciedev = to_pcie_device(dev);
- status = driver->probe(pciedev, driver->id_table);
+ status = driver->probe(pciedev);
if (!status) {
dev_printk(KERN_DEBUG, dev, "service driver %s loaded\n",
driver->name);
diff --git a/drivers/pci/pcie/portdrv_pci.c b/drivers/pci/pcie/portdrv_pci.c
index 5ea566e20b37..b924e2463f85 100644
--- a/drivers/pci/pcie/portdrv_pci.c
+++ b/drivers/pci/pcie/portdrv_pci.c
@@ -32,11 +32,6 @@ MODULE_LICENSE("GPL");
/* global data */
static const char device_name[] = "pcieport-driver";
-static int pcie_portdrv_save_config(struct pci_dev *dev)
-{
- return pci_save_state(dev);
-}
-
static int pcie_portdrv_restore_config(struct pci_dev *dev)
{
int retval;
@@ -49,21 +44,21 @@ static int pcie_portdrv_restore_config(struct pci_dev *dev)
}
#ifdef CONFIG_PM
-static int pcie_portdrv_suspend(struct pci_dev *dev, pm_message_t state)
-{
- return pcie_port_device_suspend(dev, state);
+static struct dev_pm_ops pcie_portdrv_pm_ops = {
+ .suspend = pcie_port_device_suspend,
+ .resume = pcie_port_device_resume,
+ .freeze = pcie_port_device_suspend,
+ .thaw = pcie_port_device_resume,
+ .poweroff = pcie_port_device_suspend,
+ .restore = pcie_port_device_resume,
+};
-}
+#define PCIE_PORTDRV_PM_OPS (&pcie_portdrv_pm_ops)
-static int pcie_portdrv_resume(struct pci_dev *dev)
-{
- pci_set_master(dev);
- return pcie_port_device_resume(dev);
-}
-#else
-#define pcie_portdrv_suspend NULL
-#define pcie_portdrv_resume NULL
-#endif
+#else /* !PM */
+
+#define PCIE_PORTDRV_PM_OPS NULL
+#endif /* !PM */
/*
* pcie_portdrv_probe - Probe PCI-Express port devices
@@ -82,20 +77,15 @@ static int __devinit pcie_portdrv_probe (struct pci_dev *dev,
if (status)
return status;
- if (pci_enable_device(dev) < 0)
- return -ENODEV;
-
- pci_set_master(dev);
if (!dev->irq && dev->pin) {
dev_warn(&dev->dev, "device [%04x:%04x] has invalid IRQ; "
"check vendor BIOS\n", dev->vendor, dev->device);
}
- if (pcie_port_device_register(dev)) {
- pci_disable_device(dev);
- return -ENOMEM;
- }
+ status = pcie_port_device_register(dev);
+ if (status)
+ return status;
- pcie_portdrv_save_config(dev);
+ pci_save_state(dev);
return 0;
}
@@ -104,7 +94,6 @@ static void pcie_portdrv_remove (struct pci_dev *dev)
{
pcie_port_device_remove(dev);
pci_disable_device(dev);
- kfree(pci_get_drvdata(dev));
}
static int error_detected_iter(struct device *device, void *data)
@@ -278,10 +267,9 @@ static struct pci_driver pcie_portdriver = {
.probe = pcie_portdrv_probe,
.remove = pcie_portdrv_remove,
- .suspend = pcie_portdrv_suspend,
- .resume = pcie_portdrv_resume,
-
.err_handler = &pcie_portdrv_err_handler,
+
+ .driver.pm = PCIE_PORTDRV_PM_OPS,
};
static int __init pcie_portdrv_init(void)
diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index 55ec44a27e89..8eb50dffb78a 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -287,7 +287,7 @@ void __devinit pci_read_bridge_bases(struct pci_bus *child)
struct resource *res;
int i;
- if (!dev) /* It's a host bus, nothing to read */
+ if (!child->parent) /* It's a host bus, nothing to read */
return;
if (dev->transparent) {
@@ -511,21 +511,21 @@ int __devinit pci_scan_bridge(struct pci_bus *bus, struct pci_dev *dev, int max,
/*
* If we already got to this bus through a different bridge,
- * ignore it. This can happen with the i450NX chipset.
+ * don't re-add it. This can happen with the i450NX chipset.
+ *
+ * However, we continue to descend down the hierarchy and
+ * scan remaining child buses.
*/
- if (pci_find_bus(pci_domain_nr(bus), busnr)) {
- dev_info(&dev->dev, "bus %04x:%02x already known\n",
- pci_domain_nr(bus), busnr);
- goto out;
+ child = pci_find_bus(pci_domain_nr(bus), busnr);
+ if (!child) {
+ child = pci_add_new_bus(bus, dev, busnr);
+ if (!child)
+ goto out;
+ child->primary = buses & 0xFF;
+ child->subordinate = (buses >> 16) & 0xFF;
+ child->bridge_ctl = bctl;
}
- child = pci_add_new_bus(bus, dev, busnr);
- if (!child)
- goto out;
- child->primary = buses & 0xFF;
- child->subordinate = (buses >> 16) & 0xFF;
- child->bridge_ctl = bctl;
-
cmax = pci_scan_child_bus(child);
if (cmax > max)
max = cmax;
@@ -674,6 +674,19 @@ static void pci_read_irq(struct pci_dev *dev)
dev->irq = irq;
}
+static void set_pcie_port_type(struct pci_dev *pdev)
+{
+ int pos;
+ u16 reg16;
+
+ pos = pci_find_capability(pdev, PCI_CAP_ID_EXP);
+ if (!pos)
+ return;
+ pdev->is_pcie = 1;
+ pci_read_config_word(pdev, pos + PCI_EXP_FLAGS, &reg16);
+ pdev->pcie_type = (reg16 & PCI_EXP_FLAGS_TYPE) >> 4;
+}
+
#define LEGACY_IO_RESOURCE (IORESOURCE_IO | IORESOURCE_PCI_FIXED)
/**
@@ -683,12 +696,33 @@ static void pci_read_irq(struct pci_dev *dev)
* Initialize the device structure with information about the device's
* vendor,class,memory and IO-space addresses,IRQ lines etc.
* Called at initialisation of the PCI subsystem and by CardBus services.
- * Returns 0 on success and -1 if unknown type of device (not normal, bridge
- * or CardBus).
+ * Returns 0 on success and negative if unknown type of device (not normal,
+ * bridge or CardBus).
*/
-static int pci_setup_device(struct pci_dev * dev)
+int pci_setup_device(struct pci_dev *dev)
{
u32 class;
+ u8 hdr_type;
+ struct pci_slot *slot;
+
+ if (pci_read_config_byte(dev, PCI_HEADER_TYPE, &hdr_type))
+ return -EIO;
+
+ dev->sysdata = dev->bus->sysdata;
+ dev->dev.parent = dev->bus->bridge;
+ dev->dev.bus = &pci_bus_type;
+ dev->hdr_type = hdr_type & 0x7f;
+ dev->multifunction = !!(hdr_type & 0x80);
+ dev->error_state = pci_channel_io_normal;
+ set_pcie_port_type(dev);
+
+ list_for_each_entry(slot, &dev->bus->slots, list)
+ if (PCI_SLOT(dev->devfn) == slot->number)
+ dev->slot = slot;
+
+ /* Assume 32-bit PCI; let 64-bit PCI cards (which are far rarer)
+ set this higher, assuming the system even supports it. */
+ dev->dma_mask = 0xffffffff;
dev_set_name(&dev->dev, "%04x:%02x:%02x.%d", pci_domain_nr(dev->bus),
dev->bus->number, PCI_SLOT(dev->devfn),
@@ -703,12 +737,14 @@ static int pci_setup_device(struct pci_dev * dev)
dev_dbg(&dev->dev, "found [%04x:%04x] class %06x header type %02x\n",
dev->vendor, dev->device, class, dev->hdr_type);
+ /* need to have dev->class ready */
+ dev->cfg_size = pci_cfg_space_size(dev);
+
/* "Unknown power state" */
dev->current_state = PCI_UNKNOWN;
/* Early fixups, before probing the BARs */
pci_fixup_device(pci_fixup_early, dev);
- class = dev->class >> 8;
switch (dev->hdr_type) { /* header type */
case PCI_HEADER_TYPE_NORMAL: /* standard header */
@@ -770,7 +806,7 @@ static int pci_setup_device(struct pci_dev * dev)
default: /* unknown header */
dev_err(&dev->dev, "unknown header type %02x, "
"ignoring device\n", dev->hdr_type);
- return -1;
+ return -EIO;
bad:
dev_err(&dev->dev, "ignoring class %02x (doesn't match header "
@@ -785,6 +821,7 @@ static int pci_setup_device(struct pci_dev * dev)
static void pci_release_capabilities(struct pci_dev *dev)
{
pci_vpd_release(dev);
+ pci_iov_release(dev);
}
/**
@@ -803,19 +840,6 @@ static void pci_release_dev(struct device *dev)
kfree(pci_dev);
}
-static void set_pcie_port_type(struct pci_dev *pdev)
-{
- int pos;
- u16 reg16;
-
- pos = pci_find_capability(pdev, PCI_CAP_ID_EXP);
- if (!pos)
- return;
- pdev->is_pcie = 1;
- pci_read_config_word(pdev, pos + PCI_EXP_FLAGS, &reg16);
- pdev->pcie_type = (reg16 & PCI_EXP_FLAGS_TYPE) >> 4;
-}
-
/**
* pci_cfg_space_size - get the configuration space size of the PCI device.
* @dev: PCI device
@@ -847,6 +871,11 @@ int pci_cfg_space_size(struct pci_dev *dev)
{
int pos;
u32 status;
+ u16 class;
+
+ class = dev->class >> 8;
+ if (class == PCI_CLASS_BRIDGE_HOST)
+ return pci_cfg_space_size_ext(dev);
pos = pci_find_capability(dev, PCI_CAP_ID_EXP);
if (!pos) {
@@ -891,9 +920,7 @@ EXPORT_SYMBOL(alloc_pci_dev);
static struct pci_dev *pci_scan_device(struct pci_bus *bus, int devfn)
{
struct pci_dev *dev;
- struct pci_slot *slot;
u32 l;
- u8 hdr_type;
int delay = 1;
if (pci_bus_read_config_dword(bus, devfn, PCI_VENDOR_ID, &l))
@@ -920,34 +947,16 @@ static struct pci_dev *pci_scan_device(struct pci_bus *bus, int devfn)
}
}
- if (pci_bus_read_config_byte(bus, devfn, PCI_HEADER_TYPE, &hdr_type))
- return NULL;
-
dev = alloc_pci_dev();
if (!dev)
return NULL;
dev->bus = bus;
- dev->sysdata = bus->sysdata;
- dev->dev.parent = bus->bridge;
- dev->dev.bus = &pci_bus_type;
dev->devfn = devfn;
- dev->hdr_type = hdr_type & 0x7f;
- dev->multifunction = !!(hdr_type & 0x80);
dev->vendor = l & 0xffff;
dev->device = (l >> 16) & 0xffff;
- dev->cfg_size = pci_cfg_space_size(dev);
- dev->error_state = pci_channel_io_normal;
- set_pcie_port_type(dev);
-
- list_for_each_entry(slot, &bus->slots, list)
- if (PCI_SLOT(devfn) == slot->number)
- dev->slot = slot;
- /* Assume 32-bit PCI; let 64-bit PCI cards (which are far rarer)
- set this higher, assuming the system even supports it. */
- dev->dma_mask = 0xffffffff;
- if (pci_setup_device(dev) < 0) {
+ if (pci_setup_device(dev)) {
kfree(dev);
return NULL;
}
@@ -972,6 +981,9 @@ static void pci_init_capabilities(struct pci_dev *dev)
/* Alternative Routing-ID Forwarding */
pci_enable_ari(dev);
+
+ /* Single Root I/O Virtualization */
+ pci_iov_init(dev);
}
void pci_device_add(struct pci_dev *dev, struct pci_bus *bus)
@@ -1006,6 +1018,12 @@ struct pci_dev *__ref pci_scan_single_device(struct pci_bus *bus, int devfn)
{
struct pci_dev *dev;
+ dev = pci_get_slot(bus, devfn);
+ if (dev) {
+ pci_dev_put(dev);
+ return dev;
+ }
+
dev = pci_scan_device(bus, devfn);
if (!dev)
return NULL;
@@ -1024,35 +1042,27 @@ EXPORT_SYMBOL(pci_scan_single_device);
* Scan a PCI slot on the specified PCI bus for devices, adding
* discovered devices to the @bus->devices list. New devices
* will not have is_added set.
+ *
+ * Returns the number of new devices found.
*/
int pci_scan_slot(struct pci_bus *bus, int devfn)
{
- int func, nr = 0;
- int scan_all_fns;
-
- scan_all_fns = pcibios_scan_all_fns(bus, devfn);
-
- for (func = 0; func < 8; func++, devfn++) {
- struct pci_dev *dev;
-
- dev = pci_scan_single_device(bus, devfn);
- if (dev) {
- nr++;
+ int fn, nr = 0;
+ struct pci_dev *dev;
- /*
- * If this is a single function device,
- * don't scan past the first function.
- */
- if (!dev->multifunction) {
- if (func > 0) {
- dev->multifunction = 1;
- } else {
- break;
- }
+ dev = pci_scan_single_device(bus, devfn);
+ if (dev && !dev->is_added) /* new device? */
+ nr++;
+
+ if ((dev && dev->multifunction) ||
+ (!dev && pcibios_scan_all_fns(bus, devfn))) {
+ for (fn = 1; fn < 8; fn++) {
+ dev = pci_scan_single_device(bus, devfn + fn);
+ if (dev) {
+ if (!dev->is_added)
+ nr++;
+ dev->multifunction = 1;
}
- } else {
- if (func == 0 && !scan_all_fns)
- break;
}
}
@@ -1074,12 +1084,21 @@ unsigned int __devinit pci_scan_child_bus(struct pci_bus *bus)
for (devfn = 0; devfn < 0x100; devfn += 8)
pci_scan_slot(bus, devfn);
+ /* Reserve buses for SR-IOV capability. */
+ max += pci_iov_bus_range(bus);
+
/*
* After performing arch-dependent fixup of the bus, look behind
* all PCI-to-PCI bridges on this bus.
*/
- pr_debug("PCI: Fixups for bus %04x:%02x\n", pci_domain_nr(bus), bus->number);
- pcibios_fixup_bus(bus);
+ if (!bus->is_added) {
+ pr_debug("PCI: Fixups for bus %04x:%02x\n",
+ pci_domain_nr(bus), bus->number);
+ pcibios_fixup_bus(bus);
+ if (pci_is_root_bus(bus))
+ bus->is_added = 1;
+ }
+
for (pass=0; pass < 2; pass++)
list_for_each_entry(dev, &bus->devices, bus_list) {
if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE ||
@@ -1114,7 +1133,7 @@ struct pci_bus * pci_create_bus(struct device *parent,
if (!b)
return NULL;
- dev = kmalloc(sizeof(*dev), GFP_KERNEL);
+ dev = kzalloc(sizeof(*dev), GFP_KERNEL);
if (!dev){
kfree(b);
return NULL;
@@ -1133,7 +1152,6 @@ struct pci_bus * pci_create_bus(struct device *parent,
list_add_tail(&b->node, &pci_root_buses);
up_write(&pci_bus_sem);
- memset(dev, 0, sizeof(*dev));
dev->parent = parent;
dev->release = pci_release_bus_bridge_dev;
dev_set_name(dev, "pci%04x:%02x", pci_domain_nr(b), bus);
@@ -1193,6 +1211,38 @@ struct pci_bus * __devinit pci_scan_bus_parented(struct device *parent,
EXPORT_SYMBOL(pci_scan_bus_parented);
#ifdef CONFIG_HOTPLUG
+/**
+ * pci_rescan_bus - scan a PCI bus for devices.
+ * @bus: PCI bus to scan
+ *
+ * Scan a PCI bus and child buses for new devices, adds them,
+ * and enables them.
+ *
+ * Returns the max number of subordinate bus discovered.
+ */
+unsigned int __ref pci_rescan_bus(struct pci_bus *bus)
+{
+ unsigned int max;
+ struct pci_dev *dev;
+
+ max = pci_scan_child_bus(bus);
+
+ down_read(&pci_bus_sem);
+ list_for_each_entry(dev, &bus->devices, bus_list)
+ if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE ||
+ dev->hdr_type == PCI_HEADER_TYPE_CARDBUS)
+ if (dev->subordinate)
+ pci_bus_size_bridges(dev->subordinate);
+ up_read(&pci_bus_sem);
+
+ pci_bus_assign_resources(bus);
+ pci_enable_bridges(bus);
+ pci_bus_add_devices(bus);
+
+ return max;
+}
+EXPORT_SYMBOL_GPL(pci_rescan_bus);
+
EXPORT_SYMBOL(pci_add_new_bus);
EXPORT_SYMBOL(pci_scan_slot);
EXPORT_SYMBOL(pci_scan_bridge);
diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
index 92b9efe9bcaf..0254741bece0 100644
--- a/drivers/pci/quirks.c
+++ b/drivers/pci/quirks.c
@@ -24,6 +24,7 @@
#include <linux/kallsyms.h>
#include <linux/dmi.h>
#include <linux/pci-aspm.h>
+#include <linux/ioport.h>
#include "pci.h"
int isa_dma_bridge_buggy;
@@ -34,6 +35,69 @@ int pcie_mch_quirk;
EXPORT_SYMBOL(pcie_mch_quirk);
#ifdef CONFIG_PCI_QUIRKS
+/*
+ * This quirk function disables memory decoding and releases memory resources
+ * of the device specified by kernel's boot parameter 'pci=resource_alignment='.
+ * It also rounds up size to specified alignment.
+ * Later on, the kernel will assign page-aligned memory resource back
+ * to the device.
+ */
+static void __devinit quirk_resource_alignment(struct pci_dev *dev)
+{
+ int i;
+ struct resource *r;
+ resource_size_t align, size;
+ u16 command;
+
+ if (!pci_is_reassigndev(dev))
+ return;
+
+ if (dev->hdr_type == PCI_HEADER_TYPE_NORMAL &&
+ (dev->class >> 8) == PCI_CLASS_BRIDGE_HOST) {
+ dev_warn(&dev->dev,
+ "Can't reassign resources to host bridge.\n");
+ return;
+ }
+
+ dev_info(&dev->dev,
+ "Disabling memory decoding and releasing memory resources.\n");
+ pci_read_config_word(dev, PCI_COMMAND, &command);
+ command &= ~PCI_COMMAND_MEMORY;
+ pci_write_config_word(dev, PCI_COMMAND, command);
+
+ align = pci_specified_resource_alignment(dev);
+ for (i=0; i < PCI_BRIDGE_RESOURCES; i++) {
+ r = &dev->resource[i];
+ if (!(r->flags & IORESOURCE_MEM))
+ continue;
+ size = resource_size(r);
+ if (size < align) {
+ size = align;
+ dev_info(&dev->dev,
+ "Rounding up size of resource #%d to %#llx.\n",
+ i, (unsigned long long)size);
+ }
+ r->end = size - 1;
+ r->start = 0;
+ }
+ /* Need to disable bridge's resource window,
+ * to enable the kernel to reassign new resource
+ * window later on.
+ */
+ if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE &&
+ (dev->class >> 8) == PCI_CLASS_BRIDGE_PCI) {
+ for (i = PCI_BRIDGE_RESOURCES; i < PCI_NUM_RESOURCES; i++) {
+ r = &dev->resource[i];
+ if (!(r->flags & IORESOURCE_MEM))
+ continue;
+ r->end = resource_size(r) - 1;
+ r->start = 0;
+ }
+ pci_disable_bridge_window(dev);
+ }
+}
+DECLARE_PCI_FIXUP_HEADER(PCI_ANY_ID, PCI_ANY_ID, quirk_resource_alignment);
+
/* The Mellanox Tavor device gives false positive parity errors
* Mark this device with a broken_parity_status, to allow
* PCI scanning code to "skip" this now blacklisted device.
@@ -1126,10 +1190,15 @@ static void __init asus_hides_smbus_hostbridge(struct pci_dev *dev)
* its on-board VGA controller */
asus_hides_smbus = 1;
}
- else if (dev->device == PCI_DEVICE_ID_INTEL_82845G_IG)
+ else if (dev->device == PCI_DEVICE_ID_INTEL_82801DB_2)
switch(dev->subsystem_device) {
case 0x00b8: /* Compaq Evo D510 CMT */
case 0x00b9: /* Compaq Evo D510 SFF */
+ /* Motherboard doesn't have Host bridge
+ * subvendor/subdevice IDs and on-board VGA
+ * controller is disabled if an AGP card is
+ * inserted, therefore checking USB UHCI
+ * Controller #1 */
asus_hides_smbus = 1;
}
else if (dev->device == PCI_DEVICE_ID_INTEL_82815_CGC)
@@ -1154,7 +1223,7 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82855GM_HB, as
DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82915GM_HB, asus_hides_smbus_hostbridge);
DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82810_IG3, asus_hides_smbus_hostbridge);
-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82845G_IG, asus_hides_smbus_hostbridge);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801DB_2, asus_hides_smbus_hostbridge);
DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82815_CGC, asus_hides_smbus_hostbridge);
static void asus_hides_smbus_lpc(struct pci_dev *dev)
@@ -1664,9 +1733,13 @@ static void __devinit quirk_netmos(struct pci_dev *dev)
* of parallel ports and <S> is the number of serial ports.
*/
switch (dev->device) {
+ case PCI_DEVICE_ID_NETMOS_9835:
+ /* Well, this rule doesn't hold for the following 9835 device */
+ if (dev->subsystem_vendor == PCI_VENDOR_ID_IBM &&
+ dev->subsystem_device == 0x0299)
+ return;
case PCI_DEVICE_ID_NETMOS_9735:
case PCI_DEVICE_ID_NETMOS_9745:
- case PCI_DEVICE_ID_NETMOS_9835:
case PCI_DEVICE_ID_NETMOS_9845:
case PCI_DEVICE_ID_NETMOS_9855:
if ((dev->class >> 8) == PCI_CLASS_COMMUNICATION_SERIAL &&
@@ -2078,6 +2151,92 @@ DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_NVIDIA,
PCI_DEVICE_ID_NVIDIA_NVENET_15,
nvenet_msi_disable);
+static int __devinit ht_check_msi_mapping(struct pci_dev *dev)
+{
+ int pos, ttl = 48;
+ int found = 0;
+
+ /* check if there is HT MSI cap or enabled on this device */
+ pos = pci_find_ht_capability(dev, HT_CAPTYPE_MSI_MAPPING);
+ while (pos && ttl--) {
+ u8 flags;
+
+ if (found < 1)
+ found = 1;
+ if (pci_read_config_byte(dev, pos + HT_MSI_FLAGS,
+ &flags) == 0) {
+ if (flags & HT_MSI_FLAGS_ENABLE) {
+ if (found < 2) {
+ found = 2;
+ break;
+ }
+ }
+ }
+ pos = pci_find_next_ht_capability(dev, pos,
+ HT_CAPTYPE_MSI_MAPPING);
+ }
+
+ return found;
+}
+
+static int __devinit host_bridge_with_leaf(struct pci_dev *host_bridge)
+{
+ struct pci_dev *dev;
+ int pos;
+ int i, dev_no;
+ int found = 0;
+
+ dev_no = host_bridge->devfn >> 3;
+ for (i = dev_no + 1; i < 0x20; i++) {
+ dev = pci_get_slot(host_bridge->bus, PCI_DEVFN(i, 0));
+ if (!dev)
+ continue;
+
+ /* found next host bridge ?*/
+ pos = pci_find_ht_capability(dev, HT_CAPTYPE_SLAVE);
+ if (pos != 0) {
+ pci_dev_put(dev);
+ break;
+ }
+
+ if (ht_check_msi_mapping(dev)) {
+ found = 1;
+ pci_dev_put(dev);
+ break;
+ }
+ pci_dev_put(dev);
+ }
+
+ return found;
+}
+
+#define PCI_HT_CAP_SLAVE_CTRL0 4 /* link control */
+#define PCI_HT_CAP_SLAVE_CTRL1 8 /* link control to */
+
+static int __devinit is_end_of_ht_chain(struct pci_dev *dev)
+{
+ int pos, ctrl_off;
+ int end = 0;
+ u16 flags, ctrl;
+
+ pos = pci_find_ht_capability(dev, HT_CAPTYPE_SLAVE);
+
+ if (!pos)
+ goto out;
+
+ pci_read_config_word(dev, pos + PCI_CAP_FLAGS, &flags);
+
+ ctrl_off = ((flags >> 10) & 1) ?
+ PCI_HT_CAP_SLAVE_CTRL0 : PCI_HT_CAP_SLAVE_CTRL1;
+ pci_read_config_word(dev, pos + ctrl_off, &ctrl);
+
+ if (ctrl & (1 << 6))
+ end = 1;
+
+out:
+ return end;
+}
+
static void __devinit nv_ht_enable_msi_mapping(struct pci_dev *dev)
{
struct pci_dev *host_bridge;
@@ -2102,6 +2261,11 @@ static void __devinit nv_ht_enable_msi_mapping(struct pci_dev *dev)
if (!found)
return;
+ /* don't enable end_device/host_bridge with leaf directly here */
+ if (host_bridge == dev && is_end_of_ht_chain(host_bridge) &&
+ host_bridge_with_leaf(host_bridge))
+ goto out;
+
/* root did that ! */
if (msi_ht_cap_enabled(host_bridge))
goto out;
@@ -2132,44 +2296,12 @@ static void __devinit ht_disable_msi_mapping(struct pci_dev *dev)
}
}
-static int __devinit ht_check_msi_mapping(struct pci_dev *dev)
-{
- int pos, ttl = 48;
- int found = 0;
-
- /* check if there is HT MSI cap or enabled on this device */
- pos = pci_find_ht_capability(dev, HT_CAPTYPE_MSI_MAPPING);
- while (pos && ttl--) {
- u8 flags;
-
- if (found < 1)
- found = 1;
- if (pci_read_config_byte(dev, pos + HT_MSI_FLAGS,
- &flags) == 0) {
- if (flags & HT_MSI_FLAGS_ENABLE) {
- if (found < 2) {
- found = 2;
- break;
- }
- }
- }
- pos = pci_find_next_ht_capability(dev, pos,
- HT_CAPTYPE_MSI_MAPPING);
- }
-
- return found;
-}
-
-static void __devinit nv_msi_ht_cap_quirk(struct pci_dev *dev)
+static void __devinit __nv_msi_ht_cap_quirk(struct pci_dev *dev, int all)
{
struct pci_dev *host_bridge;
int pos;
int found;
- /* Enabling HT MSI mapping on this device breaks MCP51 */
- if (dev->device == 0x270)
- return;
-
/* check if there is HT MSI cap or enabled on this device */
found = ht_check_msi_mapping(dev);
@@ -2193,7 +2325,10 @@ static void __devinit nv_msi_ht_cap_quirk(struct pci_dev *dev)
/* Host bridge is to HT */
if (found == 1) {
/* it is not enabled, try to enable it */
- nv_ht_enable_msi_mapping(dev);
+ if (all)
+ ht_enable_msi_mapping(dev);
+ else
+ nv_ht_enable_msi_mapping(dev);
}
return;
}
@@ -2205,8 +2340,20 @@ static void __devinit nv_msi_ht_cap_quirk(struct pci_dev *dev)
/* Host bridge is not to HT, disable HT MSI mapping on this device */
ht_disable_msi_mapping(dev);
}
-DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_NVIDIA, PCI_ANY_ID, nv_msi_ht_cap_quirk);
-DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AL, PCI_ANY_ID, nv_msi_ht_cap_quirk);
+
+static void __devinit nv_msi_ht_cap_quirk_all(struct pci_dev *dev)
+{
+ return __nv_msi_ht_cap_quirk(dev, 1);
+}
+
+static void __devinit nv_msi_ht_cap_quirk_leaf(struct pci_dev *dev)
+{
+ return __nv_msi_ht_cap_quirk(dev, 0);
+}
+
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_NVIDIA, PCI_ANY_ID, nv_msi_ht_cap_quirk_leaf);
+
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AL, PCI_ANY_ID, nv_msi_ht_cap_quirk_all);
static void __devinit quirk_msi_intx_disable_bug(struct pci_dev *dev)
{
@@ -2268,6 +2415,54 @@ DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, 0x4375,
#endif /* CONFIG_PCI_MSI */
+#ifdef CONFIG_PCI_IOV
+
+/*
+ * For Intel 82576 SR-IOV NIC, if BIOS doesn't allocate resources for the
+ * SR-IOV BARs, zero the Flash BAR and program the SR-IOV BARs to use the
+ * old Flash Memory Space.
+ */
+static void __devinit quirk_i82576_sriov(struct pci_dev *dev)
+{
+ int pos, flags;
+ u32 bar, start, size;
+
+ if (PAGE_SIZE > 0x10000)
+ return;
+
+ flags = pci_resource_flags(dev, 0);
+ if ((flags & PCI_BASE_ADDRESS_SPACE) !=
+ PCI_BASE_ADDRESS_SPACE_MEMORY ||
+ (flags & PCI_BASE_ADDRESS_MEM_TYPE_MASK) !=
+ PCI_BASE_ADDRESS_MEM_TYPE_32)
+ return;
+
+ pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_SRIOV);
+ if (!pos)
+ return;
+
+ pci_read_config_dword(dev, pos + PCI_SRIOV_BAR, &bar);
+ if (bar & PCI_BASE_ADDRESS_MEM_MASK)
+ return;
+
+ start = pci_resource_start(dev, 1);
+ size = pci_resource_len(dev, 1);
+ if (!start || size != 0x400000 || start & (size - 1))
+ return;
+
+ pci_resource_flags(dev, 1) = 0;
+ pci_write_config_dword(dev, PCI_BASE_ADDRESS_1, 0);
+ pci_write_config_dword(dev, pos + PCI_SRIOV_BAR, start);
+ pci_write_config_dword(dev, pos + PCI_SRIOV_BAR + 12, start + size / 2);
+
+ dev_info(&dev->dev, "use Flash Memory Space for SR-IOV BARs\n");
+}
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x10c9, quirk_i82576_sriov);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x10e6, quirk_i82576_sriov);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x10e7, quirk_i82576_sriov);
+
+#endif /* CONFIG_PCI_IOV */
+
static void pci_do_fixups(struct pci_dev *dev, struct pci_fixup *f,
struct pci_fixup *end)
{
diff --git a/drivers/pci/remove.c b/drivers/pci/remove.c
index 042e08924421..86503c14ce7e 100644
--- a/drivers/pci/remove.c
+++ b/drivers/pci/remove.c
@@ -71,6 +71,9 @@ void pci_remove_bus(struct pci_bus *pci_bus)
down_write(&pci_bus_sem);
list_del(&pci_bus->node);
up_write(&pci_bus_sem);
+ if (!pci_bus->is_added)
+ return;
+
pci_remove_legacy_files(pci_bus);
device_remove_file(&pci_bus->dev, &dev_attr_cpuaffinity);
device_remove_file(&pci_bus->dev, &dev_attr_cpulistaffinity);
@@ -92,6 +95,7 @@ EXPORT_SYMBOL(pci_remove_bus);
*/
void pci_remove_bus_device(struct pci_dev *dev)
{
+ pci_stop_bus_device(dev);
if (dev->subordinate) {
struct pci_bus *b = dev->subordinate;
diff --git a/drivers/pci/search.c b/drivers/pci/search.c
index 5af8bd538149..710d4ea69568 100644
--- a/drivers/pci/search.c
+++ b/drivers/pci/search.c
@@ -29,7 +29,7 @@ pci_find_upstream_pcie_bridge(struct pci_dev *pdev)
if (pdev->is_pcie)
return NULL;
while (1) {
- if (!pdev->bus->self)
+ if (!pdev->bus->parent)
break;
pdev = pdev->bus->self;
/* a p2p bridge */
diff --git a/drivers/pci/setup-bus.c b/drivers/pci/setup-bus.c
index 704608945780..8d9da9d30a61 100644
--- a/drivers/pci/setup-bus.c
+++ b/drivers/pci/setup-bus.c
@@ -27,7 +27,7 @@
#include <linux/slab.h>
-static void pbus_assign_resources_sorted(struct pci_bus *bus)
+static void pbus_assign_resources_sorted(const struct pci_bus *bus)
{
struct pci_dev *dev;
struct resource *res;
@@ -144,6 +144,9 @@ static void pci_setup_bridge(struct pci_bus *bus)
struct pci_bus_region region;
u32 l, bu, lu, io_upper16;
+ if (pci_is_enabled(bridge))
+ return;
+
dev_info(&bridge->dev, "PCI bridge, secondary bus %04x:%02x\n",
pci_domain_nr(bus), bus->number);
@@ -495,7 +498,7 @@ void __ref pci_bus_size_bridges(struct pci_bus *bus)
}
EXPORT_SYMBOL(pci_bus_size_bridges);
-void __ref pci_bus_assign_resources(struct pci_bus *bus)
+void __ref pci_bus_assign_resources(const struct pci_bus *bus)
{
struct pci_bus *b;
struct pci_dev *dev;
diff --git a/drivers/pci/setup-res.c b/drivers/pci/setup-res.c
index 32e8d88a4619..3039fcb86afc 100644
--- a/drivers/pci/setup-res.c
+++ b/drivers/pci/setup-res.c
@@ -120,6 +120,21 @@ int pci_claim_resource(struct pci_dev *dev, int resource)
return err;
}
+#ifdef CONFIG_PCI_QUIRKS
+void pci_disable_bridge_window(struct pci_dev *dev)
+{
+ dev_dbg(&dev->dev, "Disabling bridge window.\n");
+
+ /* MMIO Base/Limit */
+ pci_write_config_dword(dev, PCI_MEMORY_BASE, 0x0000fff0);
+
+ /* Prefetchable MMIO Base/Limit */
+ pci_write_config_dword(dev, PCI_PREF_LIMIT_UPPER32, 0);
+ pci_write_config_dword(dev, PCI_PREF_MEMORY_BASE, 0x0000fff0);
+ pci_write_config_dword(dev, PCI_PREF_BASE_UPPER32, 0xffffffff);
+}
+#endif /* CONFIG_PCI_QUIRKS */
+
int pci_assign_resource(struct pci_dev *dev, int resno)
{
struct pci_bus *bus = dev->bus;
diff --git a/drivers/pci/slot.c b/drivers/pci/slot.c
index 5a8ccb4f604d..21189447e545 100644
--- a/drivers/pci/slot.c
+++ b/drivers/pci/slot.c
@@ -1,8 +1,8 @@
/*
* drivers/pci/slot.c
* Copyright (C) 2006 Matthew Wilcox <matthew@wil.cx>
- * Copyright (C) 2006-2008 Hewlett-Packard Development Company, L.P.
- * Alex Chiang <achiang@hp.com>
+ * Copyright (C) 2006-2009 Hewlett-Packard Development Company, L.P.
+ * Alex Chiang <achiang@hp.com>
*/
#include <linux/kobject.h>
@@ -52,8 +52,8 @@ static void pci_slot_release(struct kobject *kobj)
struct pci_dev *dev;
struct pci_slot *slot = to_pci_slot(kobj);
- pr_debug("%s: releasing pci_slot on %x:%d\n", __func__,
- slot->bus->number, slot->number);
+ dev_dbg(&slot->bus->dev, "dev %02x, released physical slot %s\n",
+ slot->number, pci_slot_name(slot));
list_for_each_entry(dev, &slot->bus->devices, bus_list)
if (PCI_SLOT(dev->devfn) == slot->number)
@@ -248,9 +248,8 @@ placeholder:
if (PCI_SLOT(dev->devfn) == slot_nr)
dev->slot = slot;
- /* Don't care if debug printk has a -1 for slot_nr */
- pr_debug("%s: created pci_slot on %04x:%02x:%02x\n",
- __func__, pci_domain_nr(parent), parent->number, slot_nr);
+ dev_dbg(&parent->dev, "dev %02x, created physical slot %s\n",
+ slot_nr, pci_slot_name(slot));
out:
kfree(slot_name);
@@ -299,9 +298,8 @@ EXPORT_SYMBOL_GPL(pci_renumber_slot);
*/
void pci_destroy_slot(struct pci_slot *slot)
{
- pr_debug("%s: dec refcount to %d on %04x:%02x:%02x\n", __func__,
- atomic_read(&slot->kobj.kref.refcount) - 1,
- pci_domain_nr(slot->bus), slot->bus->number, slot->number);
+ dev_dbg(&slot->bus->dev, "dev %02x, dec refcount to %d\n",
+ slot->number, atomic_read(&slot->kobj.kref.refcount) - 1);
down_write(&pci_bus_sem);
kobject_put(&slot->kobj);