summaryrefslogtreecommitdiff
path: root/arch/powerpc/platforms/pseries
diff options
context:
space:
mode:
Diffstat (limited to 'arch/powerpc/platforms/pseries')
-rw-r--r--arch/powerpc/platforms/pseries/dtl.c4
-rw-r--r--arch/powerpc/platforms/pseries/firmware.c3
-rw-r--r--arch/powerpc/platforms/pseries/hotplug-cpu.c173
-rw-r--r--arch/powerpc/platforms/pseries/hotplug-memory.c6
-rw-r--r--arch/powerpc/platforms/pseries/lpar.c18
-rw-r--r--arch/powerpc/platforms/pseries/msi.c296
-rw-r--r--arch/powerpc/platforms/pseries/pci_dlpar.c4
-rw-r--r--arch/powerpc/platforms/pseries/pseries.h2
-rw-r--r--arch/powerpc/platforms/pseries/ras.c2
-rw-r--r--arch/powerpc/platforms/pseries/setup.c2
-rw-r--r--arch/powerpc/platforms/pseries/vas.c2
11 files changed, 390 insertions, 122 deletions
diff --git a/arch/powerpc/platforms/pseries/dtl.c b/arch/powerpc/platforms/pseries/dtl.c
index 982f069e4c31..352af5b14a0f 100644
--- a/arch/powerpc/platforms/pseries/dtl.c
+++ b/arch/powerpc/platforms/pseries/dtl.c
@@ -11,10 +11,10 @@
#include <linux/spinlock.h>
#include <asm/smp.h>
#include <linux/uaccess.h>
+#include <linux/debugfs.h>
#include <asm/firmware.h>
#include <asm/dtl.h>
#include <asm/lppaca.h>
-#include <asm/debugfs.h>
#include <asm/plpar_wrappers.h>
#include <asm/machdep.h>
@@ -338,7 +338,7 @@ static int dtl_init(void)
/* set up common debugfs structure */
- dtl_dir = debugfs_create_dir("dtl", powerpc_debugfs_root);
+ dtl_dir = debugfs_create_dir("dtl", arch_debugfs_dir);
debugfs_create_x8("dtl_event_mask", 0600, dtl_dir, &dtl_event_mask);
debugfs_create_u32("dtl_buf_entries", 0400, dtl_dir, &dtl_buf_entries);
diff --git a/arch/powerpc/platforms/pseries/firmware.c b/arch/powerpc/platforms/pseries/firmware.c
index 4c7b7f5a2ebc..f162156b7b68 100644
--- a/arch/powerpc/platforms/pseries/firmware.c
+++ b/arch/powerpc/platforms/pseries/firmware.c
@@ -119,10 +119,11 @@ struct vec5_fw_feature {
static __initdata struct vec5_fw_feature
vec5_fw_features_table[] = {
- {FW_FEATURE_TYPE1_AFFINITY, OV5_TYPE1_AFFINITY},
+ {FW_FEATURE_FORM1_AFFINITY, OV5_FORM1_AFFINITY},
{FW_FEATURE_PRRN, OV5_PRRN},
{FW_FEATURE_DRMEM_V2, OV5_DRMEM_V2},
{FW_FEATURE_DRC_INFO, OV5_DRC_INFO},
+ {FW_FEATURE_FORM2_AFFINITY, OV5_FORM2_AFFINITY},
};
static void __init fw_vec5_feature_init(const char *vec5, unsigned long len)
diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c b/arch/powerpc/platforms/pseries/hotplug-cpu.c
index 7e970f81d8ff..d646c22e94ab 100644
--- a/arch/powerpc/platforms/pseries/hotplug-cpu.c
+++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c
@@ -39,6 +39,12 @@
/* This version can't take the spinlock, because it never returns */
static int rtas_stop_self_token = RTAS_UNKNOWN_SERVICE;
+/*
+ * Record the CPU ids used on each nodes.
+ * Protected by cpu_add_remove_lock.
+ */
+static cpumask_var_t node_recorded_ids_map[MAX_NUMNODES];
+
static void rtas_stop_self(void)
{
static struct rtas_args args;
@@ -139,72 +145,148 @@ static void pseries_cpu_die(unsigned int cpu)
paca_ptrs[cpu]->cpu_start = 0;
}
+/**
+ * find_cpu_id_range - found a linear ranger of @nthreads free CPU ids.
+ * @nthreads : the number of threads (cpu ids)
+ * @assigned_node : the node it belongs to or NUMA_NO_NODE if free ids from any
+ * node can be peek.
+ * @cpu_mask: the returned CPU mask.
+ *
+ * Returns 0 on success.
+ */
+static int find_cpu_id_range(unsigned int nthreads, int assigned_node,
+ cpumask_var_t *cpu_mask)
+{
+ cpumask_var_t candidate_mask;
+ unsigned int cpu, node;
+ int rc = -ENOSPC;
+
+ if (!zalloc_cpumask_var(&candidate_mask, GFP_KERNEL))
+ return -ENOMEM;
+
+ cpumask_clear(*cpu_mask);
+ for (cpu = 0; cpu < nthreads; cpu++)
+ cpumask_set_cpu(cpu, *cpu_mask);
+
+ BUG_ON(!cpumask_subset(cpu_present_mask, cpu_possible_mask));
+
+ /* Get a bitmap of unoccupied slots. */
+ cpumask_xor(candidate_mask, cpu_possible_mask, cpu_present_mask);
+
+ if (assigned_node != NUMA_NO_NODE) {
+ /*
+ * Remove free ids previously assigned on the other nodes. We
+ * can walk only online nodes because once a node became online
+ * it is not turned offlined back.
+ */
+ for_each_online_node(node) {
+ if (node == assigned_node)
+ continue;
+ cpumask_andnot(candidate_mask, candidate_mask,
+ node_recorded_ids_map[node]);
+ }
+ }
+
+ if (cpumask_empty(candidate_mask))
+ goto out;
+
+ while (!cpumask_empty(*cpu_mask)) {
+ if (cpumask_subset(*cpu_mask, candidate_mask))
+ /* Found a range where we can insert the new cpu(s) */
+ break;
+ cpumask_shift_left(*cpu_mask, *cpu_mask, nthreads);
+ }
+
+ if (!cpumask_empty(*cpu_mask))
+ rc = 0;
+
+out:
+ free_cpumask_var(candidate_mask);
+ return rc;
+}
+
/*
* Update cpu_present_mask and paca(s) for a new cpu node. The wrinkle
- * here is that a cpu device node may represent up to two logical cpus
+ * here is that a cpu device node may represent multiple logical cpus
* in the SMT case. We must honor the assumption in other code that
* the logical ids for sibling SMT threads x and y are adjacent, such
* that x^1 == y and y^1 == x.
*/
static int pseries_add_processor(struct device_node *np)
{
- unsigned int cpu;
- cpumask_var_t candidate_mask, tmp;
- int err = -ENOSPC, len, nthreads, i;
+ int len, nthreads, node, cpu, assigned_node;
+ int rc = 0;
+ cpumask_var_t cpu_mask;
const __be32 *intserv;
intserv = of_get_property(np, "ibm,ppc-interrupt-server#s", &len);
if (!intserv)
return 0;
- zalloc_cpumask_var(&candidate_mask, GFP_KERNEL);
- zalloc_cpumask_var(&tmp, GFP_KERNEL);
-
nthreads = len / sizeof(u32);
- for (i = 0; i < nthreads; i++)
- cpumask_set_cpu(i, tmp);
- cpu_maps_update_begin();
+ if (!alloc_cpumask_var(&cpu_mask, GFP_KERNEL))
+ return -ENOMEM;
- BUG_ON(!cpumask_subset(cpu_present_mask, cpu_possible_mask));
+ /*
+ * Fetch from the DT nodes read by dlpar_configure_connector() the NUMA
+ * node id the added CPU belongs to.
+ */
+ node = of_node_to_nid(np);
+ if (node < 0 || !node_possible(node))
+ node = first_online_node;
- /* Get a bitmap of unoccupied slots. */
- cpumask_xor(candidate_mask, cpu_possible_mask, cpu_present_mask);
- if (cpumask_empty(candidate_mask)) {
- /* If we get here, it most likely means that NR_CPUS is
- * less than the partition's max processors setting.
+ BUG_ON(node == NUMA_NO_NODE);
+ assigned_node = node;
+
+ cpu_maps_update_begin();
+
+ rc = find_cpu_id_range(nthreads, node, &cpu_mask);
+ if (rc && nr_node_ids > 1) {
+ /*
+ * Try again, considering the free CPU ids from the other node.
*/
- printk(KERN_ERR "Cannot add cpu %pOF; this system configuration"
- " supports %d logical cpus.\n", np,
- num_possible_cpus());
- goto out_unlock;
+ node = NUMA_NO_NODE;
+ rc = find_cpu_id_range(nthreads, NUMA_NO_NODE, &cpu_mask);
}
- while (!cpumask_empty(tmp))
- if (cpumask_subset(tmp, candidate_mask))
- /* Found a range where we can insert the new cpu(s) */
- break;
- else
- cpumask_shift_left(tmp, tmp, nthreads);
-
- if (cpumask_empty(tmp)) {
- printk(KERN_ERR "Unable to find space in cpu_present_mask for"
- " processor %pOFn with %d thread(s)\n", np,
- nthreads);
- goto out_unlock;
+ if (rc) {
+ pr_err("Cannot add cpu %pOF; this system configuration"
+ " supports %d logical cpus.\n", np, num_possible_cpus());
+ goto out;
}
- for_each_cpu(cpu, tmp) {
+ for_each_cpu(cpu, cpu_mask) {
BUG_ON(cpu_present(cpu));
set_cpu_present(cpu, true);
set_hard_smp_processor_id(cpu, be32_to_cpu(*intserv++));
}
- err = 0;
-out_unlock:
+
+ /* Record the newly used CPU ids for the associate node. */
+ cpumask_or(node_recorded_ids_map[assigned_node],
+ node_recorded_ids_map[assigned_node], cpu_mask);
+
+ /*
+ * If node is set to NUMA_NO_NODE, CPU ids have be reused from
+ * another node, remove them from its mask.
+ */
+ if (node == NUMA_NO_NODE) {
+ cpu = cpumask_first(cpu_mask);
+ pr_warn("Reusing free CPU ids %d-%d from another node\n",
+ cpu, cpu + nthreads - 1);
+ for_each_online_node(node) {
+ if (node == assigned_node)
+ continue;
+ cpumask_andnot(node_recorded_ids_map[node],
+ node_recorded_ids_map[node],
+ cpu_mask);
+ }
+ }
+
+out:
cpu_maps_update_done();
- free_cpumask_var(candidate_mask);
- free_cpumask_var(tmp);
- return err;
+ free_cpumask_var(cpu_mask);
+ return rc;
}
/*
@@ -498,6 +580,8 @@ static ssize_t dlpar_cpu_add(u32 drc_index)
return saved_rc;
}
+ update_numa_distance(dn);
+
rc = dlpar_online_cpu(dn);
if (rc) {
saved_rc = rc;
@@ -908,6 +992,7 @@ static struct notifier_block pseries_smp_nb = {
static int __init pseries_cpu_hotplug_init(void)
{
int qcss_tok;
+ unsigned int node;
#ifdef CONFIG_ARCH_CPU_PROBE_RELEASE
ppc_md.cpu_probe = dlpar_cpu_probe;
@@ -929,8 +1014,18 @@ static int __init pseries_cpu_hotplug_init(void)
smp_ops->cpu_die = pseries_cpu_die;
/* Processors can be added/removed only on LPAR */
- if (firmware_has_feature(FW_FEATURE_LPAR))
+ if (firmware_has_feature(FW_FEATURE_LPAR)) {
+ for_each_node(node) {
+ alloc_bootmem_cpumask_var(&node_recorded_ids_map[node]);
+
+ /* Record ids of CPU added at boot time */
+ cpumask_or(node_recorded_ids_map[node],
+ node_recorded_ids_map[node],
+ cpumask_of_node(node));
+ }
+
of_reconfig_notifier_register(&pseries_smp_nb);
+ }
return 0;
}
diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c
index 377d852f5a9a..14fccd7b9c99 100644
--- a/arch/powerpc/platforms/pseries/hotplug-memory.c
+++ b/arch/powerpc/platforms/pseries/hotplug-memory.c
@@ -180,6 +180,8 @@ static int update_lmb_associativity_index(struct drmem_lmb *lmb)
return -ENODEV;
}
+ update_numa_distance(lmb_node);
+
dr_node = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory");
if (!dr_node) {
dlpar_free_cc_nodes(lmb_node);
@@ -979,6 +981,10 @@ static int pseries_memory_notifier(struct notifier_block *nb,
case OF_RECONFIG_DETACH_NODE:
err = pseries_remove_mem_node(rd->dn);
break;
+ case OF_RECONFIG_UPDATE_PROPERTY:
+ if (!strcmp(rd->dn->name,
+ "ibm,dynamic-reconfiguration-memory"))
+ drmem_update_lmbs(rd->prop);
}
return notifier_from_errno(err);
}
diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c
index dab356e3ff87..3df6bdfea475 100644
--- a/arch/powerpc/platforms/pseries/lpar.c
+++ b/arch/powerpc/platforms/pseries/lpar.c
@@ -22,6 +22,8 @@
#include <linux/workqueue.h>
#include <linux/proc_fs.h>
#include <linux/pgtable.h>
+#include <linux/debugfs.h>
+
#include <asm/processor.h>
#include <asm/mmu.h>
#include <asm/page.h>
@@ -39,7 +41,6 @@
#include <asm/kexec.h>
#include <asm/fadump.h>
#include <asm/asm-prototypes.h>
-#include <asm/debugfs.h>
#include <asm/dtl.h>
#include "pseries.h"
@@ -261,7 +262,7 @@ static int cpu_relative_dispatch_distance(int last_disp_cpu, int cur_disp_cpu)
if (!last_disp_cpu_assoc || !cur_disp_cpu_assoc)
return -EIO;
- return cpu_distance(last_disp_cpu_assoc, cur_disp_cpu_assoc);
+ return cpu_relative_distance(last_disp_cpu_assoc, cur_disp_cpu_assoc);
}
static int cpu_home_node_dispatch_distance(int disp_cpu)
@@ -281,7 +282,7 @@ static int cpu_home_node_dispatch_distance(int disp_cpu)
if (!disp_cpu_assoc || !vcpu_assoc)
return -EIO;
- return cpu_distance(disp_cpu_assoc, vcpu_assoc);
+ return cpu_relative_distance(disp_cpu_assoc, vcpu_assoc);
}
static void update_vcpu_disp_stat(int disp_cpu)
@@ -801,7 +802,8 @@ static long pSeries_lpar_hpte_remove(unsigned long hpte_group)
return -1;
}
-static void manual_hpte_clear_all(void)
+/* Called during kexec sequence with MMU off */
+static notrace void manual_hpte_clear_all(void)
{
unsigned long size_bytes = 1UL << ppc64_pft_size;
unsigned long hpte_count = size_bytes >> 4;
@@ -834,7 +836,8 @@ static void manual_hpte_clear_all(void)
}
}
-static int hcall_hpte_clear_all(void)
+/* Called during kexec sequence with MMU off */
+static notrace int hcall_hpte_clear_all(void)
{
int rc;
@@ -845,7 +848,8 @@ static int hcall_hpte_clear_all(void)
return rc;
}
-static void pseries_hpte_clear_all(void)
+/* Called during kexec sequence with MMU off */
+static notrace void pseries_hpte_clear_all(void)
{
int rc;
@@ -2016,7 +2020,7 @@ static int __init vpa_debugfs_init(void)
if (!firmware_has_feature(FW_FEATURE_SPLPAR))
return 0;
- vpa_dir = debugfs_create_dir("vpa", powerpc_debugfs_root);
+ vpa_dir = debugfs_create_dir("vpa", arch_debugfs_dir);
/* set up the per-cpu vpa file*/
for_each_possible_cpu(i) {
diff --git a/arch/powerpc/platforms/pseries/msi.c b/arch/powerpc/platforms/pseries/msi.c
index 637300330507..1b305e411862 100644
--- a/arch/powerpc/platforms/pseries/msi.c
+++ b/arch/powerpc/platforms/pseries/msi.c
@@ -13,6 +13,7 @@
#include <asm/hw_irq.h>
#include <asm/ppc-pci.h>
#include <asm/machdep.h>
+#include <asm/xive.h>
#include "pseries.h"
@@ -110,21 +111,6 @@ static int rtas_query_irq_number(struct pci_dn *pdn, int offset)
return rtas_ret[0];
}
-static void rtas_teardown_msi_irqs(struct pci_dev *pdev)
-{
- struct msi_desc *entry;
-
- for_each_pci_msi_entry(entry, pdev) {
- if (!entry->irq)
- continue;
-
- irq_set_msi_desc(entry->irq, NULL);
- irq_dispose_mapping(entry->irq);
- }
-
- rtas_disable_msi(pdev);
-}
-
static int check_req(struct pci_dev *pdev, int nvec, char *prop_name)
{
struct device_node *dn;
@@ -164,12 +150,12 @@ static int check_req_msix(struct pci_dev *pdev, int nvec)
/* Quota calculation */
-static struct device_node *find_pe_total_msi(struct pci_dev *dev, int *total)
+static struct device_node *__find_pe_total_msi(struct device_node *node, int *total)
{
struct device_node *dn;
const __be32 *p;
- dn = of_node_get(pci_device_to_OF_node(dev));
+ dn = of_node_get(node);
while (dn) {
p = of_get_property(dn, "ibm,pe-total-#msi", NULL);
if (p) {
@@ -185,6 +171,11 @@ static struct device_node *find_pe_total_msi(struct pci_dev *dev, int *total)
return NULL;
}
+static struct device_node *find_pe_total_msi(struct pci_dev *dev, int *total)
+{
+ return __find_pe_total_msi(pci_device_to_OF_node(dev), total);
+}
+
static struct device_node *find_pe_dn(struct pci_dev *dev, int *total)
{
struct device_node *dn;
@@ -368,12 +359,11 @@ static void rtas_hack_32bit_msi_gen2(struct pci_dev *pdev)
pci_write_config_dword(pdev, pdev->msi_cap + PCI_MSI_ADDRESS_HI, 0);
}
-static int rtas_setup_msi_irqs(struct pci_dev *pdev, int nvec_in, int type)
+static int rtas_prepare_msi_irqs(struct pci_dev *pdev, int nvec_in, int type,
+ msi_alloc_info_t *arg)
{
struct pci_dn *pdn;
- int hwirq, virq, i, quota, rc;
- struct msi_desc *entry;
- struct msi_msg msg;
+ int quota, rc;
int nvec = nvec_in;
int use_32bit_msi_hack = 0;
@@ -451,53 +441,229 @@ again:
return rc;
}
- i = 0;
- for_each_pci_msi_entry(entry, pdev) {
- hwirq = rtas_query_irq_number(pdn, i++);
- if (hwirq < 0) {
- pr_debug("rtas_msi: error (%d) getting hwirq\n", rc);
- return hwirq;
- }
+ return 0;
+}
- /*
- * Depending on the number of online CPUs in the original
- * kernel, it is likely for CPU #0 to be offline in a kdump
- * kernel. The associated IRQs in the affinity mappings
- * provided by irq_create_affinity_masks() are thus not
- * started by irq_startup(), as per-design for managed IRQs.
- * This can be a problem with multi-queue block devices driven
- * by blk-mq : such a non-started IRQ is very likely paired
- * with the single queue enforced by blk-mq during kdump (see
- * blk_mq_alloc_tag_set()). This causes the device to remain
- * silent and likely hangs the guest at some point.
- *
- * We don't really care for fine-grained affinity when doing
- * kdump actually : simply ignore the pre-computed affinity
- * masks in this case and let the default mask with all CPUs
- * be used when creating the IRQ mappings.
- */
- if (is_kdump_kernel())
- virq = irq_create_mapping(NULL, hwirq);
- else
- virq = irq_create_mapping_affinity(NULL, hwirq,
- entry->affinity);
+static int pseries_msi_ops_prepare(struct irq_domain *domain, struct device *dev,
+ int nvec, msi_alloc_info_t *arg)
+{
+ struct pci_dev *pdev = to_pci_dev(dev);
+ struct msi_desc *desc = first_pci_msi_entry(pdev);
+ int type = desc->msi_attrib.is_msix ? PCI_CAP_ID_MSIX : PCI_CAP_ID_MSI;
- if (!virq) {
- pr_debug("rtas_msi: Failed mapping hwirq %d\n", hwirq);
- return -ENOSPC;
- }
+ return rtas_prepare_msi_irqs(pdev, nvec, type, arg);
+}
+
+/*
+ * ->msi_free() is called before irq_domain_free_irqs_top() when the
+ * handler data is still available. Use that to clear the XIVE
+ * controller data.
+ */
+static void pseries_msi_ops_msi_free(struct irq_domain *domain,
+ struct msi_domain_info *info,
+ unsigned int irq)
+{
+ if (xive_enabled())
+ xive_irq_free_data(irq);
+}
+
+/*
+ * RTAS can not disable one MSI at a time. It's all or nothing. Do it
+ * at the end after all IRQs have been freed.
+ */
+static void pseries_msi_domain_free_irqs(struct irq_domain *domain,
+ struct device *dev)
+{
+ if (WARN_ON_ONCE(!dev_is_pci(dev)))
+ return;
+
+ __msi_domain_free_irqs(domain, dev);
+
+ rtas_disable_msi(to_pci_dev(dev));
+}
+
+static struct msi_domain_ops pseries_pci_msi_domain_ops = {
+ .msi_prepare = pseries_msi_ops_prepare,
+ .msi_free = pseries_msi_ops_msi_free,
+ .domain_free_irqs = pseries_msi_domain_free_irqs,
+};
+
+static void pseries_msi_shutdown(struct irq_data *d)
+{
+ d = d->parent_data;
+ if (d->chip->irq_shutdown)
+ d->chip->irq_shutdown(d);
+}
+
+static void pseries_msi_mask(struct irq_data *d)
+{
+ pci_msi_mask_irq(d);
+ irq_chip_mask_parent(d);
+}
+
+static void pseries_msi_unmask(struct irq_data *d)
+{
+ pci_msi_unmask_irq(d);
+ irq_chip_unmask_parent(d);
+}
- dev_dbg(&pdev->dev, "rtas_msi: allocated virq %d\n", virq);
- irq_set_msi_desc(virq, entry);
+static struct irq_chip pseries_pci_msi_irq_chip = {
+ .name = "pSeries-PCI-MSI",
+ .irq_shutdown = pseries_msi_shutdown,
+ .irq_mask = pseries_msi_mask,
+ .irq_unmask = pseries_msi_unmask,
+ .irq_eoi = irq_chip_eoi_parent,
+};
+
+static struct msi_domain_info pseries_msi_domain_info = {
+ .flags = (MSI_FLAG_USE_DEF_DOM_OPS | MSI_FLAG_USE_DEF_CHIP_OPS |
+ MSI_FLAG_MULTI_PCI_MSI | MSI_FLAG_PCI_MSIX),
+ .ops = &pseries_pci_msi_domain_ops,
+ .chip = &pseries_pci_msi_irq_chip,
+};
+
+static void pseries_msi_compose_msg(struct irq_data *data, struct msi_msg *msg)
+{
+ __pci_read_msi_msg(irq_data_get_msi_desc(data), msg);
+}
+
+static struct irq_chip pseries_msi_irq_chip = {
+ .name = "pSeries-MSI",
+ .irq_shutdown = pseries_msi_shutdown,
+ .irq_mask = irq_chip_mask_parent,
+ .irq_unmask = irq_chip_unmask_parent,
+ .irq_eoi = irq_chip_eoi_parent,
+ .irq_set_affinity = irq_chip_set_affinity_parent,
+ .irq_compose_msi_msg = pseries_msi_compose_msg,
+};
+
+static int pseries_irq_parent_domain_alloc(struct irq_domain *domain, unsigned int virq,
+ irq_hw_number_t hwirq)
+{
+ struct irq_fwspec parent_fwspec;
+ int ret;
+
+ parent_fwspec.fwnode = domain->parent->fwnode;
+ parent_fwspec.param_count = 2;
+ parent_fwspec.param[0] = hwirq;
+ parent_fwspec.param[1] = IRQ_TYPE_EDGE_RISING;
+
+ ret = irq_domain_alloc_irqs_parent(domain, virq, 1, &parent_fwspec);
+ if (ret)
+ return ret;
+
+ return 0;
+}
+
+static int pseries_irq_domain_alloc(struct irq_domain *domain, unsigned int virq,
+ unsigned int nr_irqs, void *arg)
+{
+ struct pci_controller *phb = domain->host_data;
+ msi_alloc_info_t *info = arg;
+ struct msi_desc *desc = info->desc;
+ struct pci_dev *pdev = msi_desc_to_pci_dev(desc);
+ int hwirq;
+ int i, ret;
+
+ hwirq = rtas_query_irq_number(pci_get_pdn(pdev), desc->msi_attrib.entry_nr);
+ if (hwirq < 0) {
+ dev_err(&pdev->dev, "Failed to query HW IRQ: %d\n", hwirq);
+ return hwirq;
+ }
+
+ dev_dbg(&pdev->dev, "%s bridge %pOF %d/%x #%d\n", __func__,
+ phb->dn, virq, hwirq, nr_irqs);
+
+ for (i = 0; i < nr_irqs; i++) {
+ ret = pseries_irq_parent_domain_alloc(domain, virq + i, hwirq + i);
+ if (ret)
+ goto out;
+
+ irq_domain_set_hwirq_and_chip(domain, virq + i, hwirq + i,
+ &pseries_msi_irq_chip, domain->host_data);
+ }
+
+ return 0;
+
+out:
+ /* TODO: handle RTAS cleanup in ->msi_finish() ? */
+ irq_domain_free_irqs_parent(domain, virq, i - 1);
+ return ret;
+}
+
+static void pseries_irq_domain_free(struct irq_domain *domain, unsigned int virq,
+ unsigned int nr_irqs)
+{
+ struct irq_data *d = irq_domain_get_irq_data(domain, virq);
+ struct pci_controller *phb = irq_data_get_irq_chip_data(d);
+
+ pr_debug("%s bridge %pOF %d #%d\n", __func__, phb->dn, virq, nr_irqs);
+
+ /* XIVE domain data is cleared through ->msi_free() */
+}
- /* Read config space back so we can restore after reset */
- __pci_read_msi_msg(entry, &msg);
- entry->msg = msg;
+static const struct irq_domain_ops pseries_irq_domain_ops = {
+ .alloc = pseries_irq_domain_alloc,
+ .free = pseries_irq_domain_free,
+};
+
+static int __pseries_msi_allocate_domains(struct pci_controller *phb,
+ unsigned int count)
+{
+ struct irq_domain *parent = irq_get_default_host();
+
+ phb->fwnode = irq_domain_alloc_named_id_fwnode("pSeries-MSI",
+ phb->global_number);
+ if (!phb->fwnode)
+ return -ENOMEM;
+
+ phb->dev_domain = irq_domain_create_hierarchy(parent, 0, count,
+ phb->fwnode,
+ &pseries_irq_domain_ops, phb);
+ if (!phb->dev_domain) {
+ pr_err("PCI: failed to create IRQ domain bridge %pOF (domain %d)\n",
+ phb->dn, phb->global_number);
+ irq_domain_free_fwnode(phb->fwnode);
+ return -ENOMEM;
+ }
+
+ phb->msi_domain = pci_msi_create_irq_domain(of_node_to_fwnode(phb->dn),
+ &pseries_msi_domain_info,
+ phb->dev_domain);
+ if (!phb->msi_domain) {
+ pr_err("PCI: failed to create MSI IRQ domain bridge %pOF (domain %d)\n",
+ phb->dn, phb->global_number);
+ irq_domain_free_fwnode(phb->fwnode);
+ irq_domain_remove(phb->dev_domain);
+ return -ENOMEM;
}
return 0;
}
+int pseries_msi_allocate_domains(struct pci_controller *phb)
+{
+ int count;
+
+ if (!__find_pe_total_msi(phb->dn, &count)) {
+ pr_err("PCI: failed to find MSIs for bridge %pOF (domain %d)\n",
+ phb->dn, phb->global_number);
+ return -ENOSPC;
+ }
+
+ return __pseries_msi_allocate_domains(phb, count);
+}
+
+void pseries_msi_free_domains(struct pci_controller *phb)
+{
+ if (phb->msi_domain)
+ irq_domain_remove(phb->msi_domain);
+ if (phb->dev_domain)
+ irq_domain_remove(phb->dev_domain);
+ if (phb->fwnode)
+ irq_domain_free_fwnode(phb->fwnode);
+}
+
static void rtas_msi_pci_irq_fixup(struct pci_dev *pdev)
{
/* No LSI -> leave MSIs (if any) configured */
@@ -518,8 +684,6 @@ static void rtas_msi_pci_irq_fixup(struct pci_dev *pdev)
static int rtas_msi_init(void)
{
- struct pci_controller *phb;
-
query_token = rtas_token("ibm,query-interrupt-source-number");
change_token = rtas_token("ibm,change-msi");
@@ -531,16 +695,6 @@ static int rtas_msi_init(void)
pr_debug("rtas_msi: Registering RTAS MSI callbacks.\n");
- WARN_ON(pseries_pci_controller_ops.setup_msi_irqs);
- pseries_pci_controller_ops.setup_msi_irqs = rtas_setup_msi_irqs;
- pseries_pci_controller_ops.teardown_msi_irqs = rtas_teardown_msi_irqs;
-
- list_for_each_entry(phb, &hose_list, list_node) {
- WARN_ON(phb->controller_ops.setup_msi_irqs);
- phb->controller_ops.setup_msi_irqs = rtas_setup_msi_irqs;
- phb->controller_ops.teardown_msi_irqs = rtas_teardown_msi_irqs;
- }
-
WARN_ON(ppc_md.pci_irq_fixup);
ppc_md.pci_irq_fixup = rtas_msi_pci_irq_fixup;
diff --git a/arch/powerpc/platforms/pseries/pci_dlpar.c b/arch/powerpc/platforms/pseries/pci_dlpar.c
index a8f9140a24fa..90c9d3531694 100644
--- a/arch/powerpc/platforms/pseries/pci_dlpar.c
+++ b/arch/powerpc/platforms/pseries/pci_dlpar.c
@@ -33,6 +33,8 @@ struct pci_controller *init_phb_dynamic(struct device_node *dn)
pci_devs_phb_init_dynamic(phb);
+ pseries_msi_allocate_domains(phb);
+
/* Create EEH devices for the PHB */
eeh_phb_pe_create(phb);
@@ -74,6 +76,8 @@ int remove_phb_dynamic(struct pci_controller *phb)
}
}
+ pseries_msi_free_domains(phb);
+
/* Remove the PCI bus and unregister the bridge device from sysfs */
phb->bus = NULL;
pci_remove_bus(b);
diff --git a/arch/powerpc/platforms/pseries/pseries.h b/arch/powerpc/platforms/pseries/pseries.h
index 1f051a786fb3..3544778e06d0 100644
--- a/arch/powerpc/platforms/pseries/pseries.h
+++ b/arch/powerpc/platforms/pseries/pseries.h
@@ -85,6 +85,8 @@ struct pci_host_bridge;
int pseries_root_bridge_prepare(struct pci_host_bridge *bridge);
extern struct pci_controller_ops pseries_pci_controller_ops;
+int pseries_msi_allocate_domains(struct pci_controller *phb);
+void pseries_msi_free_domains(struct pci_controller *phb);
unsigned long pseries_memory_block_size(void);
diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c
index 167f2e1b8d39..56092dccfdb8 100644
--- a/arch/powerpc/platforms/pseries/ras.c
+++ b/arch/powerpc/platforms/pseries/ras.c
@@ -783,7 +783,7 @@ static int recover_mce(struct pt_regs *regs, struct machine_check_event *evt)
{
int recovered = 0;
- if (!(regs->msr & MSR_RI)) {
+ if (regs_is_unrecoverable(regs)) {
/* If MSR_RI isn't set, we cannot recover */
pr_err("Machine check interrupt unrecoverable: MSR(RI=0)\n");
recovered = 0;
diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c
index 631a0d57b6cd..35724caf8a83 100644
--- a/arch/powerpc/platforms/pseries/setup.c
+++ b/arch/powerpc/platforms/pseries/setup.c
@@ -486,6 +486,8 @@ static void __init pSeries_discover_phbs(void)
/* create pci_dn's for DT nodes under this PHB */
pci_devs_phb_init_dynamic(phb);
+
+ pseries_msi_allocate_domains(phb);
}
of_node_put(root);
diff --git a/arch/powerpc/platforms/pseries/vas.c b/arch/powerpc/platforms/pseries/vas.c
index b5c1cf1bc64d..b043e3936d21 100644
--- a/arch/powerpc/platforms/pseries/vas.c
+++ b/arch/powerpc/platforms/pseries/vas.c
@@ -184,7 +184,7 @@ static int h_get_nx_fault(u32 winid, u64 buffer)
* Note: The hypervisor forwards an interrupt for each fault request.
* So one fault CRB to process for each H_GET_NX_FAULT hcall.
*/
-irqreturn_t pseries_vas_fault_thread_fn(int irq, void *data)
+static irqreturn_t pseries_vas_fault_thread_fn(int irq, void *data)
{
struct pseries_vas_window *txwin = data;
struct coprocessor_request_block crb;