summaryrefslogtreecommitdiff
path: root/arch/powerpc/mm/numa.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/powerpc/mm/numa.c')
-rw-r--r--arch/powerpc/mm/numa.c506
1 files changed, 12 insertions, 494 deletions
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index 9fcf2d195830..058fee9a0835 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -221,7 +221,8 @@ static void initialize_distance_lookup_table(int nid,
}
}
-/* Returns nid in the range [0..MAX_NUMNODES-1], or -1 if no useful numa
+/*
+ * Returns nid in the range [0..nr_node_ids], or -1 if no useful NUMA
* info is found.
*/
static int associativity_to_nid(const __be32 *associativity)
@@ -235,7 +236,7 @@ static int associativity_to_nid(const __be32 *associativity)
nid = of_read_number(&associativity[min_common_depth], 1);
/* POWER4 LPAR uses 0xffff as invalid node */
- if (nid == 0xffff || nid >= MAX_NUMNODES)
+ if (nid == 0xffff || nid >= nr_node_ids)
nid = NUMA_NO_NODE;
if (nid > 0 &&
@@ -448,7 +449,7 @@ static int of_drconf_to_nid_single(struct drmem_lmb *lmb)
index = lmb->aa_index * aa.array_sz + min_common_depth - 1;
nid = of_read_number(&aa.arrays[index], 1);
- if (nid == 0xffff || nid >= MAX_NUMNODES)
+ if (nid == 0xffff || nid >= nr_node_ids)
nid = default_nid;
if (nid > 0) {
@@ -644,8 +645,9 @@ static inline int __init read_usm_ranges(const __be32 **usm)
* Extract NUMA information from the ibm,dynamic-reconfiguration-memory
* node. This assumes n_mem_{addr,size}_cells have been set.
*/
-static void __init numa_setup_drmem_lmb(struct drmem_lmb *lmb,
- const __be32 **usm)
+static int __init numa_setup_drmem_lmb(struct drmem_lmb *lmb,
+ const __be32 **usm,
+ void *data)
{
unsigned int ranges, is_kexec_kdump = 0;
unsigned long base, size, sz;
@@ -657,7 +659,7 @@ static void __init numa_setup_drmem_lmb(struct drmem_lmb *lmb,
*/
if ((lmb->flags & DRCONF_MEM_RESERVED)
|| !(lmb->flags & DRCONF_MEM_ASSIGNED))
- return;
+ return 0;
if (*usm)
is_kexec_kdump = 1;
@@ -669,7 +671,7 @@ static void __init numa_setup_drmem_lmb(struct drmem_lmb *lmb,
if (is_kexec_kdump) {
ranges = read_usm_ranges(usm);
if (!ranges) /* there are no (base, size) duple */
- return;
+ return 0;
}
do {
@@ -686,6 +688,8 @@ static void __init numa_setup_drmem_lmb(struct drmem_lmb *lmb,
if (sz)
memblock_set_node(base, sz, &memblock.memory, nid);
} while (--ranges);
+
+ return 0;
}
static int __init parse_numa_properties(void)
@@ -787,7 +791,7 @@ new_range:
*/
memory = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory");
if (memory) {
- walk_drmem_lmbs(memory, numa_setup_drmem_lmb);
+ walk_drmem_lmbs(memory, NULL, numa_setup_drmem_lmb);
of_node_put(memory);
}
@@ -984,28 +988,6 @@ static int __init early_numa(char *p)
}
early_param("numa", early_numa);
-/*
- * The platform can inform us through one of several mechanisms
- * (post-migration device tree updates, PRRN or VPHN) that the NUMA
- * assignment of a resource has changed. This controls whether we act
- * on that. Disabled by default.
- */
-static bool topology_updates_enabled;
-
-static int __init early_topology_updates(char *p)
-{
- if (!p)
- return 0;
-
- if (!strcmp(p, "on")) {
- pr_warn("Caution: enabling topology updates\n");
- topology_updates_enabled = true;
- }
-
- return 0;
-}
-early_param("topology_updates", early_topology_updates);
-
#ifdef CONFIG_MEMORY_HOTPLUG
/*
* Find the node associated with a hot added memory section for
@@ -1144,98 +1126,9 @@ u64 memory_hotplug_max(void)
/* Virtual Processor Home Node (VPHN) support */
#ifdef CONFIG_PPC_SPLPAR
-struct topology_update_data {
- struct topology_update_data *next;
- unsigned int cpu;
- int old_nid;
- int new_nid;
-};
-
-#define TOPOLOGY_DEF_TIMER_SECS 60
-
-static u8 vphn_cpu_change_counts[NR_CPUS][MAX_DISTANCE_REF_POINTS];
-static cpumask_t cpu_associativity_changes_mask;
-static int vphn_enabled;
-static int prrn_enabled;
-static void reset_topology_timer(void);
-static int topology_timer_secs = 1;
static int topology_inited;
/*
- * Change polling interval for associativity changes.
- */
-int timed_topology_update(int nsecs)
-{
- if (vphn_enabled) {
- if (nsecs > 0)
- topology_timer_secs = nsecs;
- else
- topology_timer_secs = TOPOLOGY_DEF_TIMER_SECS;
-
- reset_topology_timer();
- }
-
- return 0;
-}
-
-/*
- * Store the current values of the associativity change counters in the
- * hypervisor.
- */
-static void setup_cpu_associativity_change_counters(void)
-{
- int cpu;
-
- /* The VPHN feature supports a maximum of 8 reference points */
- BUILD_BUG_ON(MAX_DISTANCE_REF_POINTS > 8);
-
- for_each_possible_cpu(cpu) {
- int i;
- u8 *counts = vphn_cpu_change_counts[cpu];
- volatile u8 *hypervisor_counts = lppaca_of(cpu).vphn_assoc_counts;
-
- for (i = 0; i < distance_ref_points_depth; i++)
- counts[i] = hypervisor_counts[i];
- }
-}
-
-/*
- * The hypervisor maintains a set of 8 associativity change counters in
- * the VPA of each cpu that correspond to the associativity levels in the
- * ibm,associativity-reference-points property. When an associativity
- * level changes, the corresponding counter is incremented.
- *
- * Set a bit in cpu_associativity_changes_mask for each cpu whose home
- * node associativity levels have changed.
- *
- * Returns the number of cpus with unhandled associativity changes.
- */
-static int update_cpu_associativity_changes_mask(void)
-{
- int cpu;
- cpumask_t *changes = &cpu_associativity_changes_mask;
-
- for_each_possible_cpu(cpu) {
- int i, changed = 0;
- u8 *counts = vphn_cpu_change_counts[cpu];
- volatile u8 *hypervisor_counts = lppaca_of(cpu).vphn_assoc_counts;
-
- for (i = 0; i < distance_ref_points_depth; i++) {
- if (hypervisor_counts[i] != counts[i]) {
- counts[i] = hypervisor_counts[i];
- changed = 1;
- }
- }
- if (changed) {
- cpumask_or(changes, changes, cpu_sibling_mask(cpu));
- cpu = cpu_last_thread_sibling(cpu);
- }
- }
-
- return cpumask_weight(changes);
-}
-
-/*
* Retrieve the new associativity information for a virtual processor's
* home node.
*/
@@ -1250,7 +1143,6 @@ static long vphn_get_associativity(unsigned long cpu,
switch (rc) {
case H_SUCCESS:
dbg("VPHN hcall succeeded. Reset polling...\n");
- timed_topology_update(0);
goto out;
case H_FUNCTION:
@@ -1269,8 +1161,6 @@ static long vphn_get_associativity(unsigned long cpu,
, rc);
break;
}
-
- stop_topology_update();
out:
return rc;
}
@@ -1314,380 +1204,8 @@ int find_and_online_cpu_nid(int cpu)
return new_nid;
}
-/*
- * Update the CPU maps and sysfs entries for a single CPU when its NUMA
- * characteristics change. This function doesn't perform any locking and is
- * only safe to call from stop_machine().
- */
-static int update_cpu_topology(void *data)
-{
- struct topology_update_data *update;
- unsigned long cpu;
-
- if (!data)
- return -EINVAL;
-
- cpu = smp_processor_id();
-
- for (update = data; update; update = update->next) {
- int new_nid = update->new_nid;
- if (cpu != update->cpu)
- continue;
-
- unmap_cpu_from_node(cpu);
- map_cpu_to_node(cpu, new_nid);
- set_cpu_numa_node(cpu, new_nid);
- set_cpu_numa_mem(cpu, local_memory_node(new_nid));
- vdso_getcpu_init();
- }
-
- return 0;
-}
-
-static int update_lookup_table(void *data)
-{
- struct topology_update_data *update;
-
- if (!data)
- return -EINVAL;
-
- /*
- * Upon topology update, the numa-cpu lookup table needs to be updated
- * for all threads in the core, including offline CPUs, to ensure that
- * future hotplug operations respect the cpu-to-node associativity
- * properly.
- */
- for (update = data; update; update = update->next) {
- int nid, base, j;
-
- nid = update->new_nid;
- base = cpu_first_thread_sibling(update->cpu);
-
- for (j = 0; j < threads_per_core; j++) {
- update_numa_cpu_lookup_table(base + j, nid);
- }
- }
-
- return 0;
-}
-
-/*
- * Update the node maps and sysfs entries for each cpu whose home node
- * has changed. Returns 1 when the topology has changed, and 0 otherwise.
- *
- * cpus_locked says whether we already hold cpu_hotplug_lock.
- */
-int numa_update_cpu_topology(bool cpus_locked)
-{
- unsigned int cpu, sibling, changed = 0;
- struct topology_update_data *updates, *ud;
- cpumask_t updated_cpus;
- struct device *dev;
- int weight, new_nid, i = 0;
-
- if (!prrn_enabled && !vphn_enabled && topology_inited)
- return 0;
-
- weight = cpumask_weight(&cpu_associativity_changes_mask);
- if (!weight)
- return 0;
-
- updates = kcalloc(weight, sizeof(*updates), GFP_KERNEL);
- if (!updates)
- return 0;
-
- cpumask_clear(&updated_cpus);
-
- for_each_cpu(cpu, &cpu_associativity_changes_mask) {
- /*
- * If siblings aren't flagged for changes, updates list
- * will be too short. Skip on this update and set for next
- * update.
- */
- if (!cpumask_subset(cpu_sibling_mask(cpu),
- &cpu_associativity_changes_mask)) {
- pr_info("Sibling bits not set for associativity "
- "change, cpu%d\n", cpu);
- cpumask_or(&cpu_associativity_changes_mask,
- &cpu_associativity_changes_mask,
- cpu_sibling_mask(cpu));
- cpu = cpu_last_thread_sibling(cpu);
- continue;
- }
-
- new_nid = find_and_online_cpu_nid(cpu);
-
- if (new_nid == numa_cpu_lookup_table[cpu]) {
- cpumask_andnot(&cpu_associativity_changes_mask,
- &cpu_associativity_changes_mask,
- cpu_sibling_mask(cpu));
- dbg("Assoc chg gives same node %d for cpu%d\n",
- new_nid, cpu);
- cpu = cpu_last_thread_sibling(cpu);
- continue;
- }
-
- for_each_cpu(sibling, cpu_sibling_mask(cpu)) {
- ud = &updates[i++];
- ud->next = &updates[i];
- ud->cpu = sibling;
- ud->new_nid = new_nid;
- ud->old_nid = numa_cpu_lookup_table[sibling];
- cpumask_set_cpu(sibling, &updated_cpus);
- }
- cpu = cpu_last_thread_sibling(cpu);
- }
-
- /*
- * Prevent processing of 'updates' from overflowing array
- * where last entry filled in a 'next' pointer.
- */
- if (i)
- updates[i-1].next = NULL;
-
- pr_debug("Topology update for the following CPUs:\n");
- if (cpumask_weight(&updated_cpus)) {
- for (ud = &updates[0]; ud; ud = ud->next) {
- pr_debug("cpu %d moving from node %d "
- "to %d\n", ud->cpu,
- ud->old_nid, ud->new_nid);
- }
- }
-
- /*
- * In cases where we have nothing to update (because the updates list
- * is too short or because the new topology is same as the old one),
- * skip invoking update_cpu_topology() via stop-machine(). This is
- * necessary (and not just a fast-path optimization) since stop-machine
- * can end up electing a random CPU to run update_cpu_topology(), and
- * thus trick us into setting up incorrect cpu-node mappings (since
- * 'updates' is kzalloc()'ed).
- *
- * And for the similar reason, we will skip all the following updating.
- */
- if (!cpumask_weight(&updated_cpus))
- goto out;
-
- if (cpus_locked)
- stop_machine_cpuslocked(update_cpu_topology, &updates[0],
- &updated_cpus);
- else
- stop_machine(update_cpu_topology, &updates[0], &updated_cpus);
-
- /*
- * Update the numa-cpu lookup table with the new mappings, even for
- * offline CPUs. It is best to perform this update from the stop-
- * machine context.
- */
- if (cpus_locked)
- stop_machine_cpuslocked(update_lookup_table, &updates[0],
- cpumask_of(raw_smp_processor_id()));
- else
- stop_machine(update_lookup_table, &updates[0],
- cpumask_of(raw_smp_processor_id()));
-
- for (ud = &updates[0]; ud; ud = ud->next) {
- unregister_cpu_under_node(ud->cpu, ud->old_nid);
- register_cpu_under_node(ud->cpu, ud->new_nid);
-
- dev = get_cpu_device(ud->cpu);
- if (dev)
- kobject_uevent(&dev->kobj, KOBJ_CHANGE);
- cpumask_clear_cpu(ud->cpu, &cpu_associativity_changes_mask);
- changed = 1;
- }
-
-out:
- kfree(updates);
- return changed;
-}
-
-int arch_update_cpu_topology(void)
-{
- return numa_update_cpu_topology(true);
-}
-
-static void topology_work_fn(struct work_struct *work)
-{
- rebuild_sched_domains();
-}
-static DECLARE_WORK(topology_work, topology_work_fn);
-
-static void topology_schedule_update(void)
-{
- schedule_work(&topology_work);
-}
-
-static void topology_timer_fn(struct timer_list *unused)
-{
- if (prrn_enabled && cpumask_weight(&cpu_associativity_changes_mask))
- topology_schedule_update();
- else if (vphn_enabled) {
- if (update_cpu_associativity_changes_mask() > 0)
- topology_schedule_update();
- reset_topology_timer();
- }
-}
-static struct timer_list topology_timer;
-
-static void reset_topology_timer(void)
-{
- if (vphn_enabled)
- mod_timer(&topology_timer, jiffies + topology_timer_secs * HZ);
-}
-
-#ifdef CONFIG_SMP
-
-static int dt_update_callback(struct notifier_block *nb,
- unsigned long action, void *data)
-{
- struct of_reconfig_data *update = data;
- int rc = NOTIFY_DONE;
-
- switch (action) {
- case OF_RECONFIG_UPDATE_PROPERTY:
- if (of_node_is_type(update->dn, "cpu") &&
- !of_prop_cmp(update->prop->name, "ibm,associativity")) {
- u32 core_id;
- of_property_read_u32(update->dn, "reg", &core_id);
- rc = dlpar_cpu_readd(core_id);
- rc = NOTIFY_OK;
- }
- break;
- }
-
- return rc;
-}
-
-static struct notifier_block dt_update_nb = {
- .notifier_call = dt_update_callback,
-};
-
-#endif
-
-/*
- * Start polling for associativity changes.
- */
-int start_topology_update(void)
-{
- int rc = 0;
-
- if (!topology_updates_enabled)
- return 0;
-
- if (firmware_has_feature(FW_FEATURE_PRRN)) {
- if (!prrn_enabled) {
- prrn_enabled = 1;
-#ifdef CONFIG_SMP
- rc = of_reconfig_notifier_register(&dt_update_nb);
-#endif
- }
- }
- if (firmware_has_feature(FW_FEATURE_VPHN) &&
- lppaca_shared_proc(get_lppaca())) {
- if (!vphn_enabled) {
- vphn_enabled = 1;
- setup_cpu_associativity_change_counters();
- timer_setup(&topology_timer, topology_timer_fn,
- TIMER_DEFERRABLE);
- reset_topology_timer();
- }
- }
-
- pr_info("Starting topology update%s%s\n",
- (prrn_enabled ? " prrn_enabled" : ""),
- (vphn_enabled ? " vphn_enabled" : ""));
-
- return rc;
-}
-
-/*
- * Disable polling for VPHN associativity changes.
- */
-int stop_topology_update(void)
-{
- int rc = 0;
-
- if (!topology_updates_enabled)
- return 0;
-
- if (prrn_enabled) {
- prrn_enabled = 0;
-#ifdef CONFIG_SMP
- rc = of_reconfig_notifier_unregister(&dt_update_nb);
-#endif
- }
- if (vphn_enabled) {
- vphn_enabled = 0;
- rc = del_timer_sync(&topology_timer);
- }
-
- pr_info("Stopping topology update\n");
-
- return rc;
-}
-
-int prrn_is_enabled(void)
-{
- return prrn_enabled;
-}
-
-static int topology_read(struct seq_file *file, void *v)
-{
- if (vphn_enabled || prrn_enabled)
- seq_puts(file, "on\n");
- else
- seq_puts(file, "off\n");
-
- return 0;
-}
-
-static int topology_open(struct inode *inode, struct file *file)
-{
- return single_open(file, topology_read, NULL);
-}
-
-static ssize_t topology_write(struct file *file, const char __user *buf,
- size_t count, loff_t *off)
-{
- char kbuf[4]; /* "on" or "off" plus null. */
- int read_len;
-
- read_len = count < 3 ? count : 3;
- if (copy_from_user(kbuf, buf, read_len))
- return -EINVAL;
-
- kbuf[read_len] = '\0';
-
- if (!strncmp(kbuf, "on", 2)) {
- topology_updates_enabled = true;
- start_topology_update();
- } else if (!strncmp(kbuf, "off", 3)) {
- stop_topology_update();
- topology_updates_enabled = false;
- } else
- return -EINVAL;
-
- return count;
-}
-
-static const struct proc_ops topology_proc_ops = {
- .proc_read = seq_read,
- .proc_write = topology_write,
- .proc_open = topology_open,
- .proc_release = single_release,
-};
-
static int topology_update_init(void)
{
- start_topology_update();
-
- if (vphn_enabled)
- topology_schedule_update();
-
- if (!proc_create("powerpc/topology_updates", 0644, NULL, &topology_proc_ops))
- return -ENOMEM;
-
topology_inited = 1;
return 0;
}