summaryrefslogtreecommitdiff
path: root/arch/powerpc/platforms
diff options
context:
space:
mode:
Diffstat (limited to 'arch/powerpc/platforms')
-rw-r--r--arch/powerpc/platforms/85xx/corenet_generic.c2
-rw-r--r--arch/powerpc/platforms/85xx/qemu_e500.c5
-rw-r--r--arch/powerpc/platforms/86xx/mpc86xx_hpcn.c6
-rw-r--r--arch/powerpc/platforms/8xx/pic.c1
-rw-r--r--arch/powerpc/platforms/Kconfig.cputype3
-rw-r--r--arch/powerpc/platforms/book3s/vas-api.c145
-rw-r--r--arch/powerpc/platforms/cell/cbe_thermal.c2
-rw-r--r--arch/powerpc/platforms/cell/spu_base.c4
-rw-r--r--arch/powerpc/platforms/cell/spufs/sched.c7
-rw-r--r--arch/powerpc/platforms/powermac/pmac.h2
-rw-r--r--arch/powerpc/platforms/powernv/idle.c1
-rw-r--r--arch/powerpc/platforms/powernv/opal-core.c2
-rw-r--r--arch/powerpc/platforms/powernv/opal-dump.c2
-rw-r--r--arch/powerpc/platforms/powernv/opal-flash.c2
-rw-r--r--arch/powerpc/platforms/powernv/opal-tracepoints.c1
-rw-r--r--arch/powerpc/platforms/powernv/pci.c2
-rw-r--r--arch/powerpc/platforms/powernv/rng.c6
-rw-r--r--arch/powerpc/platforms/ps3/system-bus.c4
-rw-r--r--arch/powerpc/platforms/pseries/Makefile5
-rw-r--r--arch/powerpc/platforms/pseries/firmware.c1
-rw-r--r--arch/powerpc/platforms/pseries/lpar.c1
-rw-r--r--arch/powerpc/platforms/pseries/lparcfg.c87
-rw-r--r--arch/powerpc/platforms/pseries/mobility.c7
-rw-r--r--arch/powerpc/platforms/pseries/papr_platform_attributes.c361
-rw-r--r--arch/powerpc/platforms/pseries/papr_scm.c321
-rw-r--r--arch/powerpc/platforms/pseries/pci_dlpar.c4
-rw-r--r--arch/powerpc/platforms/pseries/power.c2
-rw-r--r--arch/powerpc/platforms/pseries/pseries.h1
-rw-r--r--arch/powerpc/platforms/pseries/ras.c68
-rw-r--r--arch/powerpc/platforms/pseries/setup.c1
-rw-r--r--arch/powerpc/platforms/pseries/vas-sysfs.c268
-rw-r--r--arch/powerpc/platforms/pseries/vas.c500
-rw-r--r--arch/powerpc/platforms/pseries/vas.h30
33 files changed, 1748 insertions, 106 deletions
diff --git a/arch/powerpc/platforms/85xx/corenet_generic.c b/arch/powerpc/platforms/85xx/corenet_generic.c
index 8d6029099848..17ae75d62518 100644
--- a/arch/powerpc/platforms/85xx/corenet_generic.c
+++ b/arch/powerpc/platforms/85xx/corenet_generic.c
@@ -37,7 +37,7 @@ void __init corenet_gen_pic_init(void)
unsigned int flags = MPIC_BIG_ENDIAN | MPIC_SINGLE_DEST_CPU |
MPIC_NO_RESET;
- if (ppc_md.get_irq == mpic_get_coreint_irq)
+ if (!IS_ENABLED(CONFIG_HOTPLUG_CPU) && !IS_ENABLED(CONFIG_KEXEC_CORE))
flags |= MPIC_ENABLE_COREINT;
mpic = mpic_alloc(NULL, 0, flags, 0, 512, " OpenPIC ");
diff --git a/arch/powerpc/platforms/85xx/qemu_e500.c b/arch/powerpc/platforms/85xx/qemu_e500.c
index a4127b0b161f..4c4d577effd9 100644
--- a/arch/powerpc/platforms/85xx/qemu_e500.c
+++ b/arch/powerpc/platforms/85xx/qemu_e500.c
@@ -67,4 +67,9 @@ define_machine(qemu_e500) {
.get_irq = mpic_get_coreint_irq,
.calibrate_decr = generic_calibrate_decr,
.progress = udbg_progress,
+#ifdef CONFIG_PPC64
+ .power_save = book3e_idle,
+#else
+ .power_save = e500_idle,
+#endif
};
diff --git a/arch/powerpc/platforms/86xx/mpc86xx_hpcn.c b/arch/powerpc/platforms/86xx/mpc86xx_hpcn.c
index b697918b727d..a6b8ffcbf01a 100644
--- a/arch/powerpc/platforms/86xx/mpc86xx_hpcn.c
+++ b/arch/powerpc/platforms/86xx/mpc86xx_hpcn.c
@@ -95,12 +95,6 @@ static int __init mpc86xx_hpcn_probe(void)
if (of_machine_is_compatible("fsl,mpc8641hpcn"))
return 1; /* Looks good */
- /* Be nice and don't give silent boot death. Delete this in 2.6.27 */
- if (of_machine_is_compatible("mpc86xx")) {
- pr_warn("WARNING: your dts/dtb is old. You must update before the next kernel release.\n");
- return 1;
- }
-
return 0;
}
diff --git a/arch/powerpc/platforms/8xx/pic.c b/arch/powerpc/platforms/8xx/pic.c
index f2ba837249d6..04a6abf14c29 100644
--- a/arch/powerpc/platforms/8xx/pic.c
+++ b/arch/powerpc/platforms/8xx/pic.c
@@ -153,6 +153,7 @@ int __init mpc8xx_pic_init(void)
if (mpc8xx_pic_host == NULL) {
printk(KERN_ERR "MPC8xx PIC: failed to allocate irq host!\n");
ret = -ENOMEM;
+ goto out;
}
ret = 0;
diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype
index 87bc1929ee5a..e2e1fec91c6e 100644
--- a/arch/powerpc/platforms/Kconfig.cputype
+++ b/arch/powerpc/platforms/Kconfig.cputype
@@ -107,6 +107,7 @@ config PPC_BOOK3S_64
config PPC_BOOK3E_64
bool "Embedded processors"
+ select PPC_FSL_BOOK3E
select PPC_FPU # Make it a choice ?
select PPC_SMP_MUXED_IPI
select PPC_DOORBELL
@@ -295,7 +296,7 @@ config FSL_BOOKE
config PPC_FSL_BOOK3E
bool
select ARCH_SUPPORTS_HUGETLBFS if PHYS_64BIT || PPC64
- select FSL_EMB_PERFMON
+ imply FSL_EMB_PERFMON
select PPC_SMP_MUXED_IPI
select PPC_DOORBELL
select PPC_KUEP
diff --git a/arch/powerpc/platforms/book3s/vas-api.c b/arch/powerpc/platforms/book3s/vas-api.c
index 4d82c92ddd52..f9a1615b74da 100644
--- a/arch/powerpc/platforms/book3s/vas-api.c
+++ b/arch/powerpc/platforms/book3s/vas-api.c
@@ -316,6 +316,7 @@ static int coproc_ioc_tx_win_open(struct file *fp, unsigned long arg)
return PTR_ERR(txwin);
}
+ mutex_init(&txwin->task_ref.mmap_mutex);
cp_inst->txwin = txwin;
return 0;
@@ -350,6 +351,124 @@ static int coproc_release(struct inode *inode, struct file *fp)
return 0;
}
+/*
+ * If the executed instruction that caused the fault was a paste, then
+ * clear regs CR0[EQ], advance NIP, and return 0. Else return error code.
+ */
+static int do_fail_paste(void)
+{
+ struct pt_regs *regs = current->thread.regs;
+ u32 instword;
+
+ if (WARN_ON_ONCE(!regs))
+ return -EINVAL;
+
+ if (WARN_ON_ONCE(!user_mode(regs)))
+ return -EINVAL;
+
+ /*
+ * If we couldn't translate the instruction, the driver should
+ * return success without handling the fault, it will be retried
+ * or the instruction fetch will fault.
+ */
+ if (get_user(instword, (u32 __user *)(regs->nip)))
+ return -EAGAIN;
+
+ /*
+ * Not a paste instruction, driver may fail the fault.
+ */
+ if ((instword & PPC_INST_PASTE_MASK) != PPC_INST_PASTE)
+ return -ENOENT;
+
+ regs->ccr &= ~0xe0000000; /* Clear CR0[0-2] to fail paste */
+ regs_add_return_ip(regs, 4); /* Emulate the paste */
+
+ return 0;
+}
+
+/*
+ * This fault handler is invoked when the core generates page fault on
+ * the paste address. Happens if the kernel closes window in hypervisor
+ * (on pseries) due to lost credit or the paste address is not mapped.
+ */
+static vm_fault_t vas_mmap_fault(struct vm_fault *vmf)
+{
+ struct vm_area_struct *vma = vmf->vma;
+ struct file *fp = vma->vm_file;
+ struct coproc_instance *cp_inst = fp->private_data;
+ struct vas_window *txwin;
+ vm_fault_t fault;
+ u64 paste_addr;
+ int ret;
+
+ /*
+ * window is not opened. Shouldn't expect this error.
+ */
+ if (!cp_inst || !cp_inst->txwin) {
+ pr_err("%s(): Unexpected fault on paste address with TX window closed\n",
+ __func__);
+ return VM_FAULT_SIGBUS;
+ }
+
+ txwin = cp_inst->txwin;
+ /*
+ * When the LPAR lost credits due to core removal or during
+ * migration, invalidate the existing mapping for the current
+ * paste addresses and set windows in-active (zap_page_range in
+ * reconfig_close_windows()).
+ * New mapping will be done later after migration or new credits
+ * available. So continue to receive faults if the user space
+ * issue NX request.
+ */
+ if (txwin->task_ref.vma != vmf->vma) {
+ pr_err("%s(): No previous mapping with paste address\n",
+ __func__);
+ return VM_FAULT_SIGBUS;
+ }
+
+ mutex_lock(&txwin->task_ref.mmap_mutex);
+ /*
+ * The window may be inactive due to lost credit (Ex: core
+ * removal with DLPAR). If the window is active again when
+ * the credit is available, map the new paste address at the
+ * the window virtual address.
+ */
+ if (txwin->status == VAS_WIN_ACTIVE) {
+ paste_addr = cp_inst->coproc->vops->paste_addr(txwin);
+ if (paste_addr) {
+ fault = vmf_insert_pfn(vma, vma->vm_start,
+ (paste_addr >> PAGE_SHIFT));
+ mutex_unlock(&txwin->task_ref.mmap_mutex);
+ return fault;
+ }
+ }
+ mutex_unlock(&txwin->task_ref.mmap_mutex);
+
+ /*
+ * Received this fault due to closing the actual window.
+ * It can happen during migration or lost credits.
+ * Since no mapping, return the paste instruction failure
+ * to the user space.
+ */
+ ret = do_fail_paste();
+ /*
+ * The user space can retry several times until success (needed
+ * for migration) or should fallback to SW compression or
+ * manage with the existing open windows if available.
+ * Looking at sysfs interface, it can determine whether these
+ * failures are coming during migration or core removal:
+ * nr_used_credits > nr_total_credits when lost credits
+ */
+ if (!ret || (ret == -EAGAIN))
+ return VM_FAULT_NOPAGE;
+
+ return VM_FAULT_SIGBUS;
+}
+
+static const struct vm_operations_struct vas_vm_ops = {
+ .fault = vas_mmap_fault,
+};
+
static int coproc_mmap(struct file *fp, struct vm_area_struct *vma)
{
struct coproc_instance *cp_inst = fp->private_data;
@@ -378,10 +497,29 @@ static int coproc_mmap(struct file *fp, struct vm_area_struct *vma)
return -EACCES;
}
+ /*
+ * The initial mmap is done after the window is opened
+ * with ioctl. But before mmap(), this window can be closed in
+ * the hypervisor due to lost credit (core removal on pseries).
+ * So if the window is not active, return mmap() failure with
+ * -EACCES and expects the user space reissue mmap() when it
+ * is active again or open new window when the credit is available.
+ * mmap_mutex protects the paste address mmap() with DLPAR
+ * close/open event and allows mmap() only when the window is
+ * active.
+ */
+ mutex_lock(&txwin->task_ref.mmap_mutex);
+ if (txwin->status != VAS_WIN_ACTIVE) {
+ pr_err("%s(): Window is not active\n", __func__);
+ rc = -EACCES;
+ goto out;
+ }
+
paste_addr = cp_inst->coproc->vops->paste_addr(txwin);
if (!paste_addr) {
pr_err("%s(): Window paste address failed\n", __func__);
- return -EINVAL;
+ rc = -EINVAL;
+ goto out;
}
pfn = paste_addr >> PAGE_SHIFT;
@@ -398,6 +536,11 @@ static int coproc_mmap(struct file *fp, struct vm_area_struct *vma)
pr_devel("%s(): paste addr %llx at %lx, rc %d\n", __func__,
paste_addr, vma->vm_start, rc);
+ txwin->task_ref.vma = vma;
+ vma->vm_ops = &vas_vm_ops;
+
+out:
+ mutex_unlock(&txwin->task_ref.mmap_mutex);
return rc;
}
diff --git a/arch/powerpc/platforms/cell/cbe_thermal.c b/arch/powerpc/platforms/cell/cbe_thermal.c
index 2ece77f49bc3..abb5e527b4db 100644
--- a/arch/powerpc/platforms/cell/cbe_thermal.c
+++ b/arch/powerpc/platforms/cell/cbe_thermal.c
@@ -255,7 +255,7 @@ static struct attribute *spu_attributes[] = {
NULL,
};
-static struct attribute_group spu_attribute_group = {
+static const struct attribute_group spu_attribute_group = {
.name = "thermal",
.attrs = spu_attributes,
};
diff --git a/arch/powerpc/platforms/cell/spu_base.c b/arch/powerpc/platforms/cell/spu_base.c
index 83cea9e7ee72..2eecba3345c3 100644
--- a/arch/powerpc/platforms/cell/spu_base.c
+++ b/arch/powerpc/platforms/cell/spu_base.c
@@ -490,7 +490,7 @@ int spu_add_dev_attr(struct device_attribute *attr)
}
EXPORT_SYMBOL_GPL(spu_add_dev_attr);
-int spu_add_dev_attr_group(struct attribute_group *attrs)
+int spu_add_dev_attr_group(const struct attribute_group *attrs)
{
struct spu *spu;
int rc = 0;
@@ -529,7 +529,7 @@ void spu_remove_dev_attr(struct device_attribute *attr)
}
EXPORT_SYMBOL_GPL(spu_remove_dev_attr);
-void spu_remove_dev_attr_group(struct attribute_group *attrs)
+void spu_remove_dev_attr_group(const struct attribute_group *attrs)
{
struct spu *spu;
diff --git a/arch/powerpc/platforms/cell/spufs/sched.c b/arch/powerpc/platforms/cell/spufs/sched.c
index 369206489895..99bd027a7f7c 100644
--- a/arch/powerpc/platforms/cell/spufs/sched.c
+++ b/arch/powerpc/platforms/cell/spufs/sched.c
@@ -340,8 +340,7 @@ static struct spu *aff_ref_location(struct spu_context *ctx, int mem_aff,
static void aff_set_ref_point_location(struct spu_gang *gang)
{
int mem_aff, gs, lowest_offset;
- struct spu_context *ctx;
- struct spu *tmp;
+ struct spu_context *tmp, *ctx;
mem_aff = gang->aff_ref_ctx->flags & SPU_CREATE_AFFINITY_MEM;
lowest_offset = 0;
@@ -1053,6 +1052,7 @@ void spuctx_switch_state(struct spu_context *ctx,
}
}
+#ifdef CONFIG_PROC_FS
static int show_spu_loadavg(struct seq_file *s, void *private)
{
int a, b, c;
@@ -1074,7 +1074,8 @@ static int show_spu_loadavg(struct seq_file *s, void *private)
atomic_read(&nr_spu_contexts),
idr_get_cursor(&task_active_pid_ns(current)->idr) - 1);
return 0;
-};
+}
+#endif
int __init spu_sched_init(void)
{
diff --git a/arch/powerpc/platforms/powermac/pmac.h b/arch/powerpc/platforms/powermac/pmac.h
index 29d2036dcc9d..ba8d4e97095b 100644
--- a/arch/powerpc/platforms/powermac/pmac.h
+++ b/arch/powerpc/platforms/powermac/pmac.h
@@ -5,6 +5,8 @@
#include <linux/pci.h>
#include <linux/irq.h>
+#include <asm/pmac_feature.h>
+
/*
* Declaration for the various functions exported by the
* pmac_* files. Mostly for use by pmac_setup
diff --git a/arch/powerpc/platforms/powernv/idle.c b/arch/powerpc/platforms/powernv/idle.c
index 9942289f379b..a6677a111aca 100644
--- a/arch/powerpc/platforms/powernv/idle.c
+++ b/arch/powerpc/platforms/powernv/idle.c
@@ -12,7 +12,6 @@
#include <linux/device.h>
#include <linux/cpu.h>
-#include <asm/asm-prototypes.h>
#include <asm/firmware.h>
#include <asm/interrupt.h>
#include <asm/machdep.h>
diff --git a/arch/powerpc/platforms/powernv/opal-core.c b/arch/powerpc/platforms/powernv/opal-core.c
index 0331f1973f0e..b97bc179f65a 100644
--- a/arch/powerpc/platforms/powernv/opal-core.c
+++ b/arch/powerpc/platforms/powernv/opal-core.c
@@ -603,7 +603,7 @@ static struct bin_attribute *mpipl_bin_attr[] = {
};
-static struct attribute_group mpipl_group = {
+static const struct attribute_group mpipl_group = {
.attrs = mpipl_attr,
.bin_attrs = mpipl_bin_attr,
};
diff --git a/arch/powerpc/platforms/powernv/opal-dump.c b/arch/powerpc/platforms/powernv/opal-dump.c
index 410ed5b9de29..16c5860f1372 100644
--- a/arch/powerpc/platforms/powernv/opal-dump.c
+++ b/arch/powerpc/platforms/powernv/opal-dump.c
@@ -150,7 +150,7 @@ static struct attribute *initiate_attrs[] = {
NULL,
};
-static struct attribute_group initiate_attr_group = {
+static const struct attribute_group initiate_attr_group = {
.attrs = initiate_attrs,
};
diff --git a/arch/powerpc/platforms/powernv/opal-flash.c b/arch/powerpc/platforms/powernv/opal-flash.c
index 7e7d38b17420..18481a8c52fa 100644
--- a/arch/powerpc/platforms/powernv/opal-flash.c
+++ b/arch/powerpc/platforms/powernv/opal-flash.c
@@ -512,7 +512,7 @@ static struct attribute *image_op_attrs[] = {
NULL /* need to NULL terminate the list of attributes */
};
-static struct attribute_group image_op_attr_group = {
+static const struct attribute_group image_op_attr_group = {
.attrs = image_op_attrs,
};
diff --git a/arch/powerpc/platforms/powernv/opal-tracepoints.c b/arch/powerpc/platforms/powernv/opal-tracepoints.c
index f16a43540e30..91b36541b9e5 100644
--- a/arch/powerpc/platforms/powernv/opal-tracepoints.c
+++ b/arch/powerpc/platforms/powernv/opal-tracepoints.c
@@ -2,7 +2,6 @@
#include <linux/percpu.h>
#include <linux/jump_label.h>
#include <asm/trace.h>
-#include <asm/asm-prototypes.h>
#ifdef CONFIG_JUMP_LABEL
struct static_key opal_tracepoint_key = STATIC_KEY_INIT;
diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c
index 9a8391b983d1..f7054879ecd4 100644
--- a/arch/powerpc/platforms/powernv/pci.c
+++ b/arch/powerpc/platforms/powernv/pci.c
@@ -815,7 +815,7 @@ void pnv_pci_shutdown(void)
/* Fixup wrong class code in p7ioc and p8 root complex */
static void pnv_p7ioc_rc_quirk(struct pci_dev *dev)
{
- dev->class = PCI_CLASS_BRIDGE_PCI << 8;
+ dev->class = PCI_CLASS_BRIDGE_PCI_NORMAL;
}
DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_IBM, 0x3b9, pnv_p7ioc_rc_quirk);
diff --git a/arch/powerpc/platforms/powernv/rng.c b/arch/powerpc/platforms/powernv/rng.c
index b4386714494a..e3d44b36ae98 100644
--- a/arch/powerpc/platforms/powernv/rng.c
+++ b/arch/powerpc/platforms/powernv/rng.c
@@ -43,7 +43,11 @@ static unsigned long rng_whiten(struct powernv_rng *rng, unsigned long val)
unsigned long parity;
/* Calculate the parity of the value */
- asm ("popcntd %0,%1" : "=r" (parity) : "r" (val));
+ asm (".machine push; \
+ .machine power7; \
+ popcntd %0,%1; \
+ .machine pop;"
+ : "=r" (parity) : "r" (val));
/* xor our value with the previous mask */
val ^= rng->mask;
diff --git a/arch/powerpc/platforms/ps3/system-bus.c b/arch/powerpc/platforms/ps3/system-bus.c
index c8b50fec56bf..b637bf292047 100644
--- a/arch/powerpc/platforms/ps3/system-bus.c
+++ b/arch/powerpc/platforms/ps3/system-bus.c
@@ -603,7 +603,7 @@ static dma_addr_t ps3_ioc0_map_page(struct device *_dev, struct page *page,
default:
/* not happned */
BUG();
- };
+ }
result = ps3_dma_map(dev->d_region, (unsigned long)ptr, size,
&bus_addr, iopte_flag);
@@ -762,7 +762,7 @@ int ps3_system_bus_device_register(struct ps3_system_bus_device *dev)
break;
default:
BUG();
- };
+ }
dev->core.of_node = NULL;
set_dev_node(&dev->core, 0);
diff --git a/arch/powerpc/platforms/pseries/Makefile b/arch/powerpc/platforms/pseries/Makefile
index ee60b59024b4..9764e1a2ed5c 100644
--- a/arch/powerpc/platforms/pseries/Makefile
+++ b/arch/powerpc/platforms/pseries/Makefile
@@ -6,7 +6,8 @@ obj-y := lpar.o hvCall.o nvram.o reconfig.o \
of_helpers.o \
setup.o iommu.o event_sources.o ras.o \
firmware.o power.o dlpar.o mobility.o rng.o \
- pci.o pci_dlpar.o eeh_pseries.o msi.o
+ pci.o pci_dlpar.o eeh_pseries.o msi.o \
+ papr_platform_attributes.o
obj-$(CONFIG_SMP) += smp.o
obj-$(CONFIG_KEXEC_CORE) += kexec.o
obj-$(CONFIG_PSERIES_ENERGY) += pseries_energy.o
@@ -29,6 +30,6 @@ obj-$(CONFIG_PPC_SVM) += svm.o
obj-$(CONFIG_FA_DUMP) += rtas-fadump.o
obj-$(CONFIG_SUSPEND) += suspend.o
-obj-$(CONFIG_PPC_VAS) += vas.o
+obj-$(CONFIG_PPC_VAS) += vas.o vas-sysfs.o
obj-$(CONFIG_ARCH_HAS_CC_PLATFORM) += cc_platform.o
diff --git a/arch/powerpc/platforms/pseries/firmware.c b/arch/powerpc/platforms/pseries/firmware.c
index f162156b7b68..09c119b2f623 100644
--- a/arch/powerpc/platforms/pseries/firmware.c
+++ b/arch/powerpc/platforms/pseries/firmware.c
@@ -66,6 +66,7 @@ hypertas_fw_features_table[] = {
{FW_FEATURE_BLOCK_REMOVE, "hcall-block-remove"},
{FW_FEATURE_PAPR_SCM, "hcall-scm"},
{FW_FEATURE_RPT_INVALIDATE, "hcall-rpt-invalidate"},
+ {FW_FEATURE_ENERGY_SCALE_INFO, "hcall-energy-scale-info"},
};
/* Build up the firmware features bitmask using the contents of
diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c
index f8899d506ea4..760581c5752f 100644
--- a/arch/powerpc/platforms/pseries/lpar.c
+++ b/arch/powerpc/platforms/pseries/lpar.c
@@ -40,7 +40,6 @@
#include <asm/plpar_wrappers.h>
#include <asm/kexec.h>
#include <asm/fadump.h>
-#include <asm/asm-prototypes.h>
#include <asm/dtl.h>
#include "pseries.h"
diff --git a/arch/powerpc/platforms/pseries/lparcfg.c b/arch/powerpc/platforms/pseries/lparcfg.c
index c7940fcfc911..2119c003fcf9 100644
--- a/arch/powerpc/platforms/pseries/lparcfg.c
+++ b/arch/powerpc/platforms/pseries/lparcfg.c
@@ -311,6 +311,92 @@ static void parse_mpp_x_data(struct seq_file *m)
seq_printf(m, "coalesce_pool_spurr=%ld\n", mpp_x_data.pool_spurr_cycles);
}
+/*
+ * PAPR defines, in section "7.3.16 System Parameters Option", the token 55 to
+ * read the LPAR name, and the largest output data to 4000 + 2 bytes length.
+ */
+#define SPLPAR_LPAR_NAME_TOKEN 55
+#define GET_SYS_PARM_BUF_SIZE 4002
+#if GET_SYS_PARM_BUF_SIZE > RTAS_DATA_BUF_SIZE
+#error "GET_SYS_PARM_BUF_SIZE is larger than RTAS_DATA_BUF_SIZE"
+#endif
+
+/*
+ * Read the lpar name using the RTAS ibm,get-system-parameter call.
+ *
+ * The name read through this call is updated if changes are made by the end
+ * user on the hypervisor side.
+ *
+ * Some hypervisor (like Qemu) may not provide this value. In that case, a non
+ * null value is returned.
+ */
+static int read_rtas_lpar_name(struct seq_file *m)
+{
+ int rc, len, token;
+ union {
+ char raw_buffer[GET_SYS_PARM_BUF_SIZE];
+ struct {
+ __be16 len;
+ char name[GET_SYS_PARM_BUF_SIZE-2];
+ };
+ } *local_buffer;
+
+ token = rtas_token("ibm,get-system-parameter");
+ if (token == RTAS_UNKNOWN_SERVICE)
+ return -EINVAL;
+
+ local_buffer = kmalloc(sizeof(*local_buffer), GFP_KERNEL);
+ if (!local_buffer)
+ return -ENOMEM;
+
+ do {
+ spin_lock(&rtas_data_buf_lock);
+ memset(rtas_data_buf, 0, sizeof(*local_buffer));
+ rc = rtas_call(token, 3, 1, NULL, SPLPAR_LPAR_NAME_TOKEN,
+ __pa(rtas_data_buf), sizeof(*local_buffer));
+ if (!rc)
+ memcpy(local_buffer->raw_buffer, rtas_data_buf,
+ sizeof(local_buffer->raw_buffer));
+ spin_unlock(&rtas_data_buf_lock);
+ } while (rtas_busy_delay(rc));
+
+ if (!rc) {
+ /* Force end of string */
+ len = min((int) be16_to_cpu(local_buffer->len),
+ (int) sizeof(local_buffer->name)-1);
+ local_buffer->name[len] = '\0';
+
+ seq_printf(m, "partition_name=%s\n", local_buffer->name);
+ } else
+ rc = -ENODATA;
+
+ kfree(local_buffer);
+ return rc;
+}
+
+/*
+ * Read the LPAR name from the Device Tree.
+ *
+ * The value read in the DT is not updated if the end-user is touching the LPAR
+ * name on the hypervisor side.
+ */
+static int read_dt_lpar_name(struct seq_file *m)
+{
+ const char *name;
+
+ if (of_property_read_string(of_root, "ibm,partition-name", &name))
+ return -ENOENT;
+
+ seq_printf(m, "partition_name=%s\n", name);
+ return 0;
+}
+
+static void read_lpar_name(struct seq_file *m)
+{
+ if (read_rtas_lpar_name(m) && read_dt_lpar_name(m))
+ pr_err_once("Error can't get the LPAR name");
+}
+
#define SPLPAR_CHARACTERISTICS_TOKEN 20
#define SPLPAR_MAXLENGTH 1026*(sizeof(char))
@@ -496,6 +582,7 @@ static int pseries_lparcfg_data(struct seq_file *m, void *v)
if (firmware_has_feature(FW_FEATURE_SPLPAR)) {
/* this call handles the ibm,get-system-parameter contents */
+ read_lpar_name(m);
parse_system_parameter_string(m);
parse_ppp_data(m);
parse_mpp_data(m);
diff --git a/arch/powerpc/platforms/pseries/mobility.c b/arch/powerpc/platforms/pseries/mobility.c
index 85033f392c78..78f3f74c7056 100644
--- a/arch/powerpc/platforms/pseries/mobility.c
+++ b/arch/powerpc/platforms/pseries/mobility.c
@@ -26,6 +26,7 @@
#include <asm/machdep.h>
#include <asm/rtas.h>
#include "pseries.h"
+#include "vas.h" /* vas_migration_handler() */
#include "../../kernel/cacheinfo.h"
static struct kobject *mobility_kobj;
@@ -265,7 +266,7 @@ static int add_dt_node(struct device_node *parent_dn, __be32 drc_index)
return rc;
}
-int pseries_devicetree_update(s32 scope)
+static int pseries_devicetree_update(s32 scope)
{
char *rtas_buf;
__be32 *data;
@@ -669,12 +670,16 @@ static int pseries_migrate_partition(u64 handle)
if (ret)
return ret;
+ vas_migration_handler(VAS_SUSPEND);
+
ret = pseries_suspend(handle);
if (ret == 0)
post_mobility_fixup();
else
pseries_cancel_migration(handle, ret);
+ vas_migration_handler(VAS_RESUME);
+
return ret;
}
diff --git a/arch/powerpc/platforms/pseries/papr_platform_attributes.c b/arch/powerpc/platforms/pseries/papr_platform_attributes.c
new file mode 100644
index 000000000000..515150417bb3
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/papr_platform_attributes.c
@@ -0,0 +1,361 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Platform energy and frequency attributes driver
+ *
+ * This driver creates a sys file at /sys/firmware/papr/ which encapsulates a
+ * directory structure containing files in keyword - value pairs that specify
+ * energy and frequency configuration of the system.
+ *
+ * The format of exposing the sysfs information is as follows:
+ * /sys/firmware/papr/energy_scale_info/
+ * |-- <id>/
+ * |-- desc
+ * |-- value
+ * |-- value_desc (if exists)
+ * |-- <id>/
+ * |-- desc
+ * |-- value
+ * |-- value_desc (if exists)
+ *
+ * Copyright 2022 IBM Corp.
+ */
+
+#include <asm/hvcall.h>
+#include <asm/machdep.h>
+
+#include "pseries.h"
+
+/*
+ * Flag attributes to fetch either all or one attribute from the HCALL
+ * flag = BE(0) => fetch all attributes with firstAttributeId = 0
+ * flag = BE(1) => fetch a single attribute with firstAttributeId = id
+ */
+#define ESI_FLAGS_ALL 0
+#define ESI_FLAGS_SINGLE (1ull << 63)
+
+#define KOBJ_MAX_ATTRS 3
+
+#define ESI_HDR_SIZE sizeof(struct h_energy_scale_info_hdr)
+#define ESI_ATTR_SIZE sizeof(struct energy_scale_attribute)
+#define CURR_MAX_ESI_ATTRS 8
+
+struct energy_scale_attribute {
+ __be64 id;
+ __be64 val;
+ u8 desc[64];
+ u8 value_desc[64];
+} __packed;
+
+struct h_energy_scale_info_hdr {
+ __be64 num_attrs;
+ __be64 array_offset;
+ u8 data_header_version;
+} __packed;
+
+struct papr_attr {
+ u64 id;
+ struct kobj_attribute kobj_attr;
+};
+
+struct papr_group {
+ struct attribute_group pg;
+ struct papr_attr pgattrs[KOBJ_MAX_ATTRS];
+};
+
+static struct papr_group *papr_groups;
+/* /sys/firmware/papr */
+static struct kobject *papr_kobj;
+/* /sys/firmware/papr/energy_scale_info */
+static struct kobject *esi_kobj;
+
+/*
+ * Energy modes can change dynamically hence making a new hcall each time the
+ * information needs to be retrieved
+ */
+static int papr_get_attr(u64 id, struct energy_scale_attribute *esi)
+{
+ int esi_buf_size = ESI_HDR_SIZE + (CURR_MAX_ESI_ATTRS * ESI_ATTR_SIZE);
+ int ret, max_esi_attrs = CURR_MAX_ESI_ATTRS;
+ struct energy_scale_attribute *curr_esi;
+ struct h_energy_scale_info_hdr *hdr;
+ char *buf;
+
+ buf = kmalloc(esi_buf_size, GFP_KERNEL);
+ if (buf == NULL)
+ return -ENOMEM;
+
+retry:
+ ret = plpar_hcall_norets(H_GET_ENERGY_SCALE_INFO, ESI_FLAGS_SINGLE,
+ id, virt_to_phys(buf),
+ esi_buf_size);
+
+ /*
+ * If the hcall fails with not enough memory for either the
+ * header or data, attempt to allocate more
+ */
+ if (ret == H_PARTIAL || ret == H_P4) {
+ char *temp_buf;
+
+ max_esi_attrs += 4;
+ esi_buf_size = ESI_HDR_SIZE + (CURR_MAX_ESI_ATTRS * max_esi_attrs);
+
+ temp_buf = krealloc(buf, esi_buf_size, GFP_KERNEL);
+ if (temp_buf)
+ buf = temp_buf;
+ else
+ return -ENOMEM;
+
+ goto retry;
+ }
+
+ if (ret != H_SUCCESS) {
+ pr_warn("hcall failed: H_GET_ENERGY_SCALE_INFO");
+ ret = -EIO;
+ goto out_buf;
+ }
+
+ hdr = (struct h_energy_scale_info_hdr *) buf;
+ curr_esi = (struct energy_scale_attribute *)
+ (buf + be64_to_cpu(hdr->array_offset));
+
+ if (esi_buf_size <
+ be64_to_cpu(hdr->array_offset) + (be64_to_cpu(hdr->num_attrs)
+ * sizeof(struct energy_scale_attribute))) {
+ ret = -EIO;
+ goto out_buf;
+ }
+
+ *esi = *curr_esi;
+
+out_buf:
+ kfree(buf);
+
+ return ret;
+}
+
+/*
+ * Extract and export the description of the energy scale attributes
+ */
+static ssize_t desc_show(struct kobject *kobj,
+ struct kobj_attribute *kobj_attr,
+ char *buf)
+{
+ struct papr_attr *pattr = container_of(kobj_attr, struct papr_attr,
+ kobj_attr);
+ struct energy_scale_attribute esi;
+ int ret;
+
+ ret = papr_get_attr(pattr->id, &esi);
+ if (ret)
+ return ret;
+
+ return sysfs_emit(buf, "%s\n", esi.desc);
+}
+
+/*
+ * Extract and export the numeric value of the energy scale attributes
+ */
+static ssize_t val_show(struct kobject *kobj,
+ struct kobj_attribute *kobj_attr,
+ char *buf)
+{
+ struct papr_attr *pattr = container_of(kobj_attr, struct papr_attr,
+ kobj_attr);
+ struct energy_scale_attribute esi;
+ int ret;
+
+ ret = papr_get_attr(pattr->id, &esi);
+ if (ret)
+ return ret;
+
+ return sysfs_emit(buf, "%llu\n", be64_to_cpu(esi.val));
+}
+
+/*
+ * Extract and export the value description in string format of the energy
+ * scale attributes
+ */
+static ssize_t val_desc_show(struct kobject *kobj,
+ struct kobj_attribute *kobj_attr,
+ char *buf)
+{
+ struct papr_attr *pattr = container_of(kobj_attr, struct papr_attr,
+ kobj_attr);
+ struct energy_scale_attribute esi;
+ int ret;
+
+ ret = papr_get_attr(pattr->id, &esi);
+ if (ret)
+ return ret;
+
+ return sysfs_emit(buf, "%s\n", esi.value_desc);
+}
+
+static struct papr_ops_info {
+ const char *attr_name;
+ ssize_t (*show)(struct kobject *kobj, struct kobj_attribute *kobj_attr,
+ char *buf);
+} ops_info[KOBJ_MAX_ATTRS] = {
+ { "desc", desc_show },
+ { "value", val_show },
+ { "value_desc", val_desc_show },
+};
+
+static void add_attr(u64 id, int index, struct papr_attr *attr)
+{
+ attr->id = id;
+ sysfs_attr_init(&attr->kobj_attr.attr);
+ attr->kobj_attr.attr.name = ops_info[index].attr_name;
+ attr->kobj_attr.attr.mode = 0444;
+ attr->kobj_attr.show = ops_info[index].show;
+}
+
+static int add_attr_group(u64 id, struct papr_group *pg, bool show_val_desc)
+{
+ int i;
+
+ for (i = 0; i < KOBJ_MAX_ATTRS; i++) {
+ if (!strcmp(ops_info[i].attr_name, "value_desc") &&
+ !show_val_desc) {
+ continue;
+ }
+ add_attr(id, i, &pg->pgattrs[i]);
+ pg->pg.attrs[i] = &pg->pgattrs[i].kobj_attr.attr;
+ }
+
+ return sysfs_create_group(esi_kobj, &pg->pg);
+}
+
+
+static int __init papr_init(void)
+{
+ int esi_buf_size = ESI_HDR_SIZE + (CURR_MAX_ESI_ATTRS * ESI_ATTR_SIZE);
+ int ret, idx, i, max_esi_attrs = CURR_MAX_ESI_ATTRS;
+ struct h_energy_scale_info_hdr *esi_hdr;
+ struct energy_scale_attribute *esi_attrs;
+ uint64_t num_attrs;
+ char *esi_buf;
+
+ if (!firmware_has_feature(FW_FEATURE_LPAR) ||
+ !firmware_has_feature(FW_FEATURE_ENERGY_SCALE_INFO)) {
+ return -ENXIO;
+ }
+
+ esi_buf = kmalloc(esi_buf_size, GFP_KERNEL);
+ if (esi_buf == NULL)
+ return -ENOMEM;
+ /*
+ * hcall(
+ * uint64 H_GET_ENERGY_SCALE_INFO, // Get energy scale info
+ * uint64 flags, // Per the flag request
+ * uint64 firstAttributeId, // The attribute id
+ * uint64 bufferAddress, // Guest physical address of the output buffer
+ * uint64 bufferSize); // The size in bytes of the output buffer
+ */
+retry:
+
+ ret = plpar_hcall_norets(H_GET_ENERGY_SCALE_INFO, ESI_FLAGS_ALL, 0,
+ virt_to_phys(esi_buf), esi_buf_size);
+
+ /*
+ * If the hcall fails with not enough memory for either the
+ * header or data, attempt to allocate more
+ */
+ if (ret == H_PARTIAL || ret == H_P4) {
+ char *temp_esi_buf;
+
+ max_esi_attrs += 4;
+ esi_buf_size = ESI_HDR_SIZE + (CURR_MAX_ESI_ATTRS * max_esi_attrs);
+
+ temp_esi_buf = krealloc(esi_buf, esi_buf_size, GFP_KERNEL);
+ if (temp_esi_buf)
+ esi_buf = temp_esi_buf;
+ else
+ return -ENOMEM;
+
+ goto retry;
+ }
+
+ if (ret != H_SUCCESS) {
+ pr_warn("hcall failed: H_GET_ENERGY_SCALE_INFO, ret: %d\n", ret);
+ goto out_free_esi_buf;
+ }
+
+ esi_hdr = (struct h_energy_scale_info_hdr *) esi_buf;
+ num_attrs = be64_to_cpu(esi_hdr->num_attrs);
+ esi_attrs = (struct energy_scale_attribute *)
+ (esi_buf + be64_to_cpu(esi_hdr->array_offset));
+
+ if (esi_buf_size <
+ be64_to_cpu(esi_hdr->array_offset) +
+ (num_attrs * sizeof(struct energy_scale_attribute))) {
+ goto out_free_esi_buf;
+ }
+
+ papr_groups = kcalloc(num_attrs, sizeof(*papr_groups), GFP_KERNEL);
+ if (!papr_groups)
+ goto out_free_esi_buf;
+
+ papr_kobj = kobject_create_and_add("papr", firmware_kobj);
+ if (!papr_kobj) {
+ pr_warn("kobject_create_and_add papr failed\n");
+ goto out_papr_groups;
+ }
+
+ esi_kobj = kobject_create_and_add("energy_scale_info", papr_kobj);
+ if (!esi_kobj) {
+ pr_warn("kobject_create_and_add energy_scale_info failed\n");
+ goto out_kobj;
+ }
+
+ /* Allocate the groups before registering */
+ for (idx = 0; idx < num_attrs; idx++) {
+ papr_groups[idx].pg.attrs = kcalloc(KOBJ_MAX_ATTRS + 1,
+ sizeof(*papr_groups[idx].pg.attrs),
+ GFP_KERNEL);
+ if (!papr_groups[idx].pg.attrs)
+ goto out_pgattrs;
+
+ papr_groups[idx].pg.name = kasprintf(GFP_KERNEL, "%lld",
+ be64_to_cpu(esi_attrs[idx].id));
+ if (papr_groups[idx].pg.name == NULL)
+ goto out_pgattrs;
+ }
+
+ for (idx = 0; idx < num_attrs; idx++) {
+ bool show_val_desc = true;
+
+ /* Do not add the value desc attr if it does not exist */
+ if (strnlen(esi_attrs[idx].value_desc,
+ sizeof(esi_attrs[idx].value_desc)) == 0)
+ show_val_desc = false;
+
+ if (add_attr_group(be64_to_cpu(esi_attrs[idx].id),
+ &papr_groups[idx],
+ show_val_desc)) {
+ pr_warn("Failed to create papr attribute group %s\n",
+ papr_groups[idx].pg.name);
+ idx = num_attrs;
+ goto out_pgattrs;
+ }
+ }
+
+ kfree(esi_buf);
+ return 0;
+out_pgattrs:
+ for (i = 0; i < idx ; i++) {
+ kfree(papr_groups[i].pg.attrs);
+ kfree(papr_groups[i].pg.name);
+ }
+ kobject_put(esi_kobj);
+out_kobj:
+ kobject_put(papr_kobj);
+out_papr_groups:
+ kfree(papr_groups);
+out_free_esi_buf:
+ kfree(esi_buf);
+
+ return -ENOMEM;
+}
+
+machine_device_initcall(pseries, papr_init);
diff --git a/arch/powerpc/platforms/pseries/papr_scm.c b/arch/powerpc/platforms/pseries/papr_scm.c
index f48e87ac89c9..f58728d5f10d 100644
--- a/arch/powerpc/platforms/pseries/papr_scm.c
+++ b/arch/powerpc/platforms/pseries/papr_scm.c
@@ -19,6 +19,7 @@
#include <asm/papr_pdsm.h>
#include <asm/mce.h>
#include <asm/unaligned.h>
+#include <linux/perf_event.h>
#define BIND_ANY_ADDR (~0ul)
@@ -120,6 +121,12 @@ struct papr_scm_priv {
/* length of the stat buffer as expected by phyp */
size_t stat_buffer_len;
+
+ /* The bits which needs to be overridden */
+ u64 health_bitmap_inject_mask;
+
+ /* array to have event_code and stat_id mappings */
+ char **nvdimm_events_map;
};
static int papr_scm_pmem_flush(struct nd_region *nd_region,
@@ -340,6 +347,225 @@ static ssize_t drc_pmem_query_stats(struct papr_scm_priv *p,
return 0;
}
+#ifdef CONFIG_PERF_EVENTS
+#define to_nvdimm_pmu(_pmu) container_of(_pmu, struct nvdimm_pmu, pmu)
+
+static int papr_scm_pmu_get_value(struct perf_event *event, struct device *dev, u64 *count)
+{
+ struct papr_scm_perf_stat *stat;
+ struct papr_scm_perf_stats *stats;
+ struct papr_scm_priv *p = (struct papr_scm_priv *)dev->driver_data;
+ int rc, size;
+
+ /* Allocate request buffer enough to hold single performance stat */
+ size = sizeof(struct papr_scm_perf_stats) +
+ sizeof(struct papr_scm_perf_stat);
+
+ if (!p || !p->nvdimm_events_map)
+ return -EINVAL;
+
+ stats = kzalloc(size, GFP_KERNEL);
+ if (!stats)
+ return -ENOMEM;
+
+ stat = &stats->scm_statistic[0];
+ memcpy(&stat->stat_id,
+ p->nvdimm_events_map[event->attr.config],
+ sizeof(stat->stat_id));
+ stat->stat_val = 0;
+
+ rc = drc_pmem_query_stats(p, stats, 1);
+ if (rc < 0) {
+ kfree(stats);
+ return rc;
+ }
+
+ *count = be64_to_cpu(stat->stat_val);
+ kfree(stats);
+ return 0;
+}
+
+static int papr_scm_pmu_event_init(struct perf_event *event)
+{
+ struct nvdimm_pmu *nd_pmu = to_nvdimm_pmu(event->pmu);
+ struct papr_scm_priv *p;
+
+ if (!nd_pmu)
+ return -EINVAL;
+
+ /* test the event attr type for PMU enumeration */
+ if (event->attr.type != event->pmu->type)
+ return -ENOENT;
+
+ /* it does not support event sampling mode */
+ if (is_sampling_event(event))
+ return -EOPNOTSUPP;
+
+ /* no branch sampling */
+ if (has_branch_stack(event))
+ return -EOPNOTSUPP;
+
+ p = (struct papr_scm_priv *)nd_pmu->dev->driver_data;
+ if (!p)
+ return -EINVAL;
+
+ /* Invalid eventcode */
+ if (event->attr.config == 0 || event->attr.config > 16)
+ return -EINVAL;
+
+ return 0;
+}
+
+static int papr_scm_pmu_add(struct perf_event *event, int flags)
+{
+ u64 count;
+ int rc;
+ struct nvdimm_pmu *nd_pmu = to_nvdimm_pmu(event->pmu);
+
+ if (!nd_pmu)
+ return -EINVAL;
+
+ if (flags & PERF_EF_START) {
+ rc = papr_scm_pmu_get_value(event, nd_pmu->dev, &count);
+ if (rc)
+ return rc;
+
+ local64_set(&event->hw.prev_count, count);
+ }
+
+ return 0;
+}
+
+static void papr_scm_pmu_read(struct perf_event *event)
+{
+ u64 prev, now;
+ int rc;
+ struct nvdimm_pmu *nd_pmu = to_nvdimm_pmu(event->pmu);
+
+ if (!nd_pmu)
+ return;
+
+ rc = papr_scm_pmu_get_value(event, nd_pmu->dev, &now);
+ if (rc)
+ return;
+
+ prev = local64_xchg(&event->hw.prev_count, now);
+ local64_add(now - prev, &event->count);
+}
+
+static void papr_scm_pmu_del(struct perf_event *event, int flags)
+{
+ papr_scm_pmu_read(event);
+}
+
+static int papr_scm_pmu_check_events(struct papr_scm_priv *p, struct nvdimm_pmu *nd_pmu)
+{
+ struct papr_scm_perf_stat *stat;
+ struct papr_scm_perf_stats *stats;
+ char *statid;
+ int index, rc, count;
+ u32 available_events;
+
+ if (!p->stat_buffer_len)
+ return -ENOENT;
+
+ available_events = (p->stat_buffer_len - sizeof(struct papr_scm_perf_stats))
+ / sizeof(struct papr_scm_perf_stat);
+
+ /* Allocate the buffer for phyp where stats are written */
+ stats = kzalloc(p->stat_buffer_len, GFP_KERNEL);
+ if (!stats) {
+ rc = -ENOMEM;
+ return rc;
+ }
+
+ /* Allocate memory to nvdimm_event_map */
+ p->nvdimm_events_map = kcalloc(available_events, sizeof(char *), GFP_KERNEL);
+ if (!p->nvdimm_events_map) {
+ rc = -ENOMEM;
+ goto out_stats;
+ }
+
+ /* Called to get list of events supported */
+ rc = drc_pmem_query_stats(p, stats, 0);
+ if (rc)
+ goto out_nvdimm_events_map;
+
+ for (index = 0, stat = stats->scm_statistic, count = 0;
+ index < available_events; index++, ++stat) {
+ statid = kzalloc(strlen(stat->stat_id) + 1, GFP_KERNEL);
+ if (!statid) {
+ rc = -ENOMEM;
+ goto out_nvdimm_events_map;
+ }
+
+ strcpy(statid, stat->stat_id);
+ p->nvdimm_events_map[count] = statid;
+ count++;
+ }
+ p->nvdimm_events_map[count] = NULL;
+ kfree(stats);
+ return 0;
+
+out_nvdimm_events_map:
+ kfree(p->nvdimm_events_map);
+out_stats:
+ kfree(stats);
+ return rc;
+}
+
+static void papr_scm_pmu_register(struct papr_scm_priv *p)
+{
+ struct nvdimm_pmu *nd_pmu;
+ int rc, nodeid;
+
+ nd_pmu = kzalloc(sizeof(*nd_pmu), GFP_KERNEL);
+ if (!nd_pmu) {
+ rc = -ENOMEM;
+ goto pmu_err_print;
+ }
+
+ rc = papr_scm_pmu_check_events(p, nd_pmu);
+ if (rc)
+ goto pmu_check_events_err;
+
+ nd_pmu->pmu.task_ctx_nr = perf_invalid_context;
+ nd_pmu->pmu.name = nvdimm_name(p->nvdimm);
+ nd_pmu->pmu.event_init = papr_scm_pmu_event_init;
+ nd_pmu->pmu.read = papr_scm_pmu_read;
+ nd_pmu->pmu.add = papr_scm_pmu_add;
+ nd_pmu->pmu.del = papr_scm_pmu_del;
+
+ nd_pmu->pmu.capabilities = PERF_PMU_CAP_NO_INTERRUPT |
+ PERF_PMU_CAP_NO_EXCLUDE;
+
+ /*updating the cpumask variable */
+ nodeid = numa_map_to_online_node(dev_to_node(&p->pdev->dev));
+ nd_pmu->arch_cpumask = *cpumask_of_node(nodeid);
+
+ rc = register_nvdimm_pmu(nd_pmu, p->pdev);
+ if (rc)
+ goto pmu_register_err;
+
+ /*
+ * Set archdata.priv value to nvdimm_pmu structure, to handle the
+ * unregistering of pmu device.
+ */
+ p->pdev->archdata.priv = nd_pmu;
+ return;
+
+pmu_register_err:
+ kfree(p->nvdimm_events_map);
+pmu_check_events_err:
+ kfree(nd_pmu);
+pmu_err_print:
+ dev_info(&p->pdev->dev, "nvdimm pmu didn't register rc=%d\n", rc);
+}
+
+#else
+static void papr_scm_pmu_register(struct papr_scm_priv *p) { }
+#endif
+
/*
* Issue hcall to retrieve dimm health info and populate papr_scm_priv with the
* health information.
@@ -347,19 +573,29 @@ static ssize_t drc_pmem_query_stats(struct papr_scm_priv *p,
static int __drc_pmem_query_health(struct papr_scm_priv *p)
{
unsigned long ret[PLPAR_HCALL_BUFSIZE];
+ u64 bitmap = 0;
long rc;
/* issue the hcall */
rc = plpar_hcall(H_SCM_HEALTH, ret, p->drc_index);
- if (rc != H_SUCCESS) {
+ if (rc == H_SUCCESS)
+ bitmap = ret[0] & ret[1];
+ else if (rc == H_FUNCTION)
+ dev_info_once(&p->pdev->dev,
+ "Hcall H_SCM_HEALTH not implemented, assuming empty health bitmap");
+ else {
+
dev_err(&p->pdev->dev,
"Failed to query health information, Err:%ld\n", rc);
return -ENXIO;
}
p->lasthealth_jiffies = jiffies;
- p->health_bitmap = ret[0] & ret[1];
-
+ /* Allow injecting specific health bits via inject mask. */
+ if (p->health_bitmap_inject_mask)
+ bitmap = (bitmap & ~p->health_bitmap_inject_mask) |
+ p->health_bitmap_inject_mask;
+ WRITE_ONCE(p->health_bitmap, bitmap);
dev_dbg(&p->pdev->dev,
"Queried dimm health info. Bitmap:0x%016lx Mask:0x%016lx\n",
ret[0], ret[1]);
@@ -669,6 +905,56 @@ out:
return rc;
}
+/* Inject a smart error Add the dirty-shutdown-counter value to the pdsm */
+static int papr_pdsm_smart_inject(struct papr_scm_priv *p,
+ union nd_pdsm_payload *payload)
+{
+ int rc;
+ u32 supported_flags = 0;
+ u64 inject_mask = 0, clear_mask = 0;
+ u64 mask;
+
+ /* Check for individual smart error flags and update inject/clear masks */
+ if (payload->smart_inject.flags & PDSM_SMART_INJECT_HEALTH_FATAL) {
+ supported_flags |= PDSM_SMART_INJECT_HEALTH_FATAL;
+ if (payload->smart_inject.fatal_enable)
+ inject_mask |= PAPR_PMEM_HEALTH_FATAL;
+ else
+ clear_mask |= PAPR_PMEM_HEALTH_FATAL;
+ }
+
+ if (payload->smart_inject.flags & PDSM_SMART_INJECT_BAD_SHUTDOWN) {
+ supported_flags |= PDSM_SMART_INJECT_BAD_SHUTDOWN;
+ if (payload->smart_inject.unsafe_shutdown_enable)
+ inject_mask |= PAPR_PMEM_SHUTDOWN_DIRTY;
+ else
+ clear_mask |= PAPR_PMEM_SHUTDOWN_DIRTY;
+ }
+
+ dev_dbg(&p->pdev->dev, "[Smart-inject] inject_mask=%#llx clear_mask=%#llx\n",
+ inject_mask, clear_mask);
+
+ /* Prevent concurrent access to dimm health bitmap related members */
+ rc = mutex_lock_interruptible(&p->health_mutex);
+ if (rc)
+ return rc;
+
+ /* Use inject/clear masks to set health_bitmap_inject_mask */
+ mask = READ_ONCE(p->health_bitmap_inject_mask);
+ mask = (mask & ~clear_mask) | inject_mask;
+ WRITE_ONCE(p->health_bitmap_inject_mask, mask);
+
+ /* Invalidate cached health bitmap */
+ p->lasthealth_jiffies = 0;
+
+ mutex_unlock(&p->health_mutex);
+
+ /* Return the supported flags back to userspace */
+ payload->smart_inject.flags = supported_flags;
+
+ return sizeof(struct nd_papr_pdsm_health);
+}
+
/*
* 'struct pdsm_cmd_desc'
* Identifies supported PDSMs' expected length of in/out payloads
@@ -702,6 +988,12 @@ static const struct pdsm_cmd_desc __pdsm_cmd_descriptors[] = {
.size_out = sizeof(struct nd_papr_pdsm_health),
.service = papr_pdsm_health,
},
+
+ [PAPR_PDSM_SMART_INJECT] = {
+ .size_in = sizeof(struct nd_papr_pdsm_smart_inject),
+ .size_out = sizeof(struct nd_papr_pdsm_smart_inject),
+ .service = papr_pdsm_smart_inject,
+ },
/* Empty */
[PAPR_PDSM_MAX] = {
.size_in = 0,
@@ -838,6 +1130,19 @@ static int papr_scm_ndctl(struct nvdimm_bus_descriptor *nd_desc,
return 0;
}
+static ssize_t health_bitmap_inject_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct nvdimm *dimm = to_nvdimm(dev);
+ struct papr_scm_priv *p = nvdimm_provider_data(dimm);
+
+ return sprintf(buf, "%#llx\n",
+ READ_ONCE(p->health_bitmap_inject_mask));
+}
+
+static DEVICE_ATTR_ADMIN_RO(health_bitmap_inject);
+
static ssize_t perf_stats_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
@@ -952,10 +1257,11 @@ static struct attribute *papr_nd_attributes[] = {
&dev_attr_flags.attr,
&dev_attr_perf_stats.attr,
&dev_attr_dirty_shutdown.attr,
+ &dev_attr_health_bitmap_inject.attr,
NULL,
};
-static struct attribute_group papr_nd_attribute_group = {
+static const struct attribute_group papr_nd_attribute_group = {
.name = "papr",
.is_visible = papr_nd_attribute_visible,
.attrs = papr_nd_attributes,
@@ -1236,6 +1542,7 @@ static int papr_scm_probe(struct platform_device *pdev)
goto err2;
platform_set_drvdata(pdev, p);
+ papr_scm_pmu_register(p);
return 0;
@@ -1254,6 +1561,12 @@ static int papr_scm_remove(struct platform_device *pdev)
nvdimm_bus_unregister(p->bus);
drc_pmem_unbind(p);
+
+ if (pdev->archdata.priv)
+ unregister_nvdimm_pmu(pdev->archdata.priv);
+
+ pdev->archdata.priv = NULL;
+ kfree(p->nvdimm_events_map);
kfree(p->bus_desc.provider_name);
kfree(p);
diff --git a/arch/powerpc/platforms/pseries/pci_dlpar.c b/arch/powerpc/platforms/pseries/pci_dlpar.c
index 90c9d3531694..4ba824568119 100644
--- a/arch/powerpc/platforms/pseries/pci_dlpar.c
+++ b/arch/powerpc/platforms/pseries/pci_dlpar.c
@@ -78,6 +78,9 @@ int remove_phb_dynamic(struct pci_controller *phb)
pseries_msi_free_domains(phb);
+ /* Keep a reference so phb isn't freed yet */
+ get_device(&host_bridge->dev);
+
/* Remove the PCI bus and unregister the bridge device from sysfs */
phb->bus = NULL;
pci_remove_bus(b);
@@ -101,6 +104,7 @@ int remove_phb_dynamic(struct pci_controller *phb)
* the pcibios_free_controller_deferred() callback;
* see pseries_root_bridge_prepare().
*/
+ put_device(&host_bridge->dev);
return 0;
}
diff --git a/arch/powerpc/platforms/pseries/power.c b/arch/powerpc/platforms/pseries/power.c
index ee343ec6ab94..3676cb297767 100644
--- a/arch/powerpc/platforms/pseries/power.c
+++ b/arch/powerpc/platforms/pseries/power.c
@@ -51,7 +51,7 @@ static struct attribute *g[] = {
NULL,
};
-static struct attribute_group attr_group = {
+static const struct attribute_group attr_group = {
.attrs = g,
};
diff --git a/arch/powerpc/platforms/pseries/pseries.h b/arch/powerpc/platforms/pseries/pseries.h
index 56c9ef9052e9..af162aeeae86 100644
--- a/arch/powerpc/platforms/pseries/pseries.h
+++ b/arch/powerpc/platforms/pseries/pseries.h
@@ -21,6 +21,7 @@ struct pt_regs;
extern int pSeries_system_reset_exception(struct pt_regs *regs);
extern int pSeries_machine_check_exception(struct pt_regs *regs);
extern long pseries_machine_check_realmode(struct pt_regs *regs);
+void pSeries_machine_check_log_err(void);
#ifdef CONFIG_SMP
extern void smp_init_pseries(void);
diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c
index 74c9b1b5bc66..f12516c3998c 100644
--- a/arch/powerpc/platforms/pseries/ras.c
+++ b/arch/powerpc/platforms/pseries/ras.c
@@ -23,11 +23,6 @@ static DEFINE_SPINLOCK(ras_log_buf_lock);
static int ras_check_exception_token;
-static void mce_process_errlog_event(struct irq_work *work);
-static struct irq_work mce_errlog_process_work = {
- .func = mce_process_errlog_event,
-};
-
#define EPOW_SENSOR_TOKEN 9
#define EPOW_SENSOR_INDEX 0
@@ -60,11 +55,17 @@ struct pseries_mc_errorlog {
* XX 2: Reserved.
* XXX 3: Type of UE error.
*
- * For error_type != MC_ERROR_TYPE_UE
+ * For error_type == MC_ERROR_TYPE_SLB/ERAT/TLB
* XXXXXXXX
* X 1: Effective address provided.
* XXXXX 5: Reserved.
* XX 2: Type of SLB/ERAT/TLB error.
+ *
+ * For error_type == MC_ERROR_TYPE_CTRL_MEM_ACCESS
+ * XXXXXXXX
+ * X 1: Error causing address provided.
+ * XXX 3: Type of error.
+ * XXXX 4: Reserved.
*/
u8 sub_err_type;
u8 reserved_1[6];
@@ -80,6 +81,7 @@ struct pseries_mc_errorlog {
#define MC_ERROR_TYPE_TLB 0x04
#define MC_ERROR_TYPE_D_CACHE 0x05
#define MC_ERROR_TYPE_I_CACHE 0x07
+#define MC_ERROR_TYPE_CTRL_MEM_ACCESS 0x08
/* RTAS pseries MCE error sub types */
#define MC_ERROR_UE_INDETERMINATE 0
@@ -90,6 +92,7 @@ struct pseries_mc_errorlog {
#define UE_EFFECTIVE_ADDR_PROVIDED 0x40
#define UE_LOGICAL_ADDR_PROVIDED 0x20
+#define MC_EFFECTIVE_ADDR_PROVIDED 0x80
#define MC_ERROR_SLB_PARITY 0
#define MC_ERROR_SLB_MULTIHIT 1
@@ -103,6 +106,9 @@ struct pseries_mc_errorlog {
#define MC_ERROR_TLB_MULTIHIT 2
#define MC_ERROR_TLB_INDETERMINATE 3
+#define MC_ERROR_CTRL_MEM_ACCESS_PTABLE_WALK 0
+#define MC_ERROR_CTRL_MEM_ACCESS_OP_ACCESS 1
+
static inline u8 rtas_mc_error_sub_type(const struct pseries_mc_errorlog *mlog)
{
switch (mlog->error_type) {
@@ -112,6 +118,8 @@ static inline u8 rtas_mc_error_sub_type(const struct pseries_mc_errorlog *mlog)
case MC_ERROR_TYPE_ERAT:
case MC_ERROR_TYPE_TLB:
return (mlog->sub_err_type & 0x03);
+ case MC_ERROR_TYPE_CTRL_MEM_ACCESS:
+ return (mlog->sub_err_type & 0x70) >> 4;
default:
return 0;
}
@@ -658,7 +666,7 @@ static int mce_handle_err_virtmode(struct pt_regs *regs,
mce_err.u.slb_error_type = MCE_SLB_ERROR_INDETERMINATE;
break;
}
- if (mce_log->sub_err_type & 0x80)
+ if (mce_log->sub_err_type & MC_EFFECTIVE_ADDR_PROVIDED)
eaddr = be64_to_cpu(mce_log->effective_address);
break;
case MC_ERROR_TYPE_ERAT:
@@ -675,7 +683,7 @@ static int mce_handle_err_virtmode(struct pt_regs *regs,
mce_err.u.erat_error_type = MCE_ERAT_ERROR_INDETERMINATE;
break;
}
- if (mce_log->sub_err_type & 0x80)
+ if (mce_log->sub_err_type & MC_EFFECTIVE_ADDR_PROVIDED)
eaddr = be64_to_cpu(mce_log->effective_address);
break;
case MC_ERROR_TYPE_TLB:
@@ -692,7 +700,7 @@ static int mce_handle_err_virtmode(struct pt_regs *regs,
mce_err.u.tlb_error_type = MCE_TLB_ERROR_INDETERMINATE;
break;
}
- if (mce_log->sub_err_type & 0x80)
+ if (mce_log->sub_err_type & MC_EFFECTIVE_ADDR_PROVIDED)
eaddr = be64_to_cpu(mce_log->effective_address);
break;
case MC_ERROR_TYPE_D_CACHE:
@@ -701,6 +709,21 @@ static int mce_handle_err_virtmode(struct pt_regs *regs,
case MC_ERROR_TYPE_I_CACHE:
mce_err.error_type = MCE_ERROR_TYPE_ICACHE;
break;
+ case MC_ERROR_TYPE_CTRL_MEM_ACCESS:
+ mce_err.error_type = MCE_ERROR_TYPE_RA;
+ switch (err_sub_type) {
+ case MC_ERROR_CTRL_MEM_ACCESS_PTABLE_WALK:
+ mce_err.u.ra_error_type =
+ MCE_RA_ERROR_PAGE_TABLE_WALK_LOAD_STORE_FOREIGN;
+ break;
+ case MC_ERROR_CTRL_MEM_ACCESS_OP_ACCESS:
+ mce_err.u.ra_error_type =
+ MCE_RA_ERROR_LOAD_STORE_FOREIGN;
+ break;
+ }
+ if (mce_log->sub_err_type & MC_EFFECTIVE_ADDR_PROVIDED)
+ eaddr = be64_to_cpu(mce_log->effective_address);
+ break;
case MC_ERROR_TYPE_UNKNOWN:
default:
mce_err.error_type = MCE_ERROR_TYPE_UNKNOWN;
@@ -717,7 +740,6 @@ static int mce_handle_error(struct pt_regs *regs, struct rtas_error_log *errp)
struct pseries_errorlog *pseries_log;
struct pseries_mc_errorlog *mce_log = NULL;
int disposition = rtas_error_disposition(errp);
- unsigned long msr;
u8 error_type;
if (!rtas_error_extended(errp))
@@ -731,40 +753,16 @@ static int mce_handle_error(struct pt_regs *regs, struct rtas_error_log *errp)
error_type = mce_log->error_type;
disposition = mce_handle_err_realmode(disposition, error_type);
-
- /*
- * Enable translation as we will be accessing per-cpu variables
- * in save_mce_event() which may fall outside RMO region, also
- * leave it enabled because subsequently we will be queuing work
- * to workqueues where again per-cpu variables accessed, besides
- * fwnmi_release_errinfo() crashes when called in realmode on
- * pseries.
- * Note: All the realmode handling like flushing SLB entries for
- * SLB multihit is done by now.
- */
out:
- msr = mfmsr();
- mtmsr(msr | MSR_IR | MSR_DR);
-
disposition = mce_handle_err_virtmode(regs, errp, mce_log,
disposition);
-
- /*
- * Queue irq work to log this rtas event later.
- * irq_work_queue uses per-cpu variables, so do this in virt
- * mode as well.
- */
- irq_work_queue(&mce_errlog_process_work);
-
- mtmsr(msr);
-
return disposition;
}
/*
* Process MCE rtas errlog event.
*/
-static void mce_process_errlog_event(struct irq_work *work)
+void pSeries_machine_check_log_err(void)
{
struct rtas_error_log *err;
diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c
index 83a04d967a59..069d7b3bb142 100644
--- a/arch/powerpc/platforms/pseries/setup.c
+++ b/arch/powerpc/platforms/pseries/setup.c
@@ -1086,6 +1086,7 @@ define_machine(pseries) {
.system_reset_exception = pSeries_system_reset_exception,
.machine_check_early = pseries_machine_check_realmode,
.machine_check_exception = pSeries_machine_check_exception,
+ .machine_check_log_err = pSeries_machine_check_log_err,
#ifdef CONFIG_KEXEC_CORE
.machine_kexec = pSeries_machine_kexec,
.kexec_cpu_down = pseries_kexec_cpu_down,
diff --git a/arch/powerpc/platforms/pseries/vas-sysfs.c b/arch/powerpc/platforms/pseries/vas-sysfs.c
new file mode 100644
index 000000000000..4a7fcde5afc0
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/vas-sysfs.c
@@ -0,0 +1,268 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright 2022-23 IBM Corp.
+ */
+
+#define pr_fmt(fmt) "vas: " fmt
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/miscdevice.h>
+#include <linux/kobject.h>
+#include <linux/slab.h>
+#include <linux/mm.h>
+
+#include "vas.h"
+
+#ifdef CONFIG_SYSFS
+static struct kobject *pseries_vas_kobj;
+static struct kobject *gzip_caps_kobj;
+
+struct vas_caps_entry {
+ struct kobject kobj;
+ struct vas_cop_feat_caps *caps;
+};
+
+#define to_caps_entry(entry) container_of(entry, struct vas_caps_entry, kobj)
+
+/*
+ * This function is used to get the notification from the drmgr when
+ * QoS credits are changed. Though receiving the target total QoS
+ * credits here, get the official QoS capabilities from the hypervisor.
+ */
+static ssize_t update_total_credits_trigger(struct vas_cop_feat_caps *caps,
+ const char *buf, size_t count)
+{
+ int err;
+ u16 creds;
+
+ err = kstrtou16(buf, 0, &creds);
+ if (!err)
+ err = vas_reconfig_capabilties(caps->win_type);
+
+ if (err)
+ return -EINVAL;
+
+ return count;
+}
+
+#define sysfs_caps_entry_read(_name) \
+static ssize_t _name##_show(struct vas_cop_feat_caps *caps, char *buf) \
+{ \
+ return sprintf(buf, "%d\n", atomic_read(&caps->_name)); \
+}
+
+struct vas_sysfs_entry {
+ struct attribute attr;
+ ssize_t (*show)(struct vas_cop_feat_caps *, char *);
+ ssize_t (*store)(struct vas_cop_feat_caps *, const char *, size_t);
+};
+
+#define VAS_ATTR_RO(_name) \
+ sysfs_caps_entry_read(_name); \
+ static struct vas_sysfs_entry _name##_attribute = __ATTR(_name, \
+ 0444, _name##_show, NULL);
+
+/*
+ * Create sysfs interface:
+ * /sys/devices/vas/vas0/gzip/default_capabilities
+ * This directory contains the following VAS GZIP capabilities
+ * for the defaule credit type.
+ * /sys/devices/vas/vas0/gzip/default_capabilities/nr_total_credits
+ * Total number of default credits assigned to the LPAR which
+ * can be changed with DLPAR operation.
+ * /sys/devices/vas/vas0/gzip/default_capabilities/nr_used_credits
+ * Number of credits used by the user space. One credit will
+ * be assigned for each window open.
+ *
+ * /sys/devices/vas/vas0/gzip/qos_capabilities
+ * This directory contains the following VAS GZIP capabilities
+ * for the Quality of Service (QoS) credit type.
+ * /sys/devices/vas/vas0/gzip/qos_capabilities/nr_total_credits
+ * Total number of QoS credits assigned to the LPAR. The user
+ * has to define this value using HMC interface. It can be
+ * changed dynamically by the user.
+ * /sys/devices/vas/vas0/gzip/qos_capabilities/nr_used_credits
+ * Number of credits used by the user space.
+ * /sys/devices/vas/vas0/gzip/qos_capabilities/update_total_credits
+ * Update total QoS credits dynamically
+ */
+
+VAS_ATTR_RO(nr_total_credits);
+VAS_ATTR_RO(nr_used_credits);
+
+static struct vas_sysfs_entry update_total_credits_attribute =
+ __ATTR(update_total_credits, 0200, NULL, update_total_credits_trigger);
+
+static struct attribute *vas_def_capab_attrs[] = {
+ &nr_total_credits_attribute.attr,
+ &nr_used_credits_attribute.attr,
+ NULL,
+};
+
+static struct attribute *vas_qos_capab_attrs[] = {
+ &nr_total_credits_attribute.attr,
+ &nr_used_credits_attribute.attr,
+ &update_total_credits_attribute.attr,
+ NULL,
+};
+
+static ssize_t vas_type_show(struct kobject *kobj, struct attribute *attr,
+ char *buf)
+{
+ struct vas_caps_entry *centry;
+ struct vas_cop_feat_caps *caps;
+ struct vas_sysfs_entry *entry;
+
+ centry = to_caps_entry(kobj);
+ caps = centry->caps;
+ entry = container_of(attr, struct vas_sysfs_entry, attr);
+
+ if (!entry->show)
+ return -EIO;
+
+ return entry->show(caps, buf);
+}
+
+static ssize_t vas_type_store(struct kobject *kobj, struct attribute *attr,
+ const char *buf, size_t count)
+{
+ struct vas_caps_entry *centry;
+ struct vas_cop_feat_caps *caps;
+ struct vas_sysfs_entry *entry;
+
+ centry = to_caps_entry(kobj);
+ caps = centry->caps;
+ entry = container_of(attr, struct vas_sysfs_entry, attr);
+ if (!entry->store)
+ return -EIO;
+
+ return entry->store(caps, buf, count);
+}
+
+static void vas_type_release(struct kobject *kobj)
+{
+ struct vas_caps_entry *centry = to_caps_entry(kobj);
+ kfree(centry);
+}
+
+static const struct sysfs_ops vas_sysfs_ops = {
+ .show = vas_type_show,
+ .store = vas_type_store,
+};
+
+static struct kobj_type vas_def_attr_type = {
+ .release = vas_type_release,
+ .sysfs_ops = &vas_sysfs_ops,
+ .default_attrs = vas_def_capab_attrs,
+};
+
+static struct kobj_type vas_qos_attr_type = {
+ .release = vas_type_release,
+ .sysfs_ops = &vas_sysfs_ops,
+ .default_attrs = vas_qos_capab_attrs,
+};
+
+static char *vas_caps_kobj_name(struct vas_caps_entry *centry,
+ struct kobject **kobj)
+{
+ struct vas_cop_feat_caps *caps = centry->caps;
+
+ if (caps->descriptor == VAS_GZIP_QOS_CAPABILITIES) {
+ kobject_init(&centry->kobj, &vas_qos_attr_type);
+ *kobj = gzip_caps_kobj;
+ return "qos_capabilities";
+ } else if (caps->descriptor == VAS_GZIP_DEFAULT_CAPABILITIES) {
+ kobject_init(&centry->kobj, &vas_def_attr_type);
+ *kobj = gzip_caps_kobj;
+ return "default_capabilities";
+ } else
+ return "Unknown";
+}
+
+/*
+ * Add feature specific capability dir entry.
+ * Ex: VDefGzip or VQosGzip
+ */
+int sysfs_add_vas_caps(struct vas_cop_feat_caps *caps)
+{
+ struct vas_caps_entry *centry;
+ struct kobject *kobj = NULL;
+ int ret = 0;
+ char *name;
+
+ centry = kzalloc(sizeof(*centry), GFP_KERNEL);
+ if (!centry)
+ return -ENOMEM;
+
+ centry->caps = caps;
+ name = vas_caps_kobj_name(centry, &kobj);
+
+ if (kobj) {
+ ret = kobject_add(&centry->kobj, kobj, "%s", name);
+
+ if (ret) {
+ pr_err("VAS: sysfs kobject add / event failed %d\n",
+ ret);
+ kobject_put(&centry->kobj);
+ }
+ }
+
+ return ret;
+}
+
+static struct miscdevice vas_miscdev = {
+ .minor = MISC_DYNAMIC_MINOR,
+ .name = "vas",
+};
+
+/*
+ * Add VAS and VasCaps (overall capabilities) dir entries.
+ */
+int __init sysfs_pseries_vas_init(struct vas_all_caps *vas_caps)
+{
+ int ret;
+
+ ret = misc_register(&vas_miscdev);
+ if (ret < 0) {
+ pr_err("%s: register vas misc device failed\n", __func__);
+ return ret;
+ }
+
+ /*
+ * The hypervisor does not expose multiple VAS instances, but can
+ * see multiple VAS instances on PowerNV. So create 'vas0' directory
+ * on pseries.
+ */
+ pseries_vas_kobj = kobject_create_and_add("vas0",
+ &vas_miscdev.this_device->kobj);
+ if (!pseries_vas_kobj) {
+ pr_err("Failed to create VAS sysfs entry\n");
+ return -ENOMEM;
+ }
+
+ if ((vas_caps->feat_type & VAS_GZIP_QOS_FEAT_BIT) ||
+ (vas_caps->feat_type & VAS_GZIP_DEF_FEAT_BIT)) {
+ gzip_caps_kobj = kobject_create_and_add("gzip",
+ pseries_vas_kobj);
+ if (!gzip_caps_kobj) {
+ pr_err("Failed to create VAS GZIP capability entry\n");
+ kobject_put(pseries_vas_kobj);
+ return -ENOMEM;
+ }
+ }
+
+ return 0;
+}
+
+#else
+int sysfs_add_vas_caps(struct vas_cop_feat_caps *caps)
+{
+ return 0;
+}
+
+int __init sysfs_pseries_vas_init(struct vas_all_caps *vas_caps)
+{
+ return 0;
+}
+#endif
diff --git a/arch/powerpc/platforms/pseries/vas.c b/arch/powerpc/platforms/pseries/vas.c
index d243ddc58827..1f59d78c77a1 100644
--- a/arch/powerpc/platforms/pseries/vas.c
+++ b/arch/powerpc/platforms/pseries/vas.c
@@ -26,9 +26,11 @@
static struct vas_all_caps caps_all;
static bool copypaste_feat;
+static struct hv_vas_cop_feat_caps hv_cop_caps;
static struct vas_caps vascaps[VAS_MAX_FEAT_TYPE];
static DEFINE_MUTEX(vas_pseries_mutex);
+static bool migration_in_progress;
static long hcall_return_busy_check(long rc)
{
@@ -107,7 +109,6 @@ static int h_deallocate_vas_window(u64 winid)
static int h_modify_vas_window(struct pseries_vas_window *win)
{
long rc;
- u32 lpid = mfspr(SPRN_PID);
/*
* AMR value is not supported in Linux VAS implementation.
@@ -115,7 +116,7 @@ static int h_modify_vas_window(struct pseries_vas_window *win)
*/
do {
rc = plpar_hcall_norets(H_MODIFY_VAS_WINDOW,
- win->vas_win.winid, lpid, 0,
+ win->vas_win.winid, win->pid, 0,
VAS_MOD_WIN_FLAGS, 0);
rc = hcall_return_busy_check(rc);
@@ -124,8 +125,8 @@ static int h_modify_vas_window(struct pseries_vas_window *win)
if (rc == H_SUCCESS)
return 0;
- pr_err("H_MODIFY_VAS_WINDOW error: %ld, winid %u lpid %u\n",
- rc, win->vas_win.winid, lpid);
+ pr_err("H_MODIFY_VAS_WINDOW error: %ld, winid %u pid %u\n",
+ rc, win->vas_win.winid, win->pid);
return -EIO;
}
@@ -310,8 +311,8 @@ static struct vas_window *vas_allocate_window(int vas_id, u64 flags,
cop_feat_caps = &caps->caps;
- if (atomic_inc_return(&cop_feat_caps->used_lpar_creds) >
- atomic_read(&cop_feat_caps->target_lpar_creds)) {
+ if (atomic_inc_return(&cop_feat_caps->nr_used_credits) >
+ atomic_read(&cop_feat_caps->nr_total_credits)) {
pr_err("Credits are not available to allocate window\n");
rc = -EINVAL;
goto out;
@@ -338,6 +339,8 @@ static struct vas_window *vas_allocate_window(int vas_id, u64 flags,
}
}
+ txwin->pid = mfspr(SPRN_PID);
+
/*
* Allocate / Deallocate window hcalls and setup / free IRQs
* have to be protected with mutex.
@@ -354,7 +357,10 @@ static struct vas_window *vas_allocate_window(int vas_id, u64 flags,
* same fault IRQ is not freed by the OS before.
*/
mutex_lock(&vas_pseries_mutex);
- rc = allocate_setup_window(txwin, (u64 *)&domain[0],
+ if (migration_in_progress)
+ rc = -EBUSY;
+ else
+ rc = allocate_setup_window(txwin, (u64 *)&domain[0],
cop_feat_caps->win_type);
mutex_unlock(&vas_pseries_mutex);
if (rc)
@@ -369,13 +375,28 @@ static struct vas_window *vas_allocate_window(int vas_id, u64 flags,
if (rc)
goto out_free;
- vas_user_win_add_mm_context(&txwin->vas_win.task_ref);
txwin->win_type = cop_feat_caps->win_type;
mutex_lock(&vas_pseries_mutex);
- list_add(&txwin->win_list, &caps->list);
+ /*
+ * Possible to lose the acquired credit with DLPAR core
+ * removal after the window is opened. So if there are any
+ * closed windows (means with lost credits), do not give new
+ * window to user space. New windows will be opened only
+ * after the existing windows are reopened when credits are
+ * available.
+ */
+ if (!caps->nr_close_wins) {
+ list_add(&txwin->win_list, &caps->list);
+ caps->nr_open_windows++;
+ mutex_unlock(&vas_pseries_mutex);
+ vas_user_win_add_mm_context(&txwin->vas_win.task_ref);
+ return &txwin->vas_win;
+ }
mutex_unlock(&vas_pseries_mutex);
- return &txwin->vas_win;
+ put_vas_user_win_ref(&txwin->vas_win.task_ref);
+ rc = -EBUSY;
+ pr_err("No credit is available to allocate window\n");
out_free:
/*
@@ -385,7 +406,7 @@ out_free:
free_irq_setup(txwin);
h_deallocate_vas_window(txwin->vas_win.winid);
out:
- atomic_dec(&cop_feat_caps->used_lpar_creds);
+ atomic_dec(&cop_feat_caps->nr_used_credits);
kfree(txwin);
return ERR_PTR(rc);
}
@@ -438,14 +459,25 @@ static int vas_deallocate_window(struct vas_window *vwin)
caps = &vascaps[win->win_type].caps;
mutex_lock(&vas_pseries_mutex);
- rc = deallocate_free_window(win);
- if (rc) {
- mutex_unlock(&vas_pseries_mutex);
- return rc;
- }
+ /*
+ * VAS window is already closed in the hypervisor when
+ * lost the credit or with migration. So just remove the entry
+ * from the list, remove task references and free vas_window
+ * struct.
+ */
+ if (!(win->vas_win.status & VAS_WIN_NO_CRED_CLOSE) &&
+ !(win->vas_win.status & VAS_WIN_MIGRATE_CLOSE)) {
+ rc = deallocate_free_window(win);
+ if (rc) {
+ mutex_unlock(&vas_pseries_mutex);
+ return rc;
+ }
+ } else
+ vascaps[win->win_type].nr_close_wins--;
list_del(&win->win_list);
- atomic_dec(&caps->used_lpar_creds);
+ atomic_dec(&caps->nr_used_credits);
+ vascaps[win->win_type].nr_open_windows--;
mutex_unlock(&vas_pseries_mutex);
put_vas_user_win_ref(&vwin->task_ref);
@@ -500,6 +532,7 @@ static int __init get_vas_capabilities(u8 feat, enum vas_cop_feat_type type,
memset(vcaps, 0, sizeof(*vcaps));
INIT_LIST_HEAD(&vcaps->list);
+ vcaps->feat = feat;
caps = &vcaps->caps;
rc = h_query_vas_capabilities(H_QUERY_VAS_CAPABILITIES, feat,
@@ -521,7 +554,7 @@ static int __init get_vas_capabilities(u8 feat, enum vas_cop_feat_type type,
}
caps->max_lpar_creds = be16_to_cpu(hv_caps->max_lpar_creds);
caps->max_win_creds = be16_to_cpu(hv_caps->max_win_creds);
- atomic_set(&caps->target_lpar_creds,
+ atomic_set(&caps->nr_total_credits,
be16_to_cpu(hv_caps->target_lpar_creds));
if (feat == VAS_GZIP_DEF_FEAT) {
caps->def_lpar_creds = be16_to_cpu(hv_caps->def_lpar_creds);
@@ -533,16 +566,409 @@ static int __init get_vas_capabilities(u8 feat, enum vas_cop_feat_type type,
}
}
+ rc = sysfs_add_vas_caps(caps);
+ if (rc)
+ return rc;
+
copypaste_feat = true;
return 0;
}
+/*
+ * VAS windows can be closed due to lost credits when the core is
+ * removed. So reopen them if credits are available due to DLPAR
+ * core add and set the window active status. When NX sees the page
+ * fault on the unmapped paste address, the kernel handles the fault
+ * by setting the remapping to new paste address if the window is
+ * active.
+ */
+static int reconfig_open_windows(struct vas_caps *vcaps, int creds,
+ bool migrate)
+{
+ long domain[PLPAR_HCALL9_BUFSIZE] = {VAS_DEFAULT_DOMAIN_ID};
+ struct vas_cop_feat_caps *caps = &vcaps->caps;
+ struct pseries_vas_window *win = NULL, *tmp;
+ int rc, mv_ents = 0;
+ int flag;
+
+ /*
+ * Nothing to do if there are no closed windows.
+ */
+ if (!vcaps->nr_close_wins)
+ return 0;
+
+ /*
+ * For the core removal, the hypervisor reduces the credits
+ * assigned to the LPAR and the kernel closes VAS windows
+ * in the hypervisor depends on reduced credits. The kernel
+ * uses LIFO (the last windows that are opened will be closed
+ * first) and expects to open in the same order when credits
+ * are available.
+ * For example, 40 windows are closed when the LPAR lost 2 cores
+ * (dedicated). If 1 core is added, this LPAR can have 20 more
+ * credits. It means the kernel can reopen 20 windows. So move
+ * 20 entries in the VAS windows lost and reopen next 20 windows.
+ * For partition migration, reopen all windows that are closed
+ * during resume.
+ */
+ if ((vcaps->nr_close_wins > creds) && !migrate)
+ mv_ents = vcaps->nr_close_wins - creds;
+
+ list_for_each_entry_safe(win, tmp, &vcaps->list, win_list) {
+ if (!mv_ents)
+ break;
+
+ mv_ents--;
+ }
+
+ /*
+ * Open windows if they are closed only with migration or
+ * DLPAR (lost credit) before.
+ */
+ if (migrate)
+ flag = VAS_WIN_MIGRATE_CLOSE;
+ else
+ flag = VAS_WIN_NO_CRED_CLOSE;
+
+ list_for_each_entry_safe_from(win, tmp, &vcaps->list, win_list) {
+ /*
+ * This window is closed with DLPAR and migration events.
+ * So reopen the window with the last event.
+ * The user space is not suspended with the current
+ * migration notifier. So the user space can issue DLPAR
+ * CPU hotplug while migration in progress. In this case
+ * this window will be opened with the last event.
+ */
+ if ((win->vas_win.status & VAS_WIN_NO_CRED_CLOSE) &&
+ (win->vas_win.status & VAS_WIN_MIGRATE_CLOSE)) {
+ win->vas_win.status &= ~flag;
+ continue;
+ }
+
+ /*
+ * Nothing to do on this window if it is not closed
+ * with this flag
+ */
+ if (!(win->vas_win.status & flag))
+ continue;
+
+ rc = allocate_setup_window(win, (u64 *)&domain[0],
+ caps->win_type);
+ if (rc)
+ return rc;
+
+ rc = h_modify_vas_window(win);
+ if (rc)
+ goto out;
+
+ mutex_lock(&win->vas_win.task_ref.mmap_mutex);
+ /*
+ * Set window status to active
+ */
+ win->vas_win.status &= ~flag;
+ mutex_unlock(&win->vas_win.task_ref.mmap_mutex);
+ win->win_type = caps->win_type;
+ if (!--vcaps->nr_close_wins)
+ break;
+ }
+
+ return 0;
+out:
+ /*
+ * Window modify HCALL failed. So close the window to the
+ * hypervisor and return.
+ */
+ free_irq_setup(win);
+ h_deallocate_vas_window(win->vas_win.winid);
+ return rc;
+}
+
+/*
+ * The hypervisor reduces the available credits if the LPAR lost core. It
+ * means the excessive windows should not be active and the user space
+ * should not be using these windows to send compression requests to NX.
+ * So the kernel closes the excessive windows and unmap the paste address
+ * such that the user space receives paste instruction failure. Then up to
+ * the user space to fall back to SW compression and manage with the
+ * existing windows.
+ */
+static int reconfig_close_windows(struct vas_caps *vcap, int excess_creds,
+ bool migrate)
+{
+ struct pseries_vas_window *win, *tmp;
+ struct vas_user_win_ref *task_ref;
+ struct vm_area_struct *vma;
+ int rc = 0, flag;
+
+ if (migrate)
+ flag = VAS_WIN_MIGRATE_CLOSE;
+ else
+ flag = VAS_WIN_NO_CRED_CLOSE;
+
+ list_for_each_entry_safe(win, tmp, &vcap->list, win_list) {
+ /*
+ * This window is already closed due to lost credit
+ * or for migration before. Go for next window.
+ * For migration, nothing to do since this window
+ * closed for DLPAR and will be reopened even on
+ * the destination system with other DLPAR operation.
+ */
+ if ((win->vas_win.status & VAS_WIN_MIGRATE_CLOSE) ||
+ (win->vas_win.status & VAS_WIN_NO_CRED_CLOSE)) {
+ win->vas_win.status |= flag;
+ continue;
+ }
+
+ task_ref = &win->vas_win.task_ref;
+ mutex_lock(&task_ref->mmap_mutex);
+ vma = task_ref->vma;
+ /*
+ * Number of available credits are reduced, So select
+ * and close windows.
+ */
+ win->vas_win.status |= flag;
+
+ mmap_write_lock(task_ref->mm);
+ /*
+ * vma is set in the original mapping. But this mapping
+ * is done with mmap() after the window is opened with ioctl.
+ * so we may not see the original mapping if the core remove
+ * is done before the original mmap() and after the ioctl.
+ */
+ if (vma)
+ zap_page_range(vma, vma->vm_start,
+ vma->vm_end - vma->vm_start);
+
+ mmap_write_unlock(task_ref->mm);
+ mutex_unlock(&task_ref->mmap_mutex);
+ /*
+ * Close VAS window in the hypervisor, but do not
+ * free vas_window struct since it may be reused
+ * when the credit is available later (DLPAR with
+ * adding cores). This struct will be used
+ * later when the process issued with close(FD).
+ */
+ rc = deallocate_free_window(win);
+ /*
+ * This failure is from the hypervisor.
+ * No way to stop migration for these failures.
+ * So ignore error and continue closing other windows.
+ */
+ if (rc && !migrate)
+ return rc;
+
+ vcap->nr_close_wins++;
+
+ /*
+ * For migration, do not depend on lpar_creds in case if
+ * mismatch with the hypervisor value (should not happen).
+ * So close all active windows in the list and will be
+ * reopened windows based on the new lpar_creds on the
+ * destination system during resume.
+ */
+ if (!migrate && !--excess_creds)
+ break;
+ }
+
+ return 0;
+}
+
+/*
+ * Get new VAS capabilities when the core add/removal configuration
+ * changes. Reconfig window configurations based on the credits
+ * availability from this new capabilities.
+ */
+int vas_reconfig_capabilties(u8 type)
+{
+ struct vas_cop_feat_caps *caps;
+ int old_nr_creds, new_nr_creds;
+ struct vas_caps *vcaps;
+ int rc = 0, nr_active_wins;
+
+ if (type >= VAS_MAX_FEAT_TYPE) {
+ pr_err("Invalid credit type %d\n", type);
+ return -EINVAL;
+ }
+
+ vcaps = &vascaps[type];
+ caps = &vcaps->caps;
+
+ mutex_lock(&vas_pseries_mutex);
+ rc = h_query_vas_capabilities(H_QUERY_VAS_CAPABILITIES, vcaps->feat,
+ (u64)virt_to_phys(&hv_cop_caps));
+ if (rc)
+ goto out;
+
+ new_nr_creds = be16_to_cpu(hv_cop_caps.target_lpar_creds);
+
+ old_nr_creds = atomic_read(&caps->nr_total_credits);
+
+ atomic_set(&caps->nr_total_credits, new_nr_creds);
+ /*
+ * The total number of available credits may be decreased or
+ * inceased with DLPAR operation. Means some windows have to be
+ * closed / reopened. Hold the vas_pseries_mutex so that the
+ * the user space can not open new windows.
+ */
+ if (old_nr_creds < new_nr_creds) {
+ /*
+ * If the existing target credits is less than the new
+ * target, reopen windows if they are closed due to
+ * the previous DLPAR (core removal).
+ */
+ rc = reconfig_open_windows(vcaps, new_nr_creds - old_nr_creds,
+ false);
+ } else {
+ /*
+ * # active windows is more than new LPAR available
+ * credits. So close the excessive windows.
+ * On pseries, each window will have 1 credit.
+ */
+ nr_active_wins = vcaps->nr_open_windows - vcaps->nr_close_wins;
+ if (nr_active_wins > new_nr_creds)
+ rc = reconfig_close_windows(vcaps,
+ nr_active_wins - new_nr_creds,
+ false);
+ }
+
+out:
+ mutex_unlock(&vas_pseries_mutex);
+ return rc;
+}
+/*
+ * Total number of default credits available (target_credits)
+ * in LPAR depends on number of cores configured. It varies based on
+ * whether processors are in shared mode or dedicated mode.
+ * Get the notifier when CPU configuration is changed with DLPAR
+ * operation so that get the new target_credits (vas default capabilities)
+ * and then update the existing windows usage if needed.
+ */
+static int pseries_vas_notifier(struct notifier_block *nb,
+ unsigned long action, void *data)
+{
+ struct of_reconfig_data *rd = data;
+ struct device_node *dn = rd->dn;
+ const __be32 *intserv = NULL;
+ int len, rc = 0;
+
+ if ((action == OF_RECONFIG_ATTACH_NODE) ||
+ (action == OF_RECONFIG_DETACH_NODE))
+ intserv = of_get_property(dn, "ibm,ppc-interrupt-server#s",
+ &len);
+ /*
+ * Processor config is not changed
+ */
+ if (!intserv)
+ return NOTIFY_OK;
+
+ rc = vas_reconfig_capabilties(VAS_GZIP_DEF_FEAT_TYPE);
+ if (rc)
+ pr_err("Failed reconfig VAS capabilities with DLPAR\n");
+
+ return rc;
+}
+
+static struct notifier_block pseries_vas_nb = {
+ .notifier_call = pseries_vas_notifier,
+};
+
+/*
+ * For LPM, all windows have to be closed on the source partition
+ * before migration and reopen them on the destination partition
+ * after migration. So closing windows during suspend and
+ * reopen them during resume.
+ */
+int vas_migration_handler(int action)
+{
+ struct vas_cop_feat_caps *caps;
+ int old_nr_creds, new_nr_creds = 0;
+ struct vas_caps *vcaps;
+ int i, rc = 0;
+
+ /*
+ * NX-GZIP is not enabled. Nothing to do for migration.
+ */
+ if (!copypaste_feat)
+ return rc;
+
+ mutex_lock(&vas_pseries_mutex);
+
+ if (action == VAS_SUSPEND)
+ migration_in_progress = true;
+ else
+ migration_in_progress = false;
+
+ for (i = 0; i < VAS_MAX_FEAT_TYPE; i++) {
+ vcaps = &vascaps[i];
+ caps = &vcaps->caps;
+ old_nr_creds = atomic_read(&caps->nr_total_credits);
+
+ rc = h_query_vas_capabilities(H_QUERY_VAS_CAPABILITIES,
+ vcaps->feat,
+ (u64)virt_to_phys(&hv_cop_caps));
+ if (!rc) {
+ new_nr_creds = be16_to_cpu(hv_cop_caps.target_lpar_creds);
+ /*
+ * Should not happen. But incase print messages, close
+ * all windows in the list during suspend and reopen
+ * windows based on new lpar_creds on the destination
+ * system.
+ */
+ if (old_nr_creds != new_nr_creds) {
+ pr_err("Target credits mismatch with the hypervisor\n");
+ pr_err("state(%d): lpar creds: %d HV lpar creds: %d\n",
+ action, old_nr_creds, new_nr_creds);
+ pr_err("Used creds: %d, Active creds: %d\n",
+ atomic_read(&caps->nr_used_credits),
+ vcaps->nr_open_windows - vcaps->nr_close_wins);
+ }
+ } else {
+ pr_err("state(%d): Get VAS capabilities failed with %d\n",
+ action, rc);
+ /*
+ * We can not stop migration with the current lpm
+ * implementation. So continue closing all windows in
+ * the list (during suspend) and return without
+ * opening windows (during resume) if VAS capabilities
+ * HCALL failed.
+ */
+ if (action == VAS_RESUME)
+ goto out;
+ }
+
+ switch (action) {
+ case VAS_SUSPEND:
+ rc = reconfig_close_windows(vcaps, vcaps->nr_open_windows,
+ true);
+ break;
+ case VAS_RESUME:
+ atomic_set(&caps->nr_total_credits, new_nr_creds);
+ rc = reconfig_open_windows(vcaps, new_nr_creds, true);
+ break;
+ default:
+ /* should not happen */
+ pr_err("Invalid migration action %d\n", action);
+ rc = -EINVAL;
+ goto out;
+ }
+
+ /*
+ * Ignore errors during suspend and return for resume.
+ */
+ if (rc && (action == VAS_RESUME))
+ goto out;
+ }
+
+out:
+ mutex_unlock(&vas_pseries_mutex);
+ return rc;
+}
+
static int __init pseries_vas_init(void)
{
- struct hv_vas_cop_feat_caps *hv_cop_caps;
struct hv_vas_all_caps *hv_caps;
- int rc;
+ int rc = 0;
/*
* Linux supports user space COPY/PASTE only with Radix
@@ -566,35 +992,39 @@ static int __init pseries_vas_init(void)
caps_all.descriptor = be64_to_cpu(hv_caps->descriptor);
caps_all.feat_type = be64_to_cpu(hv_caps->feat_type);
- hv_cop_caps = kmalloc(sizeof(*hv_cop_caps), GFP_KERNEL);
- if (!hv_cop_caps) {
- rc = -ENOMEM;
- goto out;
- }
+ sysfs_pseries_vas_init(&caps_all);
+
/*
* QOS capabilities available
*/
if (caps_all.feat_type & VAS_GZIP_QOS_FEAT_BIT) {
rc = get_vas_capabilities(VAS_GZIP_QOS_FEAT,
- VAS_GZIP_QOS_FEAT_TYPE, hv_cop_caps);
+ VAS_GZIP_QOS_FEAT_TYPE, &hv_cop_caps);
if (rc)
- goto out_cop;
+ goto out;
}
/*
* Default capabilities available
*/
- if (caps_all.feat_type & VAS_GZIP_DEF_FEAT_BIT) {
+ if (caps_all.feat_type & VAS_GZIP_DEF_FEAT_BIT)
rc = get_vas_capabilities(VAS_GZIP_DEF_FEAT,
- VAS_GZIP_DEF_FEAT_TYPE, hv_cop_caps);
- if (rc)
- goto out_cop;
- }
+ VAS_GZIP_DEF_FEAT_TYPE, &hv_cop_caps);
+
+ if (!rc && copypaste_feat) {
+ if (firmware_has_feature(FW_FEATURE_LPAR))
+ of_reconfig_notifier_register(&pseries_vas_nb);
- pr_info("GZIP feature is available\n");
+ pr_info("GZIP feature is available\n");
+ } else {
+ /*
+ * Should not happen, but only when get default
+ * capabilities HCALL failed. So disable copy paste
+ * feature.
+ */
+ copypaste_feat = false;
+ }
-out_cop:
- kfree(hv_cop_caps);
out:
kfree(hv_caps);
return rc;
diff --git a/arch/powerpc/platforms/pseries/vas.h b/arch/powerpc/platforms/pseries/vas.h
index 4ecb3fcabd10..34177881e998 100644
--- a/arch/powerpc/platforms/pseries/vas.h
+++ b/arch/powerpc/platforms/pseries/vas.h
@@ -30,6 +30,14 @@
#define VAS_COPY_PASTE_USER_MODE 0x00000001
#define VAS_COP_OP_USER_MODE 0x00000010
+#define VAS_GZIP_QOS_CAPABILITIES 0x56516F73477A6970
+#define VAS_GZIP_DEFAULT_CAPABILITIES 0x56446566477A6970
+
+enum vas_migrate_action {
+ VAS_SUSPEND,
+ VAS_RESUME,
+};
+
/*
* Co-processor feature - GZIP QoS windows or GZIP default windows
*/
@@ -72,9 +80,8 @@ struct vas_cop_feat_caps {
};
/* Total LPAR available credits. Can be different from max LPAR */
/* credits due to DLPAR operation */
- atomic_t target_lpar_creds;
- atomic_t used_lpar_creds; /* Used credits so far */
- u16 avail_lpar_creds; /* Remaining available credits */
+ atomic_t nr_total_credits; /* Total credits assigned to LPAR */
+ atomic_t nr_used_credits; /* Used credits so far */
};
/*
@@ -84,6 +91,9 @@ struct vas_cop_feat_caps {
struct vas_caps {
struct vas_cop_feat_caps caps;
struct list_head list; /* List of open windows */
+ int nr_close_wins; /* closed windows in the hypervisor for DLPAR */
+ int nr_open_windows; /* Number of successful open windows */
+ u8 feat; /* Feature type */
};
/*
@@ -115,6 +125,7 @@ struct pseries_vas_window {
u64 domain[6]; /* Associativity domain Ids */
/* this window is allocated */
u64 util;
+ u32 pid; /* PID associated with this window */
/* List of windows opened which is used for LPM */
struct list_head win_list;
@@ -122,4 +133,17 @@ struct pseries_vas_window {
char *name;
int fault_virq;
};
+
+int sysfs_add_vas_caps(struct vas_cop_feat_caps *caps);
+int vas_reconfig_capabilties(u8 type);
+int __init sysfs_pseries_vas_init(struct vas_all_caps *vas_caps);
+
+#ifdef CONFIG_PPC_VAS
+int vas_migration_handler(int action);
+#else
+static inline int vas_migration_handler(int action)
+{
+ return 0;
+}
+#endif
#endif /* _VAS_H */