summaryrefslogtreecommitdiff
path: root/arch/s390
diff options
context:
space:
mode:
Diffstat (limited to 'arch/s390')
-rw-r--r--arch/s390/Kconfig7
-rw-r--r--arch/s390/crypto/aes_s390.c31
-rw-r--r--arch/s390/include/asm/barrier.h15
-rw-r--r--arch/s390/include/asm/compat.h3
-rw-r--r--arch/s390/include/asm/cpu_mf.h181
-rw-r--r--arch/s390/include/asm/css_chars.h2
-rw-r--r--arch/s390/include/asm/pci.h1
-rw-r--r--arch/s390/include/asm/perf_event.h80
-rw-r--r--arch/s390/include/asm/qdio.h35
-rw-r--r--arch/s390/include/asm/sclp.h7
-rw-r--r--arch/s390/include/asm/smp.h2
-rw-r--r--arch/s390/include/uapi/asm/zcrypt.h65
-rw-r--r--arch/s390/kernel/Makefile3
-rw-r--r--arch/s390/kernel/asm-offsets.c1
-rw-r--r--arch/s390/kernel/compat_signal.c5
-rw-r--r--arch/s390/kernel/entry64.S8
-rw-r--r--arch/s390/kernel/perf_cpum_cf.c1
-rw-r--r--arch/s390/kernel/perf_cpum_cf_events.c322
-rw-r--r--arch/s390/kernel/perf_cpum_sf.c1641
-rw-r--r--arch/s390/kernel/perf_event.c174
-rw-r--r--arch/s390/kernel/process.c14
-rw-r--r--arch/s390/kernel/ptrace.c27
-rw-r--r--arch/s390/kernel/s390_ksyms.c2
-rw-r--r--arch/s390/kernel/setup.c3
-rw-r--r--arch/s390/kernel/smp.c44
-rw-r--r--arch/s390/kernel/vdso.c2
-rw-r--r--arch/s390/kernel/vdso32/clock_gettime.S9
-rw-r--r--arch/s390/kernel/vdso64/clock_getres.S4
-rw-r--r--arch/s390/kernel/vdso64/clock_gettime.S10
-rw-r--r--arch/s390/kvm/priv.c2
-rw-r--r--arch/s390/lib/uaccess_pt.c4
-rw-r--r--arch/s390/mm/pgtable.c4
-rw-r--r--arch/s390/net/bpf_jit_comp.c29
-rw-r--r--arch/s390/oprofile/hwsampler.c78
-rw-r--r--arch/s390/oprofile/hwsampler.h52
-rw-r--r--arch/s390/oprofile/init.c23
-rw-r--r--arch/s390/pci/pci.c20
-rw-r--r--arch/s390/pci/pci_dma.c13
-rw-r--r--arch/s390/pci/pci_event.c28
39 files changed, 2698 insertions, 254 deletions
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index 5877e71901b3..e9f312532526 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -135,7 +135,6 @@ config S390
select HAVE_SYSCALL_TRACEPOINTS
select HAVE_UID16 if 32BIT
select HAVE_VIRT_CPU_ACCOUNTING
- select INIT_ALL_POSSIBLE
select KTIME_SCALAR if 32BIT
select MODULES_USE_ELF_RELA
select OLD_SIGACTION
@@ -347,14 +346,14 @@ config SMP
Even if you don't know what to do here, say Y.
config NR_CPUS
- int "Maximum number of CPUs (2-64)"
- range 2 64
+ int "Maximum number of CPUs (2-256)"
+ range 2 256
depends on SMP
default "32" if !64BIT
default "64" if 64BIT
help
This allows you to specify the maximum number of CPUs which this
- kernel will support. The maximum supported value is 64 and the
+ kernel will support. The maximum supported value is 256 and the
minimum value which makes sense is 2.
This is purely to save memory - each supported CPU adds
diff --git a/arch/s390/crypto/aes_s390.c b/arch/s390/crypto/aes_s390.c
index 4363528dc8fd..b3feabd39f31 100644
--- a/arch/s390/crypto/aes_s390.c
+++ b/arch/s390/crypto/aes_s390.c
@@ -55,8 +55,7 @@ struct pcc_param {
struct s390_xts_ctx {
u8 key[32];
- u8 xts_param[16];
- struct pcc_param pcc;
+ u8 pcc_key[32];
long enc;
long dec;
int key_len;
@@ -591,7 +590,7 @@ static int xts_aes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
xts_ctx->enc = KM_XTS_128_ENCRYPT;
xts_ctx->dec = KM_XTS_128_DECRYPT;
memcpy(xts_ctx->key + 16, in_key, 16);
- memcpy(xts_ctx->pcc.key + 16, in_key + 16, 16);
+ memcpy(xts_ctx->pcc_key + 16, in_key + 16, 16);
break;
case 48:
xts_ctx->enc = 0;
@@ -602,7 +601,7 @@ static int xts_aes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
xts_ctx->enc = KM_XTS_256_ENCRYPT;
xts_ctx->dec = KM_XTS_256_DECRYPT;
memcpy(xts_ctx->key, in_key, 32);
- memcpy(xts_ctx->pcc.key, in_key + 32, 32);
+ memcpy(xts_ctx->pcc_key, in_key + 32, 32);
break;
default:
*flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
@@ -621,29 +620,33 @@ static int xts_aes_crypt(struct blkcipher_desc *desc, long func,
unsigned int nbytes = walk->nbytes;
unsigned int n;
u8 *in, *out;
- void *param;
+ struct pcc_param pcc_param;
+ struct {
+ u8 key[32];
+ u8 init[16];
+ } xts_param;
if (!nbytes)
goto out;
- memset(xts_ctx->pcc.block, 0, sizeof(xts_ctx->pcc.block));
- memset(xts_ctx->pcc.bit, 0, sizeof(xts_ctx->pcc.bit));
- memset(xts_ctx->pcc.xts, 0, sizeof(xts_ctx->pcc.xts));
- memcpy(xts_ctx->pcc.tweak, walk->iv, sizeof(xts_ctx->pcc.tweak));
- param = xts_ctx->pcc.key + offset;
- ret = crypt_s390_pcc(func, param);
+ memset(pcc_param.block, 0, sizeof(pcc_param.block));
+ memset(pcc_param.bit, 0, sizeof(pcc_param.bit));
+ memset(pcc_param.xts, 0, sizeof(pcc_param.xts));
+ memcpy(pcc_param.tweak, walk->iv, sizeof(pcc_param.tweak));
+ memcpy(pcc_param.key, xts_ctx->pcc_key, 32);
+ ret = crypt_s390_pcc(func, &pcc_param.key[offset]);
if (ret < 0)
return -EIO;
- memcpy(xts_ctx->xts_param, xts_ctx->pcc.xts, 16);
- param = xts_ctx->key + offset;
+ memcpy(xts_param.key, xts_ctx->key, 32);
+ memcpy(xts_param.init, pcc_param.xts, 16);
do {
/* only use complete blocks */
n = nbytes & ~(AES_BLOCK_SIZE - 1);
out = walk->dst.virt.addr;
in = walk->src.virt.addr;
- ret = crypt_s390_km(func, param, out, in, n);
+ ret = crypt_s390_km(func, &xts_param.key[offset], out, in, n);
if (ret < 0 || ret != n)
return -EIO;
diff --git a/arch/s390/include/asm/barrier.h b/arch/s390/include/asm/barrier.h
index 16760eeb79b0..578680f6207a 100644
--- a/arch/s390/include/asm/barrier.h
+++ b/arch/s390/include/asm/barrier.h
@@ -32,4 +32,19 @@
#define set_mb(var, value) do { var = value; mb(); } while (0)
+#define smp_store_release(p, v) \
+do { \
+ compiletime_assert_atomic_type(*p); \
+ barrier(); \
+ ACCESS_ONCE(*p) = (v); \
+} while (0)
+
+#define smp_load_acquire(p) \
+({ \
+ typeof(*p) ___p1 = ACCESS_ONCE(*p); \
+ compiletime_assert_atomic_type(*p); \
+ barrier(); \
+ ___p1; \
+})
+
#endif /* __ASM_BARRIER_H */
diff --git a/arch/s390/include/asm/compat.h b/arch/s390/include/asm/compat.h
index 4bf9da03591e..5d7e8cf83bd6 100644
--- a/arch/s390/include/asm/compat.h
+++ b/arch/s390/include/asm/compat.h
@@ -38,7 +38,8 @@
#define PSW32_USER_BITS (PSW32_MASK_DAT | PSW32_MASK_IO | PSW32_MASK_EXT | \
PSW32_DEFAULT_KEY | PSW32_MASK_BASE | \
- PSW32_MASK_MCHECK | PSW32_MASK_PSTATE | PSW32_ASC_HOME)
+ PSW32_MASK_MCHECK | PSW32_MASK_PSTATE | \
+ PSW32_ASC_PRIMARY)
#define COMPAT_USER_HZ 100
#define COMPAT_UTS_MACHINE "s390\0\0\0\0"
diff --git a/arch/s390/include/asm/cpu_mf.h b/arch/s390/include/asm/cpu_mf.h
index c879fad404c8..cb700d54bd83 100644
--- a/arch/s390/include/asm/cpu_mf.h
+++ b/arch/s390/include/asm/cpu_mf.h
@@ -56,6 +56,96 @@ struct cpumf_ctr_info {
u32 reserved2[12];
} __packed;
+/* QUERY SAMPLING INFORMATION block */
+struct hws_qsi_info_block { /* Bit(s) */
+ unsigned int b0_13:14; /* 0-13: zeros */
+ unsigned int as:1; /* 14: basic-sampling authorization */
+ unsigned int ad:1; /* 15: diag-sampling authorization */
+ unsigned int b16_21:6; /* 16-21: zeros */
+ unsigned int es:1; /* 22: basic-sampling enable control */
+ unsigned int ed:1; /* 23: diag-sampling enable control */
+ unsigned int b24_29:6; /* 24-29: zeros */
+ unsigned int cs:1; /* 30: basic-sampling activation control */
+ unsigned int cd:1; /* 31: diag-sampling activation control */
+ unsigned int bsdes:16; /* 4-5: size of basic sampling entry */
+ unsigned int dsdes:16; /* 6-7: size of diagnostic sampling entry */
+ unsigned long min_sampl_rate; /* 8-15: minimum sampling interval */
+ unsigned long max_sampl_rate; /* 16-23: maximum sampling interval*/
+ unsigned long tear; /* 24-31: TEAR contents */
+ unsigned long dear; /* 32-39: DEAR contents */
+ unsigned int rsvrd0; /* 40-43: reserved */
+ unsigned int cpu_speed; /* 44-47: CPU speed */
+ unsigned long long rsvrd1; /* 48-55: reserved */
+ unsigned long long rsvrd2; /* 56-63: reserved */
+} __packed;
+
+/* SET SAMPLING CONTROLS request block */
+struct hws_lsctl_request_block {
+ unsigned int s:1; /* 0: maximum buffer indicator */
+ unsigned int h:1; /* 1: part. level reserved for VM use*/
+ unsigned long long b2_53:52;/* 2-53: zeros */
+ unsigned int es:1; /* 54: basic-sampling enable control */
+ unsigned int ed:1; /* 55: diag-sampling enable control */
+ unsigned int b56_61:6; /* 56-61: - zeros */
+ unsigned int cs:1; /* 62: basic-sampling activation control */
+ unsigned int cd:1; /* 63: diag-sampling activation control */
+ unsigned long interval; /* 8-15: sampling interval */
+ unsigned long tear; /* 16-23: TEAR contents */
+ unsigned long dear; /* 24-31: DEAR contents */
+ /* 32-63: */
+ unsigned long rsvrd1; /* reserved */
+ unsigned long rsvrd2; /* reserved */
+ unsigned long rsvrd3; /* reserved */
+ unsigned long rsvrd4; /* reserved */
+} __packed;
+
+struct hws_basic_entry {
+ unsigned int def:16; /* 0-15 Data Entry Format */
+ unsigned int R:4; /* 16-19 reserved */
+ unsigned int U:4; /* 20-23 Number of unique instruct. */
+ unsigned int z:2; /* zeros */
+ unsigned int T:1; /* 26 PSW DAT mode */
+ unsigned int W:1; /* 27 PSW wait state */
+ unsigned int P:1; /* 28 PSW Problem state */
+ unsigned int AS:2; /* 29-30 PSW address-space control */
+ unsigned int I:1; /* 31 entry valid or invalid */
+ unsigned int:16;
+ unsigned int prim_asn:16; /* primary ASN */
+ unsigned long long ia; /* Instruction Address */
+ unsigned long long gpp; /* Guest Program Parameter */
+ unsigned long long hpp; /* Host Program Parameter */
+} __packed;
+
+struct hws_diag_entry {
+ unsigned int def:16; /* 0-15 Data Entry Format */
+ unsigned int R:14; /* 16-19 and 20-30 reserved */
+ unsigned int I:1; /* 31 entry valid or invalid */
+ u8 data[]; /* Machine-dependent sample data */
+} __packed;
+
+struct hws_combined_entry {
+ struct hws_basic_entry basic; /* Basic-sampling data entry */
+ struct hws_diag_entry diag; /* Diagnostic-sampling data entry */
+} __packed;
+
+struct hws_trailer_entry {
+ union {
+ struct {
+ unsigned int f:1; /* 0 - Block Full Indicator */
+ unsigned int a:1; /* 1 - Alert request control */
+ unsigned int t:1; /* 2 - Timestamp format */
+ unsigned long long:61; /* 3 - 63: Reserved */
+ };
+ unsigned long long flags; /* 0 - 63: All indicators */
+ };
+ unsigned long long overflow; /* 64 - sample Overflow count */
+ unsigned char timestamp[16]; /* 16 - 31 timestamp */
+ unsigned long long reserved1; /* 32 -Reserved */
+ unsigned long long reserved2; /* */
+ unsigned long long progusage1; /* 48 - reserved for programming use */
+ unsigned long long progusage2; /* */
+} __packed;
+
/* Query counter information */
static inline int qctri(struct cpumf_ctr_info *info)
{
@@ -99,4 +189,95 @@ static inline int ecctr(u64 ctr, u64 *val)
return cc;
}
+/* Query sampling information */
+static inline int qsi(struct hws_qsi_info_block *info)
+{
+ int cc;
+ cc = 1;
+
+ asm volatile(
+ "0: .insn s,0xb2860000,0(%1)\n"
+ "1: lhi %0,0\n"
+ "2:\n"
+ EX_TABLE(0b, 2b) EX_TABLE(1b, 2b)
+ : "=d" (cc), "+a" (info)
+ : "m" (*info)
+ : "cc", "memory");
+
+ return cc ? -EINVAL : 0;
+}
+
+/* Load sampling controls */
+static inline int lsctl(struct hws_lsctl_request_block *req)
+{
+ int cc;
+
+ cc = 1;
+ asm volatile(
+ "0: .insn s,0xb2870000,0(%1)\n"
+ "1: ipm %0\n"
+ " srl %0,28\n"
+ "2:\n"
+ EX_TABLE(0b, 2b) EX_TABLE(1b, 2b)
+ : "+d" (cc), "+a" (req)
+ : "m" (*req)
+ : "cc", "memory");
+
+ return cc ? -EINVAL : 0;
+}
+
+/* Sampling control helper functions */
+
+#include <linux/time.h>
+
+static inline unsigned long freq_to_sample_rate(struct hws_qsi_info_block *qsi,
+ unsigned long freq)
+{
+ return (USEC_PER_SEC / freq) * qsi->cpu_speed;
+}
+
+static inline unsigned long sample_rate_to_freq(struct hws_qsi_info_block *qsi,
+ unsigned long rate)
+{
+ return USEC_PER_SEC * qsi->cpu_speed / rate;
+}
+
+#define SDB_TE_ALERT_REQ_MASK 0x4000000000000000UL
+#define SDB_TE_BUFFER_FULL_MASK 0x8000000000000000UL
+
+/* Return TOD timestamp contained in an trailer entry */
+static inline unsigned long long trailer_timestamp(struct hws_trailer_entry *te)
+{
+ /* TOD in STCKE format */
+ if (te->t)
+ return *((unsigned long long *) &te->timestamp[1]);
+
+ /* TOD in STCK format */
+ return *((unsigned long long *) &te->timestamp[0]);
+}
+
+/* Return pointer to trailer entry of an sample data block */
+static inline unsigned long *trailer_entry_ptr(unsigned long v)
+{
+ void *ret;
+
+ ret = (void *) v;
+ ret += PAGE_SIZE;
+ ret -= sizeof(struct hws_trailer_entry);
+
+ return (unsigned long *) ret;
+}
+
+/* Return if the entry in the sample data block table (sdbt)
+ * is a link to the next sdbt */
+static inline int is_link_entry(unsigned long *s)
+{
+ return *s & 0x1ul ? 1 : 0;
+}
+
+/* Return pointer to the linked sdbt */
+static inline unsigned long *get_next_sdbt(unsigned long *s)
+{
+ return (unsigned long *) (*s & ~0x1ul);
+}
#endif /* _ASM_S390_CPU_MF_H */
diff --git a/arch/s390/include/asm/css_chars.h b/arch/s390/include/asm/css_chars.h
index 7e1c917bbba2..09d1dd46bd57 100644
--- a/arch/s390/include/asm/css_chars.h
+++ b/arch/s390/include/asm/css_chars.h
@@ -29,6 +29,8 @@ struct css_general_char {
u32 fcx : 1; /* bit 88 */
u32 : 19;
u32 alt_ssi : 1; /* bit 108 */
+ u32:1;
+ u32 narf:1; /* bit 110 */
} __packed;
extern struct css_general_char css_general_characteristics;
diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h
index c129ab2ac731..2583466f576b 100644
--- a/arch/s390/include/asm/pci.h
+++ b/arch/s390/include/asm/pci.h
@@ -144,6 +144,7 @@ int clp_disable_fh(struct zpci_dev *);
void zpci_event_error(void *);
void zpci_event_availability(void *);
void zpci_rescan(void);
+bool zpci_is_enabled(void);
#else /* CONFIG_PCI */
static inline void zpci_event_error(void *e) {}
static inline void zpci_event_availability(void *e) {}
diff --git a/arch/s390/include/asm/perf_event.h b/arch/s390/include/asm/perf_event.h
index 1141fb3e7b21..159a8ec6da9a 100644
--- a/arch/s390/include/asm/perf_event.h
+++ b/arch/s390/include/asm/perf_event.h
@@ -1,21 +1,40 @@
/*
* Performance event support - s390 specific definitions.
*
- * Copyright IBM Corp. 2009, 2012
+ * Copyright IBM Corp. 2009, 2013
* Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
* Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
*/
-#include <asm/cpu_mf.h>
+#ifndef _ASM_S390_PERF_EVENT_H
+#define _ASM_S390_PERF_EVENT_H
-/* CPU-measurement counter facility */
-#define PERF_CPUM_CF_MAX_CTR 256
+#ifdef CONFIG_64BIT
+
+#include <linux/perf_event.h>
+#include <linux/device.h>
+#include <asm/cpu_mf.h>
/* Per-CPU flags for PMU states */
#define PMU_F_RESERVED 0x1000
#define PMU_F_ENABLED 0x2000
+#define PMU_F_IN_USE 0x4000
+#define PMU_F_ERR_IBE 0x0100
+#define PMU_F_ERR_LSDA 0x0200
+#define PMU_F_ERR_MASK (PMU_F_ERR_IBE|PMU_F_ERR_LSDA)
+
+/* Perf defintions for PMU event attributes in sysfs */
+extern __init const struct attribute_group **cpumf_cf_event_group(void);
+extern ssize_t cpumf_events_sysfs_show(struct device *dev,
+ struct device_attribute *attr,
+ char *page);
+#define EVENT_VAR(_cat, _name) event_attr_##_cat##_##_name
+#define EVENT_PTR(_cat, _name) (&EVENT_VAR(_cat, _name).attr.attr)
+
+#define CPUMF_EVENT_ATTR(cat, name, id) \
+ PMU_EVENT_ATTR(name, EVENT_VAR(cat, name), id, cpumf_events_sysfs_show)
+#define CPUMF_EVENT_PTR(cat, name) EVENT_PTR(cat, name)
-#ifdef CONFIG_64BIT
/* Perf callbacks */
struct pt_regs;
@@ -23,4 +42,55 @@ extern unsigned long perf_instruction_pointer(struct pt_regs *regs);
extern unsigned long perf_misc_flags(struct pt_regs *regs);
#define perf_misc_flags(regs) perf_misc_flags(regs)
+/* Perf pt_regs extension for sample-data-entry indicators */
+struct perf_sf_sde_regs {
+ unsigned char in_guest:1; /* guest sample */
+ unsigned long reserved:63; /* reserved */
+};
+
+/* Perf PMU definitions for the counter facility */
+#define PERF_CPUM_CF_MAX_CTR 256
+
+/* Perf PMU definitions for the sampling facility */
+#define PERF_CPUM_SF_MAX_CTR 2
+#define PERF_EVENT_CPUM_SF 0xB0000UL /* Event: Basic-sampling */
+#define PERF_EVENT_CPUM_SF_DIAG 0xBD000UL /* Event: Combined-sampling */
+#define PERF_CPUM_SF_BASIC_MODE 0x0001 /* Basic-sampling flag */
+#define PERF_CPUM_SF_DIAG_MODE 0x0002 /* Diagnostic-sampling flag */
+#define PERF_CPUM_SF_MODE_MASK (PERF_CPUM_SF_BASIC_MODE| \
+ PERF_CPUM_SF_DIAG_MODE)
+#define PERF_CPUM_SF_FULL_BLOCKS 0x0004 /* Process full SDBs only */
+
+#define REG_NONE 0
+#define REG_OVERFLOW 1
+#define OVERFLOW_REG(hwc) ((hwc)->extra_reg.config)
+#define SFB_ALLOC_REG(hwc) ((hwc)->extra_reg.alloc)
+#define RAWSAMPLE_REG(hwc) ((hwc)->config)
+#define TEAR_REG(hwc) ((hwc)->last_tag)
+#define SAMPL_RATE(hwc) ((hwc)->event_base)
+#define SAMPL_FLAGS(hwc) ((hwc)->config_base)
+#define SAMPL_DIAG_MODE(hwc) (SAMPL_FLAGS(hwc) & PERF_CPUM_SF_DIAG_MODE)
+#define SDB_FULL_BLOCKS(hwc) (SAMPL_FLAGS(hwc) & PERF_CPUM_SF_FULL_BLOCKS)
+
+/* Structure for sampling data entries to be passed as perf raw sample data
+ * to user space. Note that raw sample data must be aligned and, thus, might
+ * be padded with zeros.
+ */
+struct sf_raw_sample {
+#define SF_RAW_SAMPLE_BASIC PERF_CPUM_SF_BASIC_MODE
+#define SF_RAW_SAMPLE_DIAG PERF_CPUM_SF_DIAG_MODE
+ u64 format;
+ u32 size; /* Size of sf_raw_sample */
+ u16 bsdes; /* Basic-sampling data entry size */
+ u16 dsdes; /* Diagnostic-sampling data entry size */
+ struct hws_basic_entry basic; /* Basic-sampling data entry */
+ struct hws_diag_entry diag; /* Diagnostic-sampling data entry */
+ u8 padding[]; /* Padding to next multiple of 8 */
+} __packed;
+
+/* Perf hardware reserve and release functions */
+int perf_reserve_sampling(void);
+void perf_release_sampling(void);
+
#endif /* CONFIG_64BIT */
+#endif /* _ASM_S390_PERF_EVENT_H */
diff --git a/arch/s390/include/asm/qdio.h b/arch/s390/include/asm/qdio.h
index 57d0d7e794b1..d786c634e052 100644
--- a/arch/s390/include/asm/qdio.h
+++ b/arch/s390/include/asm/qdio.h
@@ -336,7 +336,7 @@ typedef void qdio_handler_t(struct ccw_device *, unsigned int, int,
#define QDIO_FLAG_CLEANUP_USING_HALT 0x02
/**
- * struct qdio_initialize - qdio initalization data
+ * struct qdio_initialize - qdio initialization data
* @cdev: associated ccw device
* @q_format: queue format
* @adapter_name: name for the adapter
@@ -378,6 +378,34 @@ struct qdio_initialize {
struct qdio_outbuf_state *output_sbal_state_array;
};
+/**
+ * enum qdio_brinfo_entry_type - type of address entry for qdio_brinfo_desc()
+ * @l3_ipv6_addr: entry contains IPv6 address
+ * @l3_ipv4_addr: entry contains IPv4 address
+ * @l2_addr_lnid: entry contains MAC address and VLAN ID
+ */
+enum qdio_brinfo_entry_type {l3_ipv6_addr, l3_ipv4_addr, l2_addr_lnid};
+
+/**
+ * struct qdio_brinfo_entry_XXX - Address entry for qdio_brinfo_desc()
+ * @nit: Network interface token
+ * @addr: Address of one of the three types
+ *
+ * The struct is passed to the callback function by qdio_brinfo_desc()
+ */
+struct qdio_brinfo_entry_l3_ipv6 {
+ u64 nit;
+ struct { unsigned char _s6_addr[16]; } addr;
+} __packed;
+struct qdio_brinfo_entry_l3_ipv4 {
+ u64 nit;
+ struct { uint32_t _s_addr; } addr;
+} __packed;
+struct qdio_brinfo_entry_l2 {
+ u64 nit;
+ struct { u8 mac[6]; u16 lnid; } addr_lnid;
+} __packed;
+
#define QDIO_STATE_INACTIVE 0x00000002 /* after qdio_cleanup */
#define QDIO_STATE_ESTABLISHED 0x00000004 /* after qdio_establish */
#define QDIO_STATE_ACTIVE 0x00000008 /* after qdio_activate */
@@ -399,5 +427,10 @@ extern int qdio_get_next_buffers(struct ccw_device *, int, int *, int *);
extern int qdio_shutdown(struct ccw_device *, int);
extern int qdio_free(struct ccw_device *);
extern int qdio_get_ssqd_desc(struct ccw_device *, struct qdio_ssqd_desc *);
+extern int qdio_pnso_brinfo(struct subchannel_id schid,
+ int cnc, u16 *response,
+ void (*cb)(void *priv, enum qdio_brinfo_entry_type type,
+ void *entry),
+ void *priv);
#endif /* __QDIO_H__ */
diff --git a/arch/s390/include/asm/sclp.h b/arch/s390/include/asm/sclp.h
index 30ef748bc161..220e171413f8 100644
--- a/arch/s390/include/asm/sclp.h
+++ b/arch/s390/include/asm/sclp.h
@@ -8,6 +8,7 @@
#include <linux/types.h>
#include <asm/chpid.h>
+#include <asm/cpu.h>
#define SCLP_CHP_INFO_MASK_SIZE 32
@@ -37,7 +38,7 @@ struct sclp_cpu_info {
unsigned int standby;
unsigned int combined;
int has_cpu_type;
- struct sclp_cpu_entry cpu[255];
+ struct sclp_cpu_entry cpu[MAX_CPU_ADDRESS + 1];
};
int sclp_get_cpu_info(struct sclp_cpu_info *info);
@@ -51,8 +52,8 @@ int sclp_chp_configure(struct chp_id chpid);
int sclp_chp_deconfigure(struct chp_id chpid);
int sclp_chp_read_info(struct sclp_chp_info *info);
void sclp_get_ipl_info(struct sclp_ipl_info *info);
-bool sclp_has_linemode(void);
-bool sclp_has_vt220(void);
+bool __init sclp_has_linemode(void);
+bool __init sclp_has_vt220(void);
int sclp_pci_configure(u32 fid);
int sclp_pci_deconfigure(u32 fid);
int memcpy_hsa(void *dest, unsigned long src, size_t count, int mode);
diff --git a/arch/s390/include/asm/smp.h b/arch/s390/include/asm/smp.h
index ac9bed8e103f..160779394096 100644
--- a/arch/s390/include/asm/smp.h
+++ b/arch/s390/include/asm/smp.h
@@ -31,6 +31,7 @@ extern void smp_yield(void);
extern void smp_stop_cpu(void);
extern void smp_cpu_set_polarization(int cpu, int val);
extern int smp_cpu_get_polarization(int cpu);
+extern void smp_fill_possible_mask(void);
#else /* CONFIG_SMP */
@@ -50,6 +51,7 @@ static inline int smp_vcpu_scheduled(int cpu) { return 1; }
static inline void smp_yield_cpu(int cpu) { }
static inline void smp_yield(void) { }
static inline void smp_stop_cpu(void) { }
+static inline void smp_fill_possible_mask(void) { }
#endif /* CONFIG_SMP */
diff --git a/arch/s390/include/uapi/asm/zcrypt.h b/arch/s390/include/uapi/asm/zcrypt.h
index e83fc116f5bf..f2b18eacaca8 100644
--- a/arch/s390/include/uapi/asm/zcrypt.h
+++ b/arch/s390/include/uapi/asm/zcrypt.h
@@ -154,6 +154,67 @@ struct ica_xcRB {
unsigned short priority_window;
unsigned int status;
} __attribute__((packed));
+
+/**
+ * struct ep11_cprb - EP11 connectivity programming request block
+ * @cprb_len: CPRB header length [0x0020]
+ * @cprb_ver_id: CPRB version id. [0x04]
+ * @pad_000: Alignment pad bytes
+ * @flags: Admin cmd [0x80] or functional cmd [0x00]
+ * @func_id: Function id / subtype [0x5434]
+ * @source_id: Source id [originator id]
+ * @target_id: Target id [usage/ctrl domain id]
+ * @ret_code: Return code
+ * @reserved1: Reserved
+ * @reserved2: Reserved
+ * @payload_len: Payload length
+ */
+struct ep11_cprb {
+ uint16_t cprb_len;
+ unsigned char cprb_ver_id;
+ unsigned char pad_000[2];
+ unsigned char flags;
+ unsigned char func_id[2];
+ uint32_t source_id;
+ uint32_t target_id;
+ uint32_t ret_code;
+ uint32_t reserved1;
+ uint32_t reserved2;
+ uint32_t payload_len;
+} __attribute__((packed));
+
+/**
+ * struct ep11_target_dev - EP11 target device list
+ * @ap_id: AP device id
+ * @dom_id: Usage domain id
+ */
+struct ep11_target_dev {
+ uint16_t ap_id;
+ uint16_t dom_id;
+};
+
+/**
+ * struct ep11_urb - EP11 user request block
+ * @targets_num: Number of target adapters
+ * @targets: Addr to target adapter list
+ * @weight: Level of request priority
+ * @req_no: Request id/number
+ * @req_len: Request length
+ * @req: Addr to request block
+ * @resp_len: Response length
+ * @resp: Addr to response block
+ */
+struct ep11_urb {
+ uint16_t targets_num;
+ uint64_t targets;
+ uint64_t weight;
+ uint64_t req_no;
+ uint64_t req_len;
+ uint64_t req;
+ uint64_t resp_len;
+ uint64_t resp;
+} __attribute__((packed));
+
#define AUTOSELECT ((unsigned int)0xFFFFFFFF)
#define ZCRYPT_IOCTL_MAGIC 'z'
@@ -183,6 +244,9 @@ struct ica_xcRB {
* ZSECSENDCPRB
* Send an arbitrary CPRB to a crypto card.
*
+ * ZSENDEP11CPRB
+ * Send an arbitrary EP11 CPRB to an EP11 coprocessor crypto card.
+ *
* Z90STAT_STATUS_MASK
* Return an 64 element array of unsigned chars for the status of
* all devices.
@@ -256,6 +320,7 @@ struct ica_xcRB {
#define ICARSAMODEXPO _IOC(_IOC_READ|_IOC_WRITE, ZCRYPT_IOCTL_MAGIC, 0x05, 0)
#define ICARSACRT _IOC(_IOC_READ|_IOC_WRITE, ZCRYPT_IOCTL_MAGIC, 0x06, 0)
#define ZSECSENDCPRB _IOC(_IOC_READ|_IOC_WRITE, ZCRYPT_IOCTL_MAGIC, 0x81, 0)
+#define ZSENDEP11CPRB _IOC(_IOC_READ|_IOC_WRITE, ZCRYPT_IOCTL_MAGIC, 0x04, 0)
/* New status calls */
#define Z90STAT_TOTALCOUNT _IOR(ZCRYPT_IOCTL_MAGIC, 0x40, int)
diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile
index 2403303cfed7..1b3ac09c11b6 100644
--- a/arch/s390/kernel/Makefile
+++ b/arch/s390/kernel/Makefile
@@ -60,7 +60,8 @@ obj-$(CONFIG_FTRACE_SYSCALLS) += ftrace.o
obj-$(CONFIG_CRASH_DUMP) += crash_dump.o
ifdef CONFIG_64BIT
-obj-$(CONFIG_PERF_EVENTS) += perf_event.o perf_cpum_cf.o
+obj-$(CONFIG_PERF_EVENTS) += perf_event.o perf_cpum_cf.o perf_cpum_sf.o \
+ perf_cpum_cf_events.o
obj-y += runtime_instr.o cache.o
endif
diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c
index 496116cd65ec..e4c99a183651 100644
--- a/arch/s390/kernel/asm-offsets.c
+++ b/arch/s390/kernel/asm-offsets.c
@@ -72,6 +72,7 @@ int main(void)
/* constants used by the vdso */
DEFINE(__CLOCK_REALTIME, CLOCK_REALTIME);
DEFINE(__CLOCK_MONOTONIC, CLOCK_MONOTONIC);
+ DEFINE(__CLOCK_THREAD_CPUTIME_ID, CLOCK_THREAD_CPUTIME_ID);
DEFINE(__CLOCK_REALTIME_RES, MONOTONIC_RES_NSEC);
BLANK();
/* idle data offsets */
diff --git a/arch/s390/kernel/compat_signal.c b/arch/s390/kernel/compat_signal.c
index 95e7ba0fbb7e..8b84bc373e94 100644
--- a/arch/s390/kernel/compat_signal.c
+++ b/arch/s390/kernel/compat_signal.c
@@ -412,8 +412,9 @@ static int setup_rt_frame32(int sig, struct k_sigaction *ka, siginfo_t *info,
regs->gprs[14] = (__u64 __force) ka->sa.sa_restorer | PSW32_ADDR_AMODE;
} else {
regs->gprs[14] = (__u64 __force) frame->retcode | PSW32_ADDR_AMODE;
- err |= __put_user(S390_SYSCALL_OPCODE | __NR_rt_sigreturn,
- (u16 __force __user *)(frame->retcode));
+ if (__put_user(S390_SYSCALL_OPCODE | __NR_rt_sigreturn,
+ (u16 __force __user *)(frame->retcode)))
+ goto give_sigsegv;
}
/* Set up backchain. */
diff --git a/arch/s390/kernel/entry64.S b/arch/s390/kernel/entry64.S
index e5b43c97a834..384e609b4711 100644
--- a/arch/s390/kernel/entry64.S
+++ b/arch/s390/kernel/entry64.S
@@ -74,7 +74,7 @@ _TIF_TRACE = (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | _TIF_SECCOMP | \
.endm
.macro LPP newpp
-#if defined(CONFIG_KVM) || defined(CONFIG_KVM_MODULE)
+#if IS_ENABLED(CONFIG_KVM)
tm __LC_MACHINE_FLAGS+6,0x20 # MACHINE_FLAG_LPP
jz .+8
.insn s,0xb2800000,\newpp
@@ -82,7 +82,7 @@ _TIF_TRACE = (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | _TIF_SECCOMP | \
.endm
.macro HANDLE_SIE_INTERCEPT scratch,reason
-#if defined(CONFIG_KVM) || defined(CONFIG_KVM_MODULE)
+#if IS_ENABLED(CONFIG_KVM)
tmhh %r8,0x0001 # interrupting from user ?
jnz .+62
lgr \scratch,%r9
@@ -946,7 +946,7 @@ cleanup_idle_insn:
.quad __critical_end - __critical_start
-#if defined(CONFIG_KVM) || defined(CONFIG_KVM_MODULE)
+#if IS_ENABLED(CONFIG_KVM)
/*
* sie64a calling convention:
* %r2 pointer to sie control block
@@ -975,7 +975,7 @@ sie_done:
lctlg %c1,%c1,__LC_USER_ASCE # load primary asce
# some program checks are suppressing. C code (e.g. do_protection_exception)
# will rewind the PSW by the ILC, which is 4 bytes in case of SIE. Other
-# instructions beween sie64a and sie_done should not cause program
+# instructions between sie64a and sie_done should not cause program
# interrupts. So lets use a nop (47 00 00 00) as a landing pad.
# See also HANDLE_SIE_INTERCEPT
rewind_pad:
diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c
index 1105502bf6e9..f51214c04858 100644
--- a/arch/s390/kernel/perf_cpum_cf.c
+++ b/arch/s390/kernel/perf_cpum_cf.c
@@ -680,6 +680,7 @@ static int __init cpumf_pmu_init(void)
goto out;
}
+ cpumf_pmu.attr_groups = cpumf_cf_event_group();
rc = perf_pmu_register(&cpumf_pmu, "cpum_cf", PERF_TYPE_RAW);
if (rc) {
pr_err("Registering the cpum_cf PMU failed with rc=%i\n", rc);
diff --git a/arch/s390/kernel/perf_cpum_cf_events.c b/arch/s390/kernel/perf_cpum_cf_events.c
new file mode 100644
index 000000000000..4554a4bae39e
--- /dev/null
+++ b/arch/s390/kernel/perf_cpum_cf_events.c
@@ -0,0 +1,322 @@
+/*
+ * Perf PMU sysfs events attributes for available CPU-measurement counters
+ *
+ */
+
+#include <linux/slab.h>
+#include <linux/perf_event.h>
+
+
+/* BEGIN: CPUM_CF COUNTER DEFINITIONS =================================== */
+
+CPUMF_EVENT_ATTR(cf, CPU_CYCLES, 0x0000);
+CPUMF_EVENT_ATTR(cf, INSTRUCTIONS, 0x0001);
+CPUMF_EVENT_ATTR(cf, L1I_DIR_WRITES, 0x0002);
+CPUMF_EVENT_ATTR(cf, L1I_PENALTY_CYCLES, 0x0003);
+CPUMF_EVENT_ATTR(cf, PROBLEM_STATE_CPU_CYCLES, 0x0020);
+CPUMF_EVENT_ATTR(cf, PROBLEM_STATE_INSTRUCTIONS, 0x0021);
+CPUMF_EVENT_ATTR(cf, PROBLEM_STATE_L1I_DIR_WRITES, 0x0022);
+CPUMF_EVENT_ATTR(cf, PROBLEM_STATE_L1I_PENALTY_CYCLES, 0x0023);
+CPUMF_EVENT_ATTR(cf, PROBLEM_STATE_L1D_DIR_WRITES, 0x0024);
+CPUMF_EVENT_ATTR(cf, PROBLEM_STATE_L1D_PENALTY_CYCLES, 0x0025);
+CPUMF_EVENT_ATTR(cf, L1D_DIR_WRITES, 0x0004);
+CPUMF_EVENT_ATTR(cf, L1D_PENALTY_CYCLES, 0x0005);
+CPUMF_EVENT_ATTR(cf, PRNG_FUNCTIONS, 0x0040);
+CPUMF_EVENT_ATTR(cf, PRNG_CYCLES, 0x0041);
+CPUMF_EVENT_ATTR(cf, PRNG_BLOCKED_FUNCTIONS, 0x0042);
+CPUMF_EVENT_ATTR(cf, PRNG_BLOCKED_CYCLES, 0x0043);
+CPUMF_EVENT_ATTR(cf, SHA_FUNCTIONS, 0x0044);
+CPUMF_EVENT_ATTR(cf, SHA_CYCLES, 0x0045);
+CPUMF_EVENT_ATTR(cf, SHA_BLOCKED_FUNCTIONS, 0x0046);
+CPUMF_EVENT_ATTR(cf, SHA_BLOCKED_CYCLES, 0x0047);
+CPUMF_EVENT_ATTR(cf, DEA_FUNCTIONS, 0x0048);
+CPUMF_EVENT_ATTR(cf, DEA_CYCLES, 0x0049);
+CPUMF_EVENT_ATTR(cf, DEA_BLOCKED_FUNCTIONS, 0x004a);
+CPUMF_EVENT_ATTR(cf, DEA_BLOCKED_CYCLES, 0x004b);
+CPUMF_EVENT_ATTR(cf, AES_FUNCTIONS, 0x004c);
+CPUMF_EVENT_ATTR(cf, AES_CYCLES, 0x004d);
+CPUMF_EVENT_ATTR(cf, AES_BLOCKED_FUNCTIONS, 0x004e);
+CPUMF_EVENT_ATTR(cf, AES_BLOCKED_CYCLES, 0x004f);
+CPUMF_EVENT_ATTR(cf_z10, L1I_L2_SOURCED_WRITES, 0x0080);
+CPUMF_EVENT_ATTR(cf_z10, L1D_L2_SOURCED_WRITES, 0x0081);
+CPUMF_EVENT_ATTR(cf_z10, L1I_L3_LOCAL_WRITES, 0x0082);
+CPUMF_EVENT_ATTR(cf_z10, L1D_L3_LOCAL_WRITES, 0x0083);
+CPUMF_EVENT_ATTR(cf_z10, L1I_L3_REMOTE_WRITES, 0x0084);
+CPUMF_EVENT_ATTR(cf_z10, L1D_L3_REMOTE_WRITES, 0x0085);
+CPUMF_EVENT_ATTR(cf_z10, L1D_LMEM_SOURCED_WRITES, 0x0086);
+CPUMF_EVENT_ATTR(cf_z10, L1I_LMEM_SOURCED_WRITES, 0x0087);
+CPUMF_EVENT_ATTR(cf_z10, L1D_RO_EXCL_WRITES, 0x0088);
+CPUMF_EVENT_ATTR(cf_z10, L1I_CACHELINE_INVALIDATES, 0x0089);
+CPUMF_EVENT_ATTR(cf_z10, ITLB1_WRITES, 0x008a);
+CPUMF_EVENT_ATTR(cf_z10, DTLB1_WRITES, 0x008b);
+CPUMF_EVENT_ATTR(cf_z10, TLB2_PTE_WRITES, 0x008c);
+CPUMF_EVENT_ATTR(cf_z10, TLB2_CRSTE_WRITES, 0x008d);
+CPUMF_EVENT_ATTR(cf_z10, TLB2_CRSTE_HPAGE_WRITES, 0x008e);
+CPUMF_EVENT_ATTR(cf_z10, ITLB1_MISSES, 0x0091);
+CPUMF_EVENT_ATTR(cf_z10, DTLB1_MISSES, 0x0092);
+CPUMF_EVENT_ATTR(cf_z10, L2C_STORES_SENT, 0x0093);
+CPUMF_EVENT_ATTR(cf_z196, L1D_L2_SOURCED_WRITES, 0x0080);
+CPUMF_EVENT_ATTR(cf_z196, L1I_L2_SOURCED_WRITES, 0x0081);
+CPUMF_EVENT_ATTR(cf_z196, DTLB1_MISSES, 0x0082);
+CPUMF_EVENT_ATTR(cf_z196, ITLB1_MISSES, 0x0083);
+CPUMF_EVENT_ATTR(cf_z196, L2C_STORES_SENT, 0x0085);
+CPUMF_EVENT_ATTR(cf_z196, L1D_OFFBOOK_L3_SOURCED_WRITES, 0x0086);
+CPUMF_EVENT_ATTR(cf_z196, L1D_ONBOOK_L4_SOURCED_WRITES, 0x0087);
+CPUMF_EVENT_ATTR(cf_z196, L1I_ONBOOK_L4_SOURCED_WRITES, 0x0088);
+CPUMF_EVENT_ATTR(cf_z196, L1D_RO_EXCL_WRITES, 0x0089);
+CPUMF_EVENT_ATTR(cf_z196, L1D_OFFBOOK_L4_SOURCED_WRITES, 0x008a);
+CPUMF_EVENT_ATTR(cf_z196, L1I_OFFBOOK_L4_SOURCED_WRITES, 0x008b);
+CPUMF_EVENT_ATTR(cf_z196, DTLB1_HPAGE_WRITES, 0x008c);
+CPUMF_EVENT_ATTR(cf_z196, L1D_LMEM_SOURCED_WRITES, 0x008d);
+CPUMF_EVENT_ATTR(cf_z196, L1I_LMEM_SOURCED_WRITES, 0x008e);
+CPUMF_EVENT_ATTR(cf_z196, L1I_OFFBOOK_L3_SOURCED_WRITES, 0x008f);
+CPUMF_EVENT_ATTR(cf_z196, DTLB1_WRITES, 0x0090);
+CPUMF_EVENT_ATTR(cf_z196, ITLB1_WRITES, 0x0091);
+CPUMF_EVENT_ATTR(cf_z196, TLB2_PTE_WRITES, 0x0092);
+CPUMF_EVENT_ATTR(cf_z196, TLB2_CRSTE_HPAGE_WRITES, 0x0093);
+CPUMF_EVENT_ATTR(cf_z196, TLB2_CRSTE_WRITES, 0x0094);
+CPUMF_EVENT_ATTR(cf_z196, L1D_ONCHIP_L3_SOURCED_WRITES, 0x0096);
+CPUMF_EVENT_ATTR(cf_z196, L1D_OFFCHIP_L3_SOURCED_WRITES, 0x0098);
+CPUMF_EVENT_ATTR(cf_z196, L1I_ONCHIP_L3_SOURCED_WRITES, 0x0099);
+CPUMF_EVENT_ATTR(cf_z196, L1I_OFFCHIP_L3_SOURCED_WRITES, 0x009b);
+CPUMF_EVENT_ATTR(cf_zec12, DTLB1_MISSES, 0x0080);
+CPUMF_EVENT_ATTR(cf_zec12, ITLB1_MISSES, 0x0081);
+CPUMF_EVENT_ATTR(cf_zec12, L1D_L2I_SOURCED_WRITES, 0x0082);
+CPUMF_EVENT_ATTR(cf_zec12, L1I_L2I_SOURCED_WRITES, 0x0083);
+CPUMF_EVENT_ATTR(cf_zec12, L1D_L2D_SOURCED_WRITES, 0x0084);
+CPUMF_EVENT_ATTR(cf_zec12, DTLB1_WRITES, 0x0085);
+CPUMF_EVENT_ATTR(cf_zec12, L1D_LMEM_SOURCED_WRITES, 0x0087);
+CPUMF_EVENT_ATTR(cf_zec12, L1I_LMEM_SOURCED_WRITES, 0x0089);
+CPUMF_EVENT_ATTR(cf_zec12, L1D_RO_EXCL_WRITES, 0x008a);
+CPUMF_EVENT_ATTR(cf_zec12, DTLB1_HPAGE_WRITES, 0x008b);
+CPUMF_EVENT_ATTR(cf_zec12, ITLB1_WRITES, 0x008c);
+CPUMF_EVENT_ATTR(cf_zec12, TLB2_PTE_WRITES, 0x008d);
+CPUMF_EVENT_ATTR(cf_zec12, TLB2_CRSTE_HPAGE_WRITES, 0x008e);
+CPUMF_EVENT_ATTR(cf_zec12, TLB2_CRSTE_WRITES, 0x008f);
+CPUMF_EVENT_ATTR(cf_zec12, L1D_ONCHIP_L3_SOURCED_WRITES, 0x0090);
+CPUMF_EVENT_ATTR(cf_zec12, L1D_OFFCHIP_L3_SOURCED_WRITES, 0x0091);
+CPUMF_EVENT_ATTR(cf_zec12, L1D_OFFBOOK_L3_SOURCED_WRITES, 0x0092);
+CPUMF_EVENT_ATTR(cf_zec12, L1D_ONBOOK_L4_SOURCED_WRITES, 0x0093);
+CPUMF_EVENT_ATTR(cf_zec12, L1D_OFFBOOK_L4_SOURCED_WRITES, 0x0094);
+CPUMF_EVENT_ATTR(cf_zec12, TX_NC_TEND, 0x0095);
+CPUMF_EVENT_ATTR(cf_zec12, L1D_ONCHIP_L3_SOURCED_WRITES_IV, 0x0096);
+CPUMF_EVENT_ATTR(cf_zec12, L1D_OFFCHIP_L3_SOURCED_WRITES_IV, 0x0097);
+CPUMF_EVENT_ATTR(cf_zec12, L1D_OFFBOOK_L3_SOURCED_WRITES_IV, 0x0098);
+CPUMF_EVENT_ATTR(cf_zec12, L1I_ONCHIP_L3_SOURCED_WRITES, 0x0099);
+CPUMF_EVENT_ATTR(cf_zec12, L1I_OFFCHIP_L3_SOURCED_WRITES, 0x009a);
+CPUMF_EVENT_ATTR(cf_zec12, L1I_OFFBOOK_L3_SOURCED_WRITES, 0x009b);
+CPUMF_EVENT_ATTR(cf_zec12, L1I_ONBOOK_L4_SOURCED_WRITES, 0x009c);
+CPUMF_EVENT_ATTR(cf_zec12, L1I_OFFBOOK_L4_SOURCED_WRITES, 0x009d);
+CPUMF_EVENT_ATTR(cf_zec12, TX_C_TEND, 0x009e);
+CPUMF_EVENT_ATTR(cf_zec12, L1I_ONCHIP_L3_SOURCED_WRITES_IV, 0x009f);
+CPUMF_EVENT_ATTR(cf_zec12, L1I_OFFCHIP_L3_SOURCED_WRITES_IV, 0x00a0);
+CPUMF_EVENT_ATTR(cf_zec12, L1I_OFFBOOK_L3_SOURCED_WRITES_IV, 0x00a1);
+CPUMF_EVENT_ATTR(cf_zec12, TX_NC_TABORT, 0x00b1);
+CPUMF_EVENT_ATTR(cf_zec12, TX_C_TABORT_NO_SPECIAL, 0x00b2);
+CPUMF_EVENT_ATTR(cf_zec12, TX_C_TABORT_SPECIAL, 0x00b3);
+
+static struct attribute *cpumcf_pmu_event_attr[] = {
+ CPUMF_EVENT_PTR(cf, CPU_CYCLES),
+ CPUMF_EVENT_PTR(cf, INSTRUCTIONS),
+ CPUMF_EVENT_PTR(cf, L1I_DIR_WRITES),
+ CPUMF_EVENT_PTR(cf, L1I_PENALTY_CYCLES),
+ CPUMF_EVENT_PTR(cf, PROBLEM_STATE_CPU_CYCLES),
+ CPUMF_EVENT_PTR(cf, PROBLEM_STATE_INSTRUCTIONS),
+ CPUMF_EVENT_PTR(cf, PROBLEM_STATE_L1I_DIR_WRITES),
+ CPUMF_EVENT_PTR(cf, PROBLEM_STATE_L1I_PENALTY_CYCLES),
+ CPUMF_EVENT_PTR(cf, PROBLEM_STATE_L1D_DIR_WRITES),
+ CPUMF_EVENT_PTR(cf, PROBLEM_STATE_L1D_PENALTY_CYCLES),
+ CPUMF_EVENT_PTR(cf, L1D_DIR_WRITES),
+ CPUMF_EVENT_PTR(cf, L1D_PENALTY_CYCLES),
+ CPUMF_EVENT_PTR(cf, PRNG_FUNCTIONS),
+ CPUMF_EVENT_PTR(cf, PRNG_CYCLES),
+ CPUMF_EVENT_PTR(cf, PRNG_BLOCKED_FUNCTIONS),
+ CPUMF_EVENT_PTR(cf, PRNG_BLOCKED_CYCLES),
+ CPUMF_EVENT_PTR(cf, SHA_FUNCTIONS),
+ CPUMF_EVENT_PTR(cf, SHA_CYCLES),
+ CPUMF_EVENT_PTR(cf, SHA_BLOCKED_FUNCTIONS),
+ CPUMF_EVENT_PTR(cf, SHA_BLOCKED_CYCLES),
+ CPUMF_EVENT_PTR(cf, DEA_FUNCTIONS),
+ CPUMF_EVENT_PTR(cf, DEA_CYCLES),
+ CPUMF_EVENT_PTR(cf, DEA_BLOCKED_FUNCTIONS),
+ CPUMF_EVENT_PTR(cf, DEA_BLOCKED_CYCLES),
+ CPUMF_EVENT_PTR(cf, AES_FUNCTIONS),
+ CPUMF_EVENT_PTR(cf, AES_CYCLES),
+ CPUMF_EVENT_PTR(cf, AES_BLOCKED_FUNCTIONS),
+ CPUMF_EVENT_PTR(cf, AES_BLOCKED_CYCLES),
+ NULL,
+};
+
+static struct attribute *cpumcf_z10_pmu_event_attr[] __initdata = {
+ CPUMF_EVENT_PTR(cf_z10, L1I_L2_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z10, L1D_L2_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z10, L1I_L3_LOCAL_WRITES),
+ CPUMF_EVENT_PTR(cf_z10, L1D_L3_LOCAL_WRITES),
+ CPUMF_EVENT_PTR(cf_z10, L1I_L3_REMOTE_WRITES),
+ CPUMF_EVENT_PTR(cf_z10, L1D_L3_REMOTE_WRITES),
+ CPUMF_EVENT_PTR(cf_z10, L1D_LMEM_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z10, L1I_LMEM_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z10, L1D_RO_EXCL_WRITES),
+ CPUMF_EVENT_PTR(cf_z10, L1I_CACHELINE_INVALIDATES),
+ CPUMF_EVENT_PTR(cf_z10, ITLB1_WRITES),
+ CPUMF_EVENT_PTR(cf_z10, DTLB1_WRITES),
+ CPUMF_EVENT_PTR(cf_z10, TLB2_PTE_WRITES),
+ CPUMF_EVENT_PTR(cf_z10, TLB2_CRSTE_WRITES),
+ CPUMF_EVENT_PTR(cf_z10, TLB2_CRSTE_HPAGE_WRITES),
+ CPUMF_EVENT_PTR(cf_z10, ITLB1_MISSES),
+ CPUMF_EVENT_PTR(cf_z10, DTLB1_MISSES),
+ CPUMF_EVENT_PTR(cf_z10, L2C_STORES_SENT),
+ NULL,
+};
+
+static struct attribute *cpumcf_z196_pmu_event_attr[] __initdata = {
+ CPUMF_EVENT_PTR(cf_z196, L1D_L2_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z196, L1I_L2_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z196, DTLB1_MISSES),
+ CPUMF_EVENT_PTR(cf_z196, ITLB1_MISSES),
+ CPUMF_EVENT_PTR(cf_z196, L2C_STORES_SENT),
+ CPUMF_EVENT_PTR(cf_z196, L1D_OFFBOOK_L3_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z196, L1D_ONBOOK_L4_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z196, L1I_ONBOOK_L4_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z196, L1D_RO_EXCL_WRITES),
+ CPUMF_EVENT_PTR(cf_z196, L1D_OFFBOOK_L4_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z196, L1I_OFFBOOK_L4_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z196, DTLB1_HPAGE_WRITES),
+ CPUMF_EVENT_PTR(cf_z196, L1D_LMEM_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z196, L1I_LMEM_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z196, L1I_OFFBOOK_L3_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z196, DTLB1_WRITES),
+ CPUMF_EVENT_PTR(cf_z196, ITLB1_WRITES),
+ CPUMF_EVENT_PTR(cf_z196, TLB2_PTE_WRITES),
+ CPUMF_EVENT_PTR(cf_z196, TLB2_CRSTE_HPAGE_WRITES),
+ CPUMF_EVENT_PTR(cf_z196, TLB2_CRSTE_WRITES),
+ CPUMF_EVENT_PTR(cf_z196, L1D_ONCHIP_L3_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z196, L1D_OFFCHIP_L3_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z196, L1I_ONCHIP_L3_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z196, L1I_OFFCHIP_L3_SOURCED_WRITES),
+ NULL,
+};
+
+static struct attribute *cpumcf_zec12_pmu_event_attr[] __initdata = {
+ CPUMF_EVENT_PTR(cf_zec12, DTLB1_MISSES),
+ CPUMF_EVENT_PTR(cf_zec12, ITLB1_MISSES),
+ CPUMF_EVENT_PTR(cf_zec12, L1D_L2I_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_zec12, L1I_L2I_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_zec12, L1D_L2D_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_zec12, DTLB1_WRITES),
+ CPUMF_EVENT_PTR(cf_zec12, L1D_LMEM_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_zec12, L1I_LMEM_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_zec12, L1D_RO_EXCL_WRITES),
+ CPUMF_EVENT_PTR(cf_zec12, DTLB1_HPAGE_WRITES),
+ CPUMF_EVENT_PTR(cf_zec12, ITLB1_WRITES),
+ CPUMF_EVENT_PTR(cf_zec12, TLB2_PTE_WRITES),
+ CPUMF_EVENT_PTR(cf_zec12, TLB2_CRSTE_HPAGE_WRITES),
+ CPUMF_EVENT_PTR(cf_zec12, TLB2_CRSTE_WRITES),
+ CPUMF_EVENT_PTR(cf_zec12, L1D_ONCHIP_L3_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_zec12, L1D_OFFCHIP_L3_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_zec12, L1D_OFFBOOK_L3_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_zec12, L1D_ONBOOK_L4_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_zec12, L1D_OFFBOOK_L4_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_zec12, TX_NC_TEND),
+ CPUMF_EVENT_PTR(cf_zec12, L1D_ONCHIP_L3_SOURCED_WRITES_IV),
+ CPUMF_EVENT_PTR(cf_zec12, L1D_OFFCHIP_L3_SOURCED_WRITES_IV),
+ CPUMF_EVENT_PTR(cf_zec12, L1D_OFFBOOK_L3_SOURCED_WRITES_IV),
+ CPUMF_EVENT_PTR(cf_zec12, L1I_ONCHIP_L3_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_zec12, L1I_OFFCHIP_L3_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_zec12, L1I_OFFBOOK_L3_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_zec12, L1I_ONBOOK_L4_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_zec12, L1I_OFFBOOK_L4_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_zec12, TX_C_TEND),
+ CPUMF_EVENT_PTR(cf_zec12, L1I_ONCHIP_L3_SOURCED_WRITES_IV),
+ CPUMF_EVENT_PTR(cf_zec12, L1I_OFFCHIP_L3_SOURCED_WRITES_IV),
+ CPUMF_EVENT_PTR(cf_zec12, L1I_OFFBOOK_L3_SOURCED_WRITES_IV),
+ CPUMF_EVENT_PTR(cf_zec12, TX_NC_TABORT),
+ CPUMF_EVENT_PTR(cf_zec12, TX_C_TABORT_NO_SPECIAL),
+ CPUMF_EVENT_PTR(cf_zec12, TX_C_TABORT_SPECIAL),
+ NULL,
+};
+
+/* END: CPUM_CF COUNTER DEFINITIONS ===================================== */
+
+static struct attribute_group cpumsf_pmu_events_group = {
+ .name = "events",
+ .attrs = cpumcf_pmu_event_attr,
+};
+
+PMU_FORMAT_ATTR(event, "config:0-63");
+
+static struct attribute *cpumsf_pmu_format_attr[] = {
+ &format_attr_event.attr,
+ NULL,
+};
+
+static struct attribute_group cpumsf_pmu_format_group = {
+ .name = "format",
+ .attrs = cpumsf_pmu_format_attr,
+};
+
+static const struct attribute_group *cpumsf_pmu_attr_groups[] = {
+ &cpumsf_pmu_events_group,
+ &cpumsf_pmu_format_group,
+ NULL,
+};
+
+
+static __init struct attribute **merge_attr(struct attribute **a,
+ struct attribute **b)
+{
+ struct attribute **new;
+ int j, i;
+
+ for (j = 0; a[j]; j++)
+ ;
+ for (i = 0; b[i]; i++)
+ j++;
+ j++;
+
+ new = kmalloc(sizeof(struct attribute *) * j, GFP_KERNEL);
+ if (!new)
+ return NULL;
+ j = 0;
+ for (i = 0; a[i]; i++)
+ new[j++] = a[i];
+ for (i = 0; b[i]; i++)
+ new[j++] = b[i];
+ new[j] = NULL;
+
+ return new;
+}
+
+__init const struct attribute_group **cpumf_cf_event_group(void)
+{
+ struct attribute **combined, **model;
+ struct cpuid cpu_id;
+
+ get_cpu_id(&cpu_id);
+ switch (cpu_id.machine) {
+ case 0x2097:
+ case 0x2098:
+ model = cpumcf_z10_pmu_event_attr;
+ break;
+ case 0x2817:
+ case 0x2818:
+ model = cpumcf_z196_pmu_event_attr;
+ break;
+ case 0x2827:
+ case 0x2828:
+ model = cpumcf_zec12_pmu_event_attr;
+ break;
+ default:
+ model = NULL;
+ break;
+ };
+
+ if (!model)
+ goto out;
+
+ combined = merge_attr(cpumcf_pmu_event_attr, model);
+ if (combined)
+ cpumsf_pmu_events_group.attrs = combined;
+out:
+ return cpumsf_pmu_attr_groups;
+}
diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c
new file mode 100644
index 000000000000..6c0d29827cb6
--- /dev/null
+++ b/arch/s390/kernel/perf_cpum_sf.c
@@ -0,0 +1,1641 @@
+/*
+ * Performance event support for the System z CPU-measurement Sampling Facility
+ *
+ * Copyright IBM Corp. 2013
+ * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License (version 2 only)
+ * as published by the Free Software Foundation.
+ */
+#define KMSG_COMPONENT "cpum_sf"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/kernel.h>
+#include <linux/kernel_stat.h>
+#include <linux/perf_event.h>
+#include <linux/percpu.h>
+#include <linux/notifier.h>
+#include <linux/export.h>
+#include <linux/slab.h>
+#include <linux/mm.h>
+#include <linux/moduleparam.h>
+#include <asm/cpu_mf.h>
+#include <asm/irq.h>
+#include <asm/debug.h>
+#include <asm/timex.h>
+
+/* Minimum number of sample-data-block-tables:
+ * At least one table is required for the sampling buffer structure.
+ * A single table contains up to 511 pointers to sample-data-blocks.
+ */
+#define CPUM_SF_MIN_SDBT 1
+
+/* Number of sample-data-blocks per sample-data-block-table (SDBT):
+ * A table contains SDB pointers (8 bytes) and one table-link entry
+ * that points to the origin of the next SDBT.
+ */
+#define CPUM_SF_SDB_PER_TABLE ((PAGE_SIZE - 8) / 8)
+
+/* Maximum page offset for an SDBT table-link entry:
+ * If this page offset is reached, a table-link entry to the next SDBT
+ * must be added.
+ */
+#define CPUM_SF_SDBT_TL_OFFSET (CPUM_SF_SDB_PER_TABLE * 8)
+static inline int require_table_link(const void *sdbt)
+{
+ return ((unsigned long) sdbt & ~PAGE_MASK) == CPUM_SF_SDBT_TL_OFFSET;
+}
+
+/* Minimum and maximum sampling buffer sizes:
+ *
+ * This number represents the maximum size of the sampling buffer taking
+ * the number of sample-data-block-tables into account. Note that these
+ * numbers apply to the basic-sampling function only.
+ * The maximum number of SDBs is increased by CPUM_SF_SDB_DIAG_FACTOR if
+ * the diagnostic-sampling function is active.
+ *
+ * Sampling buffer size Buffer characteristics
+ * ---------------------------------------------------
+ * 64KB == 16 pages (4KB per page)
+ * 1 page for SDB-tables
+ * 15 pages for SDBs
+ *
+ * 32MB == 8192 pages (4KB per page)
+ * 16 pages for SDB-tables
+ * 8176 pages for SDBs
+ */
+static unsigned long __read_mostly CPUM_SF_MIN_SDB = 15;
+static unsigned long __read_mostly CPUM_SF_MAX_SDB = 8176;
+static unsigned long __read_mostly CPUM_SF_SDB_DIAG_FACTOR = 1;
+
+struct sf_buffer {
+ unsigned long *sdbt; /* Sample-data-block-table origin */
+ /* buffer characteristics (required for buffer increments) */
+ unsigned long num_sdb; /* Number of sample-data-blocks */
+ unsigned long num_sdbt; /* Number of sample-data-block-tables */
+ unsigned long *tail; /* last sample-data-block-table */
+};
+
+struct cpu_hw_sf {
+ /* CPU-measurement sampling information block */
+ struct hws_qsi_info_block qsi;
+ /* CPU-measurement sampling control block */
+ struct hws_lsctl_request_block lsctl;
+ struct sf_buffer sfb; /* Sampling buffer */
+ unsigned int flags; /* Status flags */
+ struct perf_event *event; /* Scheduled perf event */
+};
+static DEFINE_PER_CPU(struct cpu_hw_sf, cpu_hw_sf);
+
+/* Debug feature */
+static debug_info_t *sfdbg;
+
+/*
+ * sf_disable() - Switch off sampling facility
+ */
+static int sf_disable(void)
+{
+ struct hws_lsctl_request_block sreq;
+
+ memset(&sreq, 0, sizeof(sreq));
+ return lsctl(&sreq);
+}
+
+/*
+ * sf_buffer_available() - Check for an allocated sampling buffer
+ */
+static int sf_buffer_available(struct cpu_hw_sf *cpuhw)
+{
+ return !!cpuhw->sfb.sdbt;
+}
+
+/*
+ * deallocate sampling facility buffer
+ */
+static void free_sampling_buffer(struct sf_buffer *sfb)
+{
+ unsigned long *sdbt, *curr;
+
+ if (!sfb->sdbt)
+ return;
+
+ sdbt = sfb->sdbt;
+ curr = sdbt;
+
+ /* Free the SDBT after all SDBs are processed... */
+ while (1) {
+ if (!*curr || !sdbt)
+ break;
+
+ /* Process table-link entries */
+ if (is_link_entry(curr)) {
+ curr = get_next_sdbt(curr);
+ if (sdbt)
+ free_page((unsigned long) sdbt);
+
+ /* If the origin is reached, sampling buffer is freed */
+ if (curr == sfb->sdbt)
+ break;
+ else
+ sdbt = curr;
+ } else {
+ /* Process SDB pointer */
+ if (*curr) {
+ free_page(*curr);
+ curr++;
+ }
+ }
+ }
+
+ debug_sprintf_event(sfdbg, 5,
+ "free_sampling_buffer: freed sdbt=%p\n", sfb->sdbt);
+ memset(sfb, 0, sizeof(*sfb));
+}
+
+static int alloc_sample_data_block(unsigned long *sdbt, gfp_t gfp_flags)
+{
+ unsigned long sdb, *trailer;
+
+ /* Allocate and initialize sample-data-block */
+ sdb = get_zeroed_page(gfp_flags);
+ if (!sdb)
+ return -ENOMEM;
+ trailer = trailer_entry_ptr(sdb);
+ *trailer = SDB_TE_ALERT_REQ_MASK;
+
+ /* Link SDB into the sample-data-block-table */
+ *sdbt = sdb;
+
+ return 0;
+}
+
+/*
+ * realloc_sampling_buffer() - extend sampler memory
+ *
+ * Allocates new sample-data-blocks and adds them to the specified sampling
+ * buffer memory.
+ *
+ * Important: This modifies the sampling buffer and must be called when the
+ * sampling facility is disabled.
+ *
+ * Returns zero on success, non-zero otherwise.
+ */
+static int realloc_sampling_buffer(struct sf_buffer *sfb,
+ unsigned long num_sdb, gfp_t gfp_flags)
+{
+ int i, rc;
+ unsigned long *new, *tail;
+
+ if (!sfb->sdbt || !sfb->tail)
+ return -EINVAL;
+
+ if (!is_link_entry(sfb->tail))
+ return -EINVAL;
+
+ /* Append to the existing sampling buffer, overwriting the table-link
+ * register.
+ * The tail variables always points to the "tail" (last and table-link)
+ * entry in an SDB-table.
+ */
+ tail = sfb->tail;
+
+ /* Do a sanity check whether the table-link entry points to
+ * the sampling buffer origin.
+ */
+ if (sfb->sdbt != get_next_sdbt(tail)) {
+ debug_sprintf_event(sfdbg, 3, "realloc_sampling_buffer: "
+ "sampling buffer is not linked: origin=%p"
+ "tail=%p\n",
+ (void *) sfb->sdbt, (void *) tail);
+ return -EINVAL;
+ }
+
+ /* Allocate remaining SDBs */
+ rc = 0;
+ for (i = 0; i < num_sdb; i++) {
+ /* Allocate a new SDB-table if it is full. */
+ if (require_table_link(tail)) {
+ new = (unsigned long *) get_zeroed_page(gfp_flags);
+ if (!new) {
+ rc = -ENOMEM;
+ break;
+ }
+ sfb->num_sdbt++;
+ /* Link current page to tail of chain */
+ *tail = (unsigned long)(void *) new + 1;
+ tail = new;
+ }
+
+ /* Allocate a new sample-data-block.
+ * If there is not enough memory, stop the realloc process
+ * and simply use what was allocated. If this is a temporary
+ * issue, a new realloc call (if required) might succeed.
+ */
+ rc = alloc_sample_data_block(tail, gfp_flags);
+ if (rc)
+ break;
+ sfb->num_sdb++;
+ tail++;
+ }
+
+ /* Link sampling buffer to its origin */
+ *tail = (unsigned long) sfb->sdbt + 1;
+ sfb->tail = tail;
+
+ debug_sprintf_event(sfdbg, 4, "realloc_sampling_buffer: new buffer"
+ " settings: sdbt=%lu sdb=%lu\n",
+ sfb->num_sdbt, sfb->num_sdb);
+ return rc;
+}
+
+/*
+ * allocate_sampling_buffer() - allocate sampler memory
+ *
+ * Allocates and initializes a sampling buffer structure using the
+ * specified number of sample-data-blocks (SDB). For each allocation,
+ * a 4K page is used. The number of sample-data-block-tables (SDBT)
+ * are calculated from SDBs.
+ * Also set the ALERT_REQ mask in each SDBs trailer.
+ *
+ * Returns zero on success, non-zero otherwise.
+ */
+static int alloc_sampling_buffer(struct sf_buffer *sfb, unsigned long num_sdb)
+{
+ int rc;
+
+ if (sfb->sdbt)
+ return -EINVAL;
+
+ /* Allocate the sample-data-block-table origin */
+ sfb->sdbt = (unsigned long *) get_zeroed_page(GFP_KERNEL);
+ if (!sfb->sdbt)
+ return -ENOMEM;
+ sfb->num_sdb = 0;
+ sfb->num_sdbt = 1;
+
+ /* Link the table origin to point to itself to prepare for
+ * realloc_sampling_buffer() invocation.
+ */
+ sfb->tail = sfb->sdbt;
+ *sfb->tail = (unsigned long)(void *) sfb->sdbt + 1;
+
+ /* Allocate requested number of sample-data-blocks */
+ rc = realloc_sampling_buffer(sfb, num_sdb, GFP_KERNEL);
+ if (rc) {
+ free_sampling_buffer(sfb);
+ debug_sprintf_event(sfdbg, 4, "alloc_sampling_buffer: "
+ "realloc_sampling_buffer failed with rc=%i\n", rc);
+ } else
+ debug_sprintf_event(sfdbg, 4,
+ "alloc_sampling_buffer: tear=%p dear=%p\n",
+ sfb->sdbt, (void *) *sfb->sdbt);
+ return rc;
+}
+
+static void sfb_set_limits(unsigned long min, unsigned long max)
+{
+ struct hws_qsi_info_block si;
+
+ CPUM_SF_MIN_SDB = min;
+ CPUM_SF_MAX_SDB = max;
+
+ memset(&si, 0, sizeof(si));
+ if (!qsi(&si))
+ CPUM_SF_SDB_DIAG_FACTOR = DIV_ROUND_UP(si.dsdes, si.bsdes);
+}
+
+static unsigned long sfb_max_limit(struct hw_perf_event *hwc)
+{
+ return SAMPL_DIAG_MODE(hwc) ? CPUM_SF_MAX_SDB * CPUM_SF_SDB_DIAG_FACTOR
+ : CPUM_SF_MAX_SDB;
+}
+
+static unsigned long sfb_pending_allocs(struct sf_buffer *sfb,
+ struct hw_perf_event *hwc)
+{
+ if (!sfb->sdbt)
+ return SFB_ALLOC_REG(hwc);
+ if (SFB_ALLOC_REG(hwc) > sfb->num_sdb)
+ return SFB_ALLOC_REG(hwc) - sfb->num_sdb;
+ return 0;
+}
+
+static int sfb_has_pending_allocs(struct sf_buffer *sfb,
+ struct hw_perf_event *hwc)
+{
+ return sfb_pending_allocs(sfb, hwc) > 0;
+}
+
+static void sfb_account_allocs(unsigned long num, struct hw_perf_event *hwc)
+{
+ /* Limit the number of SDBs to not exceed the maximum */
+ num = min_t(unsigned long, num, sfb_max_limit(hwc) - SFB_ALLOC_REG(hwc));
+ if (num)
+ SFB_ALLOC_REG(hwc) += num;
+}
+
+static void sfb_init_allocs(unsigned long num, struct hw_perf_event *hwc)
+{
+ SFB_ALLOC_REG(hwc) = 0;
+ sfb_account_allocs(num, hwc);
+}
+
+static size_t event_sample_size(struct hw_perf_event *hwc)
+{
+ struct sf_raw_sample *sfr = (struct sf_raw_sample *) RAWSAMPLE_REG(hwc);
+ size_t sample_size;
+
+ /* The sample size depends on the sampling function: The basic-sampling
+ * function must be always enabled, diagnostic-sampling function is
+ * optional.
+ */
+ sample_size = sfr->bsdes;
+ if (SAMPL_DIAG_MODE(hwc))
+ sample_size += sfr->dsdes;
+
+ return sample_size;
+}
+
+static void deallocate_buffers(struct cpu_hw_sf *cpuhw)
+{
+ if (cpuhw->sfb.sdbt)
+ free_sampling_buffer(&cpuhw->sfb);
+}
+
+static int allocate_buffers(struct cpu_hw_sf *cpuhw, struct hw_perf_event *hwc)
+{
+ unsigned long n_sdb, freq, factor;
+ size_t sfr_size, sample_size;
+ struct sf_raw_sample *sfr;
+
+ /* Allocate raw sample buffer
+ *
+ * The raw sample buffer is used to temporarily store sampling data
+ * entries for perf raw sample processing. The buffer size mainly
+ * depends on the size of diagnostic-sampling data entries which is
+ * machine-specific. The exact size calculation includes:
+ * 1. The first 4 bytes of diagnostic-sampling data entries are
+ * already reflected in the sf_raw_sample structure. Subtract
+ * these bytes.
+ * 2. The perf raw sample data must be 8-byte aligned (u64) and
+ * perf's internal data size must be considered too. So add
+ * an additional u32 for correct alignment and subtract before
+ * allocating the buffer.
+ * 3. Store the raw sample buffer pointer in the perf event
+ * hardware structure.
+ */
+ sfr_size = ALIGN((sizeof(*sfr) - sizeof(sfr->diag) + cpuhw->qsi.dsdes) +
+ sizeof(u32), sizeof(u64));
+ sfr_size -= sizeof(u32);
+ sfr = kzalloc(sfr_size, GFP_KERNEL);
+ if (!sfr)
+ return -ENOMEM;
+ sfr->size = sfr_size;
+ sfr->bsdes = cpuhw->qsi.bsdes;
+ sfr->dsdes = cpuhw->qsi.dsdes;
+ RAWSAMPLE_REG(hwc) = (unsigned long) sfr;
+
+ /* Calculate sampling buffers using 4K pages
+ *
+ * 1. Determine the sample data size which depends on the used
+ * sampling functions, for example, basic-sampling or
+ * basic-sampling with diagnostic-sampling.
+ *
+ * 2. Use the sampling frequency as input. The sampling buffer is
+ * designed for almost one second. This can be adjusted through
+ * the "factor" variable.
+ * In any case, alloc_sampling_buffer() sets the Alert Request
+ * Control indicator to trigger a measurement-alert to harvest
+ * sample-data-blocks (sdb).
+ *
+ * 3. Compute the number of sample-data-blocks and ensure a minimum
+ * of CPUM_SF_MIN_SDB. Also ensure the upper limit does not
+ * exceed a "calculated" maximum. The symbolic maximum is
+ * designed for basic-sampling only and needs to be increased if
+ * diagnostic-sampling is active.
+ * See also the remarks for these symbolic constants.
+ *
+ * 4. Compute the number of sample-data-block-tables (SDBT) and
+ * ensure a minimum of CPUM_SF_MIN_SDBT (one table can manage up
+ * to 511 SDBs).
+ */
+ sample_size = event_sample_size(hwc);
+ freq = sample_rate_to_freq(&cpuhw->qsi, SAMPL_RATE(hwc));
+ factor = 1;
+ n_sdb = DIV_ROUND_UP(freq, factor * ((PAGE_SIZE-64) / sample_size));
+ if (n_sdb < CPUM_SF_MIN_SDB)
+ n_sdb = CPUM_SF_MIN_SDB;
+
+ /* If there is already a sampling buffer allocated, it is very likely
+ * that the sampling facility is enabled too. If the event to be
+ * initialized requires a greater sampling buffer, the allocation must
+ * be postponed. Changing the sampling buffer requires the sampling
+ * facility to be in the disabled state. So, account the number of
+ * required SDBs and let cpumsf_pmu_enable() resize the buffer just
+ * before the event is started.
+ */
+ sfb_init_allocs(n_sdb, hwc);
+ if (sf_buffer_available(cpuhw))
+ return 0;
+
+ debug_sprintf_event(sfdbg, 3,
+ "allocate_buffers: rate=%lu f=%lu sdb=%lu/%lu"
+ " sample_size=%lu cpuhw=%p\n",
+ SAMPL_RATE(hwc), freq, n_sdb, sfb_max_limit(hwc),
+ sample_size, cpuhw);
+
+ return alloc_sampling_buffer(&cpuhw->sfb,
+ sfb_pending_allocs(&cpuhw->sfb, hwc));
+}
+
+static unsigned long min_percent(unsigned int percent, unsigned long base,
+ unsigned long min)
+{
+ return min_t(unsigned long, min, DIV_ROUND_UP(percent * base, 100));
+}
+
+static unsigned long compute_sfb_extent(unsigned long ratio, unsigned long base)
+{
+ /* Use a percentage-based approach to extend the sampling facility
+ * buffer. Accept up to 5% sample data loss.
+ * Vary the extents between 1% to 5% of the current number of
+ * sample-data-blocks.
+ */
+ if (ratio <= 5)
+ return 0;
+ if (ratio <= 25)
+ return min_percent(1, base, 1);
+ if (ratio <= 50)
+ return min_percent(1, base, 1);
+ if (ratio <= 75)
+ return min_percent(2, base, 2);
+ if (ratio <= 100)
+ return min_percent(3, base, 3);
+ if (ratio <= 250)
+ return min_percent(4, base, 4);
+
+ return min_percent(5, base, 8);
+}
+
+static void sfb_account_overflows(struct cpu_hw_sf *cpuhw,
+ struct hw_perf_event *hwc)
+{
+ unsigned long ratio, num;
+
+ if (!OVERFLOW_REG(hwc))
+ return;
+
+ /* The sample_overflow contains the average number of sample data
+ * that has been lost because sample-data-blocks were full.
+ *
+ * Calculate the total number of sample data entries that has been
+ * discarded. Then calculate the ratio of lost samples to total samples
+ * per second in percent.
+ */
+ ratio = DIV_ROUND_UP(100 * OVERFLOW_REG(hwc) * cpuhw->sfb.num_sdb,
+ sample_rate_to_freq(&cpuhw->qsi, SAMPL_RATE(hwc)));
+
+ /* Compute number of sample-data-blocks */
+ num = compute_sfb_extent(ratio, cpuhw->sfb.num_sdb);
+ if (num)
+ sfb_account_allocs(num, hwc);
+
+ debug_sprintf_event(sfdbg, 5, "sfb: overflow: overflow=%llu ratio=%lu"
+ " num=%lu\n", OVERFLOW_REG(hwc), ratio, num);
+ OVERFLOW_REG(hwc) = 0;
+}
+
+/* extend_sampling_buffer() - Extend sampling buffer
+ * @sfb: Sampling buffer structure (for local CPU)
+ * @hwc: Perf event hardware structure
+ *
+ * Use this function to extend the sampling buffer based on the overflow counter
+ * and postponed allocation extents stored in the specified Perf event hardware.
+ *
+ * Important: This function disables the sampling facility in order to safely
+ * change the sampling buffer structure. Do not call this function
+ * when the PMU is active.
+ */
+static void extend_sampling_buffer(struct sf_buffer *sfb,
+ struct hw_perf_event *hwc)
+{
+ unsigned long num, num_old;
+ int rc;
+
+ num = sfb_pending_allocs(sfb, hwc);
+ if (!num)
+ return;
+ num_old = sfb->num_sdb;
+
+ /* Disable the sampling facility to reset any states and also
+ * clear pending measurement alerts.
+ */
+ sf_disable();
+
+ /* Extend the sampling buffer.
+ * This memory allocation typically happens in an atomic context when
+ * called by perf. Because this is a reallocation, it is fine if the
+ * new SDB-request cannot be satisfied immediately.
+ */
+ rc = realloc_sampling_buffer(sfb, num, GFP_ATOMIC);
+ if (rc)
+ debug_sprintf_event(sfdbg, 5, "sfb: extend: realloc "
+ "failed with rc=%i\n", rc);
+
+ if (sfb_has_pending_allocs(sfb, hwc))
+ debug_sprintf_event(sfdbg, 5, "sfb: extend: "
+ "req=%lu alloc=%lu remaining=%lu\n",
+ num, sfb->num_sdb - num_old,
+ sfb_pending_allocs(sfb, hwc));
+}
+
+
+/* Number of perf events counting hardware events */
+static atomic_t num_events;
+/* Used to avoid races in calling reserve/release_cpumf_hardware */
+static DEFINE_MUTEX(pmc_reserve_mutex);
+
+#define PMC_INIT 0
+#define PMC_RELEASE 1
+#define PMC_FAILURE 2
+static void setup_pmc_cpu(void *flags)
+{
+ int err;
+ struct cpu_hw_sf *cpusf = &__get_cpu_var(cpu_hw_sf);
+
+ err = 0;
+ switch (*((int *) flags)) {
+ case PMC_INIT:
+ memset(cpusf, 0, sizeof(*cpusf));
+ err = qsi(&cpusf->qsi);
+ if (err)
+ break;
+ cpusf->flags |= PMU_F_RESERVED;
+ err = sf_disable();
+ if (err)
+ pr_err("Switching off the sampling facility failed "
+ "with rc=%i\n", err);
+ debug_sprintf_event(sfdbg, 5,
+ "setup_pmc_cpu: initialized: cpuhw=%p\n", cpusf);
+ break;
+ case PMC_RELEASE:
+ cpusf->flags &= ~PMU_F_RESERVED;
+ err = sf_disable();
+ if (err) {
+ pr_err("Switching off the sampling facility failed "
+ "with rc=%i\n", err);
+ } else
+ deallocate_buffers(cpusf);
+ debug_sprintf_event(sfdbg, 5,
+ "setup_pmc_cpu: released: cpuhw=%p\n", cpusf);
+ break;
+ }
+ if (err)
+ *((int *) flags) |= PMC_FAILURE;
+}
+
+static void release_pmc_hardware(void)
+{
+ int flags = PMC_RELEASE;
+
+ irq_subclass_unregister(IRQ_SUBCLASS_MEASUREMENT_ALERT);
+ on_each_cpu(setup_pmc_cpu, &flags, 1);
+ perf_release_sampling();
+}
+
+static int reserve_pmc_hardware(void)
+{
+ int flags = PMC_INIT;
+ int err;
+
+ err = perf_reserve_sampling();
+ if (err)
+ return err;
+ on_each_cpu(setup_pmc_cpu, &flags, 1);
+ if (flags & PMC_FAILURE) {
+ release_pmc_hardware();
+ return -ENODEV;
+ }
+ irq_subclass_register(IRQ_SUBCLASS_MEASUREMENT_ALERT);
+
+ return 0;
+}
+
+static void hw_perf_event_destroy(struct perf_event *event)
+{
+ /* Free raw sample buffer */
+ if (RAWSAMPLE_REG(&event->hw))
+ kfree((void *) RAWSAMPLE_REG(&event->hw));
+
+ /* Release PMC if this is the last perf event */
+ if (!atomic_add_unless(&num_events, -1, 1)) {
+ mutex_lock(&pmc_reserve_mutex);
+ if (atomic_dec_return(&num_events) == 0)
+ release_pmc_hardware();
+ mutex_unlock(&pmc_reserve_mutex);
+ }
+}
+
+static void hw_init_period(struct hw_perf_event *hwc, u64 period)
+{
+ hwc->sample_period = period;
+ hwc->last_period = hwc->sample_period;
+ local64_set(&hwc->period_left, hwc->sample_period);
+}
+
+static void hw_reset_registers(struct hw_perf_event *hwc,
+ unsigned long *sdbt_origin)
+{
+ struct sf_raw_sample *sfr;
+
+ /* (Re)set to first sample-data-block-table */
+ TEAR_REG(hwc) = (unsigned long) sdbt_origin;
+
+ /* (Re)set raw sampling buffer register */
+ sfr = (struct sf_raw_sample *) RAWSAMPLE_REG(hwc);
+ memset(&sfr->basic, 0, sizeof(sfr->basic));
+ memset(&sfr->diag, 0, sfr->dsdes);
+}
+
+static unsigned long hw_limit_rate(const struct hws_qsi_info_block *si,
+ unsigned long rate)
+{
+ return clamp_t(unsigned long, rate,
+ si->min_sampl_rate, si->max_sampl_rate);
+}
+
+static int __hw_perf_event_init(struct perf_event *event)
+{
+ struct cpu_hw_sf *cpuhw;
+ struct hws_qsi_info_block si;
+ struct perf_event_attr *attr = &event->attr;
+ struct hw_perf_event *hwc = &event->hw;
+ unsigned long rate;
+ int cpu, err;
+
+ /* Reserve CPU-measurement sampling facility */
+ err = 0;
+ if (!atomic_inc_not_zero(&num_events)) {
+ mutex_lock(&pmc_reserve_mutex);
+ if (atomic_read(&num_events) == 0 && reserve_pmc_hardware())
+ err = -EBUSY;
+ else
+ atomic_inc(&num_events);
+ mutex_unlock(&pmc_reserve_mutex);
+ }
+ event->destroy = hw_perf_event_destroy;
+
+ if (err)
+ goto out;
+
+ /* Access per-CPU sampling information (query sampling info) */
+ /*
+ * The event->cpu value can be -1 to count on every CPU, for example,
+ * when attaching to a task. If this is specified, use the query
+ * sampling info from the current CPU, otherwise use event->cpu to
+ * retrieve the per-CPU information.
+ * Later, cpuhw indicates whether to allocate sampling buffers for a
+ * particular CPU (cpuhw!=NULL) or each online CPU (cpuw==NULL).
+ */
+ memset(&si, 0, sizeof(si));
+ cpuhw = NULL;
+ if (event->cpu == -1)
+ qsi(&si);
+ else {
+ /* Event is pinned to a particular CPU, retrieve the per-CPU
+ * sampling structure for accessing the CPU-specific QSI.
+ */
+ cpuhw = &per_cpu(cpu_hw_sf, event->cpu);
+ si = cpuhw->qsi;
+ }
+
+ /* Check sampling facility authorization and, if not authorized,
+ * fall back to other PMUs. It is safe to check any CPU because
+ * the authorization is identical for all configured CPUs.
+ */
+ if (!si.as) {
+ err = -ENOENT;
+ goto out;
+ }
+
+ /* Always enable basic sampling */
+ SAMPL_FLAGS(hwc) = PERF_CPUM_SF_BASIC_MODE;
+
+ /* Check if diagnostic sampling is requested. Deny if the required
+ * sampling authorization is missing.
+ */
+ if (attr->config == PERF_EVENT_CPUM_SF_DIAG) {
+ if (!si.ad) {
+ err = -EPERM;
+ goto out;
+ }
+ SAMPL_FLAGS(hwc) |= PERF_CPUM_SF_DIAG_MODE;
+ }
+
+ /* Check and set other sampling flags */
+ if (attr->config1 & PERF_CPUM_SF_FULL_BLOCKS)
+ SAMPL_FLAGS(hwc) |= PERF_CPUM_SF_FULL_BLOCKS;
+
+ /* The sampling information (si) contains information about the
+ * min/max sampling intervals and the CPU speed. So calculate the
+ * correct sampling interval and avoid the whole period adjust
+ * feedback loop.
+ */
+ rate = 0;
+ if (attr->freq) {
+ rate = freq_to_sample_rate(&si, attr->sample_freq);
+ rate = hw_limit_rate(&si, rate);
+ attr->freq = 0;
+ attr->sample_period = rate;
+ } else {
+ /* The min/max sampling rates specifies the valid range
+ * of sample periods. If the specified sample period is
+ * out of range, limit the period to the range boundary.
+ */
+ rate = hw_limit_rate(&si, hwc->sample_period);
+
+ /* The perf core maintains a maximum sample rate that is
+ * configurable through the sysctl interface. Ensure the
+ * sampling rate does not exceed this value. This also helps
+ * to avoid throttling when pushing samples with
+ * perf_event_overflow().
+ */
+ if (sample_rate_to_freq(&si, rate) >
+ sysctl_perf_event_sample_rate) {
+ err = -EINVAL;
+ debug_sprintf_event(sfdbg, 1, "Sampling rate exceeds maximum perf sample rate\n");
+ goto out;
+ }
+ }
+ SAMPL_RATE(hwc) = rate;
+ hw_init_period(hwc, SAMPL_RATE(hwc));
+
+ /* Initialize sample data overflow accounting */
+ hwc->extra_reg.reg = REG_OVERFLOW;
+ OVERFLOW_REG(hwc) = 0;
+
+ /* Allocate the per-CPU sampling buffer using the CPU information
+ * from the event. If the event is not pinned to a particular
+ * CPU (event->cpu == -1; or cpuhw == NULL), allocate sampling
+ * buffers for each online CPU.
+ */
+ if (cpuhw)
+ /* Event is pinned to a particular CPU */
+ err = allocate_buffers(cpuhw, hwc);
+ else {
+ /* Event is not pinned, allocate sampling buffer on
+ * each online CPU
+ */
+ for_each_online_cpu(cpu) {
+ cpuhw = &per_cpu(cpu_hw_sf, cpu);
+ err = allocate_buffers(cpuhw, hwc);
+ if (err)
+ break;
+ }
+ }
+out:
+ return err;
+}
+
+static int cpumsf_pmu_event_init(struct perf_event *event)
+{
+ int err;
+
+ /* No support for taken branch sampling */
+ if (has_branch_stack(event))
+ return -EOPNOTSUPP;
+
+ switch (event->attr.type) {
+ case PERF_TYPE_RAW:
+ if ((event->attr.config != PERF_EVENT_CPUM_SF) &&
+ (event->attr.config != PERF_EVENT_CPUM_SF_DIAG))
+ return -ENOENT;
+ break;
+ case PERF_TYPE_HARDWARE:
+ /* Support sampling of CPU cycles in addition to the
+ * counter facility. However, the counter facility
+ * is more precise and, hence, restrict this PMU to
+ * sampling events only.
+ */
+ if (event->attr.config != PERF_COUNT_HW_CPU_CYCLES)
+ return -ENOENT;
+ if (!is_sampling_event(event))
+ return -ENOENT;
+ break;
+ default:
+ return -ENOENT;
+ }
+
+ /* Check online status of the CPU to which the event is pinned */
+ if (event->cpu >= nr_cpumask_bits ||
+ (event->cpu >= 0 && !cpu_online(event->cpu)))
+ return -ENODEV;
+
+ /* Force reset of idle/hv excludes regardless of what the
+ * user requested.
+ */
+ if (event->attr.exclude_hv)
+ event->attr.exclude_hv = 0;
+ if (event->attr.exclude_idle)
+ event->attr.exclude_idle = 0;
+
+ err = __hw_perf_event_init(event);
+ if (unlikely(err))
+ if (event->destroy)
+ event->destroy(event);
+ return err;
+}
+
+static void cpumsf_pmu_enable(struct pmu *pmu)
+{
+ struct cpu_hw_sf *cpuhw = &__get_cpu_var(cpu_hw_sf);
+ struct hw_perf_event *hwc;
+ int err;
+
+ if (cpuhw->flags & PMU_F_ENABLED)
+ return;
+
+ if (cpuhw->flags & PMU_F_ERR_MASK)
+ return;
+
+ /* Check whether to extent the sampling buffer.
+ *
+ * Two conditions trigger an increase of the sampling buffer for a
+ * perf event:
+ * 1. Postponed buffer allocations from the event initialization.
+ * 2. Sampling overflows that contribute to pending allocations.
+ *
+ * Note that the extend_sampling_buffer() function disables the sampling
+ * facility, but it can be fully re-enabled using sampling controls that
+ * have been saved in cpumsf_pmu_disable().
+ */
+ if (cpuhw->event) {
+ hwc = &cpuhw->event->hw;
+ /* Account number of overflow-designated buffer extents */
+ sfb_account_overflows(cpuhw, hwc);
+ if (sfb_has_pending_allocs(&cpuhw->sfb, hwc))
+ extend_sampling_buffer(&cpuhw->sfb, hwc);
+ }
+
+ /* (Re)enable the PMU and sampling facility */
+ cpuhw->flags |= PMU_F_ENABLED;
+ barrier();
+
+ err = lsctl(&cpuhw->lsctl);
+ if (err) {
+ cpuhw->flags &= ~PMU_F_ENABLED;
+ pr_err("Loading sampling controls failed: op=%i err=%i\n",
+ 1, err);
+ return;
+ }
+
+ debug_sprintf_event(sfdbg, 6, "pmu_enable: es=%i cs=%i ed=%i cd=%i "
+ "tear=%p dear=%p\n", cpuhw->lsctl.es, cpuhw->lsctl.cs,
+ cpuhw->lsctl.ed, cpuhw->lsctl.cd,
+ (void *) cpuhw->lsctl.tear, (void *) cpuhw->lsctl.dear);
+}
+
+static void cpumsf_pmu_disable(struct pmu *pmu)
+{
+ struct cpu_hw_sf *cpuhw = &__get_cpu_var(cpu_hw_sf);
+ struct hws_lsctl_request_block inactive;
+ struct hws_qsi_info_block si;
+ int err;
+
+ if (!(cpuhw->flags & PMU_F_ENABLED))
+ return;
+
+ if (cpuhw->flags & PMU_F_ERR_MASK)
+ return;
+
+ /* Switch off sampling activation control */
+ inactive = cpuhw->lsctl;
+ inactive.cs = 0;
+ inactive.cd = 0;
+
+ err = lsctl(&inactive);
+ if (err) {
+ pr_err("Loading sampling controls failed: op=%i err=%i\n",
+ 2, err);
+ return;
+ }
+
+ /* Save state of TEAR and DEAR register contents */
+ if (!qsi(&si)) {
+ /* TEAR/DEAR values are valid only if the sampling facility is
+ * enabled. Note that cpumsf_pmu_disable() might be called even
+ * for a disabled sampling facility because cpumsf_pmu_enable()
+ * controls the enable/disable state.
+ */
+ if (si.es) {
+ cpuhw->lsctl.tear = si.tear;
+ cpuhw->lsctl.dear = si.dear;
+ }
+ } else
+ debug_sprintf_event(sfdbg, 3, "cpumsf_pmu_disable: "
+ "qsi() failed with err=%i\n", err);
+
+ cpuhw->flags &= ~PMU_F_ENABLED;
+}
+
+/* perf_exclude_event() - Filter event
+ * @event: The perf event
+ * @regs: pt_regs structure
+ * @sde_regs: Sample-data-entry (sde) regs structure
+ *
+ * Filter perf events according to their exclude specification.
+ *
+ * Return non-zero if the event shall be excluded.
+ */
+static int perf_exclude_event(struct perf_event *event, struct pt_regs *regs,
+ struct perf_sf_sde_regs *sde_regs)
+{
+ if (event->attr.exclude_user && user_mode(regs))
+ return 1;
+ if (event->attr.exclude_kernel && !user_mode(regs))
+ return 1;
+ if (event->attr.exclude_guest && sde_regs->in_guest)
+ return 1;
+ if (event->attr.exclude_host && !sde_regs->in_guest)
+ return 1;
+ return 0;
+}
+
+/* perf_push_sample() - Push samples to perf
+ * @event: The perf event
+ * @sample: Hardware sample data
+ *
+ * Use the hardware sample data to create perf event sample. The sample
+ * is the pushed to the event subsystem and the function checks for
+ * possible event overflows. If an event overflow occurs, the PMU is
+ * stopped.
+ *
+ * Return non-zero if an event overflow occurred.
+ */
+static int perf_push_sample(struct perf_event *event, struct sf_raw_sample *sfr)
+{
+ int overflow;
+ struct pt_regs regs;
+ struct perf_sf_sde_regs *sde_regs;
+ struct perf_sample_data data;
+ struct perf_raw_record raw;
+
+ /* Setup perf sample */
+ perf_sample_data_init(&data, 0, event->hw.last_period);
+ raw.size = sfr->size;
+ raw.data = sfr;
+ data.raw = &raw;
+
+ /* Setup pt_regs to look like an CPU-measurement external interrupt
+ * using the Program Request Alert code. The regs.int_parm_long
+ * field which is unused contains additional sample-data-entry related
+ * indicators.
+ */
+ memset(&regs, 0, sizeof(regs));
+ regs.int_code = 0x1407;
+ regs.int_parm = CPU_MF_INT_SF_PRA;
+ sde_regs = (struct perf_sf_sde_regs *) &regs.int_parm_long;
+
+ regs.psw.addr = sfr->basic.ia;
+ if (sfr->basic.T)
+ regs.psw.mask |= PSW_MASK_DAT;
+ if (sfr->basic.W)
+ regs.psw.mask |= PSW_MASK_WAIT;
+ if (sfr->basic.P)
+ regs.psw.mask |= PSW_MASK_PSTATE;
+ switch (sfr->basic.AS) {
+ case 0x0:
+ regs.psw.mask |= PSW_ASC_PRIMARY;
+ break;
+ case 0x1:
+ regs.psw.mask |= PSW_ASC_ACCREG;
+ break;
+ case 0x2:
+ regs.psw.mask |= PSW_ASC_SECONDARY;
+ break;
+ case 0x3:
+ regs.psw.mask |= PSW_ASC_HOME;
+ break;
+ }
+
+ /* The host-program-parameter (hpp) contains the sie control
+ * block that is set by sie64a() in entry64.S. Check if hpp
+ * refers to a valid control block and set sde_regs flags
+ * accordingly. This would allow to use hpp values for other
+ * purposes too.
+ * For now, simply use a non-zero value as guest indicator.
+ */
+ if (sfr->basic.hpp)
+ sde_regs->in_guest = 1;
+
+ overflow = 0;
+ if (perf_exclude_event(event, &regs, sde_regs))
+ goto out;
+ if (perf_event_overflow(event, &data, &regs)) {
+ overflow = 1;
+ event->pmu->stop(event, 0);
+ }
+ perf_event_update_userpage(event);
+out:
+ return overflow;
+}
+
+static void perf_event_count_update(struct perf_event *event, u64 count)
+{
+ local64_add(count, &event->count);
+}
+
+static int sample_format_is_valid(struct hws_combined_entry *sample,
+ unsigned int flags)
+{
+ if (likely(flags & PERF_CPUM_SF_BASIC_MODE))
+ /* Only basic-sampling data entries with data-entry-format
+ * version of 0x0001 can be processed.
+ */
+ if (sample->basic.def != 0x0001)
+ return 0;
+ if (flags & PERF_CPUM_SF_DIAG_MODE)
+ /* The data-entry-format number of diagnostic-sampling data
+ * entries can vary. Because diagnostic data is just passed
+ * through, do only a sanity check on the DEF.
+ */
+ if (sample->diag.def < 0x8001)
+ return 0;
+ return 1;
+}
+
+static int sample_is_consistent(struct hws_combined_entry *sample,
+ unsigned long flags)
+{
+ /* This check applies only to basic-sampling data entries of potentially
+ * combined-sampling data entries. Invalid entries cannot be processed
+ * by the PMU and, thus, do not deliver an associated
+ * diagnostic-sampling data entry.
+ */
+ if (unlikely(!(flags & PERF_CPUM_SF_BASIC_MODE)))
+ return 0;
+ /*
+ * Samples are skipped, if they are invalid or for which the
+ * instruction address is not predictable, i.e., the wait-state bit is
+ * set.
+ */
+ if (sample->basic.I || sample->basic.W)
+ return 0;
+ return 1;
+}
+
+static void reset_sample_slot(struct hws_combined_entry *sample,
+ unsigned long flags)
+{
+ if (likely(flags & PERF_CPUM_SF_BASIC_MODE))
+ sample->basic.def = 0;
+ if (flags & PERF_CPUM_SF_DIAG_MODE)
+ sample->diag.def = 0;
+}
+
+static void sfr_store_sample(struct sf_raw_sample *sfr,
+ struct hws_combined_entry *sample)
+{
+ if (likely(sfr->format & PERF_CPUM_SF_BASIC_MODE))
+ sfr->basic = sample->basic;
+ if (sfr->format & PERF_CPUM_SF_DIAG_MODE)
+ memcpy(&sfr->diag, &sample->diag, sfr->dsdes);
+}
+
+static void debug_sample_entry(struct hws_combined_entry *sample,
+ struct hws_trailer_entry *te,
+ unsigned long flags)
+{
+ debug_sprintf_event(sfdbg, 4, "hw_collect_samples: Found unknown "
+ "sampling data entry: te->f=%i basic.def=%04x (%p)"
+ " diag.def=%04x (%p)\n", te->f,
+ sample->basic.def, &sample->basic,
+ (flags & PERF_CPUM_SF_DIAG_MODE)
+ ? sample->diag.def : 0xFFFF,
+ (flags & PERF_CPUM_SF_DIAG_MODE)
+ ? &sample->diag : NULL);
+}
+
+/* hw_collect_samples() - Walk through a sample-data-block and collect samples
+ * @event: The perf event
+ * @sdbt: Sample-data-block table
+ * @overflow: Event overflow counter
+ *
+ * Walks through a sample-data-block and collects sampling data entries that are
+ * then pushed to the perf event subsystem. Depending on the sampling function,
+ * there can be either basic-sampling or combined-sampling data entries. A
+ * combined-sampling data entry consists of a basic- and a diagnostic-sampling
+ * data entry. The sampling function is determined by the flags in the perf
+ * event hardware structure. The function always works with a combined-sampling
+ * data entry but ignores the the diagnostic portion if it is not available.
+ *
+ * Note that the implementation focuses on basic-sampling data entries and, if
+ * such an entry is not valid, the entire combined-sampling data entry is
+ * ignored.
+ *
+ * The overflow variables counts the number of samples that has been discarded
+ * due to a perf event overflow.
+ */
+static void hw_collect_samples(struct perf_event *event, unsigned long *sdbt,
+ unsigned long long *overflow)
+{
+ unsigned long flags = SAMPL_FLAGS(&event->hw);
+ struct hws_combined_entry *sample;
+ struct hws_trailer_entry *te;
+ struct sf_raw_sample *sfr;
+ size_t sample_size;
+
+ /* Prepare and initialize raw sample data */
+ sfr = (struct sf_raw_sample *) RAWSAMPLE_REG(&event->hw);
+ sfr->format = flags & PERF_CPUM_SF_MODE_MASK;
+
+ sample_size = event_sample_size(&event->hw);
+ te = (struct hws_trailer_entry *) trailer_entry_ptr(*sdbt);
+ sample = (struct hws_combined_entry *) *sdbt;
+ while ((unsigned long *) sample < (unsigned long *) te) {
+ /* Check for an empty sample */
+ if (!sample->basic.def)
+ break;
+
+ /* Update perf event period */
+ perf_event_count_update(event, SAMPL_RATE(&event->hw));
+
+ /* Check sampling data entry */
+ if (sample_format_is_valid(sample, flags)) {
+ /* If an event overflow occurred, the PMU is stopped to
+ * throttle event delivery. Remaining sample data is
+ * discarded.
+ */
+ if (!*overflow) {
+ if (sample_is_consistent(sample, flags)) {
+ /* Deliver sample data to perf */
+ sfr_store_sample(sfr, sample);
+ *overflow = perf_push_sample(event, sfr);
+ }
+ } else
+ /* Count discarded samples */
+ *overflow += 1;
+ } else {
+ debug_sample_entry(sample, te, flags);
+ /* Sample slot is not yet written or other record.
+ *
+ * This condition can occur if the buffer was reused
+ * from a combined basic- and diagnostic-sampling.
+ * If only basic-sampling is then active, entries are
+ * written into the larger diagnostic entries.
+ * This is typically the case for sample-data-blocks
+ * that are not full. Stop processing if the first
+ * invalid format was detected.
+ */
+ if (!te->f)
+ break;
+ }
+
+ /* Reset sample slot and advance to next sample */
+ reset_sample_slot(sample, flags);
+ sample += sample_size;
+ }
+}
+
+/* hw_perf_event_update() - Process sampling buffer
+ * @event: The perf event
+ * @flush_all: Flag to also flush partially filled sample-data-blocks
+ *
+ * Processes the sampling buffer and create perf event samples.
+ * The sampling buffer position are retrieved and saved in the TEAR_REG
+ * register of the specified perf event.
+ *
+ * Only full sample-data-blocks are processed. Specify the flash_all flag
+ * to also walk through partially filled sample-data-blocks. It is ignored
+ * if PERF_CPUM_SF_FULL_BLOCKS is set. The PERF_CPUM_SF_FULL_BLOCKS flag
+ * enforces the processing of full sample-data-blocks only (trailer entries
+ * with the block-full-indicator bit set).
+ */
+static void hw_perf_event_update(struct perf_event *event, int flush_all)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ struct hws_trailer_entry *te;
+ unsigned long *sdbt;
+ unsigned long long event_overflow, sampl_overflow, num_sdb, te_flags;
+ int done;
+
+ if (flush_all && SDB_FULL_BLOCKS(hwc))
+ flush_all = 0;
+
+ sdbt = (unsigned long *) TEAR_REG(hwc);
+ done = event_overflow = sampl_overflow = num_sdb = 0;
+ while (!done) {
+ /* Get the trailer entry of the sample-data-block */
+ te = (struct hws_trailer_entry *) trailer_entry_ptr(*sdbt);
+
+ /* Leave loop if no more work to do (block full indicator) */
+ if (!te->f) {
+ done = 1;
+ if (!flush_all)
+ break;
+ }
+
+ /* Check the sample overflow count */
+ if (te->overflow)
+ /* Account sample overflows and, if a particular limit
+ * is reached, extend the sampling buffer.
+ * For details, see sfb_account_overflows().
+ */
+ sampl_overflow += te->overflow;
+
+ /* Timestamps are valid for full sample-data-blocks only */
+ debug_sprintf_event(sfdbg, 6, "hw_perf_event_update: sdbt=%p "
+ "overflow=%llu timestamp=0x%llx\n",
+ sdbt, te->overflow,
+ (te->f) ? trailer_timestamp(te) : 0ULL);
+
+ /* Collect all samples from a single sample-data-block and
+ * flag if an (perf) event overflow happened. If so, the PMU
+ * is stopped and remaining samples will be discarded.
+ */
+ hw_collect_samples(event, sdbt, &event_overflow);
+ num_sdb++;
+
+ /* Reset trailer (using compare-double-and-swap) */
+ do {
+ te_flags = te->flags & ~SDB_TE_BUFFER_FULL_MASK;
+ te_flags |= SDB_TE_ALERT_REQ_MASK;
+ } while (!cmpxchg_double(&te->flags, &te->overflow,
+ te->flags, te->overflow,
+ te_flags, 0ULL));
+
+ /* Advance to next sample-data-block */
+ sdbt++;
+ if (is_link_entry(sdbt))
+ sdbt = get_next_sdbt(sdbt);
+
+ /* Update event hardware registers */
+ TEAR_REG(hwc) = (unsigned long) sdbt;
+
+ /* Stop processing sample-data if all samples of the current
+ * sample-data-block were flushed even if it was not full.
+ */
+ if (flush_all && done)
+ break;
+
+ /* If an event overflow happened, discard samples by
+ * processing any remaining sample-data-blocks.
+ */
+ if (event_overflow)
+ flush_all = 1;
+ }
+
+ /* Account sample overflows in the event hardware structure */
+ if (sampl_overflow)
+ OVERFLOW_REG(hwc) = DIV_ROUND_UP(OVERFLOW_REG(hwc) +
+ sampl_overflow, 1 + num_sdb);
+ if (sampl_overflow || event_overflow)
+ debug_sprintf_event(sfdbg, 4, "hw_perf_event_update: "
+ "overflow stats: sample=%llu event=%llu\n",
+ sampl_overflow, event_overflow);
+}
+
+static void cpumsf_pmu_read(struct perf_event *event)
+{
+ /* Nothing to do ... updates are interrupt-driven */
+}
+
+/* Activate sampling control.
+ * Next call of pmu_enable() starts sampling.
+ */
+static void cpumsf_pmu_start(struct perf_event *event, int flags)
+{
+ struct cpu_hw_sf *cpuhw = &__get_cpu_var(cpu_hw_sf);
+
+ if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED)))
+ return;
+
+ if (flags & PERF_EF_RELOAD)
+ WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE));
+
+ perf_pmu_disable(event->pmu);
+ event->hw.state = 0;
+ cpuhw->lsctl.cs = 1;
+ if (SAMPL_DIAG_MODE(&event->hw))
+ cpuhw->lsctl.cd = 1;
+ perf_pmu_enable(event->pmu);
+}
+
+/* Deactivate sampling control.
+ * Next call of pmu_enable() stops sampling.
+ */
+static void cpumsf_pmu_stop(struct perf_event *event, int flags)
+{
+ struct cpu_hw_sf *cpuhw = &__get_cpu_var(cpu_hw_sf);
+
+ if (event->hw.state & PERF_HES_STOPPED)
+ return;
+
+ perf_pmu_disable(event->pmu);
+ cpuhw->lsctl.cs = 0;
+ cpuhw->lsctl.cd = 0;
+ event->hw.state |= PERF_HES_STOPPED;
+
+ if ((flags & PERF_EF_UPDATE) && !(event->hw.state & PERF_HES_UPTODATE)) {
+ hw_perf_event_update(event, 1);
+ event->hw.state |= PERF_HES_UPTODATE;
+ }
+ perf_pmu_enable(event->pmu);
+}
+
+static int cpumsf_pmu_add(struct perf_event *event, int flags)
+{
+ struct cpu_hw_sf *cpuhw = &__get_cpu_var(cpu_hw_sf);
+ int err;
+
+ if (cpuhw->flags & PMU_F_IN_USE)
+ return -EAGAIN;
+
+ if (!cpuhw->sfb.sdbt)
+ return -EINVAL;
+
+ err = 0;
+ perf_pmu_disable(event->pmu);
+
+ event->hw.state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
+
+ /* Set up sampling controls. Always program the sampling register
+ * using the SDB-table start. Reset TEAR_REG event hardware register
+ * that is used by hw_perf_event_update() to store the sampling buffer
+ * position after samples have been flushed.
+ */
+ cpuhw->lsctl.s = 0;
+ cpuhw->lsctl.h = 1;
+ cpuhw->lsctl.tear = (unsigned long) cpuhw->sfb.sdbt;
+ cpuhw->lsctl.dear = *(unsigned long *) cpuhw->sfb.sdbt;
+ cpuhw->lsctl.interval = SAMPL_RATE(&event->hw);
+ hw_reset_registers(&event->hw, cpuhw->sfb.sdbt);
+
+ /* Ensure sampling functions are in the disabled state. If disabled,
+ * switch on sampling enable control. */
+ if (WARN_ON_ONCE(cpuhw->lsctl.es == 1 || cpuhw->lsctl.ed == 1)) {
+ err = -EAGAIN;
+ goto out;
+ }
+ cpuhw->lsctl.es = 1;
+ if (SAMPL_DIAG_MODE(&event->hw))
+ cpuhw->lsctl.ed = 1;
+
+ /* Set in_use flag and store event */
+ event->hw.idx = 0; /* only one sampling event per CPU supported */
+ cpuhw->event = event;
+ cpuhw->flags |= PMU_F_IN_USE;
+
+ if (flags & PERF_EF_START)
+ cpumsf_pmu_start(event, PERF_EF_RELOAD);
+out:
+ perf_event_update_userpage(event);
+ perf_pmu_enable(event->pmu);
+ return err;
+}
+
+static void cpumsf_pmu_del(struct perf_event *event, int flags)
+{
+ struct cpu_hw_sf *cpuhw = &__get_cpu_var(cpu_hw_sf);
+
+ perf_pmu_disable(event->pmu);
+ cpumsf_pmu_stop(event, PERF_EF_UPDATE);
+
+ cpuhw->lsctl.es = 0;
+ cpuhw->lsctl.ed = 0;
+ cpuhw->flags &= ~PMU_F_IN_USE;
+ cpuhw->event = NULL;
+
+ perf_event_update_userpage(event);
+ perf_pmu_enable(event->pmu);
+}
+
+static int cpumsf_pmu_event_idx(struct perf_event *event)
+{
+ return event->hw.idx;
+}
+
+CPUMF_EVENT_ATTR(SF, SF_CYCLES_BASIC, PERF_EVENT_CPUM_SF);
+CPUMF_EVENT_ATTR(SF, SF_CYCLES_BASIC_DIAG, PERF_EVENT_CPUM_SF_DIAG);
+
+static struct attribute *cpumsf_pmu_events_attr[] = {
+ CPUMF_EVENT_PTR(SF, SF_CYCLES_BASIC),
+ CPUMF_EVENT_PTR(SF, SF_CYCLES_BASIC_DIAG),
+ NULL,
+};
+
+PMU_FORMAT_ATTR(event, "config:0-63");
+
+static struct attribute *cpumsf_pmu_format_attr[] = {
+ &format_attr_event.attr,
+ NULL,
+};
+
+static struct attribute_group cpumsf_pmu_events_group = {
+ .name = "events",
+ .attrs = cpumsf_pmu_events_attr,
+};
+static struct attribute_group cpumsf_pmu_format_group = {
+ .name = "format",
+ .attrs = cpumsf_pmu_format_attr,
+};
+static const struct attribute_group *cpumsf_pmu_attr_groups[] = {
+ &cpumsf_pmu_events_group,
+ &cpumsf_pmu_format_group,
+ NULL,
+};
+
+static struct pmu cpumf_sampling = {
+ .pmu_enable = cpumsf_pmu_enable,
+ .pmu_disable = cpumsf_pmu_disable,
+
+ .event_init = cpumsf_pmu_event_init,
+ .add = cpumsf_pmu_add,
+ .del = cpumsf_pmu_del,
+
+ .start = cpumsf_pmu_start,
+ .stop = cpumsf_pmu_stop,
+ .read = cpumsf_pmu_read,
+
+ .event_idx = cpumsf_pmu_event_idx,
+ .attr_groups = cpumsf_pmu_attr_groups,
+};
+
+static void cpumf_measurement_alert(struct ext_code ext_code,
+ unsigned int alert, unsigned long unused)
+{
+ struct cpu_hw_sf *cpuhw;
+
+ if (!(alert & CPU_MF_INT_SF_MASK))
+ return;
+ inc_irq_stat(IRQEXT_CMS);
+ cpuhw = &__get_cpu_var(cpu_hw_sf);
+
+ /* Measurement alerts are shared and might happen when the PMU
+ * is not reserved. Ignore these alerts in this case. */
+ if (!(cpuhw->flags & PMU_F_RESERVED))
+ return;
+
+ /* The processing below must take care of multiple alert events that
+ * might be indicated concurrently. */
+
+ /* Program alert request */
+ if (alert & CPU_MF_INT_SF_PRA) {
+ if (cpuhw->flags & PMU_F_IN_USE)
+ hw_perf_event_update(cpuhw->event, 0);
+ else
+ WARN_ON_ONCE(!(cpuhw->flags & PMU_F_IN_USE));
+ }
+
+ /* Report measurement alerts only for non-PRA codes */
+ if (alert != CPU_MF_INT_SF_PRA)
+ debug_sprintf_event(sfdbg, 6, "measurement alert: 0x%x\n", alert);
+
+ /* Sampling authorization change request */
+ if (alert & CPU_MF_INT_SF_SACA)
+ qsi(&cpuhw->qsi);
+
+ /* Loss of sample data due to high-priority machine activities */
+ if (alert & CPU_MF_INT_SF_LSDA) {
+ pr_err("Sample data was lost\n");
+ cpuhw->flags |= PMU_F_ERR_LSDA;
+ sf_disable();
+ }
+
+ /* Invalid sampling buffer entry */
+ if (alert & (CPU_MF_INT_SF_IAE|CPU_MF_INT_SF_ISE)) {
+ pr_err("A sampling buffer entry is incorrect (alert=0x%x)\n",
+ alert);
+ cpuhw->flags |= PMU_F_ERR_IBE;
+ sf_disable();
+ }
+}
+
+static int cpumf_pmu_notifier(struct notifier_block *self,
+ unsigned long action, void *hcpu)
+{
+ unsigned int cpu = (long) hcpu;
+ int flags;
+
+ /* Ignore the notification if no events are scheduled on the PMU.
+ * This might be racy...
+ */
+ if (!atomic_read(&num_events))
+ return NOTIFY_OK;
+
+ switch (action & ~CPU_TASKS_FROZEN) {
+ case CPU_ONLINE:
+ case CPU_ONLINE_FROZEN:
+ flags = PMC_INIT;
+ smp_call_function_single(cpu, setup_pmc_cpu, &flags, 1);
+ break;
+ case CPU_DOWN_PREPARE:
+ flags = PMC_RELEASE;
+ smp_call_function_single(cpu, setup_pmc_cpu, &flags, 1);
+ break;
+ default:
+ break;
+ }
+
+ return NOTIFY_OK;
+}
+
+static int param_get_sfb_size(char *buffer, const struct kernel_param *kp)
+{
+ if (!cpum_sf_avail())
+ return -ENODEV;
+ return sprintf(buffer, "%lu,%lu", CPUM_SF_MIN_SDB, CPUM_SF_MAX_SDB);
+}
+
+static int param_set_sfb_size(const char *val, const struct kernel_param *kp)
+{
+ int rc;
+ unsigned long min, max;
+
+ if (!cpum_sf_avail())
+ return -ENODEV;
+ if (!val || !strlen(val))
+ return -EINVAL;
+
+ /* Valid parameter values: "min,max" or "max" */
+ min = CPUM_SF_MIN_SDB;
+ max = CPUM_SF_MAX_SDB;
+ if (strchr(val, ','))
+ rc = (sscanf(val, "%lu,%lu", &min, &max) == 2) ? 0 : -EINVAL;
+ else
+ rc = kstrtoul(val, 10, &max);
+
+ if (min < 2 || min >= max || max > get_num_physpages())
+ rc = -EINVAL;
+ if (rc)
+ return rc;
+
+ sfb_set_limits(min, max);
+ pr_info("The sampling buffer limits have changed to: "
+ "min=%lu max=%lu (diag=x%lu)\n",
+ CPUM_SF_MIN_SDB, CPUM_SF_MAX_SDB, CPUM_SF_SDB_DIAG_FACTOR);
+ return 0;
+}
+
+#define param_check_sfb_size(name, p) __param_check(name, p, void)
+static struct kernel_param_ops param_ops_sfb_size = {
+ .set = param_set_sfb_size,
+ .get = param_get_sfb_size,
+};
+
+#define RS_INIT_FAILURE_QSI 0x0001
+#define RS_INIT_FAILURE_BSDES 0x0002
+#define RS_INIT_FAILURE_ALRT 0x0003
+#define RS_INIT_FAILURE_PERF 0x0004
+static void __init pr_cpumsf_err(unsigned int reason)
+{
+ pr_err("Sampling facility support for perf is not available: "
+ "reason=%04x\n", reason);
+}
+
+static int __init init_cpum_sampling_pmu(void)
+{
+ struct hws_qsi_info_block si;
+ int err;
+
+ if (!cpum_sf_avail())
+ return -ENODEV;
+
+ memset(&si, 0, sizeof(si));
+ if (qsi(&si)) {
+ pr_cpumsf_err(RS_INIT_FAILURE_QSI);
+ return -ENODEV;
+ }
+
+ if (si.bsdes != sizeof(struct hws_basic_entry)) {
+ pr_cpumsf_err(RS_INIT_FAILURE_BSDES);
+ return -EINVAL;
+ }
+
+ if (si.ad)
+ sfb_set_limits(CPUM_SF_MIN_SDB, CPUM_SF_MAX_SDB);
+
+ sfdbg = debug_register(KMSG_COMPONENT, 2, 1, 80);
+ if (!sfdbg)
+ pr_err("Registering for s390dbf failed\n");
+ debug_register_view(sfdbg, &debug_sprintf_view);
+
+ err = register_external_interrupt(0x1407, cpumf_measurement_alert);
+ if (err) {
+ pr_cpumsf_err(RS_INIT_FAILURE_ALRT);
+ goto out;
+ }
+
+ err = perf_pmu_register(&cpumf_sampling, "cpum_sf", PERF_TYPE_RAW);
+ if (err) {
+ pr_cpumsf_err(RS_INIT_FAILURE_PERF);
+ unregister_external_interrupt(0x1407, cpumf_measurement_alert);
+ goto out;
+ }
+ perf_cpu_notifier(cpumf_pmu_notifier);
+out:
+ return err;
+}
+arch_initcall(init_cpum_sampling_pmu);
+core_param(cpum_sfb_size, CPUM_SF_MAX_SDB, sfb_size, 0640);
diff --git a/arch/s390/kernel/perf_event.c b/arch/s390/kernel/perf_event.c
index 2343c218b8f9..5d2dfa31c4ef 100644
--- a/arch/s390/kernel/perf_event.c
+++ b/arch/s390/kernel/perf_event.c
@@ -1,7 +1,7 @@
/*
* Performance event support for s390x
*
- * Copyright IBM Corp. 2012
+ * Copyright IBM Corp. 2012, 2013
* Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
*
* This program is free software; you can redistribute it and/or modify
@@ -16,15 +16,19 @@
#include <linux/kvm_host.h>
#include <linux/percpu.h>
#include <linux/export.h>
+#include <linux/seq_file.h>
+#include <linux/spinlock.h>
+#include <linux/sysfs.h>
#include <asm/irq.h>
#include <asm/cpu_mf.h>
#include <asm/lowcore.h>
#include <asm/processor.h>
+#include <asm/sysinfo.h>
const char *perf_pmu_name(void)
{
if (cpum_cf_avail() || cpum_sf_avail())
- return "CPU-measurement facilities (CPUMF)";
+ return "CPU-Measurement Facilities (CPU-MF)";
return "pmu";
}
EXPORT_SYMBOL(perf_pmu_name);
@@ -35,6 +39,8 @@ int perf_num_counters(void)
if (cpum_cf_avail())
num += PERF_CPUM_CF_MAX_CTR;
+ if (cpum_sf_avail())
+ num += PERF_CPUM_SF_MAX_CTR;
return num;
}
@@ -54,7 +60,7 @@ static bool is_in_guest(struct pt_regs *regs)
{
if (user_mode(regs))
return false;
-#if defined(CONFIG_KVM) || defined(CONFIG_KVM_MODULE)
+#if IS_ENABLED(CONFIG_KVM)
return instruction_pointer(regs) == (unsigned long) &sie_exit;
#else
return false;
@@ -83,8 +89,31 @@ static unsigned long perf_misc_guest_flags(struct pt_regs *regs)
: PERF_RECORD_MISC_GUEST_KERNEL;
}
+static unsigned long perf_misc_flags_sf(struct pt_regs *regs)
+{
+ struct perf_sf_sde_regs *sde_regs;
+ unsigned long flags;
+
+ sde_regs = (struct perf_sf_sde_regs *) &regs->int_parm_long;
+ if (sde_regs->in_guest)
+ flags = user_mode(regs) ? PERF_RECORD_MISC_GUEST_USER
+ : PERF_RECORD_MISC_GUEST_KERNEL;
+ else
+ flags = user_mode(regs) ? PERF_RECORD_MISC_USER
+ : PERF_RECORD_MISC_KERNEL;
+ return flags;
+}
+
unsigned long perf_misc_flags(struct pt_regs *regs)
{
+ /* Check if the cpum_sf PMU has created the pt_regs structure.
+ * In this case, perf misc flags can be easily extracted. Otherwise,
+ * do regular checks on the pt_regs content.
+ */
+ if (regs->int_code == 0x1407 && regs->int_parm == CPU_MF_INT_SF_PRA)
+ if (!regs->gprs[15])
+ return perf_misc_flags_sf(regs);
+
if (is_in_guest(regs))
return perf_misc_guest_flags(regs);
@@ -92,27 +121,107 @@ unsigned long perf_misc_flags(struct pt_regs *regs)
: PERF_RECORD_MISC_KERNEL;
}
-void perf_event_print_debug(void)
+void print_debug_cf(void)
{
struct cpumf_ctr_info cf_info;
- unsigned long flags;
- int cpu;
-
- if (!cpum_cf_avail())
- return;
-
- local_irq_save(flags);
+ int cpu = smp_processor_id();
- cpu = smp_processor_id();
memset(&cf_info, 0, sizeof(cf_info));
if (!qctri(&cf_info))
pr_info("CPU[%i] CPUM_CF: ver=%u.%u A=%04x E=%04x C=%04x\n",
cpu, cf_info.cfvn, cf_info.csvn,
cf_info.auth_ctl, cf_info.enable_ctl, cf_info.act_ctl);
+}
+
+static void print_debug_sf(void)
+{
+ struct hws_qsi_info_block si;
+ int cpu = smp_processor_id();
+ memset(&si, 0, sizeof(si));
+ if (qsi(&si))
+ return;
+
+ pr_info("CPU[%i] CPUM_SF: basic=%i diag=%i min=%lu max=%lu cpu_speed=%u\n",
+ cpu, si.as, si.ad, si.min_sampl_rate, si.max_sampl_rate,
+ si.cpu_speed);
+
+ if (si.as)
+ pr_info("CPU[%i] CPUM_SF: Basic-sampling: a=%i e=%i c=%i"
+ " bsdes=%i tear=%016lx dear=%016lx\n", cpu,
+ si.as, si.es, si.cs, si.bsdes, si.tear, si.dear);
+ if (si.ad)
+ pr_info("CPU[%i] CPUM_SF: Diagnostic-sampling: a=%i e=%i c=%i"
+ " dsdes=%i tear=%016lx dear=%016lx\n", cpu,
+ si.ad, si.ed, si.cd, si.dsdes, si.tear, si.dear);
+}
+
+void perf_event_print_debug(void)
+{
+ unsigned long flags;
+
+ local_irq_save(flags);
+ if (cpum_cf_avail())
+ print_debug_cf();
+ if (cpum_sf_avail())
+ print_debug_sf();
local_irq_restore(flags);
}
+/* Service level infrastructure */
+static void sl_print_counter(struct seq_file *m)
+{
+ struct cpumf_ctr_info ci;
+
+ memset(&ci, 0, sizeof(ci));
+ if (qctri(&ci))
+ return;
+
+ seq_printf(m, "CPU-MF: Counter facility: version=%u.%u "
+ "authorization=%04x\n", ci.cfvn, ci.csvn, ci.auth_ctl);
+}
+
+static void sl_print_sampling(struct seq_file *m)
+{
+ struct hws_qsi_info_block si;
+
+ memset(&si, 0, sizeof(si));
+ if (qsi(&si))
+ return;
+
+ if (!si.as && !si.ad)
+ return;
+
+ seq_printf(m, "CPU-MF: Sampling facility: min_rate=%lu max_rate=%lu"
+ " cpu_speed=%u\n", si.min_sampl_rate, si.max_sampl_rate,
+ si.cpu_speed);
+ if (si.as)
+ seq_printf(m, "CPU-MF: Sampling facility: mode=basic"
+ " sample_size=%u\n", si.bsdes);
+ if (si.ad)
+ seq_printf(m, "CPU-MF: Sampling facility: mode=diagnostic"
+ " sample_size=%u\n", si.dsdes);
+}
+
+static void service_level_perf_print(struct seq_file *m,
+ struct service_level *sl)
+{
+ if (cpum_cf_avail())
+ sl_print_counter(m);
+ if (cpum_sf_avail())
+ sl_print_sampling(m);
+}
+
+static struct service_level service_level_perf = {
+ .seq_print = service_level_perf_print,
+};
+
+static int __init service_level_perf_register(void)
+{
+ return register_service_level(&service_level_perf);
+}
+arch_initcall(service_level_perf_register);
+
/* See also arch/s390/kernel/traps.c */
static unsigned long __store_trace(struct perf_callchain_entry *entry,
unsigned long sp,
@@ -172,3 +281,44 @@ void perf_callchain_kernel(struct perf_callchain_entry *entry,
__store_trace(entry, head, S390_lowcore.thread_info,
S390_lowcore.thread_info + THREAD_SIZE);
}
+
+/* Perf defintions for PMU event attributes in sysfs */
+ssize_t cpumf_events_sysfs_show(struct device *dev,
+ struct device_attribute *attr, char *page)
+{
+ struct perf_pmu_events_attr *pmu_attr;
+
+ pmu_attr = container_of(attr, struct perf_pmu_events_attr, attr);
+ return sprintf(page, "event=0x%04llx,name=%s\n",
+ pmu_attr->id, attr->attr.name);
+}
+
+/* Reserve/release functions for sharing perf hardware */
+static DEFINE_SPINLOCK(perf_hw_owner_lock);
+static void *perf_sampling_owner;
+
+int perf_reserve_sampling(void)
+{
+ int err;
+
+ err = 0;
+ spin_lock(&perf_hw_owner_lock);
+ if (perf_sampling_owner) {
+ pr_warn("The sampling facility is already reserved by %p\n",
+ perf_sampling_owner);
+ err = -EBUSY;
+ } else
+ perf_sampling_owner = __builtin_return_address(0);
+ spin_unlock(&perf_hw_owner_lock);
+ return err;
+}
+EXPORT_SYMBOL(perf_reserve_sampling);
+
+void perf_release_sampling(void)
+{
+ spin_lock(&perf_hw_owner_lock);
+ WARN_ON(!perf_sampling_owner);
+ perf_sampling_owner = NULL;
+ spin_unlock(&perf_hw_owner_lock);
+}
+EXPORT_SYMBOL(perf_release_sampling);
diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c
index 7ed0d4e2a435..dd145321d215 100644
--- a/arch/s390/kernel/process.c
+++ b/arch/s390/kernel/process.c
@@ -261,20 +261,18 @@ static inline unsigned long brk_rnd(void)
unsigned long arch_randomize_brk(struct mm_struct *mm)
{
- unsigned long ret = PAGE_ALIGN(mm->brk + brk_rnd());
+ unsigned long ret;
- if (ret < mm->brk)
- return mm->brk;
- return ret;
+ ret = PAGE_ALIGN(mm->brk + brk_rnd());
+ return (ret > mm->brk) ? ret : mm->brk;
}
unsigned long randomize_et_dyn(unsigned long base)
{
- unsigned long ret = PAGE_ALIGN(base + brk_rnd());
+ unsigned long ret;
if (!(current->flags & PF_RANDOMIZE))
return base;
- if (ret < base)
- return base;
- return ret;
+ ret = PAGE_ALIGN(base + brk_rnd());
+ return (ret > base) ? ret : base;
}
diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c
index e65c91c591e8..f6be6087a0e9 100644
--- a/arch/s390/kernel/ptrace.c
+++ b/arch/s390/kernel/ptrace.c
@@ -56,25 +56,26 @@ void update_cr_regs(struct task_struct *task)
#ifdef CONFIG_64BIT
/* Take care of the enable/disable of transactional execution. */
if (MACHINE_HAS_TE) {
- unsigned long cr[3], cr_new[3];
+ unsigned long cr, cr_new;
- __ctl_store(cr, 0, 2);
- cr_new[1] = cr[1];
+ __ctl_store(cr, 0, 0);
/* Set or clear transaction execution TXC bit 8. */
+ cr_new = cr | (1UL << 55);
if (task->thread.per_flags & PER_FLAG_NO_TE)
- cr_new[0] = cr[0] & ~(1UL << 55);
- else
- cr_new[0] = cr[0] | (1UL << 55);
+ cr_new &= ~(1UL << 55);
+ if (cr_new != cr)
+ __ctl_load(cr, 0, 0);
/* Set or clear transaction execution TDC bits 62 and 63. */
- cr_new[2] = cr[2] & ~3UL;
+ __ctl_store(cr, 2, 2);
+ cr_new = cr & ~3UL;
if (task->thread.per_flags & PER_FLAG_TE_ABORT_RAND) {
if (task->thread.per_flags & PER_FLAG_TE_ABORT_RAND_TEND)
- cr_new[2] |= 1UL;
+ cr_new |= 1UL;
else
- cr_new[2] |= 2UL;
+ cr_new |= 2UL;
}
- if (memcmp(&cr_new, &cr, sizeof(cr)))
- __ctl_load(cr_new, 0, 2);
+ if (cr_new != cr)
+ __ctl_load(cr_new, 2, 2);
}
#endif
/* Copy user specified PER registers */
@@ -107,15 +108,11 @@ void update_cr_regs(struct task_struct *task)
void user_enable_single_step(struct task_struct *task)
{
set_tsk_thread_flag(task, TIF_SINGLE_STEP);
- if (task == current)
- update_cr_regs(task);
}
void user_disable_single_step(struct task_struct *task)
{
clear_tsk_thread_flag(task, TIF_SINGLE_STEP);
- if (task == current)
- update_cr_regs(task);
}
/*
diff --git a/arch/s390/kernel/s390_ksyms.c b/arch/s390/kernel/s390_ksyms.c
index 3bac589844a7..9f60467938d1 100644
--- a/arch/s390/kernel/s390_ksyms.c
+++ b/arch/s390/kernel/s390_ksyms.c
@@ -5,7 +5,7 @@
#ifdef CONFIG_FUNCTION_TRACER
EXPORT_SYMBOL(_mcount);
#endif
-#if defined(CONFIG_KVM) || defined(CONFIG_KVM_MODULE)
+#if IS_ENABLED(CONFIG_KVM)
EXPORT_SYMBOL(sie64a);
EXPORT_SYMBOL(sie_exit);
#endif
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index 4444875266ee..09e2f468f48b 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -373,7 +373,7 @@ static void __init setup_lowcore(void)
/*
* Set up PSW restart to call ipl.c:do_restart(). Copy the relevant
- * restart data to the absolute zero lowcore. This is necesary if
+ * restart data to the absolute zero lowcore. This is necessary if
* PSW restart is done on an offline CPU that has lowcore zero.
*/
lc->restart_stack = (unsigned long) restart_stack;
@@ -1023,6 +1023,7 @@ void __init setup_arch(char **cmdline_p)
setup_vmcoreinfo();
setup_lowcore();
+ smp_fill_possible_mask();
cpu_init();
s390_init_cpu_topology();
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index dc4a53465060..a7125b62a9a6 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -59,7 +59,7 @@ enum {
};
struct pcpu {
- struct cpu cpu;
+ struct cpu *cpu;
struct _lowcore *lowcore; /* lowcore page(s) for the cpu */
unsigned long async_stack; /* async stack for the cpu */
unsigned long panic_stack; /* panic stack for the cpu */
@@ -159,9 +159,9 @@ static void pcpu_ec_call(struct pcpu *pcpu, int ec_bit)
{
int order;
- set_bit(ec_bit, &pcpu->ec_mask);
- order = pcpu_running(pcpu) ?
- SIGP_EXTERNAL_CALL : SIGP_EMERGENCY_SIGNAL;
+ if (test_and_set_bit(ec_bit, &pcpu->ec_mask))
+ return;
+ order = pcpu_running(pcpu) ? SIGP_EXTERNAL_CALL : SIGP_EMERGENCY_SIGNAL;
pcpu_sigp_retry(pcpu, order, 0);
}
@@ -721,18 +721,14 @@ int __cpu_up(unsigned int cpu, struct task_struct *tidle)
return 0;
}
-static int __init setup_possible_cpus(char *s)
-{
- int max, cpu;
+static unsigned int setup_possible_cpus __initdata;
- if (kstrtoint(s, 0, &max) < 0)
- return 0;
- init_cpu_possible(cpumask_of(0));
- for (cpu = 1; cpu < max && cpu < nr_cpu_ids; cpu++)
- set_cpu_possible(cpu, true);
+static int __init _setup_possible_cpus(char *s)
+{
+ get_option(&s, &setup_possible_cpus);
return 0;
}
-early_param("possible_cpus", setup_possible_cpus);
+early_param("possible_cpus", _setup_possible_cpus);
#ifdef CONFIG_HOTPLUG_CPU
@@ -775,6 +771,17 @@ void __noreturn cpu_die(void)
#endif /* CONFIG_HOTPLUG_CPU */
+void __init smp_fill_possible_mask(void)
+{
+ unsigned int possible, cpu;
+
+ possible = setup_possible_cpus;
+ if (!possible)
+ possible = MACHINE_IS_VM ? 64 : nr_cpu_ids;
+ for (cpu = 0; cpu < possible && cpu < nr_cpu_ids; cpu++)
+ set_cpu_possible(cpu, true);
+}
+
void __init smp_prepare_cpus(unsigned int max_cpus)
{
/* request the 0x1201 emergency signal external interrupt */
@@ -958,7 +965,7 @@ static int smp_cpu_notify(struct notifier_block *self, unsigned long action,
void *hcpu)
{
unsigned int cpu = (unsigned int)(long)hcpu;
- struct cpu *c = &pcpu_devices[cpu].cpu;
+ struct cpu *c = pcpu_devices[cpu].cpu;
struct device *s = &c->dev;
int err = 0;
@@ -975,10 +982,15 @@ static int smp_cpu_notify(struct notifier_block *self, unsigned long action,
static int smp_add_present_cpu(int cpu)
{
- struct cpu *c = &pcpu_devices[cpu].cpu;
- struct device *s = &c->dev;
+ struct device *s;
+ struct cpu *c;
int rc;
+ c = kzalloc(sizeof(*c), GFP_KERNEL);
+ if (!c)
+ return -ENOMEM;
+ pcpu_devices[cpu].cpu = c;
+ s = &c->dev;
c->hotpluggable = 1;
rc = register_cpu(c, cpu);
if (rc)
diff --git a/arch/s390/kernel/vdso.c b/arch/s390/kernel/vdso.c
index a84476f2a9bb..613649096783 100644
--- a/arch/s390/kernel/vdso.c
+++ b/arch/s390/kernel/vdso.c
@@ -125,7 +125,7 @@ int vdso_alloc_per_cpu(struct _lowcore *lowcore)
psal[i] = 0x80000000;
lowcore->paste[4] = (u32)(addr_t) psal;
- psal[0] = 0x20000000;
+ psal[0] = 0x02000000;
psal[2] = (u32)(addr_t) aste;
*(unsigned long *) (aste + 2) = segment_table +
_ASCE_TABLE_LENGTH + _ASCE_USER_BITS + _ASCE_TYPE_SEGMENT;
diff --git a/arch/s390/kernel/vdso32/clock_gettime.S b/arch/s390/kernel/vdso32/clock_gettime.S
index 5be8e472f57d..65fc3979c2f1 100644
--- a/arch/s390/kernel/vdso32/clock_gettime.S
+++ b/arch/s390/kernel/vdso32/clock_gettime.S
@@ -46,18 +46,13 @@ __kernel_clock_gettime:
jnm 3f
a %r0,__VDSO_TK_MULT(%r5)
3: alr %r0,%r2
- al %r0,__VDSO_XTIME_NSEC(%r5) /* + tk->xtime_nsec */
- al %r1,__VDSO_XTIME_NSEC+4(%r5)
- brc 12,4f
- ahi %r0,1
-4: al %r0,__VDSO_WTOM_NSEC(%r5) /* + wall_to_monotonic.nsec */
+ al %r0,__VDSO_WTOM_NSEC(%r5)
al %r1,__VDSO_WTOM_NSEC+4(%r5)
brc 12,5f
ahi %r0,1
5: l %r2,__VDSO_TK_SHIFT(%r5) /* Timekeeper shift */
srdl %r0,0(%r2) /* >> tk->shift */
- l %r2,__VDSO_XTIME_SEC+4(%r5)
- al %r2,__VDSO_WTOM_SEC+4(%r5)
+ l %r2,__VDSO_WTOM_SEC+4(%r5)
cl %r4,__VDSO_UPD_COUNT+4(%r5) /* check update counter */
jne 1b
basr %r5,0
diff --git a/arch/s390/kernel/vdso64/clock_getres.S b/arch/s390/kernel/vdso64/clock_getres.S
index 176e1f75f9aa..34deba7c7ed1 100644
--- a/arch/s390/kernel/vdso64/clock_getres.S
+++ b/arch/s390/kernel/vdso64/clock_getres.S
@@ -23,7 +23,9 @@ __kernel_clock_getres:
je 0f
cghi %r2,__CLOCK_MONOTONIC
je 0f
- cghi %r2,-2 /* CLOCK_THREAD_CPUTIME_ID for this thread */
+ cghi %r2,__CLOCK_THREAD_CPUTIME_ID
+ je 0f
+ cghi %r2,-2 /* Per-thread CPUCLOCK with PID=0, VIRT=1 */
jne 2f
larl %r5,_vdso_data
icm %r0,15,__LC_ECTG_OK(%r5)
diff --git a/arch/s390/kernel/vdso64/clock_gettime.S b/arch/s390/kernel/vdso64/clock_gettime.S
index 0add1072ba30..91940ed33a4a 100644
--- a/arch/s390/kernel/vdso64/clock_gettime.S
+++ b/arch/s390/kernel/vdso64/clock_gettime.S
@@ -22,7 +22,9 @@ __kernel_clock_gettime:
larl %r5,_vdso_data
cghi %r2,__CLOCK_REALTIME
je 4f
- cghi %r2,-2 /* CLOCK_THREAD_CPUTIME_ID for this thread */
+ cghi %r2,__CLOCK_THREAD_CPUTIME_ID
+ je 9f
+ cghi %r2,-2 /* Per-thread CPUCLOCK with PID=0, VIRT=1 */
je 9f
cghi %r2,__CLOCK_MONOTONIC
jne 12f
@@ -35,13 +37,11 @@ __kernel_clock_gettime:
jnz 0b
stck 48(%r15) /* Store TOD clock */
lgf %r2,__VDSO_TK_SHIFT(%r5) /* Timekeeper shift */
- lg %r0,__VDSO_XTIME_SEC(%r5) /* tk->xtime_sec */
- alg %r0,__VDSO_WTOM_SEC(%r5) /* + wall_to_monotonic.sec */
+ lg %r0,__VDSO_WTOM_SEC(%r5)
lg %r1,48(%r15)
sg %r1,__VDSO_XTIME_STAMP(%r5) /* TOD - cycle_last */
msgf %r1,__VDSO_TK_MULT(%r5) /* * tk->mult */
- alg %r1,__VDSO_XTIME_NSEC(%r5) /* + tk->xtime_nsec */
- alg %r1,__VDSO_WTOM_NSEC(%r5) /* + wall_to_monotonic.nsec */
+ alg %r1,__VDSO_WTOM_NSEC(%r5)
srlg %r1,%r1,0(%r2) /* >> tk->shift */
clg %r4,__VDSO_UPD_COUNT(%r5) /* check update counter */
jne 0b
diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c
index 05537ab22382..75beea632a10 100644
--- a/arch/s390/kvm/priv.c
+++ b/arch/s390/kvm/priv.c
@@ -275,7 +275,7 @@ static int handle_io_inst(struct kvm_vcpu *vcpu)
return -EOPNOTSUPP;
} else {
/*
- * Set condition code 3 to stop the guest from issueing channel
+ * Set condition code 3 to stop the guest from issuing channel
* I/O instructions.
*/
kvm_s390_set_psw_cc(vcpu, 3);
diff --git a/arch/s390/lib/uaccess_pt.c b/arch/s390/lib/uaccess_pt.c
index dbdab3e7a1a6..0632dc50da78 100644
--- a/arch/s390/lib/uaccess_pt.c
+++ b/arch/s390/lib/uaccess_pt.c
@@ -74,8 +74,8 @@ static size_t copy_in_kernel(size_t count, void __user *to,
/*
* Returns kernel address for user virtual address. If the returned address is
- * >= -4095 (IS_ERR_VALUE(x) returns true), a fault has occured and the address
- * contains the (negative) exception code.
+ * >= -4095 (IS_ERR_VALUE(x) returns true), a fault has occurred and the
+ * address contains the (negative) exception code.
*/
#ifdef CONFIG_64BIT
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index e794c88f699a..3584ed9b20a1 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -293,7 +293,7 @@ static int gmap_alloc_table(struct gmap *gmap,
* @addr: address in the guest address space
* @len: length of the memory area to unmap
*
- * Returns 0 if the unmap succeded, -EINVAL if not.
+ * Returns 0 if the unmap succeeded, -EINVAL if not.
*/
int gmap_unmap_segment(struct gmap *gmap, unsigned long to, unsigned long len)
{
@@ -344,7 +344,7 @@ EXPORT_SYMBOL_GPL(gmap_unmap_segment);
* @from: source address in the parent address space
* @to: target address in the guest address space
*
- * Returns 0 if the mmap succeded, -EINVAL or -ENOMEM if not.
+ * Returns 0 if the mmap succeeded, -EINVAL or -ENOMEM if not.
*/
int gmap_map_segment(struct gmap *gmap, unsigned long from,
unsigned long to, unsigned long len)
diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c
index 16871da37371..708d60e40066 100644
--- a/arch/s390/net/bpf_jit_comp.c
+++ b/arch/s390/net/bpf_jit_comp.c
@@ -368,14 +368,16 @@ static int bpf_jit_insn(struct bpf_jit *jit, struct sock_filter *filter,
EMIT4_PCREL(0xa7840000, (jit->ret0_ip - jit->prg));
/* lhi %r4,0 */
EMIT4(0xa7480000);
- /* dr %r4,%r12 */
- EMIT2(0x1d4c);
+ /* dlr %r4,%r12 */
+ EMIT4(0xb997004c);
break;
- case BPF_S_ALU_DIV_K: /* A = reciprocal_divide(A, K) */
- /* m %r4,<d(K)>(%r13) */
- EMIT4_DISP(0x5c40d000, EMIT_CONST(K));
- /* lr %r5,%r4 */
- EMIT2(0x1854);
+ case BPF_S_ALU_DIV_K: /* A /= K */
+ if (K == 1)
+ break;
+ /* lhi %r4,0 */
+ EMIT4(0xa7480000);
+ /* dl %r4,<d(K)>(%r13) */
+ EMIT6_DISP(0xe340d000, 0x0097, EMIT_CONST(K));
break;
case BPF_S_ALU_MOD_X: /* A %= X */
jit->seen |= SEEN_XREG | SEEN_RET0;
@@ -385,16 +387,21 @@ static int bpf_jit_insn(struct bpf_jit *jit, struct sock_filter *filter,
EMIT4_PCREL(0xa7840000, (jit->ret0_ip - jit->prg));
/* lhi %r4,0 */
EMIT4(0xa7480000);
- /* dr %r4,%r12 */
- EMIT2(0x1d4c);
+ /* dlr %r4,%r12 */
+ EMIT4(0xb997004c);
/* lr %r5,%r4 */
EMIT2(0x1854);
break;
case BPF_S_ALU_MOD_K: /* A %= K */
+ if (K == 1) {
+ /* lhi %r5,0 */
+ EMIT4(0xa7580000);
+ break;
+ }
/* lhi %r4,0 */
EMIT4(0xa7480000);
- /* d %r4,<d(K)>(%r13) */
- EMIT4_DISP(0x5d40d000, EMIT_CONST(K));
+ /* dl %r4,<d(K)>(%r13) */
+ EMIT6_DISP(0xe340d000, 0x0097, EMIT_CONST(K));
/* lr %r5,%r4 */
EMIT2(0x1854);
break;
diff --git a/arch/s390/oprofile/hwsampler.c b/arch/s390/oprofile/hwsampler.c
index 231cecafc2f1..a32c96761eab 100644
--- a/arch/s390/oprofile/hwsampler.c
+++ b/arch/s390/oprofile/hwsampler.c
@@ -26,9 +26,6 @@
#define MAX_NUM_SDB 511
#define MIN_NUM_SDB 1
-#define ALERT_REQ_MASK 0x4000000000000000ul
-#define BUFFER_FULL_MASK 0x8000000000000000ul
-
DECLARE_PER_CPU(struct hws_cpu_buffer, sampler_cpu_buffer);
struct hws_execute_parms {
@@ -44,6 +41,7 @@ static DEFINE_MUTEX(hws_sem_oom);
static unsigned char hws_flush_all;
static unsigned int hws_oom;
+static unsigned int hws_alert;
static struct workqueue_struct *hws_wq;
static unsigned int hws_state;
@@ -65,43 +63,6 @@ static unsigned long interval;
static unsigned long min_sampler_rate;
static unsigned long max_sampler_rate;
-static int ssctl(void *buffer)
-{
- int cc;
-
- /* set in order to detect a program check */
- cc = 1;
-
- asm volatile(
- "0: .insn s,0xB2870000,0(%1)\n"
- "1: ipm %0\n"
- " srl %0,28\n"
- "2:\n"
- EX_TABLE(0b, 2b) EX_TABLE(1b, 2b)
- : "+d" (cc), "+a" (buffer)
- : "m" (*((struct hws_ssctl_request_block *)buffer))
- : "cc", "memory");
-
- return cc ? -EINVAL : 0 ;
-}
-
-static int qsi(void *buffer)
-{
- int cc;
- cc = 1;
-
- asm volatile(
- "0: .insn s,0xB2860000,0(%1)\n"
- "1: lhi %0,0\n"
- "2:\n"
- EX_TABLE(0b, 2b) EX_TABLE(1b, 2b)
- : "=d" (cc), "+a" (buffer)
- : "m" (*((struct hws_qsi_info_block *)buffer))
- : "cc", "memory");
-
- return cc ? -EINVAL : 0;
-}
-
static void execute_qsi(void *parms)
{
struct hws_execute_parms *ep = parms;
@@ -113,7 +74,7 @@ static void execute_ssctl(void *parms)
{
struct hws_execute_parms *ep = parms;
- ep->rc = ssctl(ep->buffer);
+ ep->rc = lsctl(ep->buffer);
}
static int smp_ctl_ssctl_stop(int cpu)
@@ -214,17 +175,6 @@ static int smp_ctl_qsi(int cpu)
return ep.rc;
}
-static inline unsigned long *trailer_entry_ptr(unsigned long v)
-{
- void *ret;
-
- ret = (void *)v;
- ret += PAGE_SIZE;
- ret -= sizeof(struct hws_trailer_entry);
-
- return (unsigned long *) ret;
-}
-
static void hws_ext_handler(struct ext_code ext_code,
unsigned int param32, unsigned long param64)
{
@@ -233,6 +183,9 @@ static void hws_ext_handler(struct ext_code ext_code,
if (!(param32 & CPU_MF_INT_SF_MASK))
return;
+ if (!hws_alert)
+ return;
+
inc_irq_stat(IRQEXT_CMS);
atomic_xchg(&cb->ext_params, atomic_read(&cb->ext_params) | param32);
@@ -256,16 +209,6 @@ static void init_all_cpu_buffers(void)
}
}
-static int is_link_entry(unsigned long *s)
-{
- return *s & 0x1ul ? 1 : 0;
-}
-
-static unsigned long *get_next_sdbt(unsigned long *s)
-{
- return (unsigned long *) (*s & ~0x1ul);
-}
-
static int prepare_cpu_buffers(void)
{
int cpu;
@@ -353,7 +296,7 @@ static int allocate_sdbt(int cpu)
}
*sdbt = sdb;
trailer = trailer_entry_ptr(*sdbt);
- *trailer = ALERT_REQ_MASK;
+ *trailer = SDB_TE_ALERT_REQ_MASK;
sdbt++;
mutex_unlock(&hws_sem_oom);
}
@@ -829,7 +772,7 @@ static void worker_on_interrupt(unsigned int cpu)
trailer = trailer_entry_ptr(*sdbt);
/* leave loop if no more work to do */
- if (!(*trailer & BUFFER_FULL_MASK)) {
+ if (!(*trailer & SDB_TE_BUFFER_FULL_MASK)) {
done = 1;
if (!hws_flush_all)
continue;
@@ -856,7 +799,7 @@ static void worker_on_interrupt(unsigned int cpu)
static void add_samples_to_oprofile(unsigned int cpu, unsigned long *sdbt,
unsigned long *dear)
{
- struct hws_data_entry *sample_data_ptr;
+ struct hws_basic_entry *sample_data_ptr;
unsigned long *trailer;
trailer = trailer_entry_ptr(*sdbt);
@@ -866,7 +809,7 @@ static void add_samples_to_oprofile(unsigned int cpu, unsigned long *sdbt,
trailer = dear;
}
- sample_data_ptr = (struct hws_data_entry *)(*sdbt);
+ sample_data_ptr = (struct hws_basic_entry *)(*sdbt);
while ((unsigned long *)sample_data_ptr < trailer) {
struct pt_regs *regs = NULL;
@@ -1002,6 +945,7 @@ int hwsampler_deallocate(void)
goto deallocate_exit;
irq_subclass_unregister(IRQ_SUBCLASS_MEASUREMENT_ALERT);
+ hws_alert = 0;
deallocate_sdbt();
hws_state = HWS_DEALLOCATED;
@@ -1116,6 +1060,7 @@ int hwsampler_shutdown(void)
if (hws_state == HWS_STOPPED) {
irq_subclass_unregister(IRQ_SUBCLASS_MEASUREMENT_ALERT);
+ hws_alert = 0;
deallocate_sdbt();
}
if (hws_wq) {
@@ -1190,6 +1135,7 @@ start_all_exit:
hws_oom = 1;
hws_flush_all = 0;
/* now let them in, 1407 CPUMF external interrupts */
+ hws_alert = 1;
irq_subclass_register(IRQ_SUBCLASS_MEASUREMENT_ALERT);
return 0;
diff --git a/arch/s390/oprofile/hwsampler.h b/arch/s390/oprofile/hwsampler.h
index 0022e1ebfbde..a483d06f2fa7 100644
--- a/arch/s390/oprofile/hwsampler.h
+++ b/arch/s390/oprofile/hwsampler.h
@@ -9,27 +9,7 @@
#define HWSAMPLER_H_
#include <linux/workqueue.h>
-
-struct hws_qsi_info_block /* QUERY SAMPLING information block */
-{ /* Bit(s) */
- unsigned int b0_13:14; /* 0-13: zeros */
- unsigned int as:1; /* 14: sampling authorisation control*/
- unsigned int b15_21:7; /* 15-21: zeros */
- unsigned int es:1; /* 22: sampling enable control */
- unsigned int b23_29:7; /* 23-29: zeros */
- unsigned int cs:1; /* 30: sampling activation control */
- unsigned int:1; /* 31: reserved */
- unsigned int bsdes:16; /* 4-5: size of sampling entry */
- unsigned int:16; /* 6-7: reserved */
- unsigned long min_sampl_rate; /* 8-15: minimum sampling interval */
- unsigned long max_sampl_rate; /* 16-23: maximum sampling interval*/
- unsigned long tear; /* 24-31: TEAR contents */
- unsigned long dear; /* 32-39: DEAR contents */
- unsigned int rsvrd0; /* 40-43: reserved */
- unsigned int cpu_speed; /* 44-47: CPU speed */
- unsigned long long rsvrd1; /* 48-55: reserved */
- unsigned long long rsvrd2; /* 56-63: reserved */
-};
+#include <asm/cpu_mf.h>
struct hws_ssctl_request_block /* SET SAMPLING CONTROLS req block */
{ /* bytes 0 - 7 Bit(s) */
@@ -68,36 +48,6 @@ struct hws_cpu_buffer {
unsigned int stop_mode:1;
};
-struct hws_data_entry {
- unsigned int def:16; /* 0-15 Data Entry Format */
- unsigned int R:4; /* 16-19 reserved */
- unsigned int U:4; /* 20-23 Number of unique instruct. */
- unsigned int z:2; /* zeros */
- unsigned int T:1; /* 26 PSW DAT mode */
- unsigned int W:1; /* 27 PSW wait state */
- unsigned int P:1; /* 28 PSW Problem state */
- unsigned int AS:2; /* 29-30 PSW address-space control */
- unsigned int I:1; /* 31 entry valid or invalid */
- unsigned int:16;
- unsigned int prim_asn:16; /* primary ASN */
- unsigned long long ia; /* Instruction Address */
- unsigned long long gpp; /* Guest Program Parameter */
- unsigned long long hpp; /* Host Program Parameter */
-};
-
-struct hws_trailer_entry {
- unsigned int f:1; /* 0 - Block Full Indicator */
- unsigned int a:1; /* 1 - Alert request control */
- unsigned long:62; /* 2 - 63: Reserved */
- unsigned long overflow; /* 64 - sample Overflow count */
- unsigned long timestamp; /* 16 - time-stamp */
- unsigned long timestamp1; /* */
- unsigned long reserved1; /* 32 -Reserved */
- unsigned long reserved2; /* */
- unsigned long progusage1; /* 48 - reserved for programming use */
- unsigned long progusage2; /* */
-};
-
int hwsampler_setup(void);
int hwsampler_shutdown(void);
int hwsampler_allocate(unsigned long sdbt, unsigned long sdb);
diff --git a/arch/s390/oprofile/init.c b/arch/s390/oprofile/init.c
index 04e1b6a85362..9ffe645d5989 100644
--- a/arch/s390/oprofile/init.c
+++ b/arch/s390/oprofile/init.c
@@ -10,6 +10,7 @@
*/
#include <linux/oprofile.h>
+#include <linux/perf_event.h>
#include <linux/init.h>
#include <linux/errno.h>
#include <linux/fs.h>
@@ -67,6 +68,21 @@ module_param_call(cpu_type, set_cpu_type, NULL, NULL, 0);
MODULE_PARM_DESC(cpu_type, "Force legacy basic mode sampling"
"(report cpu_type \"timer\"");
+static int __oprofile_hwsampler_start(void)
+{
+ int retval;
+
+ retval = hwsampler_allocate(oprofile_sdbt_blocks, oprofile_sdb_blocks);
+ if (retval)
+ return retval;
+
+ retval = hwsampler_start_all(oprofile_hw_interval);
+ if (retval)
+ hwsampler_deallocate();
+
+ return retval;
+}
+
static int oprofile_hwsampler_start(void)
{
int retval;
@@ -76,13 +92,13 @@ static int oprofile_hwsampler_start(void)
if (!hwsampler_running)
return timer_ops.start();
- retval = hwsampler_allocate(oprofile_sdbt_blocks, oprofile_sdb_blocks);
+ retval = perf_reserve_sampling();
if (retval)
return retval;
- retval = hwsampler_start_all(oprofile_hw_interval);
+ retval = __oprofile_hwsampler_start();
if (retval)
- hwsampler_deallocate();
+ perf_release_sampling();
return retval;
}
@@ -96,6 +112,7 @@ static void oprofile_hwsampler_stop(void)
hwsampler_stop_all();
hwsampler_deallocate();
+ perf_release_sampling();
return;
}
diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c
index bf7c73d71eef..66670ff262a0 100644
--- a/arch/s390/pci/pci.c
+++ b/arch/s390/pci/pci.c
@@ -407,8 +407,8 @@ int arch_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type)
struct msi_msg msg;
int rc;
- if (type != PCI_CAP_ID_MSIX && type != PCI_CAP_ID_MSI)
- return -EINVAL;
+ if (type == PCI_CAP_ID_MSI && nvec > 1)
+ return 1;
msi_vecs = min(nvec, ZPCI_MSI_VEC_MAX);
msi_vecs = min_t(unsigned int, msi_vecs, CONFIG_PCI_NR_MSI);
@@ -919,17 +919,23 @@ static void zpci_mem_exit(void)
kmem_cache_destroy(zdev_fmb_cache);
}
-static unsigned int s390_pci_probe;
+static unsigned int s390_pci_probe = 1;
+static unsigned int s390_pci_initialized;
char * __init pcibios_setup(char *str)
{
- if (!strcmp(str, "on")) {
- s390_pci_probe = 1;
+ if (!strcmp(str, "off")) {
+ s390_pci_probe = 0;
return NULL;
}
return str;
}
+bool zpci_is_enabled(void)
+{
+ return s390_pci_initialized;
+}
+
static int __init pci_base_init(void)
{
int rc;
@@ -961,6 +967,7 @@ static int __init pci_base_init(void)
if (rc)
goto out_find;
+ s390_pci_initialized = 1;
return 0;
out_find:
@@ -978,5 +985,6 @@ subsys_initcall_sync(pci_base_init);
void zpci_rescan(void)
{
- clp_rescan_pci_devices_simple();
+ if (zpci_is_enabled())
+ clp_rescan_pci_devices_simple();
}
diff --git a/arch/s390/pci/pci_dma.c b/arch/s390/pci/pci_dma.c
index 9b83d080902d..60c11a629d96 100644
--- a/arch/s390/pci/pci_dma.c
+++ b/arch/s390/pci/pci_dma.c
@@ -285,7 +285,7 @@ static dma_addr_t s390_dma_map_pages(struct device *dev, struct page *page,
flags |= ZPCI_TABLE_PROTECTED;
if (!dma_update_trans(zdev, pa, dma_addr, size, flags)) {
- atomic64_add(nr_pages, (atomic64_t *) &zdev->fmb->mapped_pages);
+ atomic64_add(nr_pages, &zdev->fmb->mapped_pages);
return dma_addr + (offset & ~PAGE_MASK);
}
@@ -313,7 +313,7 @@ static void s390_dma_unmap_pages(struct device *dev, dma_addr_t dma_addr,
zpci_err_hex(&dma_addr, sizeof(dma_addr));
}
- atomic64_add(npages, (atomic64_t *) &zdev->fmb->unmapped_pages);
+ atomic64_add(npages, &zdev->fmb->unmapped_pages);
iommu_page_index = (dma_addr - zdev->start_dma) >> PAGE_SHIFT;
dma_free_iommu(zdev, iommu_page_index, npages);
}
@@ -332,7 +332,6 @@ static void *s390_dma_alloc(struct device *dev, size_t size,
if (!page)
return NULL;
- atomic64_add(size / PAGE_SIZE, (atomic64_t *) &zdev->fmb->allocated_pages);
pa = page_to_phys(page);
memset((void *) pa, 0, size);
@@ -343,6 +342,7 @@ static void *s390_dma_alloc(struct device *dev, size_t size,
return NULL;
}
+ atomic64_add(size / PAGE_SIZE, &zdev->fmb->allocated_pages);
if (dma_handle)
*dma_handle = map;
return (void *) pa;
@@ -352,8 +352,11 @@ static void s390_dma_free(struct device *dev, size_t size,
void *pa, dma_addr_t dma_handle,
struct dma_attrs *attrs)
{
- s390_dma_unmap_pages(dev, dma_handle, PAGE_ALIGN(size),
- DMA_BIDIRECTIONAL, NULL);
+ struct zpci_dev *zdev = get_zdev(to_pci_dev(dev));
+
+ size = PAGE_ALIGN(size);
+ atomic64_sub(size / PAGE_SIZE, &zdev->fmb->allocated_pages);
+ s390_dma_unmap_pages(dev, dma_handle, size, DMA_BIDIRECTIONAL, NULL);
free_pages((unsigned long) pa, get_order(size));
}
diff --git a/arch/s390/pci/pci_event.c b/arch/s390/pci/pci_event.c
index 800f064b0da7..01e251b1da0c 100644
--- a/arch/s390/pci/pci_event.c
+++ b/arch/s390/pci/pci_event.c
@@ -43,9 +43,8 @@ struct zpci_ccdf_avail {
u16 pec; /* PCI event code */
} __packed;
-void zpci_event_error(void *data)
+static void __zpci_event_error(struct zpci_ccdf_err *ccdf)
{
- struct zpci_ccdf_err *ccdf = data;
struct zpci_dev *zdev = get_zdev_by_fid(ccdf->fid);
zpci_err("error CCDF:\n");
@@ -58,9 +57,14 @@ void zpci_event_error(void *data)
pci_name(zdev->pdev), ccdf->pec, ccdf->fid);
}
-void zpci_event_availability(void *data)
+void zpci_event_error(void *data)
+{
+ if (zpci_is_enabled())
+ __zpci_event_error(data);
+}
+
+static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf)
{
- struct zpci_ccdf_avail *ccdf = data;
struct zpci_dev *zdev = get_zdev_by_fid(ccdf->fid);
struct pci_dev *pdev = zdev ? zdev->pdev : NULL;
int ret;
@@ -75,6 +79,7 @@ void zpci_event_availability(void *data)
if (!zdev || zdev->state == ZPCI_FN_STATE_CONFIGURED)
break;
zdev->state = ZPCI_FN_STATE_CONFIGURED;
+ zdev->fh = ccdf->fh;
ret = zpci_enable_device(zdev);
if (ret)
break;
@@ -98,9 +103,14 @@ void zpci_event_availability(void *data)
break;
case 0x0304: /* Configured -> Standby */
- if (pdev)
+ if (pdev) {
+ /* Give the driver a hint that the function is
+ * already unusable. */
+ pdev->error_state = pci_channel_io_perm_failure;
pci_stop_and_remove_bus_device(pdev);
+ }
+ zdev->fh = ccdf->fh;
zpci_disable_device(zdev);
zdev->state = ZPCI_FN_STATE_STANDBY;
break;
@@ -108,6 +118,8 @@ void zpci_event_availability(void *data)
clp_rescan_pci_devices();
break;
case 0x0308: /* Standby -> Reserved */
+ if (!zdev)
+ break;
pci_stop_root_bus(zdev->bus);
pci_remove_root_bus(zdev->bus);
break;
@@ -115,3 +127,9 @@ void zpci_event_availability(void *data)
break;
}
}
+
+void zpci_event_availability(void *data)
+{
+ if (zpci_is_enabled())
+ __zpci_event_availability(data);
+}