From a6e2c226c3d51fd93636320e47cabc8a8f0824c5 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Sun, 24 May 2020 15:08:19 +0530 Subject: powerpc: Fix kernel crash in show_instructions() w/DEBUG_VIRTUAL With CONFIG_DEBUG_VIRTUAL=y, we can hit a BUG() if we take a hard lockup watchdog interrupt when in OPAL mode. This happens in show_instructions() if the kernel takes the watchdog NMI IPI, or any other interrupt, with MSR_IR == 0. show_instructions() updates the variable pc in the loop and the second iteration will result in BUG(). We hit the BUG_ON due the below check in __va() #define __va(x) ({ VIRTUAL_BUG_ON((unsigned long)(x) >= PAGE_OFFSET); (void *)(unsigned long)((phys_addr_t)(x) | PAGE_OFFSET); }) Fix it by moving the check out of the loop. Also update nip so that the nip == pc check still matches. Fixes: 4dd7554a6456 ("powerpc/64: Add VIRTUAL_BUG_ON checks for __va and __pa addresses") Signed-off-by: Aneesh Kumar K.V [mpe: Use IS_ENABLED(), massage change log] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200524093822.423487-1-aneesh.kumar@linux.ibm.com --- arch/powerpc/kernel/process.c | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 7bb7faf84490..a2f1f0e70a4b 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -1252,29 +1252,31 @@ struct task_struct *__switch_to(struct task_struct *prev, static void show_instructions(struct pt_regs *regs) { int i; + unsigned long nip = regs->nip; unsigned long pc = regs->nip - (NR_INSN_TO_PRINT * 3 / 4 * sizeof(int)); printk("Instruction dump:"); + /* + * If we were executing with the MMU off for instructions, adjust pc + * rather than printing XXXXXXXX. + */ + if (!IS_ENABLED(CONFIG_BOOKE) && !(regs->msr & MSR_IR)) { + pc = (unsigned long)phys_to_virt(pc); + nip = (unsigned long)phys_to_virt(regs->nip); + } + for (i = 0; i < NR_INSN_TO_PRINT; i++) { int instr; if (!(i % 8)) pr_cont("\n"); -#if !defined(CONFIG_BOOKE) - /* If executing with the IMMU off, adjust pc rather - * than print XXXXXXXX. - */ - if (!(regs->msr & MSR_IR)) - pc = (unsigned long)phys_to_virt(pc); -#endif - if (!__kernel_text_address(pc) || probe_kernel_address((const void *)pc, instr)) { pr_cont("XXXXXXXX "); } else { - if (regs->nip == pc) + if (nip == pc) pr_cont("<%08x> ", instr); else pr_cont("%08x ", instr); -- cgit v1.2.3 From b791abf3201d724ac372c2ba1fa6e90d192e1dbf Mon Sep 17 00:00:00 2001 From: Vaibhav Jain Date: Mon, 15 Jun 2020 18:14:04 +0530 Subject: powerpc/papr_scm: Fetch nvdimm health information from PHYP Implement support for fetching nvdimm health information via H_SCM_HEALTH hcall as documented in Ref[1]. The hcall returns a pair of 64-bit bitmap, bitwise-and of which is then stored in 'struct papr_scm_priv' and subsequently partially exposed to user-space via newly introduced dimm specific attribute 'papr/flags'. Since the hcall is costly, the health information is cached and only re-queried, 60s after the previous successful hcall. The patch also adds a documentation text describing flags reported by the the new sysfs attribute 'papr/flags' is also introduced at Documentation/ABI/testing/sysfs-bus-papr-pmem. [1] commit 58b278f568f0 ("powerpc: Provide initial documentation for PAPR hcalls") Signed-off-by: Vaibhav Jain Cc: "Aneesh Kumar K . V" Cc: Dan Williams Cc: Michael Ellerman Cc: Ira Weiny Link: https://lore.kernel.org/r/20200615124407.32596-4-vaibhav@linux.ibm.com Signed-off-by: Dan Williams --- Documentation/ABI/testing/sysfs-bus-papr-pmem | 27 +++++ arch/powerpc/platforms/pseries/papr_scm.c | 168 +++++++++++++++++++++++++- 2 files changed, 193 insertions(+), 2 deletions(-) create mode 100644 Documentation/ABI/testing/sysfs-bus-papr-pmem (limited to 'arch/powerpc') diff --git a/Documentation/ABI/testing/sysfs-bus-papr-pmem b/Documentation/ABI/testing/sysfs-bus-papr-pmem new file mode 100644 index 000000000000..5b10d036a8d4 --- /dev/null +++ b/Documentation/ABI/testing/sysfs-bus-papr-pmem @@ -0,0 +1,27 @@ +What: /sys/bus/nd/devices/nmemX/papr/flags +Date: Apr, 2020 +KernelVersion: v5.8 +Contact: linuxppc-dev , linux-nvdimm@lists.01.org, +Description: + (RO) Report flags indicating various states of a + papr-pmem NVDIMM device. Each flag maps to a one or + more bits set in the dimm-health-bitmap retrieved in + response to H_SCM_HEALTH hcall. The details of the bit + flags returned in response to this hcall is available + at 'Documentation/powerpc/papr_hcalls.rst' . Below are + the flags reported in this sysfs file: + + * "not_armed" : Indicates that NVDIMM contents will not + survive a power cycle. + * "flush_fail" : Indicates that NVDIMM contents + couldn't be flushed during last + shut-down event. + * "restore_fail": Indicates that NVDIMM contents + couldn't be restored during NVDIMM + initialization. + * "encrypted" : NVDIMM contents are encrypted. + * "smart_notify": There is health event for the NVDIMM. + * "scrubbed" : Indicating that contents of the + NVDIMM have been scrubbed. + * "locked" : Indicating that NVDIMM contents cant + be modified until next power cycle. diff --git a/arch/powerpc/platforms/pseries/papr_scm.c b/arch/powerpc/platforms/pseries/papr_scm.c index f35592423380..0c091622b15e 100644 --- a/arch/powerpc/platforms/pseries/papr_scm.c +++ b/arch/powerpc/platforms/pseries/papr_scm.c @@ -12,6 +12,7 @@ #include #include #include +#include #include @@ -22,6 +23,44 @@ (1ul << ND_CMD_GET_CONFIG_DATA) | \ (1ul << ND_CMD_SET_CONFIG_DATA)) +/* DIMM health bitmap bitmap indicators */ +/* SCM device is unable to persist memory contents */ +#define PAPR_PMEM_UNARMED (1ULL << (63 - 0)) +/* SCM device failed to persist memory contents */ +#define PAPR_PMEM_SHUTDOWN_DIRTY (1ULL << (63 - 1)) +/* SCM device contents are persisted from previous IPL */ +#define PAPR_PMEM_SHUTDOWN_CLEAN (1ULL << (63 - 2)) +/* SCM device contents are not persisted from previous IPL */ +#define PAPR_PMEM_EMPTY (1ULL << (63 - 3)) +/* SCM device memory life remaining is critically low */ +#define PAPR_PMEM_HEALTH_CRITICAL (1ULL << (63 - 4)) +/* SCM device will be garded off next IPL due to failure */ +#define PAPR_PMEM_HEALTH_FATAL (1ULL << (63 - 5)) +/* SCM contents cannot persist due to current platform health status */ +#define PAPR_PMEM_HEALTH_UNHEALTHY (1ULL << (63 - 6)) +/* SCM device is unable to persist memory contents in certain conditions */ +#define PAPR_PMEM_HEALTH_NON_CRITICAL (1ULL << (63 - 7)) +/* SCM device is encrypted */ +#define PAPR_PMEM_ENCRYPTED (1ULL << (63 - 8)) +/* SCM device has been scrubbed and locked */ +#define PAPR_PMEM_SCRUBBED_AND_LOCKED (1ULL << (63 - 9)) + +/* Bits status indicators for health bitmap indicating unarmed dimm */ +#define PAPR_PMEM_UNARMED_MASK (PAPR_PMEM_UNARMED | \ + PAPR_PMEM_HEALTH_UNHEALTHY) + +/* Bits status indicators for health bitmap indicating unflushed dimm */ +#define PAPR_PMEM_BAD_SHUTDOWN_MASK (PAPR_PMEM_SHUTDOWN_DIRTY) + +/* Bits status indicators for health bitmap indicating unrestored dimm */ +#define PAPR_PMEM_BAD_RESTORE_MASK (PAPR_PMEM_EMPTY) + +/* Bit status indicators for smart event notification */ +#define PAPR_PMEM_SMART_EVENT_MASK (PAPR_PMEM_HEALTH_CRITICAL | \ + PAPR_PMEM_HEALTH_FATAL | \ + PAPR_PMEM_HEALTH_UNHEALTHY) + +/* private struct associated with each region */ struct papr_scm_priv { struct platform_device *pdev; struct device_node *dn; @@ -39,6 +78,15 @@ struct papr_scm_priv { struct resource res; struct nd_region *region; struct nd_interleave_set nd_set; + + /* Protect dimm health data from concurrent read/writes */ + struct mutex health_mutex; + + /* Last time the health information of the dimm was updated */ + unsigned long lasthealth_jiffies; + + /* Health information for the dimm */ + u64 health_bitmap; }; static int drc_pmem_bind(struct papr_scm_priv *p) @@ -144,6 +192,61 @@ err_out: return drc_pmem_bind(p); } +/* + * Issue hcall to retrieve dimm health info and populate papr_scm_priv with the + * health information. + */ +static int __drc_pmem_query_health(struct papr_scm_priv *p) +{ + unsigned long ret[PLPAR_HCALL_BUFSIZE]; + long rc; + + /* issue the hcall */ + rc = plpar_hcall(H_SCM_HEALTH, ret, p->drc_index); + if (rc != H_SUCCESS) { + dev_err(&p->pdev->dev, + "Failed to query health information, Err:%ld\n", rc); + return -ENXIO; + } + + p->lasthealth_jiffies = jiffies; + p->health_bitmap = ret[0] & ret[1]; + + dev_dbg(&p->pdev->dev, + "Queried dimm health info. Bitmap:0x%016lx Mask:0x%016lx\n", + ret[0], ret[1]); + + return 0; +} + +/* Min interval in seconds for assuming stable dimm health */ +#define MIN_HEALTH_QUERY_INTERVAL 60 + +/* Query cached health info and if needed call drc_pmem_query_health */ +static int drc_pmem_query_health(struct papr_scm_priv *p) +{ + unsigned long cache_timeout; + int rc; + + /* Protect concurrent modifications to papr_scm_priv */ + rc = mutex_lock_interruptible(&p->health_mutex); + if (rc) + return rc; + + /* Jiffies offset for which the health data is assumed to be same */ + cache_timeout = p->lasthealth_jiffies + + msecs_to_jiffies(MIN_HEALTH_QUERY_INTERVAL * 1000); + + /* Fetch new health info is its older than MIN_HEALTH_QUERY_INTERVAL */ + if (time_after(jiffies, cache_timeout)) + rc = __drc_pmem_query_health(p); + else + /* Assume cached health data is valid */ + rc = 0; + + mutex_unlock(&p->health_mutex); + return rc; +} static int papr_scm_meta_get(struct papr_scm_priv *p, struct nd_cmd_get_config_data_hdr *hdr) @@ -286,6 +389,64 @@ static int papr_scm_ndctl(struct nvdimm_bus_descriptor *nd_desc, return 0; } +static ssize_t flags_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct nvdimm *dimm = to_nvdimm(dev); + struct papr_scm_priv *p = nvdimm_provider_data(dimm); + struct seq_buf s; + u64 health; + int rc; + + rc = drc_pmem_query_health(p); + if (rc) + return rc; + + /* Copy health_bitmap locally, check masks & update out buffer */ + health = READ_ONCE(p->health_bitmap); + + seq_buf_init(&s, buf, PAGE_SIZE); + if (health & PAPR_PMEM_UNARMED_MASK) + seq_buf_printf(&s, "not_armed "); + + if (health & PAPR_PMEM_BAD_SHUTDOWN_MASK) + seq_buf_printf(&s, "flush_fail "); + + if (health & PAPR_PMEM_BAD_RESTORE_MASK) + seq_buf_printf(&s, "restore_fail "); + + if (health & PAPR_PMEM_ENCRYPTED) + seq_buf_printf(&s, "encrypted "); + + if (health & PAPR_PMEM_SMART_EVENT_MASK) + seq_buf_printf(&s, "smart_notify "); + + if (health & PAPR_PMEM_SCRUBBED_AND_LOCKED) + seq_buf_printf(&s, "scrubbed locked "); + + if (seq_buf_used(&s)) + seq_buf_printf(&s, "\n"); + + return seq_buf_used(&s); +} +DEVICE_ATTR_RO(flags); + +/* papr_scm specific dimm attributes */ +static struct attribute *papr_nd_attributes[] = { + &dev_attr_flags.attr, + NULL, +}; + +static struct attribute_group papr_nd_attribute_group = { + .name = "papr", + .attrs = papr_nd_attributes, +}; + +static const struct attribute_group *papr_nd_attr_groups[] = { + &papr_nd_attribute_group, + NULL, +}; + static int papr_scm_nvdimm_init(struct papr_scm_priv *p) { struct device *dev = &p->pdev->dev; @@ -312,8 +473,8 @@ static int papr_scm_nvdimm_init(struct papr_scm_priv *p) dimm_flags = 0; set_bit(NDD_LABELING, &dimm_flags); - p->nvdimm = nvdimm_create(p->bus, p, NULL, dimm_flags, - PAPR_SCM_DIMM_CMD_MASK, 0, NULL); + p->nvdimm = nvdimm_create(p->bus, p, papr_nd_attr_groups, + dimm_flags, PAPR_SCM_DIMM_CMD_MASK, 0, NULL); if (!p->nvdimm) { dev_err(dev, "Error creating DIMM object for %pOF\n", p->dn); goto err; @@ -399,6 +560,9 @@ static int papr_scm_probe(struct platform_device *pdev) if (!p) return -ENOMEM; + /* Initialize the dimm mutex */ + mutex_init(&p->health_mutex); + /* optional DT properties */ of_property_read_u32(dn, "ibm,metadata-size", &metadata_size); -- cgit v1.2.3 From b5f38f09e1558c20265a2976b0337bab143a66c7 Mon Sep 17 00:00:00 2001 From: Vaibhav Jain Date: Mon, 15 Jun 2020 18:14:05 +0530 Subject: powerpc/papr_scm: Improve error logging and handling papr_scm_ndctl() Since papr_scm_ndctl() can be called from outside papr_scm, its exposed to the possibility of receiving NULL as value of 'cmd_rc' argument. This patch updates papr_scm_ndctl() to protect against such possibility by assigning it pointer to a local variable in case cmd_rc == NULL. Finally the patch also updates the 'default' add a debug log unknown 'cmd' values. Signed-off-by: Vaibhav Jain Reviewed-by: Ira Weiny Cc: "Aneesh Kumar K . V" Cc: Dan Williams Cc: Michael Ellerman Cc: Ira Weiny Link: https://lore.kernel.org/r/20200615124407.32596-5-vaibhav@linux.ibm.com Signed-off-by: Dan Williams --- arch/powerpc/platforms/pseries/papr_scm.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'arch/powerpc') diff --git a/arch/powerpc/platforms/pseries/papr_scm.c b/arch/powerpc/platforms/pseries/papr_scm.c index 0c091622b15e..692ad3d79826 100644 --- a/arch/powerpc/platforms/pseries/papr_scm.c +++ b/arch/powerpc/platforms/pseries/papr_scm.c @@ -355,11 +355,16 @@ static int papr_scm_ndctl(struct nvdimm_bus_descriptor *nd_desc, { struct nd_cmd_get_config_size *get_size_hdr; struct papr_scm_priv *p; + int rc; /* Only dimm-specific calls are supported atm */ if (!nvdimm) return -EINVAL; + /* Use a local variable in case cmd_rc pointer is NULL */ + if (!cmd_rc) + cmd_rc = &rc; + p = nvdimm_provider_data(nvdimm); switch (cmd) { @@ -381,6 +386,7 @@ static int papr_scm_ndctl(struct nvdimm_bus_descriptor *nd_desc, break; default: + dev_dbg(&p->pdev->dev, "Unknown command = %d\n", cmd); return -EINVAL; } -- cgit v1.2.3 From f517f7925b7b453cb83be06c268ba057b78e4792 Mon Sep 17 00:00:00 2001 From: Vaibhav Jain Date: Mon, 15 Jun 2020 18:14:06 +0530 Subject: ndctl/papr_scm,uapi: Add support for PAPR nvdimm specific methods Introduce support for PAPR NVDIMM Specific Methods (PDSM) in papr_scm module and add the command family NVDIMM_FAMILY_PAPR to the white list of NVDIMM command sets. Also advertise support for ND_CMD_CALL for the nvdimm command mask and implement necessary scaffolding in the module to handle ND_CMD_CALL ioctl and PDSM requests that we receive. The layout of the PDSM request as we expect from libnvdimm/libndctl is described in newly introduced uapi header 'papr_pdsm.h' which defines a 'struct nd_pkg_pdsm' and a maximal union named 'nd_pdsm_payload'. These new structs together with 'struct nd_cmd_pkg' for a pdsm envelop thats sent by libndctl to libnvdimm and serviced by papr_scm in 'papr_scm_service_pdsm()'. The PDSM request is communicated by member 'struct nd_cmd_pkg.nd_command' together with other information on the pdsm payload (size-in, size-out). The patch also introduces 'struct pdsm_cmd_desc' instances of which are stored in an array __pdsm_cmd_descriptors[] indexed with PDSM cmd and corresponding access function pdsm_cmd_desc() is introduced. 'struct pdsm_cdm_desc' holds the service function for a given PDSM and corresponding payload in/out sizes. A new function papr_scm_service_pdsm() is introduced and is called from papr_scm_ndctl() in case of a PDSM request is received via ND_CMD_CALL command from libnvdimm. The function performs validation on the PDSM payload based on info present in corresponding PDSM descriptor and if valid calls the 'struct pdcm_cmd_desc.service' function to service the PDSM. Signed-off-by: Vaibhav Jain Cc: "Aneesh Kumar K . V" Cc: Dan Williams Cc: Michael Ellerman Cc: Ira Weiny Link: https://lore.kernel.org/r/20200615124407.32596-6-vaibhav@linux.ibm.com Signed-off-by: Dan Williams --- arch/powerpc/include/uapi/asm/papr_pdsm.h | 95 +++++++++++++++ arch/powerpc/platforms/pseries/papr_scm.c | 193 +++++++++++++++++++++++++++++- include/uapi/linux/ndctl.h | 1 + 3 files changed, 285 insertions(+), 4 deletions(-) create mode 100644 arch/powerpc/include/uapi/asm/papr_pdsm.h (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/uapi/asm/papr_pdsm.h b/arch/powerpc/include/uapi/asm/papr_pdsm.h new file mode 100644 index 000000000000..28115152aa4e --- /dev/null +++ b/arch/powerpc/include/uapi/asm/papr_pdsm.h @@ -0,0 +1,95 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * PAPR nvDimm Specific Methods (PDSM) and structs for libndctl + * + * (C) Copyright IBM 2020 + * + * Author: Vaibhav Jain + */ + +#ifndef _UAPI_ASM_POWERPC_PAPR_PDSM_H_ +#define _UAPI_ASM_POWERPC_PAPR_PDSM_H_ + +#include +#include + +/* + * PDSM Envelope: + * + * The ioctl ND_CMD_CALL exchange data between user-space and kernel via + * envelope which consists of 2 headers sections and payload sections as + * illustrated below: + * +-----------------+---------------+---------------------------+ + * | 64-Bytes | 8-Bytes | Max 184-Bytes | + * +-----------------+---------------+---------------------------+ + * | ND-HEADER | PDSM-HEADER | PDSM-PAYLOAD | + * +-----------------+---------------+---------------------------+ + * | nd_family | | | + * | nd_size_out | cmd_status | | + * | nd_size_in | reserved | nd_pdsm_payload | + * | nd_command | payload --> | | + * | nd_fw_size | | | + * | nd_payload ---> | | | + * +---------------+-----------------+---------------------------+ + * + * ND Header: + * This is the generic libnvdimm header described as 'struct nd_cmd_pkg' + * which is interpreted by libnvdimm before passed on to papr_scm. Important + * member fields used are: + * 'nd_family' : (In) NVDIMM_FAMILY_PAPR_SCM + * 'nd_size_in' : (In) PDSM-HEADER + PDSM-IN-PAYLOAD (usually 0) + * 'nd_size_out' : (In) PDSM-HEADER + PDSM-RETURN-PAYLOAD + * 'nd_command' : (In) One of PAPR_PDSM_XXX + * 'nd_fw_size' : (Out) PDSM-HEADER + size of actual payload returned + * + * PDSM Header: + * This is papr-scm specific header that precedes the payload. This is defined + * as nd_cmd_pdsm_pkg. Following fields aare available in this header: + * + * 'cmd_status' : (Out) Errors if any encountered while servicing PDSM. + * 'reserved' : Not used, reserved for future and should be set to 0. + * 'payload' : A union of all the possible payload structs + * + * PDSM Payload: + * + * The layout of the PDSM Payload is defined by various structs shared between + * papr_scm and libndctl so that contents of payload can be interpreted. As such + * its defined as a union of all possible payload structs as + * 'union nd_pdsm_payload'. Based on the value of 'nd_cmd_pkg.nd_command' + * appropriate member of the union is accessed. + */ + +/* Max payload size that we can handle */ +#define ND_PDSM_PAYLOAD_MAX_SIZE 184 + +/* Max payload size that we can handle */ +#define ND_PDSM_HDR_SIZE \ + (sizeof(struct nd_pkg_pdsm) - ND_PDSM_PAYLOAD_MAX_SIZE) + +/* + * Methods to be embedded in ND_CMD_CALL request. These are sent to the kernel + * via 'nd_cmd_pkg.nd_command' member of the ioctl struct + */ +enum papr_pdsm { + PAPR_PDSM_MIN = 0x0, + PAPR_PDSM_MAX, +}; + +/* Maximal union that can hold all possible payload types */ +union nd_pdsm_payload { + __u8 buf[ND_PDSM_PAYLOAD_MAX_SIZE]; +} __packed; + +/* + * PDSM-header + payload expected with ND_CMD_CALL ioctl from libnvdimm + * Valid member of union 'payload' is identified via 'nd_cmd_pkg.nd_command' + * that should always precede this struct when sent to papr_scm via CMD_CALL + * interface. + */ +struct nd_pkg_pdsm { + __s32 cmd_status; /* Out: Sub-cmd status returned back */ + __u16 reserved[2]; /* Ignored and to be set as '0' */ + union nd_pdsm_payload payload; +} __packed; + +#endif /* _UAPI_ASM_POWERPC_PAPR_PDSM_H_ */ diff --git a/arch/powerpc/platforms/pseries/papr_scm.c b/arch/powerpc/platforms/pseries/papr_scm.c index 692ad3d79826..d3bbf9940ba4 100644 --- a/arch/powerpc/platforms/pseries/papr_scm.c +++ b/arch/powerpc/platforms/pseries/papr_scm.c @@ -15,13 +15,15 @@ #include #include +#include #define BIND_ANY_ADDR (~0ul) #define PAPR_SCM_DIMM_CMD_MASK \ ((1ul << ND_CMD_GET_CONFIG_SIZE) | \ (1ul << ND_CMD_GET_CONFIG_DATA) | \ - (1ul << ND_CMD_SET_CONFIG_DATA)) + (1ul << ND_CMD_SET_CONFIG_DATA) | \ + (1ul << ND_CMD_CALL)) /* DIMM health bitmap bitmap indicators */ /* SCM device is unable to persist memory contents */ @@ -349,17 +351,195 @@ static int papr_scm_meta_set(struct papr_scm_priv *p, return 0; } +/* + * Do a sanity checks on the inputs args to dimm-control function and return + * '0' if valid. Validation of PDSM payloads happens later in + * papr_scm_service_pdsm. + */ +static int is_cmd_valid(struct nvdimm *nvdimm, unsigned int cmd, void *buf, + unsigned int buf_len) +{ + unsigned long cmd_mask = PAPR_SCM_DIMM_CMD_MASK; + struct nd_cmd_pkg *nd_cmd; + struct papr_scm_priv *p; + enum papr_pdsm pdsm; + + /* Only dimm-specific calls are supported atm */ + if (!nvdimm) + return -EINVAL; + + /* get the provider data from struct nvdimm */ + p = nvdimm_provider_data(nvdimm); + + if (!test_bit(cmd, &cmd_mask)) { + dev_dbg(&p->pdev->dev, "Unsupported cmd=%u\n", cmd); + return -EINVAL; + } + + /* For CMD_CALL verify pdsm request */ + if (cmd == ND_CMD_CALL) { + /* Verify the envelope and envelop size */ + if (!buf || + buf_len < (sizeof(struct nd_cmd_pkg) + ND_PDSM_HDR_SIZE)) { + dev_dbg(&p->pdev->dev, "Invalid pkg size=%u\n", + buf_len); + return -EINVAL; + } + + /* Verify that the nd_cmd_pkg.nd_family is correct */ + nd_cmd = (struct nd_cmd_pkg *)buf; + + if (nd_cmd->nd_family != NVDIMM_FAMILY_PAPR) { + dev_dbg(&p->pdev->dev, "Invalid pkg family=0x%llx\n", + nd_cmd->nd_family); + return -EINVAL; + } + + pdsm = (enum papr_pdsm)nd_cmd->nd_command; + + /* Verify if the pdsm command is valid */ + if (pdsm <= PAPR_PDSM_MIN || pdsm >= PAPR_PDSM_MAX) { + dev_dbg(&p->pdev->dev, "PDSM[0x%x]: Invalid PDSM\n", + pdsm); + return -EINVAL; + } + + /* Have enough space to hold returned 'nd_pkg_pdsm' header */ + if (nd_cmd->nd_size_out < ND_PDSM_HDR_SIZE) { + dev_dbg(&p->pdev->dev, "PDSM[0x%x]: Invalid payload\n", + pdsm); + return -EINVAL; + } + } + + /* Let the command be further processed */ + return 0; +} + +/* + * 'struct pdsm_cmd_desc' + * Identifies supported PDSMs' expected length of in/out payloads + * and pdsm service function. + * + * size_in : Size of input payload if any in the PDSM request. + * size_out : Size of output payload if any in the PDSM request. + * service : Service function for the PDSM request. Return semantics: + * rc < 0 : Error servicing PDSM and rc indicates the error. + * rc >=0 : Serviced successfully and 'rc' indicate number of + * bytes written to payload. + */ +struct pdsm_cmd_desc { + u32 size_in; + u32 size_out; + int (*service)(struct papr_scm_priv *dimm, + union nd_pdsm_payload *payload); +}; + +/* Holds all supported PDSMs' command descriptors */ +static const struct pdsm_cmd_desc __pdsm_cmd_descriptors[] = { + [PAPR_PDSM_MIN] = { + .size_in = 0, + .size_out = 0, + .service = NULL, + }, + /* New PDSM command descriptors to be added below */ + + /* Empty */ + [PAPR_PDSM_MAX] = { + .size_in = 0, + .size_out = 0, + .service = NULL, + }, +}; + +/* Given a valid pdsm cmd return its command descriptor else return NULL */ +static inline const struct pdsm_cmd_desc *pdsm_cmd_desc(enum papr_pdsm cmd) +{ + if (cmd >= 0 || cmd < ARRAY_SIZE(__pdsm_cmd_descriptors)) + return &__pdsm_cmd_descriptors[cmd]; + + return NULL; +} + +/* + * For a given pdsm request call an appropriate service function. + * Returns errors if any while handling the pdsm command package. + */ +static int papr_scm_service_pdsm(struct papr_scm_priv *p, + struct nd_cmd_pkg *pkg) +{ + /* Get the PDSM header and PDSM command */ + struct nd_pkg_pdsm *pdsm_pkg = (struct nd_pkg_pdsm *)pkg->nd_payload; + enum papr_pdsm pdsm = (enum papr_pdsm)pkg->nd_command; + const struct pdsm_cmd_desc *pdsc; + int rc; + + /* Fetch corresponding pdsm descriptor for validation and servicing */ + pdsc = pdsm_cmd_desc(pdsm); + + /* Validate pdsm descriptor */ + /* Ensure that reserved fields are 0 */ + if (pdsm_pkg->reserved[0] || pdsm_pkg->reserved[1]) { + dev_dbg(&p->pdev->dev, "PDSM[0x%x]: Invalid reserved field\n", + pdsm); + return -EINVAL; + } + + /* If pdsm expects some input, then ensure that the size_in matches */ + if (pdsc->size_in && + pkg->nd_size_in != (pdsc->size_in + ND_PDSM_HDR_SIZE)) { + dev_dbg(&p->pdev->dev, "PDSM[0x%x]: Mismatched size_in=%d\n", + pdsm, pkg->nd_size_in); + return -EINVAL; + } + + /* If pdsm wants to return data, then ensure that size_out matches */ + if (pdsc->size_out && + pkg->nd_size_out != (pdsc->size_out + ND_PDSM_HDR_SIZE)) { + dev_dbg(&p->pdev->dev, "PDSM[0x%x]: Mismatched size_out=%d\n", + pdsm, pkg->nd_size_out); + return -EINVAL; + } + + /* Service the pdsm */ + if (pdsc->service) { + dev_dbg(&p->pdev->dev, "PDSM[0x%x]: Servicing..\n", pdsm); + + rc = pdsc->service(p, &pdsm_pkg->payload); + + if (rc < 0) { + /* error encountered while servicing pdsm */ + pdsm_pkg->cmd_status = rc; + pkg->nd_fw_size = ND_PDSM_HDR_SIZE; + } else { + /* pdsm serviced and 'rc' bytes written to payload */ + pdsm_pkg->cmd_status = 0; + pkg->nd_fw_size = ND_PDSM_HDR_SIZE + rc; + } + } else { + dev_dbg(&p->pdev->dev, "PDSM[0x%x]: Unsupported PDSM request\n", + pdsm); + pdsm_pkg->cmd_status = -ENOENT; + pkg->nd_fw_size = ND_PDSM_HDR_SIZE; + } + + return pdsm_pkg->cmd_status; +} + static int papr_scm_ndctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm, unsigned int cmd, void *buf, unsigned int buf_len, int *cmd_rc) { struct nd_cmd_get_config_size *get_size_hdr; + struct nd_cmd_pkg *call_pkg = NULL; struct papr_scm_priv *p; int rc; - /* Only dimm-specific calls are supported atm */ - if (!nvdimm) - return -EINVAL; + rc = is_cmd_valid(nvdimm, cmd, buf, buf_len); + if (rc) { + pr_debug("Invalid cmd=0x%x. Err=%d\n", cmd, rc); + return rc; + } /* Use a local variable in case cmd_rc pointer is NULL */ if (!cmd_rc) @@ -385,6 +565,11 @@ static int papr_scm_ndctl(struct nvdimm_bus_descriptor *nd_desc, *cmd_rc = papr_scm_meta_set(p, buf); break; + case ND_CMD_CALL: + call_pkg = (struct nd_cmd_pkg *)buf; + *cmd_rc = papr_scm_service_pdsm(p, call_pkg); + break; + default: dev_dbg(&p->pdev->dev, "Unknown command = %d\n", cmd); return -EINVAL; diff --git a/include/uapi/linux/ndctl.h b/include/uapi/linux/ndctl.h index de5d90212409..0e09dc5cec19 100644 --- a/include/uapi/linux/ndctl.h +++ b/include/uapi/linux/ndctl.h @@ -244,6 +244,7 @@ struct nd_cmd_pkg { #define NVDIMM_FAMILY_HPE2 2 #define NVDIMM_FAMILY_MSFT 3 #define NVDIMM_FAMILY_HYPERV 4 +#define NVDIMM_FAMILY_PAPR 5 #define ND_IOCTL_CALL _IOWR(ND_IOCTL, ND_CMD_CALL,\ struct nd_cmd_pkg) -- cgit v1.2.3 From d35f18b554be015b6fa89fad6447c6fce8e6ad66 Mon Sep 17 00:00:00 2001 From: Vaibhav Jain Date: Mon, 15 Jun 2020 18:14:07 +0530 Subject: powerpc/papr_scm: Implement support for PAPR_PDSM_HEALTH This patch implements support for PDSM request 'PAPR_PDSM_HEALTH' that returns a newly introduced 'struct nd_papr_pdsm_health' instance containing dimm health information back to user space in response to ND_CMD_CALL. This functionality is implemented in newly introduced papr_pdsm_health() that queries the nvdimm health information and then copies this information to the package payload whose layout is defined by 'struct nd_papr_pdsm_health'. Signed-off-by: Vaibhav Jain Cc: "Aneesh Kumar K . V" Cc: Dan Williams Cc: Michael Ellerman Cc: Ira Weiny Link: https://lore.kernel.org/r/20200615124407.32596-7-vaibhav@linux.ibm.com Signed-off-by: Dan Williams --- arch/powerpc/include/uapi/asm/papr_pdsm.h | 37 ++++++++++++++++++++++ arch/powerpc/platforms/pseries/papr_scm.c | 51 +++++++++++++++++++++++++++++++ 2 files changed, 88 insertions(+) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/uapi/asm/papr_pdsm.h b/arch/powerpc/include/uapi/asm/papr_pdsm.h index 28115152aa4e..9ccecc1d6840 100644 --- a/arch/powerpc/include/uapi/asm/papr_pdsm.h +++ b/arch/powerpc/include/uapi/asm/papr_pdsm.h @@ -66,17 +66,54 @@ #define ND_PDSM_HDR_SIZE \ (sizeof(struct nd_pkg_pdsm) - ND_PDSM_PAYLOAD_MAX_SIZE) +/* Various nvdimm health indicators */ +#define PAPR_PDSM_DIMM_HEALTHY 0 +#define PAPR_PDSM_DIMM_UNHEALTHY 1 +#define PAPR_PDSM_DIMM_CRITICAL 2 +#define PAPR_PDSM_DIMM_FATAL 3 + +/* + * Struct exchanged between kernel & ndctl in for PAPR_PDSM_HEALTH + * Various flags indicate the health status of the dimm. + * + * extension_flags : Any extension fields present in the struct. + * dimm_unarmed : Dimm not armed. So contents wont persist. + * dimm_bad_shutdown : Previous shutdown did not persist contents. + * dimm_bad_restore : Contents from previous shutdown werent restored. + * dimm_scrubbed : Contents of the dimm have been scrubbed. + * dimm_locked : Contents of the dimm cant be modified until CEC reboot + * dimm_encrypted : Contents of dimm are encrypted. + * dimm_health : Dimm health indicator. One of PAPR_PDSM_DIMM_XXXX + */ +struct nd_papr_pdsm_health { + union { + struct { + __u32 extension_flags; + __u8 dimm_unarmed; + __u8 dimm_bad_shutdown; + __u8 dimm_bad_restore; + __u8 dimm_scrubbed; + __u8 dimm_locked; + __u8 dimm_encrypted; + __u16 dimm_health; + }; + __u8 buf[ND_PDSM_PAYLOAD_MAX_SIZE]; + }; +}; + /* * Methods to be embedded in ND_CMD_CALL request. These are sent to the kernel * via 'nd_cmd_pkg.nd_command' member of the ioctl struct */ enum papr_pdsm { PAPR_PDSM_MIN = 0x0, + PAPR_PDSM_HEALTH, PAPR_PDSM_MAX, }; /* Maximal union that can hold all possible payload types */ union nd_pdsm_payload { + struct nd_papr_pdsm_health health; __u8 buf[ND_PDSM_PAYLOAD_MAX_SIZE]; } __packed; diff --git a/arch/powerpc/platforms/pseries/papr_scm.c b/arch/powerpc/platforms/pseries/papr_scm.c index d3bbf9940ba4..9c569078a09f 100644 --- a/arch/powerpc/platforms/pseries/papr_scm.c +++ b/arch/powerpc/platforms/pseries/papr_scm.c @@ -416,6 +416,52 @@ static int is_cmd_valid(struct nvdimm *nvdimm, unsigned int cmd, void *buf, return 0; } +/* Fetch the DIMM health info and populate it in provided package. */ +static int papr_pdsm_health(struct papr_scm_priv *p, + union nd_pdsm_payload *payload) +{ + int rc; + + /* Ensure dimm health mutex is taken preventing concurrent access */ + rc = mutex_lock_interruptible(&p->health_mutex); + if (rc) + goto out; + + /* Always fetch upto date dimm health data ignoring cached values */ + rc = __drc_pmem_query_health(p); + if (rc) { + mutex_unlock(&p->health_mutex); + goto out; + } + + /* update health struct with various flags derived from health bitmap */ + payload->health = (struct nd_papr_pdsm_health) { + .extension_flags = 0, + .dimm_unarmed = !!(p->health_bitmap & PAPR_PMEM_UNARMED_MASK), + .dimm_bad_shutdown = !!(p->health_bitmap & PAPR_PMEM_BAD_SHUTDOWN_MASK), + .dimm_bad_restore = !!(p->health_bitmap & PAPR_PMEM_BAD_RESTORE_MASK), + .dimm_scrubbed = !!(p->health_bitmap & PAPR_PMEM_SCRUBBED_AND_LOCKED), + .dimm_locked = !!(p->health_bitmap & PAPR_PMEM_SCRUBBED_AND_LOCKED), + .dimm_encrypted = !!(p->health_bitmap & PAPR_PMEM_ENCRYPTED), + .dimm_health = PAPR_PDSM_DIMM_HEALTHY, + }; + + /* Update field dimm_health based on health_bitmap flags */ + if (p->health_bitmap & PAPR_PMEM_HEALTH_FATAL) + payload->health.dimm_health = PAPR_PDSM_DIMM_FATAL; + else if (p->health_bitmap & PAPR_PMEM_HEALTH_CRITICAL) + payload->health.dimm_health = PAPR_PDSM_DIMM_CRITICAL; + else if (p->health_bitmap & PAPR_PMEM_HEALTH_UNHEALTHY) + payload->health.dimm_health = PAPR_PDSM_DIMM_UNHEALTHY; + + /* struct populated hence can release the mutex now */ + mutex_unlock(&p->health_mutex); + rc = sizeof(struct nd_papr_pdsm_health); + +out: + return rc; +} + /* * 'struct pdsm_cmd_desc' * Identifies supported PDSMs' expected length of in/out payloads @@ -444,6 +490,11 @@ static const struct pdsm_cmd_desc __pdsm_cmd_descriptors[] = { }, /* New PDSM command descriptors to be added below */ + [PAPR_PDSM_HEALTH] = { + .size_in = 0, + .size_out = sizeof(struct nd_papr_pdsm_health), + .service = papr_pdsm_health, + }, /* Empty */ [PAPR_PDSM_MAX] = { .size_in = 0, -- cgit v1.2.3 From 0bdcfa182506526fbe4e088ff9ca86a31b81828d Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Mon, 15 Jun 2020 16:12:47 +1000 Subject: powerpc/64s: Fix KVM interrupt using wrong save area The CTR register reload in the KVM interrupt path used the wrong save area for SLB (and NMI) interrupts. Fixes: 9600f261acaa ("powerpc/64s/exception: Move KVM test to common code") Cc: stable@vger.kernel.org # v5.7+ Reported-by: Christian Zigotzky Signed-off-by: Nicholas Piggin Tested-by: Christian Zigotzky Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200615061247.1310763-1-npiggin@gmail.com --- arch/powerpc/kernel/exceptions-64s.S | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index e70ebb5c318c..fa080694e581 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -270,7 +270,7 @@ BEGIN_FTR_SECTION END_FTR_SECTION_IFSET(CPU_FTR_CFAR) .endif - ld r10,PACA_EXGEN+EX_CTR(r13) + ld r10,IAREA+EX_CTR(r13) mtctr r10 BEGIN_FTR_SECTION ld r10,IAREA+EX_PPR(r13) @@ -298,7 +298,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) .if IKVM_SKIP 89: mtocrf 0x80,r9 - ld r10,PACA_EXGEN+EX_CTR(r13) + ld r10,IAREA+EX_CTR(r13) mtctr r10 ld r9,IAREA+EX_R9(r13) ld r10,IAREA+EX_R10(r13) -- cgit v1.2.3 From 687993ccf3b05070598b89fad97410b26d7bc9d2 Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Mon, 15 Jun 2020 12:22:29 +0300 Subject: powerpc/8xx: use pmd_off() to access a PMD entry in pte_update() The pte_update() implementation for PPC_8xx unfolds page table from the PGD level to access a PMD entry. Since 8xx has only 2-level page table this can be simplified with pmd_off() shortcut. Replace explicit unfolding with pmd_off() and drop defines of pgd_index() and pgd_offset() that are no longer needed. Signed-off-by: Mike Rapoport Reviewed-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200615092229.23142-1-rppt@kernel.org --- arch/powerpc/include/asm/nohash/32/pgtable.h | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/nohash/32/pgtable.h b/arch/powerpc/include/asm/nohash/32/pgtable.h index b56f14160ae5..5a590ceaec14 100644 --- a/arch/powerpc/include/asm/nohash/32/pgtable.h +++ b/arch/powerpc/include/asm/nohash/32/pgtable.h @@ -205,10 +205,6 @@ static inline void pmd_clear(pmd_t *pmdp) *pmdp = __pmd(0); } -/* to find an entry in a page-table-directory */ -#define pgd_index(address) ((address) >> PGDIR_SHIFT) -#define pgd_offset(mm, address) ((mm)->pgd + pgd_index(address)) - /* * PTE updates. This function is called whenever an existing * valid PTE is updated. This does -not- include set_pte_at() @@ -230,6 +226,8 @@ static inline void pmd_clear(pmd_t *pmdp) * For other page sizes, we have a single entry in the table. */ #ifdef CONFIG_PPC_8xx +static pmd_t *pmd_off(struct mm_struct *mm, unsigned long addr); + static inline pte_basic_t pte_update(struct mm_struct *mm, unsigned long addr, pte_t *p, unsigned long clr, unsigned long set, int huge) { @@ -237,7 +235,7 @@ static inline pte_basic_t pte_update(struct mm_struct *mm, unsigned long addr, p pte_basic_t old = pte_val(*p); pte_basic_t new = (old & ~(pte_basic_t)clr) | set; int num, i; - pmd_t *pmd = pmd_offset(pud_offset(p4d_offset(pgd_offset(mm, addr), addr), addr), addr); + pmd_t *pmd = pmd_off(mm, addr); if (!huge) num = PAGE_SIZE / SZ_4K; -- cgit v1.2.3 From 1497eea68624f6076bf3eaf66baec3771ea04045 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 16 Jun 2020 23:56:16 +1000 Subject: powerpc/syscalls: Use the number when building SPU syscall table Currently the macro that inserts entries into the SPU syscall table doesn't actually use the "nr" (syscall number) parameter. This does work, but it relies on the exact right number of syscall entries being emitted in order for the syscal numbers to line up with the array entries. If for example we had two entries with the same syscall number we wouldn't get an error, it would just cause all subsequent syscalls to be off by one in the spu_syscall_table. So instead change the macro to assign to the specific entry of the array, meaning any numbering overlap will be caught by the compiler. Signed-off-by: Michael Ellerman Acked-by: Arnd Bergmann Link: https://lore.kernel.org/r/20200616135617.2937252-1-mpe@ellerman.id.au --- arch/powerpc/platforms/cell/spu_callbacks.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/platforms/cell/spu_callbacks.c b/arch/powerpc/platforms/cell/spu_callbacks.c index cbee3666da07..abdef9bcf432 100644 --- a/arch/powerpc/platforms/cell/spu_callbacks.c +++ b/arch/powerpc/platforms/cell/spu_callbacks.c @@ -35,7 +35,7 @@ */ static void *spu_syscall_table[] = { -#define __SYSCALL(nr, entry) entry, +#define __SYSCALL(nr, entry) [nr] = entry, #include #undef __SYSCALL }; -- cgit v1.2.3 From fe557319aa06c23cffc9346000f119547e0f289a Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 17 Jun 2020 09:37:53 +0200 Subject: maccess: rename probe_kernel_{read,write} to copy_{from,to}_kernel_nofault Better describe what these functions do. Suggested-by: Linus Torvalds Signed-off-by: Christoph Hellwig Signed-off-by: Linus Torvalds --- arch/arm/kernel/ftrace.c | 3 ++- arch/arm/kernel/kgdb.c | 2 +- arch/arm64/kernel/insn.c | 4 ++-- arch/csky/kernel/ftrace.c | 5 ++-- arch/ia64/kernel/ftrace.c | 6 ++--- arch/mips/kernel/kprobes.c | 6 ++--- arch/nds32/kernel/ftrace.c | 5 ++-- arch/parisc/kernel/ftrace.c | 2 +- arch/parisc/kernel/kgdb.c | 4 ++-- arch/parisc/lib/memcpy.c | 2 +- arch/powerpc/kernel/module_64.c | 6 +++-- arch/powerpc/kernel/trace/ftrace.c | 4 ++-- arch/powerpc/lib/inst.c | 6 ++--- arch/powerpc/perf/core-book3s.c | 3 ++- arch/riscv/kernel/ftrace.c | 3 ++- arch/riscv/kernel/kgdb.c | 4 ++-- arch/riscv/kernel/patch.c | 4 ++-- arch/s390/kernel/ftrace.c | 4 ++-- arch/sh/kernel/ftrace.c | 6 ++--- arch/um/kernel/maccess.c | 2 +- arch/x86/include/asm/ptrace.h | 4 ++-- arch/x86/kernel/dumpstack.c | 2 +- arch/x86/kernel/ftrace.c | 10 ++++---- arch/x86/kernel/kgdb.c | 6 ++--- arch/x86/kernel/kprobes/core.c | 5 ++-- arch/x86/kernel/kprobes/opt.c | 2 +- arch/x86/kernel/traps.c | 3 ++- arch/x86/mm/fault.c | 2 +- arch/x86/mm/init_32.c | 2 +- arch/x86/mm/maccess.c | 4 ++-- arch/x86/xen/enlighten_pv.c | 2 +- drivers/char/mem.c | 2 +- drivers/dio/dio.c | 6 +++-- drivers/input/serio/hp_sdc.c | 2 +- drivers/misc/kgdbts.c | 6 ++--- drivers/video/fbdev/hpfb.c | 2 +- fs/proc/kcore.c | 3 ++- include/linux/uaccess.h | 13 +++++----- kernel/debug/debug_core.c | 6 ++--- kernel/debug/gdbstub.c | 6 ++--- kernel/debug/kdb/kdb_main.c | 3 ++- kernel/debug/kdb/kdb_support.c | 7 +++--- kernel/kthread.c | 2 +- kernel/trace/bpf_trace.c | 4 ++-- kernel/trace/trace_kprobe.c | 4 ++-- kernel/workqueue.c | 10 ++++---- mm/debug.c | 8 +++---- mm/maccess.c | 49 +++++++++++++++++++------------------- mm/rodata_test.c | 2 +- mm/slub.c | 2 +- 50 files changed, 138 insertions(+), 122 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/arm/kernel/ftrace.c b/arch/arm/kernel/ftrace.c index 10499d44964a..9a79ef6b1876 100644 --- a/arch/arm/kernel/ftrace.c +++ b/arch/arm/kernel/ftrace.c @@ -84,7 +84,8 @@ static int ftrace_modify_code(unsigned long pc, unsigned long old, old = __opcode_to_mem_arm(old); if (validate) { - if (probe_kernel_read(&replaced, (void *)pc, MCOUNT_INSN_SIZE)) + if (copy_from_kernel_nofault(&replaced, (void *)pc, + MCOUNT_INSN_SIZE)) return -EFAULT; if (replaced != old) diff --git a/arch/arm/kernel/kgdb.c b/arch/arm/kernel/kgdb.c index 6a95b9296640..7bd30c0a4280 100644 --- a/arch/arm/kernel/kgdb.c +++ b/arch/arm/kernel/kgdb.c @@ -236,7 +236,7 @@ int kgdb_arch_set_breakpoint(struct kgdb_bkpt *bpt) /* patch_text() only supports int-sized breakpoints */ BUILD_BUG_ON(sizeof(int) != BREAK_INSTR_SIZE); - err = probe_kernel_read(bpt->saved_instr, (char *)bpt->bpt_addr, + err = copy_from_kernel_nofault(bpt->saved_instr, (char *)bpt->bpt_addr, BREAK_INSTR_SIZE); if (err) return err; diff --git a/arch/arm64/kernel/insn.c b/arch/arm64/kernel/insn.c index 684d871ae38d..a107375005bc 100644 --- a/arch/arm64/kernel/insn.c +++ b/arch/arm64/kernel/insn.c @@ -135,7 +135,7 @@ int __kprobes aarch64_insn_read(void *addr, u32 *insnp) int ret; __le32 val; - ret = probe_kernel_read(&val, addr, AARCH64_INSN_SIZE); + ret = copy_from_kernel_nofault(&val, addr, AARCH64_INSN_SIZE); if (!ret) *insnp = le32_to_cpu(val); @@ -151,7 +151,7 @@ static int __kprobes __aarch64_insn_write(void *addr, __le32 insn) raw_spin_lock_irqsave(&patch_lock, flags); waddr = patch_map(addr, FIX_TEXT_POKE0); - ret = probe_kernel_write(waddr, &insn, AARCH64_INSN_SIZE); + ret = copy_to_kernel_nofault(waddr, &insn, AARCH64_INSN_SIZE); patch_unmap(FIX_TEXT_POKE0); raw_spin_unlock_irqrestore(&patch_lock, flags); diff --git a/arch/csky/kernel/ftrace.c b/arch/csky/kernel/ftrace.c index 3c425b84e3be..b4a7ec1517ff 100644 --- a/arch/csky/kernel/ftrace.c +++ b/arch/csky/kernel/ftrace.c @@ -72,7 +72,8 @@ static int ftrace_check_current_nop(unsigned long hook) uint16_t olds[7]; unsigned long hook_pos = hook - 2; - if (probe_kernel_read((void *)olds, (void *)hook_pos, sizeof(nops))) + if (copy_from_kernel_nofault((void *)olds, (void *)hook_pos, + sizeof(nops))) return -EFAULT; if (memcmp((void *)nops, (void *)olds, sizeof(nops))) { @@ -97,7 +98,7 @@ static int ftrace_modify_code(unsigned long hook, unsigned long target, make_jbsr(target, hook, call, nolr); - ret = probe_kernel_write((void *)hook_pos, enable ? call : nops, + ret = copy_to_kernel_nofault((void *)hook_pos, enable ? call : nops, sizeof(nops)); if (ret) return -EPERM; diff --git a/arch/ia64/kernel/ftrace.c b/arch/ia64/kernel/ftrace.c index cee411e647ca..b2ab2d58fb30 100644 --- a/arch/ia64/kernel/ftrace.c +++ b/arch/ia64/kernel/ftrace.c @@ -108,7 +108,7 @@ ftrace_modify_code(unsigned long ip, unsigned char *old_code, goto skip_check; /* read the text we want to modify */ - if (probe_kernel_read(replaced, (void *)ip, MCOUNT_INSN_SIZE)) + if (copy_from_kernel_nofault(replaced, (void *)ip, MCOUNT_INSN_SIZE)) return -EFAULT; /* Make sure it is what we expect it to be */ @@ -117,7 +117,7 @@ ftrace_modify_code(unsigned long ip, unsigned char *old_code, skip_check: /* replace the text with the new text */ - if (probe_kernel_write(((void *)ip), new_code, MCOUNT_INSN_SIZE)) + if (copy_to_kernel_nofault(((void *)ip), new_code, MCOUNT_INSN_SIZE)) return -EPERM; flush_icache_range(ip, ip + MCOUNT_INSN_SIZE); @@ -129,7 +129,7 @@ static int ftrace_make_nop_check(struct dyn_ftrace *rec, unsigned long addr) unsigned char __attribute__((aligned(8))) replaced[MCOUNT_INSN_SIZE]; unsigned long ip = rec->ip; - if (probe_kernel_read(replaced, (void *)ip, MCOUNT_INSN_SIZE)) + if (copy_from_kernel_nofault(replaced, (void *)ip, MCOUNT_INSN_SIZE)) return -EFAULT; if (rec->flags & FTRACE_FL_CONVERTED) { struct ftrace_call_insn *call_insn, *tmp_call; diff --git a/arch/mips/kernel/kprobes.c b/arch/mips/kernel/kprobes.c index 6cfae2411c04..d043c2f897fc 100644 --- a/arch/mips/kernel/kprobes.c +++ b/arch/mips/kernel/kprobes.c @@ -86,9 +86,9 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p) goto out; } - if ((probe_kernel_read(&prev_insn, p->addr - 1, - sizeof(mips_instruction)) == 0) && - insn_has_delayslot(prev_insn)) { + if (copy_from_kernel_nofault(&prev_insn, p->addr - 1, + sizeof(mips_instruction)) == 0 && + insn_has_delayslot(prev_insn)) { pr_notice("Kprobes for branch delayslot are not supported\n"); ret = -EINVAL; goto out; diff --git a/arch/nds32/kernel/ftrace.c b/arch/nds32/kernel/ftrace.c index 22ab77ea27ad..3763b3f8c3db 100644 --- a/arch/nds32/kernel/ftrace.c +++ b/arch/nds32/kernel/ftrace.c @@ -131,13 +131,14 @@ static int __ftrace_modify_code(unsigned long pc, unsigned long *old_insn, unsigned long orig_insn[3]; if (validate) { - if (probe_kernel_read(orig_insn, (void *)pc, MCOUNT_INSN_SIZE)) + if (copy_from_kernel_nofault(orig_insn, (void *)pc, + MCOUNT_INSN_SIZE)) return -EFAULT; if (memcmp(orig_insn, old_insn, MCOUNT_INSN_SIZE)) return -EINVAL; } - if (probe_kernel_write((void *)pc, new_insn, MCOUNT_INSN_SIZE)) + if (copy_to_kernel_nofault((void *)pc, new_insn, MCOUNT_INSN_SIZE)) return -EPERM; return 0; diff --git a/arch/parisc/kernel/ftrace.c b/arch/parisc/kernel/ftrace.c index b836fc61a24f..1df0f67ed667 100644 --- a/arch/parisc/kernel/ftrace.c +++ b/arch/parisc/kernel/ftrace.c @@ -172,7 +172,7 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) ip = (void *)(rec->ip + 4 - size); - ret = probe_kernel_read(insn, ip, size); + ret = copy_from_kernel_nofault(insn, ip, size); if (ret) return ret; diff --git a/arch/parisc/kernel/kgdb.c b/arch/parisc/kernel/kgdb.c index 664278db9b97..c4554ac13eac 100644 --- a/arch/parisc/kernel/kgdb.c +++ b/arch/parisc/kernel/kgdb.c @@ -154,8 +154,8 @@ void kgdb_arch_set_pc(struct pt_regs *regs, unsigned long ip) int kgdb_arch_set_breakpoint(struct kgdb_bkpt *bpt) { - int ret = probe_kernel_read(bpt->saved_instr, (char *)bpt->bpt_addr, - BREAK_INSTR_SIZE); + int ret = copy_from_kernel_nofault(bpt->saved_instr, + (char *)bpt->bpt_addr, BREAK_INSTR_SIZE); if (ret) return ret; diff --git a/arch/parisc/lib/memcpy.c b/arch/parisc/lib/memcpy.c index 94a9fe2702c2..4b75388190b4 100644 --- a/arch/parisc/lib/memcpy.c +++ b/arch/parisc/lib/memcpy.c @@ -57,7 +57,7 @@ void * memcpy(void * dst,const void *src, size_t count) EXPORT_SYMBOL(raw_copy_in_user); EXPORT_SYMBOL(memcpy); -bool probe_kernel_read_allowed(const void *unsafe_src, size_t size) +bool copy_from_kernel_nofault_allowed(const void *unsafe_src, size_t size) { if ((unsigned long)unsafe_src < PAGE_SIZE) return false; diff --git a/arch/powerpc/kernel/module_64.c b/arch/powerpc/kernel/module_64.c index f4c2fa190192..ae2b188365b1 100644 --- a/arch/powerpc/kernel/module_64.c +++ b/arch/powerpc/kernel/module_64.c @@ -756,7 +756,8 @@ int module_trampoline_target(struct module *mod, unsigned long addr, stub = (struct ppc64_stub_entry *)addr; - if (probe_kernel_read(&magic, &stub->magic, sizeof(magic))) { + if (copy_from_kernel_nofault(&magic, &stub->magic, + sizeof(magic))) { pr_err("%s: fault reading magic for stub %lx for %s\n", __func__, addr, mod->name); return -EFAULT; } @@ -766,7 +767,8 @@ int module_trampoline_target(struct module *mod, unsigned long addr, return -EFAULT; } - if (probe_kernel_read(&funcdata, &stub->funcdata, sizeof(funcdata))) { + if (copy_from_kernel_nofault(&funcdata, &stub->funcdata, + sizeof(funcdata))) { pr_err("%s: fault reading funcdata for stub %lx for %s\n", __func__, addr, mod->name); return -EFAULT; } diff --git a/arch/powerpc/kernel/trace/ftrace.c b/arch/powerpc/kernel/trace/ftrace.c index 5e399628f51a..c1fede6ec934 100644 --- a/arch/powerpc/kernel/trace/ftrace.c +++ b/arch/powerpc/kernel/trace/ftrace.c @@ -226,7 +226,7 @@ __ftrace_make_nop(struct module *mod, unsigned long ip = rec->ip; unsigned long tramp; - if (probe_kernel_read(&op, (void *)ip, MCOUNT_INSN_SIZE)) + if (copy_from_kernel_nofault(&op, (void *)ip, MCOUNT_INSN_SIZE)) return -EFAULT; /* Make sure that that this is still a 24bit jump */ @@ -249,7 +249,7 @@ __ftrace_make_nop(struct module *mod, pr_devel("ip:%lx jumps to %lx", ip, tramp); /* Find where the trampoline jumps to */ - if (probe_kernel_read(jmp, (void *)tramp, sizeof(jmp))) { + if (copy_from_kernel_nofault(jmp, (void *)tramp, sizeof(jmp))) { pr_err("Failed to read %lx\n", tramp); return -EFAULT; } diff --git a/arch/powerpc/lib/inst.c b/arch/powerpc/lib/inst.c index aedfd6e31e53..6c7a20af9fd6 100644 --- a/arch/powerpc/lib/inst.c +++ b/arch/powerpc/lib/inst.c @@ -33,11 +33,11 @@ int probe_kernel_read_inst(struct ppc_inst *inst, unsigned int val, suffix; int err; - err = probe_kernel_read(&val, src, sizeof(val)); + err = copy_from_kernel_nofault(&val, src, sizeof(val)); if (err) return err; if (get_op(val) == OP_PREFIX) { - err = probe_kernel_read(&suffix, (void *)src + 4, 4); + err = copy_from_kernel_nofault(&suffix, (void *)src + 4, 4); *inst = ppc_inst_prefix(val, suffix); } else { *inst = ppc_inst(val); @@ -64,7 +64,7 @@ int probe_kernel_read_inst(struct ppc_inst *inst, unsigned int val; int err; - err = probe_kernel_read(&val, src, sizeof(val)); + err = copy_from_kernel_nofault(&val, src, sizeof(val)); if (!err) *inst = ppc_inst(val); diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c index 13b9dd5e4a76..efe97ff82557 100644 --- a/arch/powerpc/perf/core-book3s.c +++ b/arch/powerpc/perf/core-book3s.c @@ -418,7 +418,8 @@ static __u64 power_pmu_bhrb_to(u64 addr) __u64 target; if (is_kernel_addr(addr)) { - if (probe_kernel_read(&instr, (void *)addr, sizeof(instr))) + if (copy_from_kernel_nofault(&instr, (void *)addr, + sizeof(instr))) return 0; return branch_target((struct ppc_inst *)&instr); diff --git a/arch/riscv/kernel/ftrace.c b/arch/riscv/kernel/ftrace.c index 08396614d6f4..2ff63d0cbb50 100644 --- a/arch/riscv/kernel/ftrace.c +++ b/arch/riscv/kernel/ftrace.c @@ -38,7 +38,8 @@ static int ftrace_check_current_call(unsigned long hook_pos, * Read the text we want to modify; * return must be -EFAULT on read error */ - if (probe_kernel_read(replaced, (void *)hook_pos, MCOUNT_INSN_SIZE)) + if (copy_from_kernel_nofault(replaced, (void *)hook_pos, + MCOUNT_INSN_SIZE)) return -EFAULT; /* diff --git a/arch/riscv/kernel/kgdb.c b/arch/riscv/kernel/kgdb.c index f16ade84a11f..a21fb21883e7 100644 --- a/arch/riscv/kernel/kgdb.c +++ b/arch/riscv/kernel/kgdb.c @@ -153,7 +153,7 @@ int do_single_step(struct pt_regs *regs) stepped_address = addr; /* Replace the op code with the break instruction */ - error = probe_kernel_write((void *)stepped_address, + error = copy_to_kernel_nofault((void *)stepped_address, arch_kgdb_ops.gdb_bpt_instr, BREAK_INSTR_SIZE); /* Flush and return */ @@ -173,7 +173,7 @@ int do_single_step(struct pt_regs *regs) static void undo_single_step(struct pt_regs *regs) { if (stepped_opcode != 0) { - probe_kernel_write((void *)stepped_address, + copy_to_kernel_nofault((void *)stepped_address, (void *)&stepped_opcode, BREAK_INSTR_SIZE); flush_icache_range(stepped_address, stepped_address + BREAK_INSTR_SIZE); diff --git a/arch/riscv/kernel/patch.c b/arch/riscv/kernel/patch.c index d4a64dfed342..3fe7a5296aa5 100644 --- a/arch/riscv/kernel/patch.c +++ b/arch/riscv/kernel/patch.c @@ -63,7 +63,7 @@ static int patch_insn_write(void *addr, const void *insn, size_t len) waddr = patch_map(addr, FIX_TEXT_POKE0); - ret = probe_kernel_write(waddr, insn, len); + ret = copy_to_kernel_nofault(waddr, insn, len); patch_unmap(FIX_TEXT_POKE0); @@ -76,7 +76,7 @@ NOKPROBE_SYMBOL(patch_insn_write); #else static int patch_insn_write(void *addr, const void *insn, size_t len) { - return probe_kernel_write(addr, insn, len); + return copy_to_kernel_nofault(addr, insn, len); } NOKPROBE_SYMBOL(patch_insn_write); #endif /* CONFIG_MMU */ diff --git a/arch/s390/kernel/ftrace.c b/arch/s390/kernel/ftrace.c index 44e01dd1e624..b388e87a08bf 100644 --- a/arch/s390/kernel/ftrace.c +++ b/arch/s390/kernel/ftrace.c @@ -83,7 +83,7 @@ int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, { struct ftrace_insn orig, new, old; - if (probe_kernel_read(&old, (void *) rec->ip, sizeof(old))) + if (copy_from_kernel_nofault(&old, (void *) rec->ip, sizeof(old))) return -EFAULT; if (addr == MCOUNT_ADDR) { /* Initial code replacement */ @@ -105,7 +105,7 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) { struct ftrace_insn orig, new, old; - if (probe_kernel_read(&old, (void *) rec->ip, sizeof(old))) + if (copy_from_kernel_nofault(&old, (void *) rec->ip, sizeof(old))) return -EFAULT; /* Replace nop with an ftrace call. */ ftrace_generate_nop_insn(&orig); diff --git a/arch/sh/kernel/ftrace.c b/arch/sh/kernel/ftrace.c index 1b04270e5460..0646c5961846 100644 --- a/arch/sh/kernel/ftrace.c +++ b/arch/sh/kernel/ftrace.c @@ -119,7 +119,7 @@ static void ftrace_mod_code(void) * But if one were to fail, then they all should, and if one were * to succeed, then they all should. */ - mod_code_status = probe_kernel_write(mod_code_ip, mod_code_newcode, + mod_code_status = copy_to_kernel_nofault(mod_code_ip, mod_code_newcode, MCOUNT_INSN_SIZE); /* if we fail, then kill any new writers */ @@ -203,7 +203,7 @@ static int ftrace_modify_code(unsigned long ip, unsigned char *old_code, */ /* read the text we want to modify */ - if (probe_kernel_read(replaced, (void *)ip, MCOUNT_INSN_SIZE)) + if (copy_from_kernel_nofault(replaced, (void *)ip, MCOUNT_INSN_SIZE)) return -EFAULT; /* Make sure it is what we expect it to be */ @@ -268,7 +268,7 @@ static int ftrace_mod(unsigned long ip, unsigned long old_addr, { unsigned char code[MCOUNT_INSN_SIZE]; - if (probe_kernel_read(code, (void *)ip, MCOUNT_INSN_SIZE)) + if (copy_from_kernel_nofault(code, (void *)ip, MCOUNT_INSN_SIZE)) return -EFAULT; if (old_addr != __raw_readl((unsigned long *)code)) diff --git a/arch/um/kernel/maccess.c b/arch/um/kernel/maccess.c index e929c0966696..8ccd56813f68 100644 --- a/arch/um/kernel/maccess.c +++ b/arch/um/kernel/maccess.c @@ -7,7 +7,7 @@ #include #include -bool probe_kernel_read_allowed(const void *src, size_t size) +bool copy_from_kernel_nofault_allowed(const void *src, size_t size) { void *psrc = (void *)rounddown((unsigned long)src, PAGE_SIZE); diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h index ebedeab48704..255b2dde2c1b 100644 --- a/arch/x86/include/asm/ptrace.h +++ b/arch/x86/include/asm/ptrace.h @@ -278,7 +278,7 @@ static inline unsigned long *regs_get_kernel_stack_nth_addr(struct pt_regs *regs } /* To avoid include hell, we can't include uaccess.h */ -extern long probe_kernel_read(void *dst, const void *src, size_t size); +extern long copy_from_kernel_nofault(void *dst, const void *src, size_t size); /** * regs_get_kernel_stack_nth() - get Nth entry of the stack @@ -298,7 +298,7 @@ static inline unsigned long regs_get_kernel_stack_nth(struct pt_regs *regs, addr = regs_get_kernel_stack_nth_addr(regs, n); if (addr) { - ret = probe_kernel_read(&val, addr, sizeof(val)); + ret = copy_from_kernel_nofault(&val, addr, sizeof(val)); if (!ret) return val; } diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c index 456511b2284e..b037cfa7c0c5 100644 --- a/arch/x86/kernel/dumpstack.c +++ b/arch/x86/kernel/dumpstack.c @@ -106,7 +106,7 @@ void show_opcodes(struct pt_regs *regs, const char *loglvl) bad_ip = user_mode(regs) && __chk_range_not_ok(prologue, OPCODE_BUFSIZE, TASK_SIZE_MAX); - if (bad_ip || probe_kernel_read(opcodes, (u8 *)prologue, + if (bad_ip || copy_from_kernel_nofault(opcodes, (u8 *)prologue, OPCODE_BUFSIZE)) { printk("%sCode: Bad RIP value.\n", loglvl); } else { diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index c84d28e90a58..51504566b3a6 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -86,7 +86,7 @@ static int ftrace_verify_code(unsigned long ip, const char *old_code) * sure what we read is what we expected it to be before modifying it. */ /* read the text we want to modify */ - if (probe_kernel_read(cur_code, (void *)ip, MCOUNT_INSN_SIZE)) { + if (copy_from_kernel_nofault(cur_code, (void *)ip, MCOUNT_INSN_SIZE)) { WARN_ON(1); return -EFAULT; } @@ -355,7 +355,7 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size) npages = DIV_ROUND_UP(*tramp_size, PAGE_SIZE); /* Copy ftrace_caller onto the trampoline memory */ - ret = probe_kernel_read(trampoline, (void *)start_offset, size); + ret = copy_from_kernel_nofault(trampoline, (void *)start_offset, size); if (WARN_ON(ret < 0)) goto fail; @@ -363,13 +363,13 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size) /* The trampoline ends with ret(q) */ retq = (unsigned long)ftrace_stub; - ret = probe_kernel_read(ip, (void *)retq, RET_SIZE); + ret = copy_from_kernel_nofault(ip, (void *)retq, RET_SIZE); if (WARN_ON(ret < 0)) goto fail; if (ops->flags & FTRACE_OPS_FL_SAVE_REGS) { ip = trampoline + (ftrace_regs_caller_ret - ftrace_regs_caller); - ret = probe_kernel_read(ip, (void *)retq, RET_SIZE); + ret = copy_from_kernel_nofault(ip, (void *)retq, RET_SIZE); if (WARN_ON(ret < 0)) goto fail; } @@ -506,7 +506,7 @@ static void *addr_from_call(void *ptr) union text_poke_insn call; int ret; - ret = probe_kernel_read(&call, ptr, CALL_INSN_SIZE); + ret = copy_from_kernel_nofault(&call, ptr, CALL_INSN_SIZE); if (WARN_ON_ONCE(ret < 0)) return NULL; diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c index c44fe7d8d9a4..68acd30c6b87 100644 --- a/arch/x86/kernel/kgdb.c +++ b/arch/x86/kernel/kgdb.c @@ -732,11 +732,11 @@ int kgdb_arch_set_breakpoint(struct kgdb_bkpt *bpt) int err; bpt->type = BP_BREAKPOINT; - err = probe_kernel_read(bpt->saved_instr, (char *)bpt->bpt_addr, + err = copy_from_kernel_nofault(bpt->saved_instr, (char *)bpt->bpt_addr, BREAK_INSTR_SIZE); if (err) return err; - err = probe_kernel_write((char *)bpt->bpt_addr, + err = copy_to_kernel_nofault((char *)bpt->bpt_addr, arch_kgdb_ops.gdb_bpt_instr, BREAK_INSTR_SIZE); if (!err) return err; @@ -768,7 +768,7 @@ int kgdb_arch_remove_breakpoint(struct kgdb_bkpt *bpt) return 0; knl_write: - return probe_kernel_write((char *)bpt->bpt_addr, + return copy_to_kernel_nofault((char *)bpt->bpt_addr, (char *)bpt->saved_instr, BREAK_INSTR_SIZE); } diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c index 3bafe1bd4dc7..f09985c87d73 100644 --- a/arch/x86/kernel/kprobes/core.c +++ b/arch/x86/kernel/kprobes/core.c @@ -243,7 +243,7 @@ __recover_probed_insn(kprobe_opcode_t *buf, unsigned long addr) * Fortunately, we know that the original code is the ideal 5-byte * long NOP. */ - if (probe_kernel_read(buf, (void *)addr, + if (copy_from_kernel_nofault(buf, (void *)addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t))) return 0UL; @@ -346,7 +346,8 @@ int __copy_instruction(u8 *dest, u8 *src, u8 *real, struct insn *insn) return 0; /* This can access kernel text if given address is not recovered */ - if (probe_kernel_read(dest, (void *)recovered_insn, MAX_INSN_SIZE)) + if (copy_from_kernel_nofault(dest, (void *)recovered_insn, + MAX_INSN_SIZE)) return 0; kernel_insn_init(insn, dest, MAX_INSN_SIZE); diff --git a/arch/x86/kernel/kprobes/opt.c b/arch/x86/kernel/kprobes/opt.c index 321c19950285..7af4c61dde52 100644 --- a/arch/x86/kernel/kprobes/opt.c +++ b/arch/x86/kernel/kprobes/opt.c @@ -56,7 +56,7 @@ found: * overwritten by jump destination address. In this case, original * bytes must be recovered from op->optinsn.copied_insn buffer. */ - if (probe_kernel_read(buf, (void *)addr, + if (copy_from_kernel_nofault(buf, (void *)addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t))) return 0UL; diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index af75109485c2..7003f2e7b163 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -488,7 +488,8 @@ static enum kernel_gp_hint get_kernel_gp_address(struct pt_regs *regs, u8 insn_buf[MAX_INSN_SIZE]; struct insn insn; - if (probe_kernel_read(insn_buf, (void *)regs->ip, MAX_INSN_SIZE)) + if (copy_from_kernel_nofault(insn_buf, (void *)regs->ip, + MAX_INSN_SIZE)) return GP_NO_HINT; kernel_insn_init(&insn, insn_buf, MAX_INSN_SIZE); diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 66be9bd60307..e996aa3833b8 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -442,7 +442,7 @@ static void show_ldttss(const struct desc_ptr *gdt, const char *name, u16 index) return; } - if (probe_kernel_read(&desc, (void *)(gdt->address + offset), + if (copy_from_kernel_nofault(&desc, (void *)(gdt->address + offset), sizeof(struct ldttss_desc))) { pr_alert("%s: 0x%hx -- GDT entry is not readable\n", name, index); diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index bda909e3e37e..8b4afad84f4a 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -737,7 +737,7 @@ static void __init test_wp_bit(void) __set_fixmap(FIX_WP_TEST, __pa_symbol(empty_zero_page), PAGE_KERNEL_RO); - if (probe_kernel_write((char *)fix_to_virt(FIX_WP_TEST), &z, 1)) { + if (copy_to_kernel_nofault((char *)fix_to_virt(FIX_WP_TEST), &z, 1)) { clear_fixmap(FIX_WP_TEST); printk(KERN_CONT "Ok.\n"); return; diff --git a/arch/x86/mm/maccess.c b/arch/x86/mm/maccess.c index e1d7d7477c22..92ec176a7293 100644 --- a/arch/x86/mm/maccess.c +++ b/arch/x86/mm/maccess.c @@ -9,7 +9,7 @@ static __always_inline u64 canonical_address(u64 vaddr, u8 vaddr_bits) return ((s64)vaddr << (64 - vaddr_bits)) >> (64 - vaddr_bits); } -bool probe_kernel_read_allowed(const void *unsafe_src, size_t size) +bool copy_from_kernel_nofault_allowed(const void *unsafe_src, size_t size) { unsigned long vaddr = (unsigned long)unsafe_src; @@ -22,7 +22,7 @@ bool probe_kernel_read_allowed(const void *unsafe_src, size_t size) canonical_address(vaddr, boot_cpu_data.x86_virt_bits) == vaddr; } #else -bool probe_kernel_read_allowed(const void *unsafe_src, size_t size) +bool copy_from_kernel_nofault_allowed(const void *unsafe_src, size_t size) { return (unsigned long)unsafe_src >= TASK_SIZE_MAX; } diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c index 33b309d65955..acc49fa6a097 100644 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c @@ -386,7 +386,7 @@ static void set_aliased_prot(void *v, pgprot_t prot) preempt_disable(); - probe_kernel_read(&dummy, v, 1); + copy_from_kernel_nofault(&dummy, v, 1); if (HYPERVISOR_update_va_mapping((unsigned long)v, pte, 0)) BUG(); diff --git a/drivers/char/mem.c b/drivers/char/mem.c index 31cae88a730b..934c92dcb9ab 100644 --- a/drivers/char/mem.c +++ b/drivers/char/mem.c @@ -171,7 +171,7 @@ static ssize_t read_mem(struct file *file, char __user *buf, if (!ptr) goto failed; - probe = probe_kernel_read(bounce, ptr, sz); + probe = copy_from_kernel_nofault(bounce, ptr, sz); unxlate_dev_mem_ptr(p, ptr); if (probe) goto failed; diff --git a/drivers/dio/dio.c b/drivers/dio/dio.c index c9aa15fb86a9..193b40e7aec0 100644 --- a/drivers/dio/dio.c +++ b/drivers/dio/dio.c @@ -135,7 +135,8 @@ int __init dio_find(int deviceid) else va = ioremap(pa, PAGE_SIZE); - if (probe_kernel_read(&i, (unsigned char *)va + DIO_IDOFF, 1)) { + if (copy_from_kernel_nofault(&i, + (unsigned char *)va + DIO_IDOFF, 1)) { if (scode >= DIOII_SCBASE) iounmap(va); continue; /* no board present at that select code */ @@ -208,7 +209,8 @@ static int __init dio_init(void) else va = ioremap(pa, PAGE_SIZE); - if (probe_kernel_read(&i, (unsigned char *)va + DIO_IDOFF, 1)) { + if (copy_from_kernel_nofault(&i, + (unsigned char *)va + DIO_IDOFF, 1)) { if (scode >= DIOII_SCBASE) iounmap(va); continue; /* no board present at that select code */ diff --git a/drivers/input/serio/hp_sdc.c b/drivers/input/serio/hp_sdc.c index 654252361653..13eacf6ab431 100644 --- a/drivers/input/serio/hp_sdc.c +++ b/drivers/input/serio/hp_sdc.c @@ -1021,7 +1021,7 @@ static int __init hp_sdc_register(void) hp_sdc.base_io = (unsigned long) 0xf0428000; hp_sdc.data_io = (unsigned long) hp_sdc.base_io + 1; hp_sdc.status_io = (unsigned long) hp_sdc.base_io + 3; - if (!probe_kernel_read(&i, (unsigned char *)hp_sdc.data_io, 1)) + if (!copy_from_kernel_nofault(&i, (unsigned char *)hp_sdc.data_io, 1)) hp_sdc.dev = (void *)1; hp_sdc.dev_err = hp_sdc_init(); #endif diff --git a/drivers/misc/kgdbts.c b/drivers/misc/kgdbts.c index bccd341e9ae1..d5d2af4d10e6 100644 --- a/drivers/misc/kgdbts.c +++ b/drivers/misc/kgdbts.c @@ -828,7 +828,7 @@ static void run_plant_and_detach_test(int is_early) char before[BREAK_INSTR_SIZE]; char after[BREAK_INSTR_SIZE]; - probe_kernel_read(before, (char *)kgdbts_break_test, + copy_from_kernel_nofault(before, (char *)kgdbts_break_test, BREAK_INSTR_SIZE); init_simple_test(); ts.tst = plant_and_detach_test; @@ -836,8 +836,8 @@ static void run_plant_and_detach_test(int is_early) /* Activate test with initial breakpoint */ if (!is_early) kgdb_breakpoint(); - probe_kernel_read(after, (char *)kgdbts_break_test, - BREAK_INSTR_SIZE); + copy_from_kernel_nofault(after, (char *)kgdbts_break_test, + BREAK_INSTR_SIZE); if (memcmp(before, after, BREAK_INSTR_SIZE)) { printk(KERN_CRIT "kgdbts: ERROR kgdb corrupted memory\n"); panic("kgdb memory corruption"); diff --git a/drivers/video/fbdev/hpfb.c b/drivers/video/fbdev/hpfb.c index f02be0db335e..8d418abdd767 100644 --- a/drivers/video/fbdev/hpfb.c +++ b/drivers/video/fbdev/hpfb.c @@ -402,7 +402,7 @@ int __init hpfb_init(void) if (err) return err; - err = probe_kernel_read(&i, (unsigned char *)INTFBVADDR + DIO_IDOFF, 1); + err = copy_from_kernel_nofault(&i, (unsigned char *)INTFBVADDR + DIO_IDOFF, 1); if (!err && (i == DIO_ID_FBUFFER) && topcat_sid_ok(sid = DIO_SECID(INTFBVADDR))) { if (!request_mem_region(INTFBPADDR, DIO_DEVSIZE, "Internal Topcat")) diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c index 8ba492d44e68..e502414b3556 100644 --- a/fs/proc/kcore.c +++ b/fs/proc/kcore.c @@ -512,7 +512,8 @@ read_kcore(struct file *file, char __user *buffer, size_t buflen, loff_t *fpos) * Using bounce buffer to bypass the * hardened user copy kernel text checks. */ - if (probe_kernel_read(buf, (void *) start, tsz)) { + if (copy_from_kernel_nofault(buf, (void *)start, + tsz)) { if (clear_user(buffer, tsz)) { ret = -EFAULT; goto out; diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h index 7bcadca22100..70a3d9cd9113 100644 --- a/include/linux/uaccess.h +++ b/include/linux/uaccess.h @@ -301,13 +301,14 @@ copy_struct_from_user(void *dst, size_t ksize, const void __user *src, return 0; } -bool probe_kernel_read_allowed(const void *unsafe_src, size_t size); +bool copy_from_kernel_nofault_allowed(const void *unsafe_src, size_t size); -extern long probe_kernel_read(void *dst, const void *src, size_t size); -extern long probe_user_read(void *dst, const void __user *src, size_t size); +long copy_from_kernel_nofault(void *dst, const void *src, size_t size); +long notrace copy_to_kernel_nofault(void *dst, const void *src, size_t size); -extern long notrace probe_kernel_write(void *dst, const void *src, size_t size); -extern long notrace probe_user_write(void __user *dst, const void *src, size_t size); +extern long probe_user_read(void *dst, const void __user *src, size_t size); +extern long notrace probe_user_write(void __user *dst, const void *src, + size_t size); long strncpy_from_kernel_nofault(char *dst, const void *unsafe_addr, long count); @@ -324,7 +325,7 @@ long strnlen_user_nofault(const void __user *unsafe_addr, long count); * Returns 0 on success, or -EFAULT. */ #define probe_kernel_address(addr, retval) \ - probe_kernel_read(&retval, addr, sizeof(retval)) + copy_from_kernel_nofault(&retval, addr, sizeof(retval)) #ifndef user_access_begin #define user_access_begin(ptr,len) access_ok(ptr, len) diff --git a/kernel/debug/debug_core.c b/kernel/debug/debug_core.c index ccc0f98abdd4..bc8d25f2ac8a 100644 --- a/kernel/debug/debug_core.c +++ b/kernel/debug/debug_core.c @@ -169,18 +169,18 @@ int __weak kgdb_arch_set_breakpoint(struct kgdb_bkpt *bpt) { int err; - err = probe_kernel_read(bpt->saved_instr, (char *)bpt->bpt_addr, + err = copy_from_kernel_nofault(bpt->saved_instr, (char *)bpt->bpt_addr, BREAK_INSTR_SIZE); if (err) return err; - err = probe_kernel_write((char *)bpt->bpt_addr, + err = copy_to_kernel_nofault((char *)bpt->bpt_addr, arch_kgdb_ops.gdb_bpt_instr, BREAK_INSTR_SIZE); return err; } int __weak kgdb_arch_remove_breakpoint(struct kgdb_bkpt *bpt) { - return probe_kernel_write((char *)bpt->bpt_addr, + return copy_to_kernel_nofault((char *)bpt->bpt_addr, (char *)bpt->saved_instr, BREAK_INSTR_SIZE); } diff --git a/kernel/debug/gdbstub.c b/kernel/debug/gdbstub.c index 4b280fc7dd67..61774aec46b4 100644 --- a/kernel/debug/gdbstub.c +++ b/kernel/debug/gdbstub.c @@ -247,7 +247,7 @@ char *kgdb_mem2hex(char *mem, char *buf, int count) */ tmp = buf + count; - err = probe_kernel_read(tmp, mem, count); + err = copy_from_kernel_nofault(tmp, mem, count); if (err) return NULL; while (count > 0) { @@ -283,7 +283,7 @@ int kgdb_hex2mem(char *buf, char *mem, int count) *tmp_raw |= hex_to_bin(*tmp_hex--) << 4; } - return probe_kernel_write(mem, tmp_raw, count); + return copy_to_kernel_nofault(mem, tmp_raw, count); } /* @@ -335,7 +335,7 @@ static int kgdb_ebin2mem(char *buf, char *mem, int count) size++; } - return probe_kernel_write(mem, c, size); + return copy_to_kernel_nofault(mem, c, size); } #if DBG_MAX_REG_NUM > 0 diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c index ec190569f690..5c7949061671 100644 --- a/kernel/debug/kdb/kdb_main.c +++ b/kernel/debug/kdb/kdb_main.c @@ -2326,7 +2326,8 @@ void kdb_ps1(const struct task_struct *p) int cpu; unsigned long tmp; - if (!p || probe_kernel_read(&tmp, (char *)p, sizeof(unsigned long))) + if (!p || + copy_from_kernel_nofault(&tmp, (char *)p, sizeof(unsigned long))) return; cpu = kdb_process_cpu(p); diff --git a/kernel/debug/kdb/kdb_support.c b/kernel/debug/kdb/kdb_support.c index b8e6306e7e13..004c5b6c87f8 100644 --- a/kernel/debug/kdb/kdb_support.c +++ b/kernel/debug/kdb/kdb_support.c @@ -325,7 +325,7 @@ char *kdb_strdup(const char *str, gfp_t type) */ int kdb_getarea_size(void *res, unsigned long addr, size_t size) { - int ret = probe_kernel_read((char *)res, (char *)addr, size); + int ret = copy_from_kernel_nofault((char *)res, (char *)addr, size); if (ret) { if (!KDB_STATE(SUPPRESS)) { kdb_printf("kdb_getarea: Bad address 0x%lx\n", addr); @@ -350,7 +350,7 @@ int kdb_getarea_size(void *res, unsigned long addr, size_t size) */ int kdb_putarea_size(unsigned long addr, void *res, size_t size) { - int ret = probe_kernel_read((char *)addr, (char *)res, size); + int ret = copy_from_kernel_nofault((char *)addr, (char *)res, size); if (ret) { if (!KDB_STATE(SUPPRESS)) { kdb_printf("kdb_putarea: Bad address 0x%lx\n", addr); @@ -624,7 +624,8 @@ char kdb_task_state_char (const struct task_struct *p) char state; unsigned long tmp; - if (!p || probe_kernel_read(&tmp, (char *)p, sizeof(unsigned long))) + if (!p || + copy_from_kernel_nofault(&tmp, (char *)p, sizeof(unsigned long))) return 'E'; cpu = kdb_process_cpu(p); diff --git a/kernel/kthread.c b/kernel/kthread.c index 8e3d2d7fdf5e..132f84a5fde3 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c @@ -201,7 +201,7 @@ void *kthread_probe_data(struct task_struct *task) struct kthread *kthread = to_kthread(task); void *data = NULL; - probe_kernel_read(&data, &kthread->data, sizeof(data)); + copy_from_kernel_nofault(&data, &kthread->data, sizeof(data)); return data; } diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index e729c9e587a0..204afc12425f 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -196,7 +196,7 @@ bpf_probe_read_kernel_common(void *dst, u32 size, const void *unsafe_ptr) if (unlikely(ret < 0)) goto fail; - ret = probe_kernel_read(dst, unsafe_ptr, size); + ret = copy_from_kernel_nofault(dst, unsafe_ptr, size); if (unlikely(ret < 0)) goto fail; return ret; @@ -661,7 +661,7 @@ BPF_CALL_5(bpf_seq_printf, struct seq_file *, m, char *, fmt, u32, fmt_size, copy_size = (fmt[i + 2] == '4') ? 4 : 16; - err = probe_kernel_read(bufs->buf[memcpy_cnt], + err = copy_from_kernel_nofault(bufs->buf[memcpy_cnt], (void *) (long) args[fmt_cnt], copy_size); if (err < 0) diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 6048f1be26d2..841c74863ff8 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -1222,7 +1222,7 @@ fetch_store_strlen(unsigned long addr) #endif do { - ret = probe_kernel_read(&c, (u8 *)addr + len, 1); + ret = copy_from_kernel_nofault(&c, (u8 *)addr + len, 1); len++; } while (c && ret == 0 && len < MAX_STRING_SIZE); @@ -1300,7 +1300,7 @@ probe_mem_read(void *dest, void *src, size_t size) if ((unsigned long)src < TASK_SIZE) return probe_mem_read_user(dest, src, size); #endif - return probe_kernel_read(dest, src, size); + return copy_from_kernel_nofault(dest, src, size); } /* Note that we don't verify it, since the code does not come from user space */ diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 9fbe1e237563..c41c3c17b86a 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -4638,11 +4638,11 @@ void print_worker_info(const char *log_lvl, struct task_struct *task) * Carefully copy the associated workqueue's workfn, name and desc. * Keep the original last '\0' in case the original is garbage. */ - probe_kernel_read(&fn, &worker->current_func, sizeof(fn)); - probe_kernel_read(&pwq, &worker->current_pwq, sizeof(pwq)); - probe_kernel_read(&wq, &pwq->wq, sizeof(wq)); - probe_kernel_read(name, wq->name, sizeof(name) - 1); - probe_kernel_read(desc, worker->desc, sizeof(desc) - 1); + copy_from_kernel_nofault(&fn, &worker->current_func, sizeof(fn)); + copy_from_kernel_nofault(&pwq, &worker->current_pwq, sizeof(pwq)); + copy_from_kernel_nofault(&wq, &pwq->wq, sizeof(wq)); + copy_from_kernel_nofault(name, wq->name, sizeof(name) - 1); + copy_from_kernel_nofault(desc, worker->desc, sizeof(desc) - 1); if (fn || name[0] || desc[0]) { printk("%sWorkqueue: %s %ps", log_lvl, name, fn); diff --git a/mm/debug.c b/mm/debug.c index b5b1de8c71ac..4f376514744d 100644 --- a/mm/debug.c +++ b/mm/debug.c @@ -120,9 +120,9 @@ void __dump_page(struct page *page, const char *reason) * mapping can be invalid pointer and we don't want to crash * accessing it, so probe everything depending on it carefully */ - if (probe_kernel_read(&host, &mapping->host, + if (copy_from_kernel_nofault(&host, &mapping->host, sizeof(struct inode *)) || - probe_kernel_read(&a_ops, &mapping->a_ops, + copy_from_kernel_nofault(&a_ops, &mapping->a_ops, sizeof(struct address_space_operations *))) { pr_warn("failed to read mapping->host or a_ops, mapping not a valid kernel address?\n"); goto out_mapping; @@ -133,7 +133,7 @@ void __dump_page(struct page *page, const char *reason) goto out_mapping; } - if (probe_kernel_read(&dentry_first, + if (copy_from_kernel_nofault(&dentry_first, &host->i_dentry.first, sizeof(struct hlist_node *))) { pr_warn("mapping->a_ops:%ps with invalid mapping->host inode address %px\n", a_ops, host); @@ -146,7 +146,7 @@ void __dump_page(struct page *page, const char *reason) } dentry_ptr = container_of(dentry_first, struct dentry, d_u.d_alias); - if (probe_kernel_read(&dentry, dentry_ptr, + if (copy_from_kernel_nofault(&dentry, dentry_ptr, sizeof(struct dentry))) { pr_warn("mapping->aops:%ps with invalid mapping->host->i_dentry.first %px\n", a_ops, dentry_ptr); diff --git a/mm/maccess.c b/mm/maccess.c index 88845eda5047..cc5d8c6233c0 100644 --- a/mm/maccess.c +++ b/mm/maccess.c @@ -6,14 +6,15 @@ #include #include -bool __weak probe_kernel_read_allowed(const void *unsafe_src, size_t size) +bool __weak copy_from_kernel_nofault_allowed(const void *unsafe_src, + size_t size) { return true; } #ifdef HAVE_GET_KERNEL_NOFAULT -#define probe_kernel_read_loop(dst, src, len, type, err_label) \ +#define copy_from_kernel_nofault_loop(dst, src, len, type, err_label) \ while (len >= sizeof(type)) { \ __get_kernel_nofault(dst, src, type, err_label); \ dst += sizeof(type); \ @@ -21,25 +22,25 @@ bool __weak probe_kernel_read_allowed(const void *unsafe_src, size_t size) len -= sizeof(type); \ } -long probe_kernel_read(void *dst, const void *src, size_t size) +long copy_from_kernel_nofault(void *dst, const void *src, size_t size) { - if (!probe_kernel_read_allowed(src, size)) + if (!copy_from_kernel_nofault_allowed(src, size)) return -ERANGE; pagefault_disable(); - probe_kernel_read_loop(dst, src, size, u64, Efault); - probe_kernel_read_loop(dst, src, size, u32, Efault); - probe_kernel_read_loop(dst, src, size, u16, Efault); - probe_kernel_read_loop(dst, src, size, u8, Efault); + copy_from_kernel_nofault_loop(dst, src, size, u64, Efault); + copy_from_kernel_nofault_loop(dst, src, size, u32, Efault); + copy_from_kernel_nofault_loop(dst, src, size, u16, Efault); + copy_from_kernel_nofault_loop(dst, src, size, u8, Efault); pagefault_enable(); return 0; Efault: pagefault_enable(); return -EFAULT; } -EXPORT_SYMBOL_GPL(probe_kernel_read); +EXPORT_SYMBOL_GPL(copy_from_kernel_nofault); -#define probe_kernel_write_loop(dst, src, len, type, err_label) \ +#define copy_to_kernel_nofault_loop(dst, src, len, type, err_label) \ while (len >= sizeof(type)) { \ __put_kernel_nofault(dst, src, type, err_label); \ dst += sizeof(type); \ @@ -47,13 +48,13 @@ EXPORT_SYMBOL_GPL(probe_kernel_read); len -= sizeof(type); \ } -long probe_kernel_write(void *dst, const void *src, size_t size) +long copy_to_kernel_nofault(void *dst, const void *src, size_t size) { pagefault_disable(); - probe_kernel_write_loop(dst, src, size, u64, Efault); - probe_kernel_write_loop(dst, src, size, u32, Efault); - probe_kernel_write_loop(dst, src, size, u16, Efault); - probe_kernel_write_loop(dst, src, size, u8, Efault); + copy_to_kernel_nofault_loop(dst, src, size, u64, Efault); + copy_to_kernel_nofault_loop(dst, src, size, u32, Efault); + copy_to_kernel_nofault_loop(dst, src, size, u16, Efault); + copy_to_kernel_nofault_loop(dst, src, size, u8, Efault); pagefault_enable(); return 0; Efault: @@ -67,7 +68,7 @@ long strncpy_from_kernel_nofault(char *dst, const void *unsafe_addr, long count) if (unlikely(count <= 0)) return 0; - if (!probe_kernel_read_allowed(unsafe_addr, count)) + if (!copy_from_kernel_nofault_allowed(unsafe_addr, count)) return -ERANGE; pagefault_disable(); @@ -87,7 +88,7 @@ Efault: } #else /* HAVE_GET_KERNEL_NOFAULT */ /** - * probe_kernel_read(): safely attempt to read from kernel-space + * copy_from_kernel_nofault(): safely attempt to read from kernel-space * @dst: pointer to the buffer that shall take the data * @src: address to read from * @size: size of the data chunk @@ -98,15 +99,15 @@ Efault: * * We ensure that the copy_from_user is executed in atomic context so that * do_page_fault() doesn't attempt to take mmap_lock. This makes - * probe_kernel_read() suitable for use within regions where the caller + * copy_from_kernel_nofault() suitable for use within regions where the caller * already holds mmap_lock, or other locks which nest inside mmap_lock. */ -long probe_kernel_read(void *dst, const void *src, size_t size) +long copy_from_kernel_nofault(void *dst, const void *src, size_t size) { long ret; mm_segment_t old_fs = get_fs(); - if (!probe_kernel_read_allowed(src, size)) + if (!copy_from_kernel_nofault_allowed(src, size)) return -ERANGE; set_fs(KERNEL_DS); @@ -120,10 +121,10 @@ long probe_kernel_read(void *dst, const void *src, size_t size) return -EFAULT; return 0; } -EXPORT_SYMBOL_GPL(probe_kernel_read); +EXPORT_SYMBOL_GPL(copy_from_kernel_nofault); /** - * probe_kernel_write(): safely attempt to write to a location + * copy_to_kernel_nofault(): safely attempt to write to a location * @dst: address to write to * @src: pointer to the data that shall be written * @size: size of the data chunk @@ -131,7 +132,7 @@ EXPORT_SYMBOL_GPL(probe_kernel_read); * Safely write to address @dst from the buffer at @src. If a kernel fault * happens, handle that and return -EFAULT. */ -long probe_kernel_write(void *dst, const void *src, size_t size) +long copy_to_kernel_nofault(void *dst, const void *src, size_t size) { long ret; mm_segment_t old_fs = get_fs(); @@ -174,7 +175,7 @@ long strncpy_from_kernel_nofault(char *dst, const void *unsafe_addr, long count) if (unlikely(count <= 0)) return 0; - if (!probe_kernel_read_allowed(unsafe_addr, count)) + if (!copy_from_kernel_nofault_allowed(unsafe_addr, count)) return -ERANGE; set_fs(KERNEL_DS); diff --git a/mm/rodata_test.c b/mm/rodata_test.c index 5e313fa93276..2a99df7beeb3 100644 --- a/mm/rodata_test.c +++ b/mm/rodata_test.c @@ -25,7 +25,7 @@ void rodata_test(void) } /* test 2: write to the variable; this should fault */ - if (!probe_kernel_write((void *)&rodata_test_data, + if (!copy_to_kernel_nofault((void *)&rodata_test_data, (void *)&zero, sizeof(zero))) { pr_err("test data was not read only\n"); return; diff --git a/mm/slub.c b/mm/slub.c index b8f798b50d44..fe81773fd97e 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -292,7 +292,7 @@ static inline void *get_freepointer_safe(struct kmem_cache *s, void *object) return get_freepointer(s, object); freepointer_addr = (unsigned long)object + s->offset; - probe_kernel_read(&p, (void **)freepointer_addr, sizeof(p)); + copy_from_kernel_nofault(&p, (void **)freepointer_addr, sizeof(p)); return freelist_ptr(s, p, freepointer_addr); } -- cgit v1.2.3 From c0ee37e85e0e47402b8bbe35b6cec8e06937ca58 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 17 Jun 2020 09:37:54 +0200 Subject: maccess: rename probe_user_{read,write} to copy_{from,to}_user_nofault Better describe what these functions do. Suggested-by: Linus Torvalds Signed-off-by: Christoph Hellwig Signed-off-by: Linus Torvalds --- arch/powerpc/kernel/process.c | 3 ++- arch/powerpc/kvm/book3s_64_mmu_radix.c | 4 ++-- arch/powerpc/lib/inst.c | 6 +++--- arch/powerpc/oprofile/backtrace.c | 6 ++++-- arch/powerpc/perf/callchain_32.c | 2 +- arch/powerpc/perf/callchain_64.c | 2 +- arch/powerpc/perf/core-book3s.c | 3 ++- arch/powerpc/sysdev/fsl_pci.c | 4 ++-- include/linux/uaccess.h | 4 ++-- kernel/trace/bpf_trace.c | 4 ++-- kernel/trace/trace_kprobe.c | 2 +- mm/maccess.c | 12 ++++++------ 12 files changed, 28 insertions(+), 24 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 7bb7faf84490..d4d0d1048500 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -1305,7 +1305,8 @@ void show_user_instructions(struct pt_regs *regs) for (i = 0; i < 8 && n; i++, n--, pc += sizeof(int)) { int instr; - if (probe_user_read(&instr, (void __user *)pc, sizeof(instr))) { + if (copy_from_user_nofault(&instr, (void __user *)pc, + sizeof(instr))) { seq_buf_printf(&s, "XXXXXXXX "); continue; } diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c index 3cb0c9843d01..e738ea652192 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_radix.c +++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c @@ -64,9 +64,9 @@ unsigned long __kvmhv_copy_tofrom_guest_radix(int lpid, int pid, isync(); if (is_load) - ret = probe_user_read(to, (const void __user *)from, n); + ret = copy_from_user_nofault(to, (const void __user *)from, n); else - ret = probe_user_write((void __user *)to, from, n); + ret = copy_to_user_nofault((void __user *)to, from, n); /* switch the pid first to avoid running host with unallocated pid */ if (quadrant == 1 && pid != old_pid) diff --git a/arch/powerpc/lib/inst.c b/arch/powerpc/lib/inst.c index 6c7a20af9fd6..9cc17eb62462 100644 --- a/arch/powerpc/lib/inst.c +++ b/arch/powerpc/lib/inst.c @@ -15,11 +15,11 @@ int probe_user_read_inst(struct ppc_inst *inst, unsigned int val, suffix; int err; - err = probe_user_read(&val, nip, sizeof(val)); + err = copy_from_user_nofault(&val, nip, sizeof(val)); if (err) return err; if (get_op(val) == OP_PREFIX) { - err = probe_user_read(&suffix, (void __user *)nip + 4, 4); + err = copy_from_user_nofault(&suffix, (void __user *)nip + 4, 4); *inst = ppc_inst_prefix(val, suffix); } else { *inst = ppc_inst(val); @@ -51,7 +51,7 @@ int probe_user_read_inst(struct ppc_inst *inst, unsigned int val; int err; - err = probe_user_read(&val, nip, sizeof(val)); + err = copy_from_user_nofault(&val, nip, sizeof(val)); if (!err) *inst = ppc_inst(val); diff --git a/arch/powerpc/oprofile/backtrace.c b/arch/powerpc/oprofile/backtrace.c index 6f347fa29f41..9db7ada79d10 100644 --- a/arch/powerpc/oprofile/backtrace.c +++ b/arch/powerpc/oprofile/backtrace.c @@ -33,7 +33,8 @@ static unsigned int user_getsp32(unsigned int sp, int is_first) * which means that we've done all that we can do from * interrupt context. */ - if (probe_user_read(stack_frame, (void __user *)p, sizeof(stack_frame))) + if (copy_from_user_nofault(stack_frame, (void __user *)p, + sizeof(stack_frame))) return 0; if (!is_first) @@ -51,7 +52,8 @@ static unsigned long user_getsp64(unsigned long sp, int is_first) { unsigned long stack_frame[3]; - if (probe_user_read(stack_frame, (void __user *)sp, sizeof(stack_frame))) + if (copy_from_user_nofault(stack_frame, (void __user *)sp, + sizeof(stack_frame))) return 0; if (!is_first) diff --git a/arch/powerpc/perf/callchain_32.c b/arch/powerpc/perf/callchain_32.c index f7d888d39cd3..542e68b8eae0 100644 --- a/arch/powerpc/perf/callchain_32.c +++ b/arch/powerpc/perf/callchain_32.c @@ -44,7 +44,7 @@ static int read_user_stack_32(unsigned int __user *ptr, unsigned int *ret) ((unsigned long)ptr & 3)) return -EFAULT; - rc = probe_user_read(ret, ptr, sizeof(*ret)); + rc = copy_from_user_nofault(ret, ptr, sizeof(*ret)); if (IS_ENABLED(CONFIG_PPC64) && rc) return read_user_stack_slow(ptr, ret, 4); diff --git a/arch/powerpc/perf/callchain_64.c b/arch/powerpc/perf/callchain_64.c index 814d1c2c2b9c..fa2a1b83b9b0 100644 --- a/arch/powerpc/perf/callchain_64.c +++ b/arch/powerpc/perf/callchain_64.c @@ -50,7 +50,7 @@ static int read_user_stack_64(unsigned long __user *ptr, unsigned long *ret) ((unsigned long)ptr & 7)) return -EFAULT; - if (!probe_user_read(ret, ptr, sizeof(*ret))) + if (!copy_from_user_nofault(ret, ptr, sizeof(*ret))) return 0; return read_user_stack_slow(ptr, ret, 8); diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c index efe97ff82557..cd6a742ac6ef 100644 --- a/arch/powerpc/perf/core-book3s.c +++ b/arch/powerpc/perf/core-book3s.c @@ -426,7 +426,8 @@ static __u64 power_pmu_bhrb_to(u64 addr) } /* Userspace: need copy instruction here then translate it */ - if (probe_user_read(&instr, (unsigned int __user *)addr, sizeof(instr))) + if (copy_from_user_nofault(&instr, (unsigned int __user *)addr, + sizeof(instr))) return 0; target = branch_target((struct ppc_inst *)&instr); diff --git a/arch/powerpc/sysdev/fsl_pci.c b/arch/powerpc/sysdev/fsl_pci.c index 4a8874bc1057..73fa37ca40ef 100644 --- a/arch/powerpc/sysdev/fsl_pci.c +++ b/arch/powerpc/sysdev/fsl_pci.c @@ -1066,8 +1066,8 @@ int fsl_pci_mcheck_exception(struct pt_regs *regs) if (is_in_pci_mem_space(addr)) { if (user_mode(regs)) - ret = probe_user_read(&inst, (void __user *)regs->nip, - sizeof(inst)); + ret = copy_from_user_nofault(&inst, + (void __user *)regs->nip, sizeof(inst)); else ret = probe_kernel_address((void *)regs->nip, inst); diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h index 70a3d9cd9113..bef48da242cc 100644 --- a/include/linux/uaccess.h +++ b/include/linux/uaccess.h @@ -306,8 +306,8 @@ bool copy_from_kernel_nofault_allowed(const void *unsafe_src, size_t size); long copy_from_kernel_nofault(void *dst, const void *src, size_t size); long notrace copy_to_kernel_nofault(void *dst, const void *src, size_t size); -extern long probe_user_read(void *dst, const void __user *src, size_t size); -extern long notrace probe_user_write(void __user *dst, const void *src, +long copy_from_user_nofault(void *dst, const void __user *src, size_t size); +long notrace copy_to_user_nofault(void __user *dst, const void *src, size_t size); long strncpy_from_kernel_nofault(char *dst, const void *unsafe_addr, diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 204afc12425f..dc05626979b8 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -141,7 +141,7 @@ bpf_probe_read_user_common(void *dst, u32 size, const void __user *unsafe_ptr) { int ret; - ret = probe_user_read(dst, unsafe_ptr, size); + ret = copy_from_user_nofault(dst, unsafe_ptr, size); if (unlikely(ret < 0)) memset(dst, 0, size); return ret; @@ -326,7 +326,7 @@ BPF_CALL_3(bpf_probe_write_user, void __user *, unsafe_ptr, const void *, src, if (unlikely(!nmi_uaccess_okay())) return -EPERM; - return probe_user_write(unsafe_ptr, src, size); + return copy_to_user_nofault(unsafe_ptr, src, size); } static const struct bpf_func_proto bpf_probe_write_user_proto = { diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 841c74863ff8..aefb6065b508 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -1290,7 +1290,7 @@ probe_mem_read_user(void *dest, void *src, size_t size) { const void __user *uaddr = (__force const void __user *)src; - return probe_user_read(dest, uaddr, size); + return copy_from_user_nofault(dest, uaddr, size); } static nokprobe_inline int diff --git a/mm/maccess.c b/mm/maccess.c index cc5d8c6233c0..f98ff91e32c6 100644 --- a/mm/maccess.c +++ b/mm/maccess.c @@ -194,7 +194,7 @@ long strncpy_from_kernel_nofault(char *dst, const void *unsafe_addr, long count) #endif /* HAVE_GET_KERNEL_NOFAULT */ /** - * probe_user_read(): safely attempt to read from a user-space location + * copy_from_user_nofault(): safely attempt to read from a user-space location * @dst: pointer to the buffer that shall take the data * @src: address to read from. This must be a user address. * @size: size of the data chunk @@ -202,7 +202,7 @@ long strncpy_from_kernel_nofault(char *dst, const void *unsafe_addr, long count) * Safely read from user address @src to the buffer at @dst. If a kernel fault * happens, handle that and return -EFAULT. */ -long probe_user_read(void *dst, const void __user *src, size_t size) +long copy_from_user_nofault(void *dst, const void __user *src, size_t size) { long ret = -EFAULT; mm_segment_t old_fs = get_fs(); @@ -219,10 +219,10 @@ long probe_user_read(void *dst, const void __user *src, size_t size) return -EFAULT; return 0; } -EXPORT_SYMBOL_GPL(probe_user_read); +EXPORT_SYMBOL_GPL(copy_from_user_nofault); /** - * probe_user_write(): safely attempt to write to a user-space location + * copy_to_user_nofault(): safely attempt to write to a user-space location * @dst: address to write to * @src: pointer to the data that shall be written * @size: size of the data chunk @@ -230,7 +230,7 @@ EXPORT_SYMBOL_GPL(probe_user_read); * Safely write to address @dst from the buffer at @src. If a kernel fault * happens, handle that and return -EFAULT. */ -long probe_user_write(void __user *dst, const void *src, size_t size) +long copy_to_user_nofault(void __user *dst, const void *src, size_t size) { long ret = -EFAULT; mm_segment_t old_fs = get_fs(); @@ -247,7 +247,7 @@ long probe_user_write(void __user *dst, const void *src, size_t size) return -EFAULT; return 0; } -EXPORT_SYMBOL_GPL(probe_user_write); +EXPORT_SYMBOL_GPL(copy_to_user_nofault); /** * strncpy_from_user_nofault: - Copy a NUL terminated string from unsafe user -- cgit v1.2.3 From 25f12ae45fc1931a1dce3cc59f9989a9d87834b0 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 17 Jun 2020 09:37:55 +0200 Subject: maccess: rename probe_kernel_address to get_kernel_nofault Better describe what this helper does, and match the naming of copy_from_kernel_nofault. Also switch the argument order around, so that it acts and looks like get_user(). Signed-off-by: Christoph Hellwig Signed-off-by: Linus Torvalds --- arch/arm/kernel/traps.c | 2 +- arch/arm/mm/alignment.c | 4 ++-- arch/arm64/kernel/traps.c | 2 +- arch/ia64/include/asm/sections.h | 2 +- arch/parisc/kernel/process.c | 2 +- arch/powerpc/include/asm/sections.h | 2 +- arch/powerpc/kernel/kgdb.c | 2 +- arch/powerpc/kernel/kprobes.c | 2 +- arch/powerpc/kernel/process.c | 2 +- arch/powerpc/sysdev/fsl_pci.c | 2 +- arch/riscv/kernel/kgdb.c | 4 ++-- arch/riscv/kernel/traps.c | 4 ++-- arch/s390/mm/fault.c | 2 +- arch/sh/kernel/traps.c | 2 +- arch/x86/kernel/probe_roms.c | 20 ++++++++++---------- arch/x86/kernel/traps.c | 2 +- arch/x86/mm/fault.c | 6 +++--- arch/x86/pci/pcbios.c | 2 +- include/linux/uaccess.h | 10 +++++----- lib/test_lockup.c | 6 +++--- 20 files changed, 40 insertions(+), 40 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c index 65a3b1e75480..49ce15c3612d 100644 --- a/arch/arm/kernel/traps.c +++ b/arch/arm/kernel/traps.c @@ -396,7 +396,7 @@ int is_valid_bugaddr(unsigned long pc) u32 insn = __opcode_to_mem_arm(BUG_INSTR_VALUE); #endif - if (probe_kernel_address((unsigned *)pc, bkpt)) + if (get_kernel_nofault(bkpt, (unsigned *)pc)) return 0; return bkpt == insn; diff --git a/arch/arm/mm/alignment.c b/arch/arm/mm/alignment.c index 84718eddae60..81a627e6e1c5 100644 --- a/arch/arm/mm/alignment.c +++ b/arch/arm/mm/alignment.c @@ -774,7 +774,7 @@ static int alignment_get_arm(struct pt_regs *regs, u32 *ip, u32 *inst) if (user_mode(regs)) fault = get_user(instr, ip); else - fault = probe_kernel_address(ip, instr); + fault = get_kernel_nofault(instr, ip); *inst = __mem_to_opcode_arm(instr); @@ -789,7 +789,7 @@ static int alignment_get_thumb(struct pt_regs *regs, u16 *ip, u16 *inst) if (user_mode(regs)) fault = get_user(instr, ip); else - fault = probe_kernel_address(ip, instr); + fault = get_kernel_nofault(instr, ip); *inst = __mem_to_opcode_thumb16(instr); diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index 50cc30acf106..227b2d9bae3d 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -376,7 +376,7 @@ static int call_undef_hook(struct pt_regs *regs) if (!user_mode(regs)) { __le32 instr_le; - if (probe_kernel_address((__force __le32 *)pc, instr_le)) + if (get_kernel_nofault(instr_le, (__force __le32 *)pc)) goto exit; instr = le32_to_cpu(instr_le); } else if (compat_thumb_mode(regs)) { diff --git a/arch/ia64/include/asm/sections.h b/arch/ia64/include/asm/sections.h index cea15f2dd38d..ad4fc06e5f4b 100644 --- a/arch/ia64/include/asm/sections.h +++ b/arch/ia64/include/asm/sections.h @@ -35,7 +35,7 @@ static inline void *dereference_function_descriptor(void *ptr) struct fdesc *desc = ptr; void *p; - if (!probe_kernel_address(&desc->ip, p)) + if (!get_kernel_nofault(p, &desc->ip)) ptr = p; return ptr; } diff --git a/arch/parisc/kernel/process.c b/arch/parisc/kernel/process.c index 230a6422b99f..6c435dbccca0 100644 --- a/arch/parisc/kernel/process.c +++ b/arch/parisc/kernel/process.c @@ -293,7 +293,7 @@ void *dereference_function_descriptor(void *ptr) Elf64_Fdesc *desc = ptr; void *p; - if (!probe_kernel_address(&desc->addr, p)) + if (!get_kernel_nofault(p, &desc->addr)) ptr = p; return ptr; } diff --git a/arch/powerpc/include/asm/sections.h b/arch/powerpc/include/asm/sections.h index d19871763ed4..bd311616fca8 100644 --- a/arch/powerpc/include/asm/sections.h +++ b/arch/powerpc/include/asm/sections.h @@ -85,7 +85,7 @@ static inline void *dereference_function_descriptor(void *ptr) struct ppc64_opd_entry *desc = ptr; void *p; - if (!probe_kernel_address(&desc->funcaddr, p)) + if (!get_kernel_nofault(p, &desc->funcaddr)) ptr = p; return ptr; } diff --git a/arch/powerpc/kernel/kgdb.c b/arch/powerpc/kernel/kgdb.c index 652b2852bea3..e14a1862a3ca 100644 --- a/arch/powerpc/kernel/kgdb.c +++ b/arch/powerpc/kernel/kgdb.c @@ -421,7 +421,7 @@ int kgdb_arch_set_breakpoint(struct kgdb_bkpt *bpt) unsigned int instr; struct ppc_inst *addr = (struct ppc_inst *)bpt->bpt_addr; - err = probe_kernel_address(addr, instr); + err = get_kernel_nofault(instr, addr); if (err) return err; diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c index 6f96f65ebfe8..9cc792a3a6a9 100644 --- a/arch/powerpc/kernel/kprobes.c +++ b/arch/powerpc/kernel/kprobes.c @@ -289,7 +289,7 @@ int kprobe_handler(struct pt_regs *regs) if (!p) { unsigned int instr; - if (probe_kernel_address(addr, instr)) + if (get_kernel_nofault(instr, addr)) goto no_kprobe; if (instr != BREAKPOINT_INSTRUCTION) { diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index d4d0d1048500..30955a0c32d0 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -1271,7 +1271,7 @@ static void show_instructions(struct pt_regs *regs) #endif if (!__kernel_text_address(pc) || - probe_kernel_address((const void *)pc, instr)) { + get_kernel_nofault(instr, (const void *)pc)) { pr_cont("XXXXXXXX "); } else { if (regs->nip == pc) diff --git a/arch/powerpc/sysdev/fsl_pci.c b/arch/powerpc/sysdev/fsl_pci.c index 73fa37ca40ef..040b9d01c079 100644 --- a/arch/powerpc/sysdev/fsl_pci.c +++ b/arch/powerpc/sysdev/fsl_pci.c @@ -1069,7 +1069,7 @@ int fsl_pci_mcheck_exception(struct pt_regs *regs) ret = copy_from_user_nofault(&inst, (void __user *)regs->nip, sizeof(inst)); else - ret = probe_kernel_address((void *)regs->nip, inst); + ret = get_kernel_nofault(inst, (void *)regs->nip); if (!ret && mcheck_handle_load(regs, inst)) { regs->nip += 4; diff --git a/arch/riscv/kernel/kgdb.c b/arch/riscv/kernel/kgdb.c index a21fb21883e7..c3275f42d1ac 100644 --- a/arch/riscv/kernel/kgdb.c +++ b/arch/riscv/kernel/kgdb.c @@ -62,7 +62,7 @@ int get_step_address(struct pt_regs *regs, unsigned long *next_addr) unsigned int rs1_num, rs2_num; int op_code; - if (probe_kernel_address((void *)pc, op_code)) + if (get_kernel_nofault(op_code, (void *)pc)) return -EINVAL; if ((op_code & __INSN_LENGTH_MASK) != __INSN_LENGTH_GE_32) { if (is_c_jalr_insn(op_code) || is_c_jr_insn(op_code)) { @@ -146,7 +146,7 @@ int do_single_step(struct pt_regs *regs) return error; /* Store the op code in the stepped address */ - error = probe_kernel_address((void *)addr, stepped_opcode); + error = get_kernel_nofault(stepped_opcode, (void *)addr); if (error) return error; diff --git a/arch/riscv/kernel/traps.c b/arch/riscv/kernel/traps.c index ecec1778e3a4..7d95cce5e47c 100644 --- a/arch/riscv/kernel/traps.c +++ b/arch/riscv/kernel/traps.c @@ -137,7 +137,7 @@ static inline unsigned long get_break_insn_length(unsigned long pc) { bug_insn_t insn; - if (probe_kernel_address((bug_insn_t *)pc, insn)) + if (get_kernel_nofault(insn, (bug_insn_t *)pc)) return 0; return GET_INSN_LENGTH(insn); @@ -165,7 +165,7 @@ int is_valid_bugaddr(unsigned long pc) if (pc < VMALLOC_START) return 0; - if (probe_kernel_address((bug_insn_t *)pc, insn)) + if (get_kernel_nofault(insn, (bug_insn_t *)pc)) return 0; if ((insn & __INSN_LENGTH_MASK) == __INSN_LENGTH_32) return (insn == __BUG_INSN_32); diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index 6a24751557f0..d53c2e2ea1fd 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -105,7 +105,7 @@ static int bad_address(void *p) { unsigned long dummy; - return probe_kernel_address((unsigned long *)p, dummy); + return get_kernel_nofault(dummy, (unsigned long *)p); } static void dump_pagetable(unsigned long asce, unsigned long address) diff --git a/arch/sh/kernel/traps.c b/arch/sh/kernel/traps.c index a33025451fcd..9c3d32b80038 100644 --- a/arch/sh/kernel/traps.c +++ b/arch/sh/kernel/traps.c @@ -118,7 +118,7 @@ int is_valid_bugaddr(unsigned long addr) if (addr < PAGE_OFFSET) return 0; - if (probe_kernel_address((insn_size_t *)addr, opcode)) + if (get_kernel_nofault(opcode, (insn_size_t *)addr)) return 0; if (opcode == TRAPA_BUG_OPCODE) return 1; diff --git a/arch/x86/kernel/probe_roms.c b/arch/x86/kernel/probe_roms.c index ee0286390a4c..65b0dd2bf25c 100644 --- a/arch/x86/kernel/probe_roms.c +++ b/arch/x86/kernel/probe_roms.c @@ -99,7 +99,7 @@ static bool probe_list(struct pci_dev *pdev, unsigned short vendor, unsigned short device; do { - if (probe_kernel_address(rom_list, device) != 0) + if (get_kernel_nofault(device, rom_list) != 0) device = 0; if (device && match_id(pdev, vendor, device)) @@ -125,13 +125,13 @@ static struct resource *find_oprom(struct pci_dev *pdev) break; rom = isa_bus_to_virt(res->start); - if (probe_kernel_address(rom + 0x18, offset) != 0) + if (get_kernel_nofault(offset, rom + 0x18) != 0) continue; - if (probe_kernel_address(rom + offset + 0x4, vendor) != 0) + if (get_kernel_nofault(vendor, rom + offset + 0x4) != 0) continue; - if (probe_kernel_address(rom + offset + 0x6, device) != 0) + if (get_kernel_nofault(device, rom + offset + 0x6) != 0) continue; if (match_id(pdev, vendor, device)) { @@ -139,8 +139,8 @@ static struct resource *find_oprom(struct pci_dev *pdev) break; } - if (probe_kernel_address(rom + offset + 0x8, list) == 0 && - probe_kernel_address(rom + offset + 0xc, rev) == 0 && + if (get_kernel_nofault(list, rom + offset + 0x8) == 0 && + get_kernel_nofault(rev, rom + offset + 0xc) == 0 && rev >= 3 && list && probe_list(pdev, vendor, rom + offset + list)) { oprom = res; @@ -183,14 +183,14 @@ static int __init romsignature(const unsigned char *rom) const unsigned short * const ptr = (const unsigned short *)rom; unsigned short sig; - return probe_kernel_address(ptr, sig) == 0 && sig == ROMSIGNATURE; + return get_kernel_nofault(sig, ptr) == 0 && sig == ROMSIGNATURE; } static int __init romchecksum(const unsigned char *rom, unsigned long length) { unsigned char sum, c; - for (sum = 0; length && probe_kernel_address(rom++, c) == 0; length--) + for (sum = 0; length && get_kernel_nofault(c, rom++) == 0; length--) sum += c; return !length && !sum; } @@ -211,7 +211,7 @@ void __init probe_roms(void) video_rom_resource.start = start; - if (probe_kernel_address(rom + 2, c) != 0) + if (get_kernel_nofault(c, rom + 2) != 0) continue; /* 0 < length <= 0x7f * 512, historically */ @@ -249,7 +249,7 @@ void __init probe_roms(void) if (!romsignature(rom)) continue; - if (probe_kernel_address(rom + 2, c) != 0) + if (get_kernel_nofault(c, rom + 2) != 0) continue; /* 0 < length <= 0x7f * 512, historically */ diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 7003f2e7b163..f9727b96961f 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -91,7 +91,7 @@ int is_valid_bugaddr(unsigned long addr) if (addr < TASK_SIZE_MAX) return 0; - if (probe_kernel_address((unsigned short *)addr, ud)) + if (get_kernel_nofault(ud, (unsigned short *)addr)) return 0; return ud == INSN_UD0 || ud == INSN_UD2; diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index e996aa3833b8..1ead568c0101 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -99,7 +99,7 @@ check_prefetch_opcode(struct pt_regs *regs, unsigned char *instr, return !instr_lo || (instr_lo>>1) == 1; case 0x00: /* Prefetch instruction is 0x0F0D or 0x0F18 */ - if (probe_kernel_address(instr, opcode)) + if (get_kernel_nofault(opcode, instr)) return 0; *prefetch = (instr_lo == 0xF) && @@ -133,7 +133,7 @@ is_prefetch(struct pt_regs *regs, unsigned long error_code, unsigned long addr) while (instr < max_instr) { unsigned char opcode; - if (probe_kernel_address(instr, opcode)) + if (get_kernel_nofault(opcode, instr)) break; instr++; @@ -301,7 +301,7 @@ static int bad_address(void *p) { unsigned long dummy; - return probe_kernel_address((unsigned long *)p, dummy); + return get_kernel_nofault(dummy, (unsigned long *)p); } static void dump_pagetable(unsigned long address) diff --git a/arch/x86/pci/pcbios.c b/arch/x86/pci/pcbios.c index 9c97d814125e..4f15280732ed 100644 --- a/arch/x86/pci/pcbios.c +++ b/arch/x86/pci/pcbios.c @@ -302,7 +302,7 @@ static const struct pci_raw_ops *__init pci_find_bios(void) check <= (union bios32 *) __va(0xffff0); ++check) { long sig; - if (probe_kernel_address(&check->fields.signature, sig)) + if (get_kernel_nofault(sig, &check->fields.signature)) continue; if (check->fields.signature != BIOS32_SIGNATURE) diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h index bef48da242cc..a508a3c08879 100644 --- a/include/linux/uaccess.h +++ b/include/linux/uaccess.h @@ -318,14 +318,14 @@ long strncpy_from_user_nofault(char *dst, const void __user *unsafe_addr, long strnlen_user_nofault(const void __user *unsafe_addr, long count); /** - * probe_kernel_address(): safely attempt to read from a location - * @addr: address to read from - * @retval: read into this variable + * get_kernel_nofault(): safely attempt to read from a location + * @val: read into this variable + * @ptr: address to read from * * Returns 0 on success, or -EFAULT. */ -#define probe_kernel_address(addr, retval) \ - copy_from_kernel_nofault(&retval, addr, sizeof(retval)) +#define get_kernel_nofault(val, ptr) \ + copy_from_kernel_nofault(&(val), (ptr), sizeof(val)) #ifndef user_access_begin #define user_access_begin(ptr,len) access_ok(ptr, len) diff --git a/lib/test_lockup.c b/lib/test_lockup.c index f258743a0d83..bd7c7ff39f6b 100644 --- a/lib/test_lockup.c +++ b/lib/test_lockup.c @@ -419,8 +419,8 @@ static bool test_kernel_ptr(unsigned long addr, int size) /* should be at least readable kernel address */ if (access_ok(ptr, 1) || access_ok(ptr + size - 1, 1) || - probe_kernel_address(ptr, buf) || - probe_kernel_address(ptr + size - 1, buf)) { + get_kernel_nofault(buf, ptr) || + get_kernel_nofault(buf, ptr + size - 1)) { pr_err("invalid kernel ptr: %#lx\n", addr); return true; } @@ -437,7 +437,7 @@ static bool __maybe_unused test_magic(unsigned long addr, int offset, if (!addr) return false; - if (probe_kernel_address(ptr, magic) || magic != expected) { + if (get_kernel_nofault(magic, ptr) || magic != expected) { pr_err("invalid magic at %#lx + %#x = %#x, expected %#x\n", addr, offset, magic, expected); return true; -- cgit v1.2.3 From 0c389d89abc28edf70ae847ee2fa55acb267b826 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Thu, 18 Jun 2020 12:10:37 -0700 Subject: maccess: make get_kernel_nofault() check for minimal type compatibility Now that we've renamed probe_kernel_address() to get_kernel_nofault() and made it look and behave more in line with get_user(), some of the subtle type behavior differences end up being more obvious and possibly dangerous. When you do get_user(val, user_ptr); the type of the access comes from the "user_ptr" part, and the above basically acts as val = *user_ptr; by design (except, of course, for the fact that the actual dereference is done with a user access). Note how in the above case, the type of the end result comes from the pointer argument, and then the value is cast to the type of 'val' as part of the assignment. So the type of the pointer is ultimately the more important type both for the access itself. But 'get_kernel_nofault()' may now _look_ similar, but it behaves very differently. When you do get_kernel_nofault(val, kernel_ptr); it behaves like val = *(typeof(val) *)kernel_ptr; except, of course, for the fact that the actual dereference is done with exception handling so that a faulting access is suppressed and returned as the error code. But note how different the casting behavior of the two superficially similar accesses are: one does the actual access in the size of the type the pointer points to, while the other does the access in the size of the target, and ignores the pointer type entirely. Actually changing get_kernel_nofault() to act like get_user() is almost certainly the right thing to do eventually, but in the meantime this patch adds logit to at least verify that the pointer type is compatible with the type of the result. In many cases, this involves just casting the pointer to 'void *' to make it obvious that the type of the pointer is not the important part. It's not how 'get_user()' acts, but at least the behavioral difference is now obvious and explicit. Cc: Christoph Hellwig Signed-off-by: Linus Torvalds --- arch/arm/kernel/traps.c | 2 +- arch/ia64/include/asm/sections.h | 2 +- arch/parisc/kernel/process.c | 2 +- arch/powerpc/include/asm/sections.h | 2 +- arch/powerpc/kernel/kgdb.c | 2 +- arch/x86/kernel/probe_roms.c | 4 ++-- include/linux/uaccess.h | 6 ++++-- 7 files changed, 11 insertions(+), 9 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c index 49ce15c3612d..17d5a785df28 100644 --- a/arch/arm/kernel/traps.c +++ b/arch/arm/kernel/traps.c @@ -396,7 +396,7 @@ int is_valid_bugaddr(unsigned long pc) u32 insn = __opcode_to_mem_arm(BUG_INSTR_VALUE); #endif - if (get_kernel_nofault(bkpt, (unsigned *)pc)) + if (get_kernel_nofault(bkpt, (void *)pc)) return 0; return bkpt == insn; diff --git a/arch/ia64/include/asm/sections.h b/arch/ia64/include/asm/sections.h index ad4fc06e5f4b..3a033d2008b3 100644 --- a/arch/ia64/include/asm/sections.h +++ b/arch/ia64/include/asm/sections.h @@ -35,7 +35,7 @@ static inline void *dereference_function_descriptor(void *ptr) struct fdesc *desc = ptr; void *p; - if (!get_kernel_nofault(p, &desc->ip)) + if (!get_kernel_nofault(p, (void *)&desc->ip)) ptr = p; return ptr; } diff --git a/arch/parisc/kernel/process.c b/arch/parisc/kernel/process.c index 6c435dbccca0..b7abb12edd3a 100644 --- a/arch/parisc/kernel/process.c +++ b/arch/parisc/kernel/process.c @@ -293,7 +293,7 @@ void *dereference_function_descriptor(void *ptr) Elf64_Fdesc *desc = ptr; void *p; - if (!get_kernel_nofault(p, &desc->addr)) + if (!get_kernel_nofault(p, (void *)&desc->addr)) ptr = p; return ptr; } diff --git a/arch/powerpc/include/asm/sections.h b/arch/powerpc/include/asm/sections.h index bd311616fca8..324d7b298ec3 100644 --- a/arch/powerpc/include/asm/sections.h +++ b/arch/powerpc/include/asm/sections.h @@ -85,7 +85,7 @@ static inline void *dereference_function_descriptor(void *ptr) struct ppc64_opd_entry *desc = ptr; void *p; - if (!get_kernel_nofault(p, &desc->funcaddr)) + if (!get_kernel_nofault(p, (void *)&desc->funcaddr)) ptr = p; return ptr; } diff --git a/arch/powerpc/kernel/kgdb.c b/arch/powerpc/kernel/kgdb.c index e14a1862a3ca..409080208a6c 100644 --- a/arch/powerpc/kernel/kgdb.c +++ b/arch/powerpc/kernel/kgdb.c @@ -421,7 +421,7 @@ int kgdb_arch_set_breakpoint(struct kgdb_bkpt *bpt) unsigned int instr; struct ppc_inst *addr = (struct ppc_inst *)bpt->bpt_addr; - err = get_kernel_nofault(instr, addr); + err = get_kernel_nofault(instr, (unsigned *) addr); if (err) return err; diff --git a/arch/x86/kernel/probe_roms.c b/arch/x86/kernel/probe_roms.c index 65b0dd2bf25c..9e1def3744f2 100644 --- a/arch/x86/kernel/probe_roms.c +++ b/arch/x86/kernel/probe_roms.c @@ -94,7 +94,7 @@ static bool match_id(struct pci_dev *pdev, unsigned short vendor, unsigned short } static bool probe_list(struct pci_dev *pdev, unsigned short vendor, - const unsigned char *rom_list) + const void *rom_list) { unsigned short device; @@ -119,7 +119,7 @@ static struct resource *find_oprom(struct pci_dev *pdev) for (i = 0; i < ARRAY_SIZE(adapter_rom_resources); i++) { struct resource *res = &adapter_rom_resources[i]; unsigned short offset, vendor, device, list, rev; - const unsigned char *rom; + const void *rom; if (res->end == 0) break; diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h index a508a3c08879..0a76ddc07d59 100644 --- a/include/linux/uaccess.h +++ b/include/linux/uaccess.h @@ -324,8 +324,10 @@ long strnlen_user_nofault(const void __user *unsafe_addr, long count); * * Returns 0 on success, or -EFAULT. */ -#define get_kernel_nofault(val, ptr) \ - copy_from_kernel_nofault(&(val), (ptr), sizeof(val)) +#define get_kernel_nofault(val, ptr) ({ \ + const typeof(val) *__gk_ptr = (ptr); \ + copy_from_kernel_nofault(&(val), __gk_ptr, sizeof(val));\ +}) #ifndef user_access_begin #define user_access_begin(ptr,len) access_ok(ptr, len) -- cgit v1.2.3 From c0e1c8c22bebecef40097c80c1c74492ff96d081 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 15 Jun 2020 12:57:59 +0000 Subject: powerpc/8xx: Provide ptep_get() with 16k pages READ_ONCE() now enforces atomic read, which leads to: CC mm/gup.o In file included from ./include/linux/kernel.h:11:0, from mm/gup.c:2: In function 'gup_hugepte.constprop', inlined from 'gup_huge_pd.isra.79' at mm/gup.c:2465:8: ./include/linux/compiler.h:392:38: error: call to '__compiletime_assert_222' declared with attribute error: Unsupported access size for {READ,WRITE}_ONCE(). _compiletime_assert(condition, msg, __compiletime_assert_, __COUNTER__) ^ ./include/linux/compiler.h:373:4: note: in definition of macro '__compiletime_assert' prefix ## suffix(); \ ^ ./include/linux/compiler.h:392:2: note: in expansion of macro '_compiletime_assert' _compiletime_assert(condition, msg, __compiletime_assert_, __COUNTER__) ^ ./include/linux/compiler.h:405:2: note: in expansion of macro 'compiletime_assert' compiletime_assert(__native_word(t) || sizeof(t) == sizeof(long long), \ ^ ./include/linux/compiler.h:291:2: note: in expansion of macro 'compiletime_assert_rwonce_type' compiletime_assert_rwonce_type(x); \ ^ mm/gup.c:2428:8: note: in expansion of macro 'READ_ONCE' pte = READ_ONCE(*ptep); ^ In function 'gup_get_pte', inlined from 'gup_pte_range' at mm/gup.c:2228:9, inlined from 'gup_pmd_range' at mm/gup.c:2613:15, inlined from 'gup_pud_range' at mm/gup.c:2641:15, inlined from 'gup_p4d_range' at mm/gup.c:2666:15, inlined from 'gup_pgd_range' at mm/gup.c:2694:15, inlined from 'internal_get_user_pages_fast' at mm/gup.c:2795:3: ./include/linux/compiler.h:392:38: error: call to '__compiletime_assert_219' declared with attribute error: Unsupported access size for {READ,WRITE}_ONCE(). _compiletime_assert(condition, msg, __compiletime_assert_, __COUNTER__) ^ ./include/linux/compiler.h:373:4: note: in definition of macro '__compiletime_assert' prefix ## suffix(); \ ^ ./include/linux/compiler.h:392:2: note: in expansion of macro '_compiletime_assert' _compiletime_assert(condition, msg, __compiletime_assert_, __COUNTER__) ^ ./include/linux/compiler.h:405:2: note: in expansion of macro 'compiletime_assert' compiletime_assert(__native_word(t) || sizeof(t) == sizeof(long long), \ ^ ./include/linux/compiler.h:291:2: note: in expansion of macro 'compiletime_assert_rwonce_type' compiletime_assert_rwonce_type(x); \ ^ mm/gup.c:2199:9: note: in expansion of macro 'READ_ONCE' return READ_ONCE(*ptep); ^ make[2]: *** [mm/gup.o] Error 1 Define ptep_get() on 8xx when using 16k pages. Fixes: 9e343b467c70 ("READ_ONCE: Enforce atomicity for {READ,WRITE}_ONCE() memory accesses") Signed-off-by: Christophe Leroy Acked-by: Will Deacon Acked-by: Peter Zijlstra (Intel) Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/341688399c1b102756046d19ea6ce39db1ae4742.1592225558.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/nohash/32/pgtable.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/nohash/32/pgtable.h b/arch/powerpc/include/asm/nohash/32/pgtable.h index 5a590ceaec14..b0afbdd07740 100644 --- a/arch/powerpc/include/asm/nohash/32/pgtable.h +++ b/arch/powerpc/include/asm/nohash/32/pgtable.h @@ -284,6 +284,16 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, return __pte(pte_update(mm, addr, ptep, ~0, 0, 0)); } +#if defined(CONFIG_PPC_8xx) && defined(CONFIG_PPC_16K_PAGES) +#define __HAVE_ARCH_PTEP_GET +static inline pte_t ptep_get(pte_t *ptep) +{ + pte_t pte = {READ_ONCE(ptep->pte), 0, 0, 0}; + + return pte; +} +#endif + #define __HAVE_ARCH_PTEP_SET_WRPROTECT static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep) -- cgit v1.2.3 From 7e4773f73dcfb92e7e33532162f722ec291e75a4 Mon Sep 17 00:00:00 2001 From: Arseny Solokha Date: Sat, 13 Jun 2020 23:28:01 +0700 Subject: powerpc/fsl_booke/32: Fix build with CONFIG_RANDOMIZE_BASE Building the current 5.8 kernel for an e500 machine with CONFIG_RANDOMIZE_BASE=y and CONFIG_BLOCK=n yields the following failure: arch/powerpc/mm/nohash/kaslr_booke.c: In function 'kaslr_early_init': arch/powerpc/mm/nohash/kaslr_booke.c:387:2: error: implicit declaration of function 'flush_icache_range'; did you mean 'flush_tlb_range'? Indeed, including asm/cacheflush.h into kaslr_booke.c fixes the build. Fixes: 2b0e86cc5de6 ("powerpc/fsl_booke/32: implement KASLR infrastructure") Cc: stable@vger.kernel.org # v5.5+ Signed-off-by: Arseny Solokha Reviewed-by: Jason Yan Acked-by: Scott Wood [mpe: Tweak change log to mention CONFIG_BLOCK=n] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200613162801.1946619-1-asolokha@kb.kras.ru --- arch/powerpc/mm/nohash/kaslr_booke.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/powerpc') diff --git a/arch/powerpc/mm/nohash/kaslr_booke.c b/arch/powerpc/mm/nohash/kaslr_booke.c index 4a75f2d9bf0e..bce0e5349978 100644 --- a/arch/powerpc/mm/nohash/kaslr_booke.c +++ b/arch/powerpc/mm/nohash/kaslr_booke.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include -- cgit v1.2.3 From c1ed1754f271f6b7acb1bfdc8cfb62220fbed423 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Thu, 11 Jun 2020 17:31:59 +0530 Subject: powerpc/kvm/book3s64: Fix kernel crash with nested kvm & DEBUG_VIRTUAL With CONFIG_DEBUG_VIRTUAL=y, __pa() checks for addr value and if it's less than PAGE_OFFSET it leads to a BUG(). #define __pa(x) ({ VIRTUAL_BUG_ON((unsigned long)(x) < PAGE_OFFSET); (unsigned long)(x) & 0x0fffffffffffffffUL; }) kernel BUG at arch/powerpc/kvm/book3s_64_mmu_radix.c:43! cpu 0x70: Vector: 700 (Program Check) at [c0000018a2187360] pc: c000000000161b30: __kvmhv_copy_tofrom_guest_radix+0x130/0x1f0 lr: c000000000161d5c: kvmhv_copy_from_guest_radix+0x3c/0x80 ... kvmhv_copy_from_guest_radix+0x3c/0x80 kvmhv_load_from_eaddr+0x48/0xc0 kvmppc_ld+0x98/0x1e0 kvmppc_load_last_inst+0x50/0x90 kvmppc_hv_emulate_mmio+0x288/0x2b0 kvmppc_book3s_radix_page_fault+0xd8/0x2b0 kvmppc_book3s_hv_page_fault+0x37c/0x1050 kvmppc_vcpu_run_hv+0xbb8/0x1080 kvmppc_vcpu_run+0x34/0x50 kvm_arch_vcpu_ioctl_run+0x2fc/0x410 kvm_vcpu_ioctl+0x2b4/0x8f0 ksys_ioctl+0xf4/0x150 sys_ioctl+0x28/0x80 system_call_exception+0x104/0x1d0 system_call_common+0xe8/0x214 kvmhv_copy_tofrom_guest_radix() uses a NULL value for to/from to indicate direction of copy. Avoid calling __pa() if the value is NULL to avoid the BUG(). Signed-off-by: Aneesh Kumar K.V [mpe: Massage change log a bit to mention CONFIG_DEBUG_VIRTUAL] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200611120159.680284-1-aneesh.kumar@linux.ibm.com --- arch/powerpc/kvm/book3s_64_mmu_radix.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c index e738ea652192..6a73714759ba 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_radix.c +++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c @@ -40,7 +40,8 @@ unsigned long __kvmhv_copy_tofrom_guest_radix(int lpid, int pid, /* Can't access quadrants 1 or 2 in non-HV mode, call the HV to do it */ if (kvmhv_on_pseries()) return plpar_hcall_norets(H_COPY_TOFROM_GUEST, lpid, pid, eaddr, - __pa(to), __pa(from), n); + (to != NULL) ? __pa(to): 0, + (from != NULL) ? __pa(from): 0, n); quadrant = 1; if (!pid) -- cgit v1.2.3 From 19ab500edb5d6020010caba48ce3b4ce4182ab63 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Sat, 27 Jun 2020 12:31:46 +0530 Subject: powerpc/mm/pkeys: Make pkey access check work on execute_only_key Jan reported that LTP mmap03 was getting stuck in a page fault loop after commit c46241a370a6 ("powerpc/pkeys: Check vma before returning key fault error to the user"), as well as a minimised reproducer: #include #include #include #include #include int main(int ac, char **av) { int page_sz = getpagesize(); int fildes; char *addr; fildes = open("tempfile", O_WRONLY | O_CREAT, 0666); write(fildes, &fildes, sizeof(fildes)); close(fildes); fildes = open("tempfile", O_RDONLY); unlink("tempfile"); addr = mmap(0, page_sz, PROT_EXEC, MAP_FILE | MAP_PRIVATE, fildes, 0); printf("%d\n", *addr); return 0; } And noticed that access_pkey_error() in page fault handler now always seem to return false: __do_page_fault access_pkey_error(is_pkey: 1, is_exec: 0, is_write: 0) arch_vma_access_permitted pkey_access_permitted if (!is_pkey_enabled(pkey)) return true return false pkey_access_permitted() should not check if the pkey is available in UAMOR (using is_pkey_enabled()). The kernel needs to do that check only when allocating keys. This also makes sure the execute_only_key which is marked as non-manageable via UAMOR is handled correctly in pkey_access_permitted(), and fixes the bug. Fixes: c46241a370a6 ("powerpc/pkeys: Check vma before returning key fault error to the user") Reported-by: Jan Stancek Signed-off-by: Aneesh Kumar K.V [mpe: Include bug report details etc. in the change log] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200627070147.297535-1-aneesh.kumar@linux.ibm.com --- arch/powerpc/mm/book3s64/pkeys.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/mm/book3s64/pkeys.c b/arch/powerpc/mm/book3s64/pkeys.c index 1199fc2bfaec..ca5fcb4bff32 100644 --- a/arch/powerpc/mm/book3s64/pkeys.c +++ b/arch/powerpc/mm/book3s64/pkeys.c @@ -353,9 +353,6 @@ static bool pkey_access_permitted(int pkey, bool write, bool execute) int pkey_shift; u64 amr; - if (!is_pkey_enabled(pkey)) - return true; - pkey_shift = pkeyshift(pkey); if (execute && !(read_iamr() & (IAMR_EX_BIT << pkey_shift))) return true; -- cgit v1.2.3 From 4557ac6b344b8cdf948ff8b007e8e1de34832f2e Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Wed, 8 Jul 2020 17:49:42 +1000 Subject: powerpc/64s/exception: Fix 0x1500 interrupt handler crash A typo caused the interrupt handler to branch immediately to the common "unknown interrupt" handler and skip the special case test for denormal cause. This does not affect KVM softpatch handling (e.g., for POWER9 TM assist) because the KVM test was moved to common code by commit 9600f261acaa ("powerpc/64s/exception: Move KVM test to common code") just before this bug was introduced. Fixes: 3f7fbd97d07d ("powerpc/64s/exception: Clean up SRR specifiers") Reported-by: Paul Menzel Signed-off-by: Nicholas Piggin Tested-by: Paul Menzel [mpe: Split selftest into a separate patch] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200708074942.1713396-1-npiggin@gmail.com --- arch/powerpc/kernel/exceptions-64s.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index fa080694e581..0fc8bad878b2 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -2551,7 +2551,7 @@ EXC_VIRT_NONE(0x5400, 0x100) INT_DEFINE_BEGIN(denorm_exception) IVEC=0x1500 IHSRR=1 - IBRANCH_COMMON=0 + IBRANCH_TO_COMMON=0 IKVM_REAL=1 INT_DEFINE_END(denorm_exception) -- cgit v1.2.3 From 192b6a780598976feb7321ff007754f8511a4129 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Sun, 12 Jul 2020 18:50:47 +0530 Subject: powerpc/book3s64/pkeys: Fix pkey_access_permitted() for execute disable pkey Even if the IAMR value denies execute access, the current code returns true from pkey_access_permitted() for an execute permission check, if the AMR read pkey bit is cleared. This results in repeated page fault loop with a test like below: #define _GNU_SOURCE #include #include #include #include #include #include #include #include #include #include #ifdef SYS_pkey_mprotect #undef SYS_pkey_mprotect #endif #ifdef SYS_pkey_alloc #undef SYS_pkey_alloc #endif #ifdef SYS_pkey_free #undef SYS_pkey_free #endif #undef PKEY_DISABLE_EXECUTE #define PKEY_DISABLE_EXECUTE 0x4 #define SYS_pkey_mprotect 386 #define SYS_pkey_alloc 384 #define SYS_pkey_free 385 #define PPC_INST_NOP 0x60000000 #define PPC_INST_BLR 0x4e800020 #define PROT_RWX (PROT_READ | PROT_WRITE | PROT_EXEC) static int sys_pkey_mprotect(void *addr, size_t len, int prot, int pkey) { return syscall(SYS_pkey_mprotect, addr, len, prot, pkey); } static int sys_pkey_alloc(unsigned long flags, unsigned long access_rights) { return syscall(SYS_pkey_alloc, flags, access_rights); } static int sys_pkey_free(int pkey) { return syscall(SYS_pkey_free, pkey); } static void do_execute(void *region) { /* jump to region */ asm volatile( "mtctr %0;" "bctrl" : : "r"(region) : "ctr", "lr"); } static void do_protect(void *region) { size_t pgsize; int i, pkey; pgsize = getpagesize(); pkey = sys_pkey_alloc(0, PKEY_DISABLE_EXECUTE); assert (pkey > 0); /* perform mprotect */ assert(!sys_pkey_mprotect(region, pgsize, PROT_RWX, pkey)); do_execute(region); /* free pkey */ assert(!sys_pkey_free(pkey)); } int main(int argc, char **argv) { size_t pgsize, numinsns; unsigned int *region; int i; /* allocate memory region to protect */ pgsize = getpagesize(); region = memalign(pgsize, pgsize); assert(region != NULL); assert(!mprotect(region, pgsize, PROT_RWX)); /* fill page with NOPs with a BLR at the end */ numinsns = pgsize / sizeof(region[0]); for (i = 0; i < numinsns - 1; i++) region[i] = PPC_INST_NOP; region[i] = PPC_INST_BLR; do_protect(region); return EXIT_SUCCESS; } The fix is to only check the IAMR for an execute check, the AMR value is not relevant. Fixes: f2407ef3ba22 ("powerpc: helper to validate key-access permissions of a pte") Cc: stable@vger.kernel.org # v4.16+ Reported-by: Sandipan Das Signed-off-by: Aneesh Kumar K.V [mpe: Add detail to change log, tweak wording & formatting] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200712132047.1038594-1-aneesh.kumar@linux.ibm.com --- arch/powerpc/mm/book3s64/pkeys.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/mm/book3s64/pkeys.c b/arch/powerpc/mm/book3s64/pkeys.c index ca5fcb4bff32..d174106bab67 100644 --- a/arch/powerpc/mm/book3s64/pkeys.c +++ b/arch/powerpc/mm/book3s64/pkeys.c @@ -354,12 +354,14 @@ static bool pkey_access_permitted(int pkey, bool write, bool execute) u64 amr; pkey_shift = pkeyshift(pkey); - if (execute && !(read_iamr() & (IAMR_EX_BIT << pkey_shift))) - return true; + if (execute) + return !(read_iamr() & (IAMR_EX_BIT << pkey_shift)); + + amr = read_amr(); + if (write) + return !(amr & (AMR_WR_BIT << pkey_shift)); - amr = read_amr(); /* Delay reading amr until absolutely needed */ - return ((!write && !(amr & (AMR_RD_BIT << pkey_shift))) || - (write && !(amr & (AMR_WR_BIT << pkey_shift)))); + return !(amr & (AMR_RD_BIT << pkey_shift)); } bool arch_pte_access_permitted(u64 pte, bool write, bool execute) -- cgit v1.2.3 From b710d27bf72068b15b2f0305d825988183e2ff28 Mon Sep 17 00:00:00 2001 From: Satheesh Rajendran Date: Fri, 19 Jun 2020 12:31:13 +0530 Subject: powerpc/pseries/svm: Fix incorrect check for shared_lppaca_size Early secure guest boot hits the below crash while booting with vcpus numbers aligned with page boundary for PAGE size of 64k and LPPACA size of 1k i.e 64, 128 etc. Partition configured for 64 cpus. CPU maps initialized for 1 thread per core ------------[ cut here ]------------ kernel BUG at arch/powerpc/kernel/paca.c:89! Oops: Exception in kernel mode, sig: 5 [#1] LE PAGE_SIZE=64K MMU=Radix SMP NR_CPUS=2048 NUMA pSeries This is due to the BUG_ON() for shared_lppaca_total_size equal to shared_lppaca_size. Instead the code should only BUG_ON() if we have exceeded the total_size, which indicates we've overflowed the array. Fixes: bd104e6db6f0 ("powerpc/pseries/svm: Use shared memory for LPPACA structures") Cc: stable@vger.kernel.org # v5.4+ Signed-off-by: Satheesh Rajendran Reviewed-by: Laurent Dufour Reviewed-by: Thiago Jung Bauermann [mpe: Reword change log to clarify we're fixing not removing the check] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200619070113.16696-1-sathnaga@linux.vnet.ibm.com --- arch/powerpc/kernel/paca.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c index 2168372b792d..74da65aacbc9 100644 --- a/arch/powerpc/kernel/paca.c +++ b/arch/powerpc/kernel/paca.c @@ -87,7 +87,7 @@ static void *__init alloc_shared_lppaca(unsigned long size, unsigned long align, * This is very early in boot, so no harm done if the kernel crashes at * this point. */ - BUG_ON(shared_lppaca_size >= shared_lppaca_total_size); + BUG_ON(shared_lppaca_size > shared_lppaca_total_size); return ptr; } -- cgit v1.2.3 From 6068e1a4427e88f5cc62f238d1baf94a8b824ef4 Mon Sep 17 00:00:00 2001 From: Haren Myneni Date: Fri, 10 Jul 2020 16:47:19 -0700 Subject: powerpc/vas: Report proper error code for address translation failure P9 DD2 NX workbook (Table 4-36) says DMA controller uses CC=5 internally for translation fault handling. NX reserves CC=250 for OS to notify user space when NX encounters address translation failure on the request buffer. Not an issue in earlier releases as NX does not get faults on kernel addresses. This patch defines CSB_CC_FAULT_ADDRESS(250) and updates CSB.CC with this proper error code for user space. Fixes: c96c4436aba4 ("powerpc/vas: Update CSB and notify process for fault CRBs") Signed-off-by: Haren Myneni [mpe: Added Fixes tag and fix typo in comment] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/019fd53e7538c6f8f332d175df74b1815ef5aa8c.camel@linux.ibm.com --- Documentation/powerpc/vas-api.rst | 2 +- arch/powerpc/include/asm/icswx.h | 2 ++ arch/powerpc/platforms/powernv/vas-fault.c | 2 +- 3 files changed, 4 insertions(+), 2 deletions(-) (limited to 'arch/powerpc') diff --git a/Documentation/powerpc/vas-api.rst b/Documentation/powerpc/vas-api.rst index 1217c2f1595e..788dc8375a0e 100644 --- a/Documentation/powerpc/vas-api.rst +++ b/Documentation/powerpc/vas-api.rst @@ -213,7 +213,7 @@ request buffers are not in memory. The operating system handles the fault by updating CSB with the following data: csb.flags = CSB_V; - csb.cc = CSB_CC_TRANSLATION; + csb.cc = CSB_CC_FAULT_ADDRESS; csb.ce = CSB_CE_TERMINATION; csb.address = fault_address; diff --git a/arch/powerpc/include/asm/icswx.h b/arch/powerpc/include/asm/icswx.h index 965b1f39b2a5..b0c70a35fd0e 100644 --- a/arch/powerpc/include/asm/icswx.h +++ b/arch/powerpc/include/asm/icswx.h @@ -77,6 +77,8 @@ struct coprocessor_completion_block { #define CSB_CC_CHAIN (37) #define CSB_CC_SEQUENCE (38) #define CSB_CC_HW (39) +/* P9 DD2 NX Workbook 3.2 (Table 4-36): Address translation fault */ +#define CSB_CC_FAULT_ADDRESS (250) #define CSB_SIZE (0x10) #define CSB_ALIGN CSB_SIZE diff --git a/arch/powerpc/platforms/powernv/vas-fault.c b/arch/powerpc/platforms/powernv/vas-fault.c index 266a6ca5e15e..3d21fce254b7 100644 --- a/arch/powerpc/platforms/powernv/vas-fault.c +++ b/arch/powerpc/platforms/powernv/vas-fault.c @@ -79,7 +79,7 @@ static void update_csb(struct vas_window *window, csb_addr = (void __user *)be64_to_cpu(crb->csb_addr); memset(&csb, 0, sizeof(csb)); - csb.cc = CSB_CC_TRANSLATION; + csb.cc = CSB_CC_FAULT_ADDRESS; csb.ce = CSB_CE_TERMINATION; csb.cs = 0; csb.count = 0; -- cgit v1.2.3