From faae0ca3628b99119c3ad9780259d25b02ddff93 Mon Sep 17 00:00:00 2001 From: Brendan Jackman Date: Thu, 26 Mar 2026 12:31:57 +0000 Subject: drm/managed: use special gfp_t format specifier Patch series "treewide: fixup gfp_t printks", v2. Use vprintf()'s special gfp_t conversion in a few places. This patch (of 3): %pGg produces nice readable output and decouples the format string from the size of gfp_t. Link: https://lore.kernel.org/20260326-gfp64-v2-0-d916021cecdf@google.com Link: https://lore.kernel.org/20260326-gfp64-v2-1-d916021cecdf@google.com Signed-off-by: Brendan Jackman Cc: Alexander Potapenko Cc: Allison Collins Cc: Allison Henderson Cc: Dave Airlie Cc: David S. Miller Cc: Dmitry Vyukov Cc: Eric Dumazet Cc: Jakub Kacinski Cc: Maarten Lankhorst Cc: Marco Elver Cc: Maxime Ripard Cc: Paolo Abeni Cc: Simon Horman Cc: Stanislaw Gruszka Cc: Thomas Zimemrmann Signed-off-by: Andrew Morton --- drivers/gpu/drm/drm_managed.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/drm_managed.c b/drivers/gpu/drm/drm_managed.c index 247f468731de..a9da94319b05 100644 --- a/drivers/gpu/drm/drm_managed.c +++ b/drivers/gpu/drm/drm_managed.c @@ -232,8 +232,8 @@ void *drmm_kmalloc(struct drm_device *dev, size_t size, gfp_t gfp) dr = alloc_dr(NULL, size, gfp, dev_to_node(dev->dev)); if (!dr) { - drm_dbg_drmres(dev, "failed to allocate %zu bytes, %u flags\n", - size, gfp); + drm_dbg_drmres(dev, "failed to allocate %zu bytes, %pGg\n", + size, &gfp); return NULL; } dr->node.name = kstrdup_const("kmalloc", gfp); -- cgit v1.2.3 From 8aa442cfce79e2d69e72fc8e0c0864ac2971149d Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Mon, 23 Feb 2026 12:15:16 -0800 Subject: dax/kmem: account for partial discontiguous resource upon removal When dev_dax_kmem_probe() partially succeeds (at least one range is mapped) but a subsequent range fails request_mem_region() or add_memory_driver_managed(), the probe silently continues, ultimately returning success, but with the corresponding range resource NULL'ed out. dev_dax_kmem_remove() iterates over all dax_device ranges regardless of if the underlying resource exists. When remove_memory() is called later, it returns 0 because the memory was never added which causes dev_dax_kmem_remove() to incorrectly assume the (nonexistent) resource can be removed and attempts cleanup on a NULL pointer. Fix this by skipping these ranges altogether, noting that these cases are considered success, such that the cleanup is still reached when all actually-added ranges are successfully removed. Link: https://lore.kernel.org/20260223201516.1517657-1-dave@stgolabs.net Fixes: 60e93dc097f7 ("device-dax: add dis-contiguous resource support") Signed-off-by: Davidlohr Bueso Reviewed-by: Ben Cheatham Reviewed-by: Alison Schofield Reviewed-by: Jonathan Cameron Cc: Dan Williams Cc: Dave Jiang Cc: Vishal Verma Signed-off-by: Andrew Morton --- drivers/dax/kmem.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'drivers') diff --git a/drivers/dax/kmem.c b/drivers/dax/kmem.c index 2cc8749bc871..a18e2b968e4d 100644 --- a/drivers/dax/kmem.c +++ b/drivers/dax/kmem.c @@ -227,6 +227,12 @@ static void dev_dax_kmem_remove(struct dev_dax *dev_dax) if (rc) continue; + /* range was never added during probe */ + if (!data->res[i]) { + success++; + continue; + } + rc = remove_memory(range.start, range_len(&range)); if (rc == 0) { remove_resource(data->res[i]); -- cgit v1.2.3 From a1e6b0968833c2dd6193d05daf5700f9e0492126 Mon Sep 17 00:00:00 2001 From: Hao Ge Date: Sat, 9 May 2026 08:56:31 +0800 Subject: proc/meminfo: expose per-node balloon pages in node meminfo Commit 835de37603ef ("meminfo: add a per node counter for balloon drivers") added NR_BALLOON_PAGES and exposed it in /proc/meminfo. However, the per-node view at /sys/devices/system/node/nodeX/meminfo was not updated, even though the counter is already tracked per-node. Add it to node_read_meminfo() so users can see balloon usage per NUMA node without having to parse the raw vmstat file. Link: https://lore.kernel.org/20260509005631.17183-1-hao.ge@linux.dev Signed-off-by: Hao Ge Acked-by: David Hildenbrand (Arm) Cc: Danilo Krummrich Cc: Greg Kroah-Hartman Cc: "Rafael J. Wysocki" Signed-off-by: Andrew Morton --- drivers/base/node.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers') diff --git a/drivers/base/node.c b/drivers/base/node.c index 126f66aa2c3e..f4d9a21cc24e 100644 --- a/drivers/base/node.c +++ b/drivers/base/node.c @@ -523,6 +523,7 @@ static ssize_t node_read_meminfo(struct device *dev, #ifdef CONFIG_UNACCEPTED_MEMORY "Node %d Unaccepted: %8lu kB\n" #endif + "Node %d Balloon: %8lu kB\n" "Node %d GPUActive: %8lu kB\n" "Node %d GPUReclaim: %8lu kB\n" , @@ -559,6 +560,7 @@ static ssize_t node_read_meminfo(struct device *dev, nid, K(sum_zone_node_page_state(nid, NR_UNACCEPTED)) #endif , + nid, K(node_page_state(pgdat, NR_BALLOON_PAGES)), nid, K(node_page_state(pgdat, NR_GPU_ACTIVE)), nid, K(node_page_state(pgdat, NR_GPU_RECLAIM)) ); -- cgit v1.2.3 From 9b1b295e9fd354b2263aee80a1ef3605d1eee32e Mon Sep 17 00:00:00 2001 From: Muchun Song Date: Tue, 12 May 2026 15:26:35 +0800 Subject: drivers/base/memory: make memory block get/put explicit Rename the memory block lookup helper to make the acquired reference explicit, add memory_block_put() to wrap put_device(), remove find_memory_block(), and use memory_block_get() as the single block-id based lookup interface. This makes it clearer to callers that a successful lookup holds a reference that must be dropped, reducing the chance of forgetting the matching put and leaking the memory block device reference. Link: https://lore.kernel.org/linux-mm/7887915D-E598-42B3-9AFE-BFFBACE8DE2D@linux.dev/#t Link: https://lore.kernel.org/20260512072635.3969576-1-songmuchun@bytedance.com Signed-off-by: Muchun Song Acked-by: Oscar Salvador Acked-by: David Hildenbrand (Arm) Acked-by: Michal Hocko Tested-by: Donet Tom Reviewed-by: Lorenzo Stoakes Tested-by: Sumanth Korikkar #s390 Cc: Richard Cheng Acked-by: Mike Rapoport (Microsoft) Cc: Alexander Gordeev Cc: Christian Borntraeger Cc: Danilo Krummrich Cc: Doug Anderson Cc: Greg Kroah-Hartman Cc: Heiko Carstens Cc: Kees Cook Cc: Liam R. Howlett Cc: Madhavan Srinivasan Cc: Michael Ellerman Cc: Nicholas Piggin Cc: "Rafael J. Wysocki" Cc: Suren Baghdasaryan Cc: Sven Schnelle Cc: Vasily Gorbik Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- arch/powerpc/platforms/pseries/hotplug-memory.c | 14 +++------ drivers/base/memory.c | 38 +++++++++---------------- drivers/base/node.c | 4 +-- drivers/s390/char/sclp_mem.c | 17 ++++++----- include/linux/memory.h | 7 +++-- mm/memory_hotplug.c | 5 ++-- 6 files changed, 35 insertions(+), 50 deletions(-) (limited to 'drivers') diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c index b2f14db59034..5d3b51081ff3 100644 --- a/arch/powerpc/platforms/pseries/hotplug-memory.c +++ b/arch/powerpc/platforms/pseries/hotplug-memory.c @@ -164,13 +164,7 @@ static int update_lmb_associativity_index(struct drmem_lmb *lmb) static struct memory_block *lmb_to_memblock(struct drmem_lmb *lmb) { - unsigned long section_nr; - struct memory_block *mem_block; - - section_nr = pfn_to_section_nr(PFN_DOWN(lmb->base_addr)); - - mem_block = find_memory_block(section_nr); - return mem_block; + return memory_block_get(phys_to_block_id(lmb->base_addr)); } static int get_lmb_range(u32 drc_index, int n_lmbs, @@ -220,7 +214,7 @@ static int dlpar_change_lmb_state(struct drmem_lmb *lmb, bool online) else rc = 0; - put_device(&mem_block->dev); + memory_block_put(mem_block); return rc; } @@ -319,12 +313,12 @@ static int dlpar_remove_lmb(struct drmem_lmb *lmb) rc = dlpar_offline_lmb(lmb); if (rc) { - put_device(&mem_block->dev); + memory_block_put(mem_block); return rc; } __remove_memory(lmb->base_addr, memory_block_size); - put_device(&mem_block->dev); + memory_block_put(mem_block); /* Update memory regions for memory remove */ memblock_remove(lmb->base_addr, memory_block_size); diff --git a/drivers/base/memory.c b/drivers/base/memory.c index 6981b55d582a..d31a421f7483 100644 --- a/drivers/base/memory.c +++ b/drivers/base/memory.c @@ -649,7 +649,7 @@ int __weak arch_get_memory_phys_device(unsigned long start_pfn) * * Called under device_hotplug_lock. */ -struct memory_block *find_memory_block_by_id(unsigned long block_id) +struct memory_block *memory_block_get(unsigned long block_id) { struct memory_block *mem; @@ -659,16 +659,6 @@ struct memory_block *find_memory_block_by_id(unsigned long block_id) return mem; } -/* - * Called under device_hotplug_lock. - */ -struct memory_block *find_memory_block(unsigned long section_nr) -{ - unsigned long block_id = memory_block_id(section_nr); - - return find_memory_block_by_id(block_id); -} - static struct attribute *memory_memblk_attrs[] = { &dev_attr_phys_index.attr, &dev_attr_state.attr, @@ -701,7 +691,7 @@ static int __add_memory_block(struct memory_block *memory) ret = device_register(&memory->dev); if (ret) { - put_device(&memory->dev); + memory_block_put(memory); return ret; } ret = xa_err(xa_store(&memory_blocks, memory->dev.id, memory, @@ -795,9 +785,9 @@ static int add_memory_block(unsigned long block_id, int nid, unsigned long state struct memory_block *mem; int ret = 0; - mem = find_memory_block_by_id(block_id); + mem = memory_block_get(block_id); if (mem) { - put_device(&mem->dev); + memory_block_put(mem); return -EEXIST; } mem = kzalloc_obj(*mem); @@ -845,8 +835,8 @@ static void remove_memory_block(struct memory_block *memory) memory->group = NULL; } - /* drop the ref. we got via find_memory_block() */ - put_device(&memory->dev); + /* drop the ref. we got via memory_block_get() */ + memory_block_put(memory); device_unregister(&memory->dev); } @@ -880,7 +870,7 @@ int create_memory_block_devices(unsigned long start, unsigned long size, end_block_id = block_id; for (block_id = start_block_id; block_id != end_block_id; block_id++) { - mem = find_memory_block_by_id(block_id); + mem = memory_block_get(block_id); if (WARN_ON_ONCE(!mem)) continue; remove_memory_block(mem); @@ -908,7 +898,7 @@ void remove_memory_block_devices(unsigned long start, unsigned long size) return; for (block_id = start_block_id; block_id != end_block_id; block_id++) { - mem = find_memory_block_by_id(block_id); + mem = memory_block_get(block_id); if (WARN_ON_ONCE(!mem)) continue; num_poisoned_pages_sub(-1UL, memblk_nr_poison(mem)); @@ -1015,12 +1005,12 @@ int walk_memory_blocks(unsigned long start, unsigned long size, return 0; for (block_id = start_block_id; block_id <= end_block_id; block_id++) { - mem = find_memory_block_by_id(block_id); + mem = memory_block_get(block_id); if (!mem) continue; ret = func(mem, arg); - put_device(&mem->dev); + memory_block_put(mem); if (ret) break; } @@ -1228,22 +1218,22 @@ int walk_dynamic_memory_groups(int nid, walk_memory_groups_func_t func, void memblk_nr_poison_inc(unsigned long pfn) { const unsigned long block_id = pfn_to_block_id(pfn); - struct memory_block *mem = find_memory_block_by_id(block_id); + struct memory_block *mem = memory_block_get(block_id); if (mem) { atomic_long_inc(&mem->nr_hwpoison); - put_device(&mem->dev); + memory_block_put(mem); } } void memblk_nr_poison_sub(unsigned long pfn, long i) { const unsigned long block_id = pfn_to_block_id(pfn); - struct memory_block *mem = find_memory_block_by_id(block_id); + struct memory_block *mem = memory_block_get(block_id); if (mem) { atomic_long_sub(i, &mem->nr_hwpoison); - put_device(&mem->dev); + memory_block_put(mem); } } diff --git a/drivers/base/node.c b/drivers/base/node.c index f4d9a21cc24e..3da91929ad4e 100644 --- a/drivers/base/node.c +++ b/drivers/base/node.c @@ -849,13 +849,13 @@ static void register_memory_blocks_under_nodes(void) for (block_id = start_block_id; block_id <= end_block_id; block_id++) { struct memory_block *mem; - mem = find_memory_block_by_id(block_id); + mem = memory_block_get(block_id); if (!mem) continue; memory_block_add_nid_early(mem, nid); do_register_memory_block_under_node(nid, mem); - put_device(&mem->dev); + memory_block_put(mem); } } diff --git a/drivers/s390/char/sclp_mem.c b/drivers/s390/char/sclp_mem.c index 78c054e26d17..6df1926d4c62 100644 --- a/drivers/s390/char/sclp_mem.c +++ b/drivers/s390/char/sclp_mem.c @@ -204,7 +204,7 @@ static ssize_t sclp_config_mem_store(struct kobject *kobj, struct kobj_attribute addr = sclp_mem->id * block_size; /* * Hold device_hotplug_lock when adding/removing memory blocks. - * Additionally, also protect calls to find_memory_block() and + * Additionally, also protect calls to memory_block_get() and * sclp_attach_storage(). */ rc = lock_device_hotplug_sysfs(); @@ -231,20 +231,19 @@ static ssize_t sclp_config_mem_store(struct kobject *kobj, struct kobj_attribute sclp_mem_change_state(addr, block_size, 0); goto out_unlock; } - mem = find_memory_block(pfn_to_section_nr(PFN_DOWN(addr))); - put_device(&mem->dev); + mem = memory_block_get(phys_to_block_id(addr)); + memory_block_put(mem); WRITE_ONCE(sclp_mem->config, 1); } else { if (!sclp_mem->config) goto out_unlock; - mem = find_memory_block(pfn_to_section_nr(PFN_DOWN(addr))); + mem = memory_block_get(phys_to_block_id(addr)); if (mem->state != MEM_OFFLINE) { - put_device(&mem->dev); + memory_block_put(mem); rc = -EBUSY; goto out_unlock; } - /* drop the ref just got via find_memory_block() */ - put_device(&mem->dev); + memory_block_put(mem); sclp_mem_change_state(addr, block_size, 0); __remove_memory(addr, block_size); #ifdef CONFIG_KASAN @@ -294,11 +293,11 @@ static ssize_t sclp_memmap_on_memory_store(struct kobject *kobj, struct kobj_att return rc; block_size = memory_block_size_bytes(); sclp_mem = container_of(kobj, struct sclp_mem, kobj); - mem = find_memory_block(pfn_to_section_nr(PFN_DOWN(sclp_mem->id * block_size))); + mem = memory_block_get(phys_to_block_id(sclp_mem->id * block_size)); if (!mem) { WRITE_ONCE(sclp_mem->memmap_on_memory, value); } else { - put_device(&mem->dev); + memory_block_put(mem); rc = -EBUSY; } unlock_device_hotplug(); diff --git a/include/linux/memory.h b/include/linux/memory.h index 5bb5599c6b2b..463dc02f6cff 100644 --- a/include/linux/memory.h +++ b/include/linux/memory.h @@ -158,7 +158,11 @@ int create_memory_block_devices(unsigned long start, unsigned long size, void remove_memory_block_devices(unsigned long start, unsigned long size); extern void memory_dev_init(void); extern int memory_notify(enum memory_block_state state, void *v); -extern struct memory_block *find_memory_block(unsigned long section_nr); +struct memory_block *memory_block_get(unsigned long block_id); +static inline void memory_block_put(struct memory_block *mem) +{ + put_device(&mem->dev); +} typedef int (*walk_memory_blocks_func_t)(struct memory_block *, void *); extern int walk_memory_blocks(unsigned long start, unsigned long size, void *arg, walk_memory_blocks_func_t func); @@ -171,7 +175,6 @@ struct memory_group *memory_group_find_by_id(int mgid); typedef int (*walk_memory_groups_func_t)(struct memory_group *, void *); int walk_dynamic_memory_groups(int nid, walk_memory_groups_func_t func, struct memory_group *excluded, void *arg); -struct memory_block *find_memory_block_by_id(unsigned long block_id); #define hotplug_memory_notifier(fn, pri) ({ \ static __meminitdata struct notifier_block fn##_mem_nb =\ { .notifier_call = fn, .priority = pri };\ diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index af5489f03771..7ac19fab2263 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -1423,14 +1423,13 @@ static void remove_memory_blocks_and_altmaps(u64 start, u64 size) struct vmem_altmap *altmap = NULL; struct memory_block *mem; - mem = find_memory_block(pfn_to_section_nr(PFN_DOWN(cur_start))); + mem = memory_block_get(phys_to_block_id(cur_start)); if (WARN_ON_ONCE(!mem)) continue; altmap = mem->altmap; mem->altmap = NULL; - /* drop the ref. we got via find_memory_block() */ - put_device(&mem->dev); + memory_block_put(mem); remove_memory_block_devices(cur_start, memblock_size); arch_remove_memory(cur_start, memblock_size, altmap, NULL); -- cgit v1.2.3 From a2b8d7827f48ee54a686cb80e4a1d0ff954ec42a Mon Sep 17 00:00:00 2001 From: Georgi Djakov Date: Thu, 14 May 2026 02:26:57 -0700 Subject: drivers/base/memory: set mem->altmap after successful device registration If __add_memory_block() fails at xa_store() (under memory pressure for example), device_unregister() is called, which eventually triggers memory_block_release() with mem->altmap still set, causing a WARN_ON(mem->altmap). This was triggered by modifying virtio-mem driver. Fix this by delaying the assignment of mem->altmap until after __add_memory_block() has succeeded. Link: https://lore.kernel.org/20260514092657.3057141-1-georgi.djakov@oss.qualcomm.com Fixes: 1a8c64e11043 ("mm/memory_hotplug: embed vmem_altmap details in memory block") Signed-off-by: Georgi Djakov Acked-by: Oscar Salvador (SUSE) Cc: Vishal Verma Cc: Mike Rapoport Cc: Richard Cheng Cc: David Hildenbrand Cc: Georgi Djakov Cc: Greg Kroah-Hartman Cc: "Rafael J. Wysocki" Cc: Signed-off-by: Andrew Morton --- drivers/base/memory.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/base/memory.c b/drivers/base/memory.c index d31a421f7483..b318344426fa 100644 --- a/drivers/base/memory.c +++ b/drivers/base/memory.c @@ -797,7 +797,6 @@ static int add_memory_block(unsigned long block_id, int nid, unsigned long state mem->start_section_nr = block_id * sections_per_block; mem->state = state; mem->nid = nid; - mem->altmap = altmap; INIT_LIST_HEAD(&mem->group_next); #ifndef CONFIG_NUMA @@ -815,6 +814,8 @@ static int add_memory_block(unsigned long block_id, int nid, unsigned long state if (ret) return ret; + mem->altmap = altmap; + if (group) { mem->group = group; list_add(&mem->group_next, &group->memory_blocks); -- cgit v1.2.3 From 17986198a7b99485d7b2bc4eb8d700fbf8c8629e Mon Sep 17 00:00:00 2001 From: Lorenzo Stoakes Date: Tue, 2 Jun 2026 12:06:25 +0100 Subject: drivers/char/mem: eliminate unnecessary use of success_hook Patch series "remove mmap_action success, error hooks", v3. The mmap_action->success_hook was a strange beast added to enable code which appeared to absolutely require access to a VMA pointer to work correctly. Primarily this was for hugetlb, however a different approach will be taken there, as clearly more work is required to figure out a sensible way of converting hugetlb to use mmap_prepare. The other user was the memory char driver, specifically /dev/zero which has the unusual property of explicitly setting file-backed VMAs anonymous. Providing the success hook was always foolish, as it allowed drivers a way to workaround the restriction that they should not access a pointer to a not-yet-correctly-initialised VMA - which defeats the purpose of the mmap_prepare work. We can achieve the same thing in memory char driver without needing the success hook, so this series removes that, then removes the success hook altogether. The error hook is also unnecessary - the motivation for this was for functions which need to override the error code when performing an mmap action in order to avoid breaking userspace. We can achieve this by just providing a field for the error code. Doing this means we don't have to worry about the hook doing anything odd. We also add a check to ensure the error code is in fact valid. Again the memory char driver is the only current user of this, so this series updates it to use that. After this change mmap_action has no custom hooks at all, which seems rather more cromulent than before. This patch (of 3): /dev/zero, uniquely, marks memory mapped there as anonymous. This is currently achieved using the mmap_action->success_hook. However this hook circumvents the abstraction of VMA initialisation so it's preferable to do things a different way. To achieve this, this patch firstly defaults the VMA descriptor's vm_ops field to the dummy VMA operations, which is what file-backed VMAs default this field to. That way, we can detect whether a driver sets this field to NULL in order to mark it anonymous. We then introduce vma_desc_set_anonymous() to do this explicitly, and invoke it in mmap_zero_prepare(). This way, any driver which does not explicitly set desc->vm_ops, retains the dummy vm_ops as they would previously. We also update set_vma_user_defined_fields() to make clear that we are either setting vma->vm_ops to what is provided by the driver (or defaulting to dummy_vm_ops if not set), or setting the VMA anonymous. This lays the groundwork for removing the success hook. Link: https://lore.kernel.org/cover.1780397980.git.ljs@kernel.org Link: https://lore.kernel.org/010579cca6787cf7bb057ab1f7228978b10601c8.1780397980.git.ljs@kernel.org Signed-off-by: Lorenzo Stoakes Acked-by: David Hildenbrand (Arm) Cc: Arnd Bergmann Cc: Greg Kroah-Hartman Cc: Jann Horn Cc: Liam R. Howlett Cc: Michal Hocko Cc: Mike Rapoport Cc: Pedro Falcato Cc: Suren Baghdasaryan Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- drivers/char/mem.c | 17 +++++------------ include/linux/mm.h | 5 +++++ mm/util.c | 1 + mm/vma.c | 3 +++ tools/testing/vma/include/dup.h | 1 + 5 files changed, 15 insertions(+), 12 deletions(-) (limited to 'drivers') diff --git a/drivers/char/mem.c b/drivers/char/mem.c index 5fd421e48c04..a4297eb39887 100644 --- a/drivers/char/mem.c +++ b/drivers/char/mem.c @@ -504,17 +504,6 @@ static ssize_t read_zero(struct file *file, char __user *buf, return cleared; } -static int mmap_zero_private_success(const struct vm_area_struct *vma) -{ - /* - * This is a highly unique situation where we mark a MAP_PRIVATE mapping - * of /dev/zero anonymous, despite it not being. - */ - vma_set_anonymous((struct vm_area_struct *)vma); - - return 0; -} - static int mmap_zero_prepare(struct vm_area_desc *desc) { #ifndef CONFIG_MMU @@ -523,7 +512,11 @@ static int mmap_zero_prepare(struct vm_area_desc *desc) if (vma_desc_test(desc, VMA_SHARED_BIT)) return shmem_zero_setup_desc(desc); - desc->action.success_hook = mmap_zero_private_success; + /* + * This is a highly unique situation where we mark a MAP_PRIVATE mapping + * of /dev/zero anonymous, despite it not being. + */ + vma_desc_set_anonymous(desc); return 0; } diff --git a/include/linux/mm.h b/include/linux/mm.h index 11f440e9d7cd..0f2612a70fb1 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1489,6 +1489,11 @@ static inline void vma_set_anonymous(struct vm_area_struct *vma) vma->vm_ops = NULL; } +static inline void vma_desc_set_anonymous(struct vm_area_desc *desc) +{ + desc->vm_ops = NULL; +} + static inline bool vma_is_anonymous(struct vm_area_struct *vma) { return !vma->vm_ops; diff --git a/mm/util.c b/mm/util.c index 3cc949a0b7ed..2b2a9df689d7 100644 --- a/mm/util.c +++ b/mm/util.c @@ -1192,6 +1192,7 @@ void compat_set_desc_from_vma(struct vm_area_desc *desc, desc->vm_file = vma->vm_file; desc->vma_flags = vma->flags; desc->page_prot = vma->vm_page_prot; + desc->vm_ops = vma->vm_ops; /* Default. */ desc->action.type = MMAP_NOTHING; diff --git a/mm/vma.c b/mm/vma.c index d90791b00a7b..9eea2850818a 100644 --- a/mm/vma.c +++ b/mm/vma.c @@ -2697,6 +2697,8 @@ static void set_vma_user_defined_fields(struct vm_area_struct *vma, { if (map->vm_ops) vma->vm_ops = map->vm_ops; + else /* Only /dev/zero should do this. */ + vma_set_anonymous(vma); vma->vm_private_data = map->vm_private_data; } @@ -2744,6 +2746,7 @@ static unsigned long __mmap_region(struct file *file, unsigned long addr, .action = { .type = MMAP_NOTHING, /* Default to no further action. */ }, + .vm_ops = &vma_dummy_vm_ops, }; bool allocated_new = false; int error; diff --git a/tools/testing/vma/include/dup.h b/tools/testing/vma/include/dup.h index 9e0dfd3a85b0..306171d061e7 100644 --- a/tools/testing/vma/include/dup.h +++ b/tools/testing/vma/include/dup.h @@ -1303,6 +1303,7 @@ static inline void compat_set_desc_from_vma(struct vm_area_desc *desc, desc->vm_file = vma->vm_file; desc->vma_flags = vma->flags; desc->page_prot = vma->vm_page_prot; + desc->vm_ops = vma->vm_ops; /* Default. */ desc->action.type = MMAP_NOTHING; -- cgit v1.2.3 From 4f5b8759262e5e65373638346307836de1290b22 Mon Sep 17 00:00:00 2001 From: Lorenzo Stoakes Date: Tue, 2 Jun 2026 12:06:27 +0100 Subject: mm/vma: eliminate mmap_action->error_hook, introduce error_override Rather than providing a hook, simplify things by providing the ability to override mmap action errors. This allows us to more carefully validate the value provided and thus ensure only a valid error code is specified, and simplifies the interface. This way, we eliminate all hooks but mmap_prepare and allow only mmap actions to be specified (which core mm controls). This significantly improves robustness and eliminates any unnecessary code duplication in driver mmap hooks. We also update the /dev/mem logic (the only user) to use mmap_action->error_override instead. Link: https://lore.kernel.org/55d13f7d016b827c459946d46a56105635be111c.1780397980.git.ljs@kernel.org Signed-off-by: Lorenzo Stoakes Acked-by: David Hildenbrand (Arm) Cc: Arnd Bergmann Cc: Greg Kroah-Hartman Cc: Jann Horn Cc: Liam R. Howlett Cc: Michal Hocko Cc: Mike Rapoport Cc: Pedro Falcato Cc: Suren Baghdasaryan Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- drivers/char/mem.c | 8 +------- include/linux/mm_types.h | 9 +++------ mm/util.c | 29 +++++++++++++++++++++-------- tools/testing/vma/include/dup.h | 9 +++------ 4 files changed, 28 insertions(+), 27 deletions(-) (limited to 'drivers') diff --git a/drivers/char/mem.c b/drivers/char/mem.c index a4297eb39887..63253d1de5d7 100644 --- a/drivers/char/mem.c +++ b/drivers/char/mem.c @@ -322,11 +322,6 @@ static const struct vm_operations_struct mmap_mem_ops = { #endif }; -static int mmap_filter_error(int err) -{ - return -EAGAIN; -} - static int mmap_mem_prepare(struct vm_area_desc *desc) { struct file *file = desc->file; @@ -362,8 +357,7 @@ static int mmap_mem_prepare(struct vm_area_desc *desc) /* Remap-pfn-range will mark the range with the I/O flag. */ mmap_action_remap_full(desc, desc->pgoff); - /* We filter remap errors to -EAGAIN. */ - desc->action.error_hook = mmap_filter_error; + desc->action.error_override = -EAGAIN; return 0; } diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 945c0a5386d6..5ef78617ce93 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -844,13 +844,10 @@ struct mmap_action { enum mmap_action_type type; /* - * If specified, this hook is invoked when an error occurred when - * attempting the selected action. - * - * The hook can return an error code in order to filter the error, but - * it is not valid to clear the error here. + * If non-zero, replace errors that arise from mmap actions with this + * value instead. Only valid error codes may be specified. */ - int (*error_hook)(int err); + int error_override; /* * This should be set in rare instances where the operation required diff --git a/mm/util.c b/mm/util.c index 4e172990afcd..af2c2103f0d9 100644 --- a/mm/util.c +++ b/mm/util.c @@ -1414,16 +1414,22 @@ static int mmap_action_finish(struct vm_area_struct *vma, */ len = vma_pages(vma) << PAGE_SHIFT; do_munmap(current->mm, vma->vm_start, len, NULL); - if (action->error_hook) { - /* We may want to filter the error. */ - err = action->error_hook(err); - /* The caller should not clear the error. */ - VM_WARN_ON_ONCE(!err); - } - return err; + + return action->error_override ?: err; } #ifdef CONFIG_MMU + +static int check_mmap_action(struct mmap_action *action) +{ + const unsigned long override = action->error_override; + + if (WARN_ON_ONCE(override && !IS_ERR_VALUE(override))) + return -EINVAL; + + return 0; +} + /** * mmap_action_prepare - Perform preparatory setup for an VMA descriptor * action which need to be performed. @@ -1433,7 +1439,14 @@ static int mmap_action_finish(struct vm_area_struct *vma, */ int mmap_action_prepare(struct vm_area_desc *desc) { - switch (desc->action.type) { + struct mmap_action *action = &desc->action; + int err; + + err = check_mmap_action(action); + if (err) + return err; + + switch (action->type) { case MMAP_NOTHING: return 0; case MMAP_REMAP_PFN: diff --git a/tools/testing/vma/include/dup.h b/tools/testing/vma/include/dup.h index fddfd1b57c09..bf26b3f48d3a 100644 --- a/tools/testing/vma/include/dup.h +++ b/tools/testing/vma/include/dup.h @@ -483,13 +483,10 @@ struct mmap_action { enum mmap_action_type type; /* - * If specified, this hook is invoked when an error occurred when - * attempting the selection action. - * - * The hook can return an error code in order to filter the error, but - * it is not valid to clear the error here. + * If non-zero, replace errors that arise from mmap actions with this + * value instead. Only valid error codes may be specified. */ - int (*error_hook)(int err); + int error_override; /* * This should be set in rare instances where the operation required -- cgit v1.2.3 From 3e8d8eb8d7f5b1ec3993ad4dbb8140a55f789f90 Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Tue, 26 May 2026 11:27:16 +0900 Subject: zram: do not leak blk idx at the end of writeback zram_writeback_slots() loop can terminate with valid reserved backing device blk_idx. The problem is that cleanup code doesn't release that reserved blk_idx before zram_writeback_slots() returns, which leads to blk_idx leak (it becomes permanently busy and can not be used for actual writeback.) This does not lead to any system instabilities, it only means that we can writeback less pages. The scenario is hard to hit in practice as it requires writeabck to race with modification (slot-free or overwrite) of the final post-processing slot. Release reserved but unused blk_idx before returning from zram_writeback_slots(). Link: https://lore.kernel.org/20260526022754.2377730-2-senozhatsky@chromium.org Fixes: f405066a1f0db ("zram: introduce writeback bio batching") Signed-off-by: Sergey Senozhatsky Suggested-by: Brian Geffon Cc: Jens Axboe Cc: Minchan Kim Cc: Richard Chang Signed-off-by: Andrew Morton --- drivers/block/zram/zram_drv.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'drivers') diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 07111455eecf..602abfe23797 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -1127,6 +1127,9 @@ next: if (req) release_wb_req(req); + if (blk_idx != INVALID_BDEV_BLOCK) + zram_release_bdev_block(zram, blk_idx); + while (atomic_read(&wb_ctl->num_inflight) > 0) { wait_event(wb_ctl->done_wait, !list_empty(&wb_ctl->done_reqs)); err = zram_complete_done_reqs(zram, wb_ctl); -- cgit v1.2.3 From 3bf1c285dc406067eae5b3a7072afad81ad4a4fc Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Tue, 26 May 2026 11:27:17 +0900 Subject: zram: clear trailing bytes of compressed writeback pages Patch series "zram: writeback fixes", v2. Brian (privately) reported a "leak" of writeback bitmap in certain cases, so that backing device can store less pages; and a theoretical data leak in the trailing bytes of compressed writeback pages. Both issues are low risk. This patch (of 2): When compressed writeback is available writtenback pages contain "garbage" in PAGE_SIZE - obj_size trailing bytes. That "garbage" is, basically, whatever data that page held before we got it for writeback. To get advantage of it an attacker needs to be able to read from active backing swap device, which is already catastrophic. Still, just in case, zero out those trailing bytes before writeback to a backing device so that we only store swap-ed out data there. Link: https://lore.kernel.org/20260526022754.2377730-1-senozhatsky@chromium.org Link: https://lore.kernel.org/20260526022754.2377730-3-senozhatsky@chromium.org Fixes: d38fab605c66 ("zram: introduce compressed data writeback") Signed-off-by: Sergey Senozhatsky Suggested-by: Brian Geffon Cc: Jens Axboe Cc: Minchan Kim Cc: Richard Chang Signed-off-by: Andrew Morton --- drivers/block/zram/zram_drv.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers') diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 602abfe23797..7917fc7a2a29 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -2134,6 +2134,8 @@ static int read_from_zspool_raw(struct zram *zram, struct page *page, u32 index) zs_obj_read_end(zram->mem_pool, handle, size, src); zcomp_stream_put(zstrm); + memzero_page(page, size, PAGE_SIZE - size); + return 0; } #endif -- cgit v1.2.3 From d1aba985984781947ad67c1b44ac64bd498c8f27 Mon Sep 17 00:00:00 2001 From: Cunlong Li Date: Thu, 28 May 2026 10:48:45 +0800 Subject: zram: drop unused bio parameter from write helpers After "zram: fix use-after-free in zram_bvec_write_partial()", zram_bvec_write_partial() always passes NULL to zram_read_page() and no longer needs the parent bio. Mirror the read side (zram_bvec_read_partial() has not taken a bio since commit 4e3c87b9421d ("zram: fix synchronous reads")) and drop the parameter from zram_bvec_write_partial() and zram_bvec_write(). No functional change. Link: https://lore.kernel.org/20260528-zram-v3-2-cab86eef8764@gmail.com Signed-off-by: Cunlong Li Reviewed-by: Christoph Hellwig Reviewed-by: Sergey Senozhatsky Cc: Jens Axboe Cc: Minchan Kim Cc: Yisheng Xie Signed-off-by: Andrew Morton --- drivers/block/zram/zram_drv.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'drivers') diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 7917fc7a2a29..fd12604ff8d7 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -2334,7 +2334,7 @@ static int zram_write_page(struct zram *zram, struct page *page, u32 index) * This is a partial IO. Read the full page before writing the changes. */ static int zram_bvec_write_partial(struct zram *zram, struct bio_vec *bvec, - u32 index, int offset, struct bio *bio) + u32 index, int offset) { struct page *page = alloc_page(GFP_NOIO); int ret; @@ -2352,10 +2352,10 @@ static int zram_bvec_write_partial(struct zram *zram, struct bio_vec *bvec, } static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, - u32 index, int offset, struct bio *bio) + u32 index, int offset) { if (is_partial_io(bvec)) - return zram_bvec_write_partial(zram, bvec, index, offset, bio); + return zram_bvec_write_partial(zram, bvec, index, offset); return zram_write_page(zram, bvec->bv_page, index); } @@ -2752,7 +2752,7 @@ static void zram_bio_write(struct zram *zram, struct bio *bio) bv.bv_len = min_t(u32, bv.bv_len, PAGE_SIZE - offset); - if (zram_bvec_write(zram, &bv, index, offset, bio) < 0) { + if (zram_bvec_write(zram, &bv, index, offset) < 0) { atomic64_inc(&zram->stats.failed_writes); bio->bi_status = BLK_STS_IOERR; break; -- cgit v1.2.3