From fab827dbee8c2e06ca4ba000fa6c48bcf9054aba Mon Sep 17 00:00:00 2001 From: Vasily Averin Date: Thu, 2 Sep 2021 14:54:57 -0700 Subject: memcg: enable accounting for pids in nested pid namespaces MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 5d097056c9a0 ("kmemcg: account certain kmem allocations to memcg") enabled memcg accounting for pids allocated from init_pid_ns.pid_cachep, but forgot to adjust the setting for nested pid namespaces. As a result, pid memory is not accounted exactly where it is really needed, inside memcg-limited containers with their own pid namespaces. Pid was one the first kernel objects enabled for memcg accounting. init_pid_ns.pid_cachep marked by SLAB_ACCOUNT and we can expect that any new pids in the system are memcg-accounted. Though recently I've noticed that it is wrong. nested pid namespaces creates own slab caches for pid objects, nested pids have increased size because contain id both for all parent and for own pid namespaces. The problem is that these slab caches are _NOT_ marked by SLAB_ACCOUNT, as a result any pids allocated in nested pid namespaces are not memcg-accounted. Pid struct in nested pid namespace consumes up to 500 bytes memory, 100000 such objects gives us up to ~50Mb unaccounted memory, this allow container to exceed assigned memcg limits. Link: https://lkml.kernel.org/r/8b6de616-fd1a-02c6-cbdb-976ecdcfa604@virtuozzo.com Fixes: 5d097056c9a0 ("kmemcg: account certain kmem allocations to memcg") Cc: stable@vger.kernel.org Signed-off-by: Vasily Averin Reviewed-by: Michal Koutný Reviewed-by: Shakeel Butt Acked-by: Christian Brauner Acked-by: Roman Gushchin Cc: Michal Hocko Cc: Johannes Weiner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/pid_namespace.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c index ca43239a255a..cb5a25a8a0cc 100644 --- a/kernel/pid_namespace.c +++ b/kernel/pid_namespace.c @@ -51,7 +51,8 @@ static struct kmem_cache *create_pid_cachep(unsigned int level) mutex_lock(&pid_caches_mutex); /* Name collision forces to do allocation under mutex. */ if (!*pkc) - *pkc = kmem_cache_create(name, len, 0, SLAB_HWCACHE_ALIGN, 0); + *pkc = kmem_cache_create(name, len, 0, + SLAB_HWCACHE_ALIGN | SLAB_ACCOUNT, 0); mutex_unlock(&pid_caches_mutex); /* current can fail, but someone else can succeed. */ return READ_ONCE(*pkc); -- cgit v1.2.3 From 30acd0bdfb86548172168a0cc71d455944de0683 Mon Sep 17 00:00:00 2001 From: Vasily Averin Date: Thu, 2 Sep 2021 14:55:27 -0700 Subject: memcg: enable accounting for new namesapces and struct nsproxy Container admin can create new namespaces and force kernel to allocate up to several pages of memory for the namespaces and its associated structures. Net and uts namespaces have enabled accounting for such allocations. It makes sense to account for rest ones to restrict the host's memory consumption from inside the memcg-limited container. Link: https://lkml.kernel.org/r/5525bcbf-533e-da27-79b7-158686c64e13@virtuozzo.com Signed-off-by: Vasily Averin Acked-by: Serge Hallyn Acked-by: Christian Brauner Acked-by: Kirill Tkhai Reviewed-by: Shakeel Butt Cc: Alexander Viro Cc: Alexey Dobriyan Cc: Andrei Vagin Cc: Borislav Petkov Cc: Borislav Petkov Cc: Dmitry Safonov <0x7f454c46@gmail.com> Cc: "Eric W. Biederman" Cc: Greg Kroah-Hartman Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: "J. Bruce Fields" Cc: Jeff Layton Cc: Jens Axboe Cc: Jiri Slaby Cc: Johannes Weiner Cc: Michal Hocko Cc: Oleg Nesterov Cc: Roman Gushchin Cc: Tejun Heo Cc: Thomas Gleixner Cc: Vladimir Davydov Cc: Yutian Yang Cc: Zefan Li Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/namespace.c | 2 +- ipc/namespace.c | 2 +- kernel/cgroup/namespace.c | 2 +- kernel/nsproxy.c | 2 +- kernel/pid_namespace.c | 2 +- kernel/time/namespace.c | 4 ++-- kernel/user_namespace.c | 2 +- 7 files changed, 8 insertions(+), 8 deletions(-) (limited to 'kernel') diff --git a/fs/namespace.c b/fs/namespace.c index e51b63ae233b..94a9817851cc 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -3307,7 +3307,7 @@ static struct mnt_namespace *alloc_mnt_ns(struct user_namespace *user_ns, bool a if (!ucounts) return ERR_PTR(-ENOSPC); - new_ns = kzalloc(sizeof(struct mnt_namespace), GFP_KERNEL); + new_ns = kzalloc(sizeof(struct mnt_namespace), GFP_KERNEL_ACCOUNT); if (!new_ns) { dec_mnt_namespaces(ucounts); return ERR_PTR(-ENOMEM); diff --git a/ipc/namespace.c b/ipc/namespace.c index 7bd0766ddc3b..ae83f0f2651b 100644 --- a/ipc/namespace.c +++ b/ipc/namespace.c @@ -42,7 +42,7 @@ static struct ipc_namespace *create_ipc_ns(struct user_namespace *user_ns, goto fail; err = -ENOMEM; - ns = kzalloc(sizeof(struct ipc_namespace), GFP_KERNEL); + ns = kzalloc(sizeof(struct ipc_namespace), GFP_KERNEL_ACCOUNT); if (ns == NULL) goto fail_dec; diff --git a/kernel/cgroup/namespace.c b/kernel/cgroup/namespace.c index f5e8828c109c..0d5c29879a50 100644 --- a/kernel/cgroup/namespace.c +++ b/kernel/cgroup/namespace.c @@ -24,7 +24,7 @@ static struct cgroup_namespace *alloc_cgroup_ns(void) struct cgroup_namespace *new_ns; int ret; - new_ns = kzalloc(sizeof(struct cgroup_namespace), GFP_KERNEL); + new_ns = kzalloc(sizeof(struct cgroup_namespace), GFP_KERNEL_ACCOUNT); if (!new_ns) return ERR_PTR(-ENOMEM); ret = ns_alloc_inum(&new_ns->ns); diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c index abc01fcad8c7..eec72ca962e2 100644 --- a/kernel/nsproxy.c +++ b/kernel/nsproxy.c @@ -568,6 +568,6 @@ out: int __init nsproxy_cache_init(void) { - nsproxy_cachep = KMEM_CACHE(nsproxy, SLAB_PANIC); + nsproxy_cachep = KMEM_CACHE(nsproxy, SLAB_PANIC|SLAB_ACCOUNT); return 0; } diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c index cb5a25a8a0cc..a46a3723bc66 100644 --- a/kernel/pid_namespace.c +++ b/kernel/pid_namespace.c @@ -450,7 +450,7 @@ const struct proc_ns_operations pidns_for_children_operations = { static __init int pid_namespaces_init(void) { - pid_ns_cachep = KMEM_CACHE(pid_namespace, SLAB_PANIC); + pid_ns_cachep = KMEM_CACHE(pid_namespace, SLAB_PANIC | SLAB_ACCOUNT); #ifdef CONFIG_CHECKPOINT_RESTORE register_sysctl_paths(kern_path, pid_ns_ctl_table); diff --git a/kernel/time/namespace.c b/kernel/time/namespace.c index 12eab0d2ae28..aec832801c26 100644 --- a/kernel/time/namespace.c +++ b/kernel/time/namespace.c @@ -88,13 +88,13 @@ static struct time_namespace *clone_time_ns(struct user_namespace *user_ns, goto fail; err = -ENOMEM; - ns = kmalloc(sizeof(*ns), GFP_KERNEL); + ns = kmalloc(sizeof(*ns), GFP_KERNEL_ACCOUNT); if (!ns) goto fail_dec; refcount_set(&ns->ns.count, 1); - ns->vvar_page = alloc_page(GFP_KERNEL | __GFP_ZERO); + ns->vvar_page = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO); if (!ns->vvar_page) goto fail_free; diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c index ef82d401dde8..6b2e3ca7ee99 100644 --- a/kernel/user_namespace.c +++ b/kernel/user_namespace.c @@ -1385,7 +1385,7 @@ const struct proc_ns_operations userns_operations = { static __init int user_namespaces_init(void) { - user_ns_cachep = KMEM_CACHE(user_namespace, SLAB_PANIC); + user_ns_cachep = KMEM_CACHE(user_namespace, SLAB_PANIC | SLAB_ACCOUNT); return 0; } subsys_initcall(user_namespaces_init); -- cgit v1.2.3 From 5f58c39819ff78ca5ddbba2b3cd8ff4779b19bb5 Mon Sep 17 00:00:00 2001 From: Vasily Averin Date: Thu, 2 Sep 2021 14:55:35 -0700 Subject: memcg: enable accounting for signals When a user send a signal to any another processes it forces the kernel to allocate memory for 'struct sigqueue' objects. The number of signals is limited by RLIMIT_SIGPENDING resource limit, but even the default settings allow each user to consume up to several megabytes of memory. It makes sense to account for these allocations to restrict the host's memory consumption from inside the memcg-limited container. Link: https://lkml.kernel.org/r/e34e958c-e785-712e-a62a-2c7b66c646c7@virtuozzo.com Signed-off-by: Vasily Averin Reviewed-by: Shakeel Butt Cc: Alexander Viro Cc: Alexey Dobriyan Cc: Andrei Vagin Cc: Borislav Petkov Cc: Borislav Petkov Cc: Christian Brauner Cc: Dmitry Safonov <0x7f454c46@gmail.com> Cc: "Eric W. Biederman" Cc: Greg Kroah-Hartman Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: "J. Bruce Fields" Cc: Jeff Layton Cc: Jens Axboe Cc: Jiri Slaby Cc: Johannes Weiner Cc: Kirill Tkhai Cc: Michal Hocko Cc: Oleg Nesterov Cc: Roman Gushchin Cc: Serge Hallyn Cc: Tejun Heo Cc: Thomas Gleixner Cc: Vladimir Davydov Cc: Yutian Yang Cc: Zefan Li Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/signal.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/signal.c b/kernel/signal.c index a3229add4455..8921c4a8419f 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -4663,7 +4663,7 @@ void __init signals_init(void) { siginfo_buildtime_checks(); - sigqueue_cachep = KMEM_CACHE(sigqueue, SLAB_PANIC); + sigqueue_cachep = KMEM_CACHE(sigqueue, SLAB_PANIC | SLAB_ACCOUNT); } #ifdef CONFIG_KGDB_KDB -- cgit v1.2.3 From c509723ec27e925bb91a20682c448e95d4bc8c9f Mon Sep 17 00:00:00 2001 From: Vasily Averin Date: Thu, 2 Sep 2021 14:55:39 -0700 Subject: memcg: enable accounting for posix_timers_cache slab A program may create multiple interval timers using timer_create(). For each timer the kernel preallocates a "queued real-time signal", Consequently, the number of timers is limited by the RLIMIT_SIGPENDING resource limit. The allocated object is quite small, ~250 bytes, but even the default signal limits allow to consume up to 100 megabytes per user. It makes sense to account for them to limit the host's memory consumption from inside the memcg-limited container. Link: https://lkml.kernel.org/r/57795560-025c-267c-6b1a-dea852d95530@virtuozzo.com Signed-off-by: Vasily Averin Reviewed-by: Thomas Gleixner Reviewed-by: Shakeel Butt Cc: Alexander Viro Cc: Alexey Dobriyan Cc: Andrei Vagin Cc: Borislav Petkov Cc: Borislav Petkov Cc: Christian Brauner Cc: Dmitry Safonov <0x7f454c46@gmail.com> Cc: "Eric W. Biederman" Cc: Greg Kroah-Hartman Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: "J. Bruce Fields" Cc: Jeff Layton Cc: Jens Axboe Cc: Jiri Slaby Cc: Johannes Weiner Cc: Kirill Tkhai Cc: Michal Hocko Cc: Oleg Nesterov Cc: Roman Gushchin Cc: Serge Hallyn Cc: Tejun Heo Cc: Vladimir Davydov Cc: Yutian Yang Cc: Zefan Li Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/time/posix-timers.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c index dd5697d7347b..7363f81dc31a 100644 --- a/kernel/time/posix-timers.c +++ b/kernel/time/posix-timers.c @@ -273,8 +273,8 @@ static int posix_get_hrtimer_res(clockid_t which_clock, struct timespec64 *tp) static __init int init_posix_timers(void) { posix_timers_cache = kmem_cache_create("posix_timers_cache", - sizeof (struct k_itimer), 0, SLAB_PANIC, - NULL); + sizeof(struct k_itimer), 0, + SLAB_PANIC | SLAB_ACCOUNT, NULL); return 0; } __initcall(init_posix_timers); -- cgit v1.2.3 From 65d759c8f9f57b96c199f3fe5cfb93ac7da095e9 Mon Sep 17 00:00:00 2001 From: Charan Teja Reddy Date: Thu, 2 Sep 2021 14:59:59 -0700 Subject: mm: compaction: support triggering of proactive compaction by user The proactive compaction[1] gets triggered for every 500msec and run compaction on the node for COMPACTION_HPAGE_ORDER (usually order-9) pages based on the value set to sysctl.compaction_proactiveness. Triggering the compaction for every 500msec in search of COMPACTION_HPAGE_ORDER pages is not needed for all applications, especially on the embedded system usecases which may have few MB's of RAM. Enabling the proactive compaction in its state will endup in running almost always on such systems. Other side, proactive compaction can still be very much useful for getting a set of higher order pages in some controllable manner(controlled by using the sysctl.compaction_proactiveness). So, on systems where enabling the proactive compaction always may proove not required, can trigger the same from user space on write to its sysctl interface. As an example, say app launcher decide to launch the memory heavy application which can be launched fast if it gets more higher order pages thus launcher can prepare the system in advance by triggering the proactive compaction from userspace. This triggering of proactive compaction is done on a write to sysctl.compaction_proactiveness by user. [1]https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit?id=facdaa917c4d5a376d09d25865f5a863f906234a [akpm@linux-foundation.org: tweak vm.rst, per Mike] Link: https://lkml.kernel.org/r/1627653207-12317-1-git-send-email-charante@codeaurora.org Signed-off-by: Charan Teja Reddy Acked-by: Vlastimil Babka Acked-by: Rafael Aquini Cc: Mike Rapoport Cc: Luis Chamberlain Cc: Kees Cook Cc: Iurii Zaikin Cc: Dave Hansen Cc: Mel Gorman Cc: Nitin Gupta Cc: Jonathan Corbet Cc: Khalid Aziz Cc: David Rientjes Cc: Vinayak Menon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/admin-guide/sysctl/vm.rst | 3 ++- include/linux/compaction.h | 2 ++ include/linux/mmzone.h | 1 + kernel/sysctl.c | 2 +- mm/compaction.c | 38 +++++++++++++++++++++++++++++++-- 5 files changed, 42 insertions(+), 4 deletions(-) (limited to 'kernel') diff --git a/Documentation/admin-guide/sysctl/vm.rst b/Documentation/admin-guide/sysctl/vm.rst index 003d5cc3751b..5e795202111f 100644 --- a/Documentation/admin-guide/sysctl/vm.rst +++ b/Documentation/admin-guide/sysctl/vm.rst @@ -118,7 +118,8 @@ compaction_proactiveness This tunable takes a value in the range [0, 100] with a default value of 20. This tunable determines how aggressively compaction is done in the -background. Setting it to 0 disables proactive compaction. +background. Write of a non zero value to this tunable will immediately +trigger the proactive compaction. Setting it to 0 disables proactive compaction. Note that compaction has a non-trivial system-wide impact as pages belonging to different processes are moved around, which could also lead diff --git a/include/linux/compaction.h b/include/linux/compaction.h index c24098c7acca..34bce35c808d 100644 --- a/include/linux/compaction.h +++ b/include/linux/compaction.h @@ -84,6 +84,8 @@ static inline unsigned long compact_gap(unsigned int order) extern unsigned int sysctl_compaction_proactiveness; extern int sysctl_compaction_handler(struct ctl_table *table, int write, void *buffer, size_t *length, loff_t *ppos); +extern int compaction_proactiveness_sysctl_handler(struct ctl_table *table, + int write, void *buffer, size_t *length, loff_t *ppos); extern int sysctl_extfrag_threshold; extern int sysctl_compact_unevictable_allowed; diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 59bad25ce78e..1bd5f5955f9a 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -846,6 +846,7 @@ typedef struct pglist_data { enum zone_type kcompactd_highest_zoneidx; wait_queue_head_t kcompactd_wait; struct task_struct *kcompactd; + bool proactive_compact_trigger; #endif /* * This is a per-node reserve of pages that are not available diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 272f4a272f8c..297f0b3966bd 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -2871,7 +2871,7 @@ static struct ctl_table vm_table[] = { .data = &sysctl_compaction_proactiveness, .maxlen = sizeof(sysctl_compaction_proactiveness), .mode = 0644, - .proc_handler = proc_dointvec_minmax, + .proc_handler = compaction_proactiveness_sysctl_handler, .extra1 = SYSCTL_ZERO, .extra2 = &one_hundred, }, diff --git a/mm/compaction.c b/mm/compaction.c index 4ee0d40d93f2..fa9b2b598eab 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -2706,6 +2706,30 @@ static void compact_nodes(void) */ unsigned int __read_mostly sysctl_compaction_proactiveness = 20; +int compaction_proactiveness_sysctl_handler(struct ctl_table *table, int write, + void *buffer, size_t *length, loff_t *ppos) +{ + int rc, nid; + + rc = proc_dointvec_minmax(table, write, buffer, length, ppos); + if (rc) + return rc; + + if (write && sysctl_compaction_proactiveness) { + for_each_online_node(nid) { + pg_data_t *pgdat = NODE_DATA(nid); + + if (pgdat->proactive_compact_trigger) + continue; + + pgdat->proactive_compact_trigger = true; + wake_up_interruptible(&pgdat->kcompactd_wait); + } + } + + return 0; +} + /* * This is the entry point for compacting all nodes via * /proc/sys/vm/compact_memory @@ -2750,7 +2774,8 @@ void compaction_unregister_node(struct node *node) static inline bool kcompactd_work_requested(pg_data_t *pgdat) { - return pgdat->kcompactd_max_order > 0 || kthread_should_stop(); + return pgdat->kcompactd_max_order > 0 || kthread_should_stop() || + pgdat->proactive_compact_trigger; } static bool kcompactd_node_suitable(pg_data_t *pgdat) @@ -2901,9 +2926,16 @@ static int kcompactd(void *p) while (!kthread_should_stop()) { unsigned long pflags; + /* + * Avoid the unnecessary wakeup for proactive compaction + * when it is disabled. + */ + if (!sysctl_compaction_proactiveness) + timeout = MAX_SCHEDULE_TIMEOUT; trace_mm_compaction_kcompactd_sleep(pgdat->node_id); if (wait_event_freezable_timeout(pgdat->kcompactd_wait, - kcompactd_work_requested(pgdat), timeout)) { + kcompactd_work_requested(pgdat), timeout) && + !pgdat->proactive_compact_trigger) { psi_memstall_enter(&pflags); kcompactd_do_work(pgdat); @@ -2938,6 +2970,8 @@ static int kcompactd(void *p) timeout = default_timeout << COMPACT_MAX_DEFER_SHIFT; } + if (unlikely(pgdat->proactive_compact_trigger)) + pgdat->proactive_compact_trigger = false; } return 0; -- cgit v1.2.3 From dce49103962840dd61423d7627748d6c558d58c5 Mon Sep 17 00:00:00 2001 From: Suren Baghdasaryan Date: Thu, 2 Sep 2021 15:00:33 -0700 Subject: mm: wire up syscall process_mrelease Split off from prev patch in the series that implements the syscall. Link: https://lkml.kernel.org/r/20210809185259.405936-2-surenb@google.com Signed-off-by: Suren Baghdasaryan Acked-by: Geert Uytterhoeven Cc: Andy Lutomirski Cc: Christian Brauner Cc: Christoph Hellwig Cc: David Hildenbrand Cc: David Rientjes Cc: Florian Weimer Cc: Jan Engelhardt Cc: Jann Horn Cc: Johannes Weiner Cc: Matthew Wilcox (Oracle) Cc: Michal Hocko Cc: Minchan Kim Cc: Oleg Nesterov Cc: Rik van Riel Cc: Roman Gushchin Cc: Shakeel Butt Cc: Tim Murray Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/alpha/kernel/syscalls/syscall.tbl | 2 ++ arch/arm/tools/syscall.tbl | 2 ++ arch/arm64/include/asm/unistd.h | 2 +- arch/arm64/include/asm/unistd32.h | 2 ++ arch/ia64/kernel/syscalls/syscall.tbl | 2 ++ arch/m68k/kernel/syscalls/syscall.tbl | 2 ++ arch/microblaze/kernel/syscalls/syscall.tbl | 2 ++ arch/mips/kernel/syscalls/syscall_n32.tbl | 2 ++ arch/mips/kernel/syscalls/syscall_n64.tbl | 2 ++ arch/mips/kernel/syscalls/syscall_o32.tbl | 2 ++ arch/parisc/kernel/syscalls/syscall.tbl | 2 ++ arch/powerpc/kernel/syscalls/syscall.tbl | 2 ++ arch/s390/kernel/syscalls/syscall.tbl | 2 ++ arch/sh/kernel/syscalls/syscall.tbl | 2 ++ arch/sparc/kernel/syscalls/syscall.tbl | 2 ++ arch/x86/entry/syscalls/syscall_32.tbl | 1 + arch/x86/entry/syscalls/syscall_64.tbl | 1 + arch/xtensa/kernel/syscalls/syscall.tbl | 2 ++ include/linux/syscalls.h | 1 + include/uapi/asm-generic/unistd.h | 4 +++- kernel/sys_ni.c | 1 + 21 files changed, 38 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/arch/alpha/kernel/syscalls/syscall.tbl b/arch/alpha/kernel/syscalls/syscall.tbl index a17687ed4b51..605645eae04c 100644 --- a/arch/alpha/kernel/syscalls/syscall.tbl +++ b/arch/alpha/kernel/syscalls/syscall.tbl @@ -486,3 +486,5 @@ 554 common landlock_create_ruleset sys_landlock_create_ruleset 555 common landlock_add_rule sys_landlock_add_rule 556 common landlock_restrict_self sys_landlock_restrict_self +# 557 reserved for memfd_secret +558 common process_mrelease sys_process_mrelease diff --git a/arch/arm/tools/syscall.tbl b/arch/arm/tools/syscall.tbl index c5df1179fc5d..2f32eb8beca8 100644 --- a/arch/arm/tools/syscall.tbl +++ b/arch/arm/tools/syscall.tbl @@ -460,3 +460,5 @@ 444 common landlock_create_ruleset sys_landlock_create_ruleset 445 common landlock_add_rule sys_landlock_add_rule 446 common landlock_restrict_self sys_landlock_restrict_self +# 447 reserved for memfd_secret +448 common process_mrelease sys_process_mrelease diff --git a/arch/arm64/include/asm/unistd.h b/arch/arm64/include/asm/unistd.h index 727bfc3be99b..3cb206aea3db 100644 --- a/arch/arm64/include/asm/unistd.h +++ b/arch/arm64/include/asm/unistd.h @@ -38,7 +38,7 @@ #define __ARM_NR_compat_set_tls (__ARM_NR_COMPAT_BASE + 5) #define __ARM_NR_COMPAT_END (__ARM_NR_COMPAT_BASE + 0x800) -#define __NR_compat_syscalls 447 +#define __NR_compat_syscalls 449 #endif #define __ARCH_WANT_SYS_CLONE diff --git a/arch/arm64/include/asm/unistd32.h b/arch/arm64/include/asm/unistd32.h index 99ffcafc736c..0f49cdb180dd 100644 --- a/arch/arm64/include/asm/unistd32.h +++ b/arch/arm64/include/asm/unistd32.h @@ -901,6 +901,8 @@ __SYSCALL(__NR_landlock_create_ruleset, sys_landlock_create_ruleset) __SYSCALL(__NR_landlock_add_rule, sys_landlock_add_rule) #define __NR_landlock_restrict_self 446 __SYSCALL(__NR_landlock_restrict_self, sys_landlock_restrict_self) +#define __NR_process_mrelease 448 +__SYSCALL(__NR_process_mrelease, sys_process_mrelease) /* * Please add new compat syscalls above this comment and update diff --git a/arch/ia64/kernel/syscalls/syscall.tbl b/arch/ia64/kernel/syscalls/syscall.tbl index 6d07742c57b8..9bf45f2be966 100644 --- a/arch/ia64/kernel/syscalls/syscall.tbl +++ b/arch/ia64/kernel/syscalls/syscall.tbl @@ -367,3 +367,5 @@ 444 common landlock_create_ruleset sys_landlock_create_ruleset 445 common landlock_add_rule sys_landlock_add_rule 446 common landlock_restrict_self sys_landlock_restrict_self +# 447 reserved for memfd_secret +448 common process_mrelease sys_process_mrelease diff --git a/arch/m68k/kernel/syscalls/syscall.tbl b/arch/m68k/kernel/syscalls/syscall.tbl index 541bc1b3a8f9..f1f98ee6c82d 100644 --- a/arch/m68k/kernel/syscalls/syscall.tbl +++ b/arch/m68k/kernel/syscalls/syscall.tbl @@ -446,3 +446,5 @@ 444 common landlock_create_ruleset sys_landlock_create_ruleset 445 common landlock_add_rule sys_landlock_add_rule 446 common landlock_restrict_self sys_landlock_restrict_self +# 447 reserved for memfd_secret +448 common process_mrelease sys_process_mrelease diff --git a/arch/microblaze/kernel/syscalls/syscall.tbl b/arch/microblaze/kernel/syscalls/syscall.tbl index a176faca2927..da49ddd4bb54 100644 --- a/arch/microblaze/kernel/syscalls/syscall.tbl +++ b/arch/microblaze/kernel/syscalls/syscall.tbl @@ -452,3 +452,5 @@ 444 common landlock_create_ruleset sys_landlock_create_ruleset 445 common landlock_add_rule sys_landlock_add_rule 446 common landlock_restrict_self sys_landlock_restrict_self +# 447 reserved for memfd_secret +448 common process_mrelease sys_process_mrelease diff --git a/arch/mips/kernel/syscalls/syscall_n32.tbl b/arch/mips/kernel/syscalls/syscall_n32.tbl index c2d2e19abea8..56c8d3cf42ed 100644 --- a/arch/mips/kernel/syscalls/syscall_n32.tbl +++ b/arch/mips/kernel/syscalls/syscall_n32.tbl @@ -385,3 +385,5 @@ 444 n32 landlock_create_ruleset sys_landlock_create_ruleset 445 n32 landlock_add_rule sys_landlock_add_rule 446 n32 landlock_restrict_self sys_landlock_restrict_self +# 447 reserved for memfd_secret +448 n32 process_mrelease sys_process_mrelease diff --git a/arch/mips/kernel/syscalls/syscall_n64.tbl b/arch/mips/kernel/syscalls/syscall_n64.tbl index ac653d08b1ea..1ca7bc337932 100644 --- a/arch/mips/kernel/syscalls/syscall_n64.tbl +++ b/arch/mips/kernel/syscalls/syscall_n64.tbl @@ -361,3 +361,5 @@ 444 n64 landlock_create_ruleset sys_landlock_create_ruleset 445 n64 landlock_add_rule sys_landlock_add_rule 446 n64 landlock_restrict_self sys_landlock_restrict_self +# 447 reserved for memfd_secret +448 n64 process_mrelease sys_process_mrelease diff --git a/arch/mips/kernel/syscalls/syscall_o32.tbl b/arch/mips/kernel/syscalls/syscall_o32.tbl index 253f2cd70b6b..fd3a9df60ec2 100644 --- a/arch/mips/kernel/syscalls/syscall_o32.tbl +++ b/arch/mips/kernel/syscalls/syscall_o32.tbl @@ -434,3 +434,5 @@ 444 o32 landlock_create_ruleset sys_landlock_create_ruleset 445 o32 landlock_add_rule sys_landlock_add_rule 446 o32 landlock_restrict_self sys_landlock_restrict_self +# 447 reserved for memfd_secret +448 o32 process_mrelease sys_process_mrelease diff --git a/arch/parisc/kernel/syscalls/syscall.tbl b/arch/parisc/kernel/syscalls/syscall.tbl index e26187b9ab87..040df1b7a589 100644 --- a/arch/parisc/kernel/syscalls/syscall.tbl +++ b/arch/parisc/kernel/syscalls/syscall.tbl @@ -444,3 +444,5 @@ 444 common landlock_create_ruleset sys_landlock_create_ruleset 445 common landlock_add_rule sys_landlock_add_rule 446 common landlock_restrict_self sys_landlock_restrict_self +# 447 reserved for memfd_secret +448 common process_mrelease sys_process_mrelease diff --git a/arch/powerpc/kernel/syscalls/syscall.tbl b/arch/powerpc/kernel/syscalls/syscall.tbl index aef2a290e71a..d8ebd7d37c0f 100644 --- a/arch/powerpc/kernel/syscalls/syscall.tbl +++ b/arch/powerpc/kernel/syscalls/syscall.tbl @@ -526,3 +526,5 @@ 444 common landlock_create_ruleset sys_landlock_create_ruleset 445 common landlock_add_rule sys_landlock_add_rule 446 common landlock_restrict_self sys_landlock_restrict_self +# 447 reserved for memfd_secret +448 common process_mrelease sys_process_mrelease diff --git a/arch/s390/kernel/syscalls/syscall.tbl b/arch/s390/kernel/syscalls/syscall.tbl index 64d51ab5a8b4..57233ace30cb 100644 --- a/arch/s390/kernel/syscalls/syscall.tbl +++ b/arch/s390/kernel/syscalls/syscall.tbl @@ -449,3 +449,5 @@ 444 common landlock_create_ruleset sys_landlock_create_ruleset sys_landlock_create_ruleset 445 common landlock_add_rule sys_landlock_add_rule sys_landlock_add_rule 446 common landlock_restrict_self sys_landlock_restrict_self sys_landlock_restrict_self +# 447 reserved for memfd_secret +448 common process_mrelease sys_process_mrelease sys_process_mrelease diff --git a/arch/sh/kernel/syscalls/syscall.tbl b/arch/sh/kernel/syscalls/syscall.tbl index e0a70be77d84..2f6e95eb4690 100644 --- a/arch/sh/kernel/syscalls/syscall.tbl +++ b/arch/sh/kernel/syscalls/syscall.tbl @@ -449,3 +449,5 @@ 444 common landlock_create_ruleset sys_landlock_create_ruleset 445 common landlock_add_rule sys_landlock_add_rule 446 common landlock_restrict_self sys_landlock_restrict_self +# 447 reserved for memfd_secret +448 common process_mrelease sys_process_mrelease diff --git a/arch/sparc/kernel/syscalls/syscall.tbl b/arch/sparc/kernel/syscalls/syscall.tbl index 603f5a821502..42fc2906215d 100644 --- a/arch/sparc/kernel/syscalls/syscall.tbl +++ b/arch/sparc/kernel/syscalls/syscall.tbl @@ -492,3 +492,5 @@ 444 common landlock_create_ruleset sys_landlock_create_ruleset 445 common landlock_add_rule sys_landlock_add_rule 446 common landlock_restrict_self sys_landlock_restrict_self +# 447 reserved for memfd_secret +448 common process_mrelease sys_process_mrelease diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl index ce763a12311c..661a03bcfbd1 100644 --- a/arch/x86/entry/syscalls/syscall_32.tbl +++ b/arch/x86/entry/syscalls/syscall_32.tbl @@ -452,3 +452,4 @@ 445 i386 landlock_add_rule sys_landlock_add_rule 446 i386 landlock_restrict_self sys_landlock_restrict_self 447 i386 memfd_secret sys_memfd_secret +448 i386 process_mrelease sys_process_mrelease diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl index f6b57799c1ea..807b6a1de8e8 100644 --- a/arch/x86/entry/syscalls/syscall_64.tbl +++ b/arch/x86/entry/syscalls/syscall_64.tbl @@ -369,6 +369,7 @@ 445 common landlock_add_rule sys_landlock_add_rule 446 common landlock_restrict_self sys_landlock_restrict_self 447 common memfd_secret sys_memfd_secret +448 common process_mrelease sys_process_mrelease # # Due to a historical design error, certain syscalls are numbered differently diff --git a/arch/xtensa/kernel/syscalls/syscall.tbl b/arch/xtensa/kernel/syscalls/syscall.tbl index 235d67d6ceb4..f4384951f393 100644 --- a/arch/xtensa/kernel/syscalls/syscall.tbl +++ b/arch/xtensa/kernel/syscalls/syscall.tbl @@ -417,3 +417,5 @@ 444 common landlock_create_ruleset sys_landlock_create_ruleset 445 common landlock_add_rule sys_landlock_add_rule 446 common landlock_restrict_self sys_landlock_restrict_self +# 447 reserved for memfd_secret +448 common process_mrelease sys_process_mrelease diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 69c9a7010081..00bc170a50f0 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -915,6 +915,7 @@ asmlinkage long sys_mincore(unsigned long start, size_t len, asmlinkage long sys_madvise(unsigned long start, size_t len, int behavior); asmlinkage long sys_process_madvise(int pidfd, const struct iovec __user *vec, size_t vlen, int behavior, unsigned int flags); +asmlinkage long sys_process_mrelease(int pidfd, unsigned int flags); asmlinkage long sys_remap_file_pages(unsigned long start, unsigned long size, unsigned long prot, unsigned long pgoff, unsigned long flags); diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h index a9d6fcd95f42..14c8fe863c6d 100644 --- a/include/uapi/asm-generic/unistd.h +++ b/include/uapi/asm-generic/unistd.h @@ -877,9 +877,11 @@ __SYSCALL(__NR_landlock_restrict_self, sys_landlock_restrict_self) #define __NR_memfd_secret 447 __SYSCALL(__NR_memfd_secret, sys_memfd_secret) #endif +#define __NR_process_mrelease 448 +__SYSCALL(__NR_process_mrelease, sys_process_mrelease) #undef __NR_syscalls -#define __NR_syscalls 448 +#define __NR_syscalls 449 /* * 32 bit systems traditionally used different diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c index 30971b1dd4a9..18a9c2cde767 100644 --- a/kernel/sys_ni.c +++ b/kernel/sys_ni.c @@ -289,6 +289,7 @@ COND_SYSCALL(munlockall); COND_SYSCALL(mincore); COND_SYSCALL(madvise); COND_SYSCALL(process_madvise); +COND_SYSCALL(process_mrelease); COND_SYSCALL(remap_file_pages); COND_SYSCALL(mbind); COND_SYSCALL_COMPAT(mbind); -- cgit v1.2.3