diff options
| author | Christian Brauner <brauner@kernel.org> | 2025-09-15 14:45:37 +0300 |
|---|---|---|
| committer | Christian Brauner <brauner@kernel.org> | 2025-09-19 15:26:17 +0300 |
| commit | 3ab378cfa793c648d4edf02bbfff3af8715aca91 (patch) | |
| tree | b5a57da7ff6c26434569ec95e8d2cf43f9cbe3c6 /include/linux | |
| parent | 8f5ae30d69d7543eee0d70083daf4de8fe15d585 (diff) | |
| parent | 28ef38a9a2c7aa4dd8dfbb1d2b12f9ea84044327 (diff) | |
| download | linux-3ab378cfa793c648d4edf02bbfff3af8715aca91.tar.xz | |
Merge patch series "ns: support file handles"
Christian Brauner <brauner@kernel.org> says:
For a while now we have supported file handles for pidfds. This has
proven to be very useful.
Extend the concept to cover namespaces as well. After this patchset it
is possible to encode and decode namespace file handles using the
commong name_to_handle_at() and open_by_handle_at() apis.
Namespaces file descriptors can already be derived from pidfds which
means they aren't subject to overmount protection bugs. IOW, it's
irrelevant if the caller would not have access to an appropriate
/proc/<pid>/ns/ directory as they could always just derive the namespace
based on a pidfd already.
It has the same advantage as pidfds. It's possible to reliably and for
the lifetime of the system refer to a namespace without pinning any
resources and to compare them.
Permission checking is kept simple. If the caller is located in the
namespace the file handle refers to they are able to open it otherwise
they must hold privilege over the owning namespace of the relevant
namespace.
Both the network namespace and the mount namespace already have an
associated cookie that isn't recycled and is fully exposed to userspace.
Move this into ns_common and use the same id space for all namespaces so
they can trivially and reliably be compared.
There's more coming based on the iterator infrastructure but the series
is large enough and focuses on file handles.
Extensive selftests included.
* patches from https://lore.kernel.org/20250912-work-namespace-v2-0-1a247645cef5@kernel.org: (33 commits)
selftests/namespaces: add file handle selftests
selftests/namespaces: add identifier selftests
tools: update nsfs.h uapi header
nsfs: add missing id retrieval support
nsfs: support exhaustive file handles
nsfs: support file handles
nsfs: add current_in_namespace()
ns: add to_<type>_ns() to respective headers
uts: support ns lookup
user: support ns lookup
time: support ns lookup
pid: support ns lookup
net: support ns lookup
ipc: support ns lookup
cgroup: support ns lookup
mnt: support ns lookup
nstree: make iterator generic
ns: remove ns_alloc_inum()
uts: use ns_common_init()
user: use ns_common_init()
...
Signed-off-by: Christian Brauner <brauner@kernel.org>
Diffstat (limited to 'include/linux')
32 files changed, 297 insertions, 105 deletions
diff --git a/include/linux/atmdev.h b/include/linux/atmdev.h index 45f2f278b50a..70807c679f1a 100644 --- a/include/linux/atmdev.h +++ b/include/linux/atmdev.h @@ -185,6 +185,7 @@ struct atmdev_ops { /* only send is required */ int (*compat_ioctl)(struct atm_dev *dev,unsigned int cmd, void __user *arg); #endif + int (*pre_send)(struct atm_vcc *vcc, struct sk_buff *skb); int (*send)(struct atm_vcc *vcc,struct sk_buff *skb); int (*send_bh)(struct atm_vcc *vcc, struct sk_buff *skb); int (*send_oam)(struct atm_vcc *vcc,void *cell,int flags); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 95886b404b16..fe1797bbec42 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -656,6 +656,7 @@ enum { QUEUE_FLAG_SQ_SCHED, /* single queue style io dispatch */ QUEUE_FLAG_DISABLE_WBT_DEF, /* for sched to disable/enable wbt */ QUEUE_FLAG_NO_ELV_SWITCH, /* can't switch elevator any more */ + QUEUE_FLAG_QOS_ENABLED, /* qos is enabled */ QUEUE_FLAG_MAX }; diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index b18fb5fcb38e..9ca25346f7cb 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -794,6 +794,11 @@ extern struct cgroup_namespace init_cgroup_ns; #ifdef CONFIG_CGROUPS +static inline struct cgroup_namespace *to_cg_ns(struct ns_common *ns) +{ + return container_of(ns, struct cgroup_namespace, ns); +} + void free_cgroup_ns(struct cgroup_namespace *ns); struct cgroup_namespace *copy_cgroup_ns(unsigned long flags, diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 6f04a1d8c720..64ff73c533e5 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -288,14 +288,6 @@ static inline void *offset_to_ptr(const int *off) #define __ADDRESSABLE(sym) \ ___ADDRESSABLE(sym, __section(".discard.addressable")) -#define __ADDRESSABLE_ASM(sym) \ - .pushsection .discard.addressable,"aw"; \ - .align ARCH_SEL(8,4); \ - ARCH_SEL(.quad, .long) __stringify(sym); \ - .popsection; - -#define __ADDRESSABLE_ASM_STR(sym) __stringify(__ADDRESSABLE_ASM(sym)) - /* * This returns a constant expression while determining if an argument is * a constant expression, most importantly without evaluating the argument. diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index edfa61d80702..62cd7b35a29c 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -168,6 +168,7 @@ enum cpuhp_state { CPUHP_AP_QCOM_TIMER_STARTING, CPUHP_AP_TEGRA_TIMER_STARTING, CPUHP_AP_ARMADA_TIMER_STARTING, + CPUHP_AP_LOONGARCH_ARCH_TIMER_STARTING, CPUHP_AP_MIPS_GIC_TIMER_STARTING, CPUHP_AP_ARC_TIMER_STARTING, CPUHP_AP_REALTEK_TIMER_STARTING, diff --git a/include/linux/dma-map-ops.h b/include/linux/dma-map-ops.h index f48e5fb88bd5..332b80c42b6f 100644 --- a/include/linux/dma-map-ops.h +++ b/include/linux/dma-map-ops.h @@ -153,6 +153,9 @@ static inline void dma_free_contiguous(struct device *dev, struct page *page, { __free_pages(page, get_order(size)); } +static inline void dma_contiguous_early_fixup(phys_addr_t base, unsigned long size) +{ +} #endif /* CONFIG_DMA_CMA*/ #ifdef CONFIG_DMA_DECLARE_COHERENT diff --git a/include/linux/export.h b/include/linux/export.h index f35d03b4113b..a686fd0ba406 100644 --- a/include/linux/export.h +++ b/include/linux/export.h @@ -91,6 +91,6 @@ #define EXPORT_SYMBOL_NS(sym, ns) __EXPORT_SYMBOL(sym, "", ns) #define EXPORT_SYMBOL_NS_GPL(sym, ns) __EXPORT_SYMBOL(sym, "GPL", ns) -#define EXPORT_SYMBOL_GPL_FOR_MODULES(sym, mods) __EXPORT_SYMBOL(sym, "GPL", "module:" mods) +#define EXPORT_SYMBOL_FOR_MODULES(sym, mods) __EXPORT_SYMBOL(sym, "GPL", "module:" mods) #endif /* _LINUX_EXPORT_H */ diff --git a/include/linux/exportfs.h b/include/linux/exportfs.h index cfb0dd1ea49c..3aac58a520c7 100644 --- a/include/linux/exportfs.h +++ b/include/linux/exportfs.h @@ -123,6 +123,12 @@ enum fid_type { FILEID_BCACHEFS_WITH_PARENT = 0xb2, /* + * + * 64 bit namespace identifier, 32 bit namespace type, 32 bit inode number. + */ + FILEID_NSFS = 0xf1, + + /* * 64 bit unique kernfs id */ FILEID_KERNFS = 0xfe, diff --git a/include/linux/firewire.h b/include/linux/firewire.h index cceb70415ed2..d38c6e538e5c 100644 --- a/include/linux/firewire.h +++ b/include/linux/firewire.h @@ -341,7 +341,11 @@ struct fw_address_handler { u64 length; fw_address_callback_t address_callback; void *callback_data; + + // Only for core functions. struct list_head link; + struct kref kref; + struct completion done; }; struct fw_address_region { diff --git a/include/linux/fs.h b/include/linux/fs.h index d7ab4f96d705..2f2edc53bf3c 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -4023,4 +4023,18 @@ static inline bool vfs_empty_path(int dfd, const char __user *path) int generic_atomic_write_valid(struct kiocb *iocb, struct iov_iter *iter); +static inline bool extensible_ioctl_valid(unsigned int cmd_a, + unsigned int cmd_b, size_t min_size) +{ + if (_IOC_DIR(cmd_a) != _IOC_DIR(cmd_b)) + return false; + if (_IOC_TYPE(cmd_a) != _IOC_TYPE(cmd_b)) + return false; + if (_IOC_NR(cmd_a) != _IOC_NR(cmd_b)) + return false; + if (_IOC_SIZE(cmd_a) < min_size) + return false; + return true; +} + #endif /* _LINUX_FS_H */ diff --git a/include/linux/iosys-map.h b/include/linux/iosys-map.h index 4696abfd311c..3e85afe794c0 100644 --- a/include/linux/iosys-map.h +++ b/include/linux/iosys-map.h @@ -264,12 +264,7 @@ static inline bool iosys_map_is_set(const struct iosys_map *map) */ static inline void iosys_map_clear(struct iosys_map *map) { - if (map->is_iomem) { - map->vaddr_iomem = NULL; - map->is_iomem = false; - } else { - map->vaddr = NULL; - } + memset(map, 0, sizeof(*map)); } /** diff --git a/include/linux/iov_iter.h b/include/linux/iov_iter.h index c4aa58032faf..f9a17fbbd398 100644 --- a/include/linux/iov_iter.h +++ b/include/linux/iov_iter.h @@ -160,7 +160,7 @@ size_t iterate_folioq(struct iov_iter *iter, size_t len, void *priv, void *priv2 do { struct folio *folio = folioq_folio(folioq, slot); - size_t part, remain, consumed; + size_t part, remain = 0, consumed; size_t fsize; void *base; @@ -168,14 +168,16 @@ size_t iterate_folioq(struct iov_iter *iter, size_t len, void *priv, void *priv2 break; fsize = folioq_folio_size(folioq, slot); - base = kmap_local_folio(folio, skip); - part = umin(len, PAGE_SIZE - skip % PAGE_SIZE); - remain = step(base, progress, part, priv, priv2); - kunmap_local(base); - consumed = part - remain; - len -= consumed; - progress += consumed; - skip += consumed; + if (skip < fsize) { + base = kmap_local_folio(folio, skip); + part = umin(len, PAGE_SIZE - skip % PAGE_SIZE); + remain = step(base, progress, part, priv, priv2); + kunmap_local(base); + consumed = part - remain; + len -= consumed; + progress += consumed; + skip += consumed; + } if (skip >= fsize) { skip = 0; slot++; diff --git a/include/linux/ipc_namespace.h b/include/linux/ipc_namespace.h index e8240cf2611a..924e4754374f 100644 --- a/include/linux/ipc_namespace.h +++ b/include/linux/ipc_namespace.h @@ -129,6 +129,11 @@ static inline int mq_init_ns(struct ipc_namespace *ns) { return 0; } #endif #if defined(CONFIG_IPC_NS) +static inline struct ipc_namespace *to_ipc_ns(struct ns_common *ns) +{ + return container_of(ns, struct ipc_namespace, ns); +} + extern struct ipc_namespace *copy_ipcs(unsigned long flags, struct user_namespace *user_ns, struct ipc_namespace *ns); diff --git a/include/linux/kcov.h b/include/linux/kcov.h index 75a2fb8b16c3..0143358874b0 100644 --- a/include/linux/kcov.h +++ b/include/linux/kcov.h @@ -57,47 +57,21 @@ static inline void kcov_remote_start_usb(u64 id) /* * The softirq flavor of kcov_remote_*() functions is introduced as a temporary - * workaround for KCOV's lack of nested remote coverage sections support. - * - * Adding support is tracked in https://bugzilla.kernel.org/show_bug.cgi?id=210337. - * - * kcov_remote_start_usb_softirq(): - * - * 1. Only collects coverage when called in the softirq context. This allows - * avoiding nested remote coverage collection sections in the task context. - * For example, USB/IP calls usb_hcd_giveback_urb() in the task context - * within an existing remote coverage collection section. Thus, KCOV should - * not attempt to start collecting coverage within the coverage collection - * section in __usb_hcd_giveback_urb() in this case. - * - * 2. Disables interrupts for the duration of the coverage collection section. - * This allows avoiding nested remote coverage collection sections in the - * softirq context (a softirq might occur during the execution of a work in - * the BH workqueue, which runs with in_serving_softirq() > 0). - * For example, usb_giveback_urb_bh() runs in the BH workqueue with - * interrupts enabled, so __usb_hcd_giveback_urb() might be interrupted in - * the middle of its remote coverage collection section, and the interrupt - * handler might invoke __usb_hcd_giveback_urb() again. + * work around for kcov's lack of nested remote coverage sections support in + * task context. Adding support for nested sections is tracked in: + * https://bugzilla.kernel.org/show_bug.cgi?id=210337 */ -static inline unsigned long kcov_remote_start_usb_softirq(u64 id) +static inline void kcov_remote_start_usb_softirq(u64 id) { - unsigned long flags = 0; - - if (in_serving_softirq()) { - local_irq_save(flags); + if (in_serving_softirq() && !in_hardirq()) kcov_remote_start_usb(id); - } - - return flags; } -static inline void kcov_remote_stop_softirq(unsigned long flags) +static inline void kcov_remote_stop_softirq(void) { - if (in_serving_softirq()) { + if (in_serving_softirq() && !in_hardirq()) kcov_remote_stop(); - local_irq_restore(flags); - } } #ifdef CONFIG_64BIT @@ -131,11 +105,8 @@ static inline u64 kcov_common_handle(void) } static inline void kcov_remote_start_common(u64 id) {} static inline void kcov_remote_start_usb(u64 id) {} -static inline unsigned long kcov_remote_start_usb_softirq(u64 id) -{ - return 0; -} -static inline void kcov_remote_stop_softirq(unsigned long flags) {} +static inline void kcov_remote_start_usb_softirq(u64 id) {} +static inline void kcov_remote_stop_softirq(void) {} #endif /* CONFIG_KCOV */ #endif /* _LINUX_KCOV_H */ diff --git a/include/linux/memblock.h b/include/linux/memblock.h index b96746376e17..fcda8481de9a 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -40,8 +40,9 @@ extern unsigned long long max_possible_pfn; * via a driver, and never indicated in the firmware-provided memory map as * system RAM. This corresponds to IORESOURCE_SYSRAM_DRIVER_MANAGED in the * kernel resource tree. - * @MEMBLOCK_RSRV_NOINIT: memory region for which struct pages are - * not initialized (only for reserved regions). + * @MEMBLOCK_RSRV_NOINIT: reserved memory region for which struct pages are not + * fully initialized. Users of this flag are responsible to properly initialize + * struct pages of this region * @MEMBLOCK_RSRV_KERN: memory region that is reserved for kernel use, * either explictitly with memblock_reserve_kern() or via memblock * allocation APIs. All memblock allocations set this flag. diff --git a/include/linux/migrate.h b/include/linux/migrate.h index acadd41e0b5c..9009e27b5f44 100644 --- a/include/linux/migrate.h +++ b/include/linux/migrate.h @@ -79,6 +79,7 @@ void migration_entry_wait_on_locked(swp_entry_t entry, spinlock_t *ptl) void folio_migrate_flags(struct folio *newfolio, struct folio *folio); int folio_migrate_mapping(struct address_space *mapping, struct folio *newfolio, struct folio *folio, int extra_count); +int set_movable_ops(const struct movable_operations *ops, enum pagetype type); #else @@ -100,6 +101,10 @@ static inline int migrate_huge_page_move_mapping(struct address_space *mapping, { return -ENOSYS; } +static inline int set_movable_ops(const struct movable_operations *ops, enum pagetype type) +{ + return -ENOSYS; +} #endif /* CONFIG_MIGRATION */ diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 5e5de4b0a433..f3a3b761abfb 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2071,6 +2071,8 @@ enum netdev_reg_state { * @max_pacing_offload_horizon: max EDT offload horizon in nsec. * @napi_config: An array of napi_config structures containing per-NAPI * settings. + * @num_napi_configs: number of allocated NAPI config structs, + * always >= max(num_rx_queues, num_tx_queues). * @gro_flush_timeout: timeout for GRO layer in NAPI * @napi_defer_hard_irqs: If not zero, provides a counter that would * allow to avoid NIC hard IRQ, on busy queues. @@ -2482,8 +2484,9 @@ struct net_device { u64 max_pacing_offload_horizon; struct napi_config *napi_config; - unsigned long gro_flush_timeout; + u32 num_napi_configs; u32 napi_defer_hard_irqs; + unsigned long gro_flush_timeout; /** * @up: copy of @state's IFF_UP, but safe to read with just @lock. diff --git a/include/linux/netfs.h b/include/linux/netfs.h index 185bd8196503..98c96d649bf9 100644 --- a/include/linux/netfs.h +++ b/include/linux/netfs.h @@ -150,6 +150,7 @@ struct netfs_io_stream { bool active; /* T if stream is active */ bool need_retry; /* T if this stream needs retrying */ bool failed; /* T if this stream failed */ + bool transferred_valid; /* T is ->transferred is valid */ }; /* diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h index 169b4ae30ff4..9aed39abc94b 100644 --- a/include/linux/nfs_page.h +++ b/include/linux/nfs_page.h @@ -160,6 +160,7 @@ extern void nfs_join_page_group(struct nfs_page *head, extern int nfs_page_group_lock(struct nfs_page *); extern void nfs_page_group_unlock(struct nfs_page *); extern bool nfs_page_group_sync_on_bit(struct nfs_page *, unsigned int); +extern bool nfs_page_group_sync_on_bit_locked(struct nfs_page *, unsigned int); extern int nfs_page_set_headlock(struct nfs_page *req); extern void nfs_page_clear_headlock(struct nfs_page *req); extern bool nfs_async_iocounter_wait(struct rpc_task *, struct nfs_lock_context *); diff --git a/include/linux/ns_common.h b/include/linux/ns_common.h index 7d22ea50b098..7224072cccc5 100644 --- a/include/linux/ns_common.h +++ b/include/linux/ns_common.h @@ -3,14 +3,43 @@ #define _LINUX_NS_COMMON_H #include <linux/refcount.h> +#include <linux/rbtree.h> struct proc_ns_operations; +struct cgroup_namespace; +struct ipc_namespace; +struct mnt_namespace; +struct net; +struct pid_namespace; +struct time_namespace; +struct user_namespace; +struct uts_namespace; + struct ns_common { struct dentry *stashed; const struct proc_ns_operations *ops; unsigned int inum; refcount_t count; + union { + struct { + u64 ns_id; + struct rb_node ns_tree_node; + struct list_head ns_list_node; + }; + struct rcu_head ns_rcu; + }; }; +#define to_ns_common(__ns) \ + _Generic((__ns), \ + struct cgroup_namespace *: &(__ns)->ns, \ + struct ipc_namespace *: &(__ns)->ns, \ + struct mnt_namespace *: &(__ns)->ns, \ + struct net *: &(__ns)->ns, \ + struct pid_namespace *: &(__ns)->ns, \ + struct time_namespace *: &(__ns)->ns, \ + struct user_namespace *: &(__ns)->ns, \ + struct uts_namespace *: &(__ns)->ns) + #endif diff --git a/include/linux/nsfs.h b/include/linux/nsfs.h new file mode 100644 index 000000000000..e5a5fa83d36b --- /dev/null +++ b/include/linux/nsfs.h @@ -0,0 +1,40 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2025 Christian Brauner <brauner@kernel.org> */ + +#ifndef _LINUX_NSFS_H +#define _LINUX_NSFS_H + +#include <linux/ns_common.h> +#include <linux/cred.h> +#include <linux/pid_namespace.h> + +struct path; +struct task_struct; +struct proc_ns_operations; + +int ns_get_path(struct path *path, struct task_struct *task, + const struct proc_ns_operations *ns_ops); +typedef struct ns_common *ns_get_path_helper_t(void *); +int ns_get_path_cb(struct path *path, ns_get_path_helper_t ns_get_cb, + void *private_data); + +bool ns_match(const struct ns_common *ns, dev_t dev, ino_t ino); + +int ns_get_name(char *buf, size_t size, struct task_struct *task, + const struct proc_ns_operations *ns_ops); +void nsfs_init(void); + +#define __current_namespace_from_type(__ns) \ + _Generic((__ns), \ + struct cgroup_namespace *: current->nsproxy->cgroup_ns, \ + struct ipc_namespace *: current->nsproxy->ipc_ns, \ + struct net *: current->nsproxy->net_ns, \ + struct pid_namespace *: task_active_pid_ns(current), \ + struct mnt_namespace *: current->nsproxy->mnt_ns, \ + struct time_namespace *: current->nsproxy->time_ns, \ + struct user_namespace *: current_user_ns(), \ + struct uts_namespace *: current->nsproxy->uts_ns) + +#define current_in_namespace(__ns) (__current_namespace_from_type(__ns) == __ns) + +#endif /* _LINUX_NSFS_H */ diff --git a/include/linux/nsproxy.h b/include/linux/nsproxy.h index dab6a1734a22..e6bec522b139 100644 --- a/include/linux/nsproxy.h +++ b/include/linux/nsproxy.h @@ -42,17 +42,6 @@ struct nsproxy { }; extern struct nsproxy init_nsproxy; -#define to_ns_common(__ns) \ - _Generic((__ns), \ - struct cgroup_namespace *: &(__ns->ns), \ - struct ipc_namespace *: &(__ns->ns), \ - struct net *: &(__ns->ns), \ - struct pid_namespace *: &(__ns->ns), \ - struct mnt_namespace *: &(__ns->ns), \ - struct time_namespace *: &(__ns->ns), \ - struct user_namespace *: &(__ns->ns), \ - struct uts_namespace *: &(__ns->ns)) - /* * A structure to encompass all bits needed to install * a partial or complete new set of namespaces. diff --git a/include/linux/nstree.h b/include/linux/nstree.h new file mode 100644 index 000000000000..29ad6402260c --- /dev/null +++ b/include/linux/nstree.h @@ -0,0 +1,91 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_NSTREE_H +#define _LINUX_NSTREE_H + +#include <linux/ns_common.h> +#include <linux/nsproxy.h> +#include <linux/rbtree.h> +#include <linux/seqlock.h> +#include <linux/rculist.h> +#include <linux/cookie.h> + +/** + * struct ns_tree - Namespace tree + * @ns_tree: Rbtree of namespaces of a particular type + * @ns_list: Sequentially walkable list of all namespaces of this type + * @ns_tree_lock: Seqlock to protect the tree and list + */ +struct ns_tree { + struct rb_root ns_tree; + struct list_head ns_list; + seqlock_t ns_tree_lock; + int type; +}; + +extern struct ns_tree cgroup_ns_tree; +extern struct ns_tree ipc_ns_tree; +extern struct ns_tree mnt_ns_tree; +extern struct ns_tree net_ns_tree; +extern struct ns_tree pid_ns_tree; +extern struct ns_tree time_ns_tree; +extern struct ns_tree user_ns_tree; +extern struct ns_tree uts_ns_tree; + +#define to_ns_tree(__ns) \ + _Generic((__ns), \ + struct cgroup_namespace *: &(cgroup_ns_tree), \ + struct ipc_namespace *: &(ipc_ns_tree), \ + struct net *: &(net_ns_tree), \ + struct pid_namespace *: &(pid_ns_tree), \ + struct mnt_namespace *: &(mnt_ns_tree), \ + struct time_namespace *: &(time_ns_tree), \ + struct user_namespace *: &(user_ns_tree), \ + struct uts_namespace *: &(uts_ns_tree)) + +u64 ns_tree_gen_id(struct ns_common *ns); +void __ns_tree_add_raw(struct ns_common *ns, struct ns_tree *ns_tree); +void __ns_tree_remove(struct ns_common *ns, struct ns_tree *ns_tree); +struct ns_common *ns_tree_lookup_rcu(u64 ns_id, int ns_type); +struct ns_common *__ns_tree_adjoined_rcu(struct ns_common *ns, + struct ns_tree *ns_tree, + bool previous); + +static inline void __ns_tree_add(struct ns_common *ns, struct ns_tree *ns_tree) +{ + ns_tree_gen_id(ns); + __ns_tree_add_raw(ns, ns_tree); +} + +/** + * ns_tree_add_raw - Add a namespace to a namespace + * @ns: Namespace to add + * + * This function adds a namespace to the appropriate namespace tree + * without assigning a id. + */ +#define ns_tree_add_raw(__ns) __ns_tree_add_raw(to_ns_common(__ns), to_ns_tree(__ns)) + +/** + * ns_tree_add - Add a namespace to a namespace tree + * @ns: Namespace to add + * + * This function assigns a new id to the namespace and adds it to the + * appropriate namespace tree and list. + */ +#define ns_tree_add(__ns) __ns_tree_add(to_ns_common(__ns), to_ns_tree(__ns)) + +/** + * ns_tree_remove - Remove a namespace from a namespace tree + * @ns: Namespace to remove + * + * This function removes a namespace from the appropriate namespace + * tree and list. + */ +#define ns_tree_remove(__ns) __ns_tree_remove(to_ns_common(__ns), to_ns_tree(__ns)) + +#define ns_tree_adjoined_rcu(__ns, __previous) \ + __ns_tree_adjoined_rcu(to_ns_common(__ns), to_ns_tree(__ns), __previous) + +#define ns_tree_active(__ns) (!RB_EMPTY_NODE(&to_ns_common(__ns)->ns_tree_node)) + +#endif /* _LINUX_NSTREE_H */ diff --git a/include/linux/pid_namespace.h b/include/linux/pid_namespace.h index 7c67a5811199..ba0efc8c8596 100644 --- a/include/linux/pid_namespace.h +++ b/include/linux/pid_namespace.h @@ -54,6 +54,11 @@ extern struct pid_namespace init_pid_ns; #define PIDNS_ADDING (1U << 31) #ifdef CONFIG_PID_NS +static inline struct pid_namespace *to_pid_ns(struct ns_common *ns) +{ + return container_of(ns, struct pid_namespace, ns); +} + static inline struct pid_namespace *get_pid_ns(struct pid_namespace *ns) { if (ns != &init_pid_ns) diff --git a/include/linux/platform_data/x86/int3472.h b/include/linux/platform_data/x86/int3472.h index 78276a11c48d..1571e9157fa5 100644 --- a/include/linux/platform_data/x86/int3472.h +++ b/include/linux/platform_data/x86/int3472.h @@ -27,6 +27,7 @@ #define INT3472_GPIO_TYPE_CLK_ENABLE 0x0c #define INT3472_GPIO_TYPE_PRIVACY_LED 0x0d #define INT3472_GPIO_TYPE_HANDSHAKE 0x12 +#define INT3472_GPIO_TYPE_HOTPLUG_DETECT 0x13 #define INT3472_PDEV_MAX_NAME_LEN 23 #define INT3472_MAX_SENSOR_GPIOS 3 diff --git a/include/linux/proc_ns.h b/include/linux/proc_ns.h index 4b20375f3783..7f89f0829e60 100644 --- a/include/linux/proc_ns.h +++ b/include/linux/proc_ns.h @@ -5,7 +5,7 @@ #ifndef _LINUX_PROC_NS_H #define _LINUX_PROC_NS_H -#include <linux/ns_common.h> +#include <linux/nsfs.h> #include <uapi/linux/nsfs.h> struct pid_namespace; @@ -66,25 +66,27 @@ static inline void proc_free_inum(unsigned int inum) {} #endif /* CONFIG_PROC_FS */ -static inline int ns_alloc_inum(struct ns_common *ns) +static inline int ns_common_init(struct ns_common *ns, + const struct proc_ns_operations *ops, + bool alloc_inum) { - WRITE_ONCE(ns->stashed, NULL); - return proc_alloc_inum(&ns->inum); + if (alloc_inum) { + int ret; + ret = proc_alloc_inum(&ns->inum); + if (ret) + return ret; + } + refcount_set(&ns->count, 1); + ns->stashed = NULL; + ns->ops = ops; + ns->ns_id = 0; + RB_CLEAR_NODE(&ns->ns_tree_node); + INIT_LIST_HEAD(&ns->ns_list_node); + return 0; } #define ns_free_inum(ns) proc_free_inum((ns)->inum) #define get_proc_ns(inode) ((struct ns_common *)(inode)->i_private) -extern int ns_get_path(struct path *path, struct task_struct *task, - const struct proc_ns_operations *ns_ops); -typedef struct ns_common *ns_get_path_helper_t(void *); -extern int ns_get_path_cb(struct path *path, ns_get_path_helper_t ns_get_cb, - void *private_data); - -extern bool ns_match(const struct ns_common *ns, dev_t dev, ino_t ino); - -extern int ns_get_name(char *buf, size_t size, struct task_struct *task, - const struct proc_ns_operations *ns_ops); -extern void nsfs_init(void); #endif /* _LINUX_PROC_NS_H */ diff --git a/include/linux/sched.h b/include/linux/sched.h index 2b272382673d..f8188b833350 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2152,6 +2152,8 @@ static inline struct mutex *__get_task_blocked_on(struct task_struct *p) static inline void __set_task_blocked_on(struct task_struct *p, struct mutex *m) { + struct mutex *blocked_on = READ_ONCE(p->blocked_on); + WARN_ON_ONCE(!m); /* The task should only be setting itself as blocked */ WARN_ON_ONCE(p != current); @@ -2162,8 +2164,8 @@ static inline void __set_task_blocked_on(struct task_struct *p, struct mutex *m) * with a different mutex. Note, setting it to the same * lock repeatedly is ok. */ - WARN_ON_ONCE(p->blocked_on && p->blocked_on != m); - p->blocked_on = m; + WARN_ON_ONCE(blocked_on && blocked_on != m); + WRITE_ONCE(p->blocked_on, m); } static inline void set_task_blocked_on(struct task_struct *p, struct mutex *m) @@ -2174,16 +2176,19 @@ static inline void set_task_blocked_on(struct task_struct *p, struct mutex *m) static inline void __clear_task_blocked_on(struct task_struct *p, struct mutex *m) { - WARN_ON_ONCE(!m); - /* Currently we serialize blocked_on under the mutex::wait_lock */ - lockdep_assert_held_once(&m->wait_lock); - /* - * There may be cases where we re-clear already cleared - * blocked_on relationships, but make sure we are not - * clearing the relationship with a different lock. - */ - WARN_ON_ONCE(m && p->blocked_on && p->blocked_on != m); - p->blocked_on = NULL; + if (m) { + struct mutex *blocked_on = READ_ONCE(p->blocked_on); + + /* Currently we serialize blocked_on under the mutex::wait_lock */ + lockdep_assert_held_once(&m->wait_lock); + /* + * There may be cases where we re-clear already cleared + * blocked_on relationships, but make sure we are not + * clearing the relationship with a different lock. + */ + WARN_ON_ONCE(blocked_on && blocked_on != m); + } + WRITE_ONCE(p->blocked_on, NULL); } static inline void clear_task_blocked_on(struct task_struct *p, struct mutex *m) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 14b923ddb6df..fa633657e4c0 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -4172,6 +4172,8 @@ int skb_copy_and_crc32c_datagram_iter(const struct sk_buff *skb, int offset, struct iov_iter *to, int len, u32 *crcp); int skb_copy_datagram_from_iter(struct sk_buff *skb, int offset, struct iov_iter *from, int len); +int skb_copy_datagram_from_iter_full(struct sk_buff *skb, int offset, + struct iov_iter *from, int len); int zerocopy_sg_from_iter(struct sk_buff *skb, struct iov_iter *frm); void skb_free_datagram(struct sock *sk, struct sk_buff *skb); int skb_kill_datagram(struct sock *sk, struct sk_buff *skb, unsigned int flags); diff --git a/include/linux/time_namespace.h b/include/linux/time_namespace.h index bb2c52f4fc94..a47a4ce4183e 100644 --- a/include/linux/time_namespace.h +++ b/include/linux/time_namespace.h @@ -33,6 +33,11 @@ struct time_namespace { extern struct time_namespace init_time_ns; #ifdef CONFIG_TIME_NS +static inline struct time_namespace *to_time_ns(struct ns_common *ns) +{ + return container_of(ns, struct time_namespace, ns); +} +void __init time_ns_init(void); extern int vdso_join_timens(struct task_struct *task, struct time_namespace *ns); extern void timens_commit(struct task_struct *tsk, struct time_namespace *ns); @@ -108,6 +113,10 @@ static inline ktime_t timens_ktime_to_host(clockid_t clockid, ktime_t tim) } #else +static inline void __init time_ns_init(void) +{ +} + static inline int vdso_join_timens(struct task_struct *task, struct time_namespace *ns) { diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h index a0bb6d012137..a09056ad090e 100644 --- a/include/linux/user_namespace.h +++ b/include/linux/user_namespace.h @@ -168,6 +168,11 @@ static inline void set_userns_rlimit_max(struct user_namespace *ns, #ifdef CONFIG_USER_NS +static inline struct user_namespace *to_user_ns(struct ns_common *ns) +{ + return container_of(ns, struct user_namespace, ns); +} + static inline struct user_namespace *get_user_ns(struct user_namespace *ns) { if (ns) diff --git a/include/linux/utsname.h b/include/linux/utsname.h index bf7613ba412b..5d34c4f0f945 100644 --- a/include/linux/utsname.h +++ b/include/linux/utsname.h @@ -30,6 +30,11 @@ struct uts_namespace { extern struct uts_namespace init_uts_ns; #ifdef CONFIG_UTS_NS +static inline struct uts_namespace *to_uts_ns(struct ns_common *ns) +{ + return container_of(ns, struct uts_namespace, ns); +} + static inline void get_uts_ns(struct uts_namespace *ns) { refcount_inc(&ns->ns.count); diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h index 918cf25cd3c6..8bf156dde554 100644 --- a/include/linux/virtio_config.h +++ b/include/linux/virtio_config.h @@ -328,8 +328,6 @@ static inline bool virtio_get_shm_region(struct virtio_device *vdev, struct virtio_shm_region *region, u8 id) { - if (!region->len) - return false; if (!vdev->config->get_shm_region) return false; return vdev->config->get_shm_region(vdev, region, id); |
