diff options
| author | David S. Miller <davem@davemloft.net> | 2021-04-12 23:27:11 +0300 | 
|---|---|---|
| committer | David S. Miller <davem@davemloft.net> | 2021-04-12 23:27:11 +0300 | 
| commit | 645b34a7b544686d82c6a082bc78df33c322cae9 (patch) | |
| tree | 4f871cebe602618f0bf13b51746d9499c9b7cca9 /net/sysctl_net.c | |
| parent | a115d24a636e892ddd1ae58f8e23c78a0390cb68 (diff) | |
| parent | 2671fa4dc0109d3fb581bc3078fdf17b5d9080f6 (diff) | |
| download | linux-645b34a7b544686d82c6a082bc78df33c322cae9.tar.xz | |
Merge branch 'netns-sysctl-isolation'
Jonathon Reinhart says:
====================
Ensuring net sysctl isolation
This patchset is the result of an audit of /proc/sys/net to prove that
it is safe to be mouted read-write in a container when a net namespace
is in use. See [1].
The first commit adds code to detect sysctls which are not netns-safe,
and can "leak" changes to other net namespaces.
My manual audit found, and the above feature confirmed, that there are
two nf_conntrack sysctls which are in fact not netns-safe.
I considered sending the latter to netfilter-devel, but I think it's
better to have both together on net-next: Adding only the former causes
undesirable warnings in the kernel log.
[1]: https://github.com/opencontainers/runc/issues/2826
====================
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/sysctl_net.c')
| -rw-r--r-- | net/sysctl_net.c | 48 | 
1 files changed, 48 insertions, 0 deletions
| diff --git a/net/sysctl_net.c b/net/sysctl_net.c index d14dab8b6774..f6cb0d4d114c 100644 --- a/net/sysctl_net.c +++ b/net/sysctl_net.c @@ -115,9 +115,57 @@ out1:  	goto out;  } +/* Verify that sysctls for non-init netns are safe by either: + * 1) being read-only, or + * 2) having a data pointer which points outside of the global kernel/module + *    data segment, and rather into the heap where a per-net object was + *    allocated. + */ +static void ensure_safe_net_sysctl(struct net *net, const char *path, +				   struct ctl_table *table) +{ +	struct ctl_table *ent; + +	pr_debug("Registering net sysctl (net %p): %s\n", net, path); +	for (ent = table; ent->procname; ent++) { +		unsigned long addr; +		const char *where; + +		pr_debug("  procname=%s mode=%o proc_handler=%ps data=%p\n", +			 ent->procname, ent->mode, ent->proc_handler, ent->data); + +		/* If it's not writable inside the netns, then it can't hurt. */ +		if ((ent->mode & 0222) == 0) { +			pr_debug("    Not writable by anyone\n"); +			continue; +		} + +		/* Where does data point? */ +		addr = (unsigned long)ent->data; +		if (is_module_address(addr)) +			where = "module"; +		else if (core_kernel_data(addr)) +			where = "kernel"; +		else +			continue; + +		/* If it is writable and points to kernel/module global +		 * data, then it's probably a netns leak. +		 */ +		WARN(1, "sysctl %s/%s: data points to %s global data: %ps\n", +		     path, ent->procname, where, ent->data); + +		/* Make it "safe" by dropping writable perms */ +		ent->mode &= ~0222; +	} +} +  struct ctl_table_header *register_net_sysctl(struct net *net,  	const char *path, struct ctl_table *table)  { +	if (!net_eq(net, &init_net)) +		ensure_safe_net_sysctl(net, path, table); +  	return __register_sysctl_table(&net->sysctls, path, table);  }  EXPORT_SYMBOL_GPL(register_net_sysctl); | 
