summaryrefslogtreecommitdiff
path: root/net/ipv6
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv6')
-rw-r--r--net/ipv6/addrconf.c488
-rw-r--r--net/ipv6/addrlabel.c36
-rw-r--r--net/ipv6/af_inet6.c24
-rw-r--r--net/ipv6/anycast.c40
-rw-r--r--net/ipv6/calipso.c29
-rw-r--r--net/ipv6/esp6.c58
-rw-r--r--net/ipv6/esp6_offload.c6
-rw-r--r--net/ipv6/exthdrs.c3
-rw-r--r--net/ipv6/fib6_notifier.c2
-rw-r--r--net/ipv6/fib6_rules.c116
-rw-r--r--net/ipv6/icmp.c55
-rw-r--r--net/ipv6/ila/ila_common.c6
-rw-r--r--net/ipv6/ila/ila_lwt.c4
-rw-r--r--net/ipv6/ila/ila_xlat.c29
-rw-r--r--net/ipv6/inet6_connection_sock.c16
-rw-r--r--net/ipv6/inet6_hashtables.c40
-rw-r--r--net/ipv6/ioam6.c14
-rw-r--r--net/ipv6/ioam6_iptunnel.c121
-rw-r--r--net/ipv6/ip6_fib.c162
-rw-r--r--net/ipv6/ip6_flowlabel.c2
-rw-r--r--net/ipv6/ip6_gre.c51
-rw-r--r--net/ipv6/ip6_input.c14
-rw-r--r--net/ipv6/ip6_output.c62
-rw-r--r--net/ipv6/ip6_tunnel.c49
-rw-r--r--net/ipv6/ip6_vti.c42
-rw-r--r--net/ipv6/ip6mr.c103
-rw-r--r--net/ipv6/mcast.c145
-rw-r--r--net/ipv6/ndisc.c34
-rw-r--r--net/ipv6/netfilter.c12
-rw-r--r--net/ipv6/netfilter/Kconfig9
-rw-r--r--net/ipv6/netfilter/ip6_tables.c2
-rw-r--r--net/ipv6/netfilter/nf_conntrack_reasm.c29
-rw-r--r--net/ipv6/netfilter/nf_dup_ipv6.c6
-rw-r--r--net/ipv6/netfilter/nf_reject_ipv6.c15
-rw-r--r--net/ipv6/netfilter/nf_socket_ipv6.c23
-rw-r--r--net/ipv6/netfilter/nft_fib_ipv6.c36
-rw-r--r--net/ipv6/ping.c2
-rw-r--r--net/ipv6/raw.c18
-rw-r--r--net/ipv6/reassembly.c31
-rw-r--r--net/ipv6/route.c525
-rw-r--r--net/ipv6/rpl_iptunnel.c71
-rw-r--r--net/ipv6/seg6_hmac.c13
-rw-r--r--net/ipv6/seg6_iptunnel.c110
-rw-r--r--net/ipv6/seg6_local.c20
-rw-r--r--net/ipv6/sit.c46
-rw-r--r--net/ipv6/tcp_ipv6.c93
-rw-r--r--net/ipv6/tcpv6_offload.c25
-rw-r--r--net/ipv6/udp.c184
-rw-r--r--net/ipv6/udp_offload.c7
-rw-r--r--net/ipv6/xfrm6_input.c18
-rw-r--r--net/ipv6/xfrm6_output.c4
51 files changed, 1871 insertions, 1179 deletions
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 94dceac52884..ba2ec7c870cc 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -80,6 +80,7 @@
#include <net/netlink.h>
#include <net/pkt_sched.h>
#include <net/l3mdev.h>
+#include <net/netdev_lock.h>
#include <linux/if_tunnel.h>
#include <linux/rtnetlink.h>
#include <linux/netconf.h>
@@ -312,7 +313,7 @@ static inline bool addrconf_link_ready(const struct net_device *dev)
static void addrconf_del_rs_timer(struct inet6_dev *idev)
{
- if (del_timer(&idev->rs_timer))
+ if (timer_delete(&idev->rs_timer))
__in6_dev_put(idev);
}
@@ -377,6 +378,7 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev)
int err = -ENOMEM;
ASSERT_RTNL();
+ netdev_ops_assert_locked(dev);
if (dev->mtu < IPV6_MIN_MTU && dev != blackhole_netdev)
return ERR_PTR(-EINVAL);
@@ -402,7 +404,7 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev)
return ERR_PTR(err);
}
if (ndev->cnf.forwarding)
- dev_disable_lro(dev);
+ netif_disable_lro(dev);
/* We refer to the device */
netdev_hold(dev, &ndev->dev_tracker, GFP_KERNEL);
@@ -852,7 +854,7 @@ static void addrconf_forward_change(struct net *net, __s32 newf)
struct inet6_dev *idev;
for_each_netdev(net, dev) {
- idev = __in6_dev_get(dev);
+ idev = __in6_dev_get_rtnl_net(dev);
if (idev) {
int changed = (!idev->cnf.forwarding) ^ (!newf);
@@ -865,13 +867,12 @@ static void addrconf_forward_change(struct net *net, __s32 newf)
static int addrconf_fixup_forwarding(const struct ctl_table *table, int *p, int newf)
{
- struct net *net;
+ struct net *net = (struct net *)table->extra2;
int old;
- if (!rtnl_trylock())
+ if (!rtnl_net_trylock(net))
return restart_syscall();
- net = (struct net *)table->extra2;
old = *p;
WRITE_ONCE(*p, newf);
@@ -881,7 +882,7 @@ static int addrconf_fixup_forwarding(const struct ctl_table *table, int *p, int
NETCONFA_FORWARDING,
NETCONFA_IFINDEX_DEFAULT,
net->ipv6.devconf_dflt);
- rtnl_unlock();
+ rtnl_net_unlock(net);
return 0;
}
@@ -903,7 +904,7 @@ static int addrconf_fixup_forwarding(const struct ctl_table *table, int *p, int
net->ipv6.devconf_all);
} else if ((!newf) ^ (!old))
dev_forward_change((struct inet6_dev *)table->extra1);
- rtnl_unlock();
+ rtnl_net_unlock(net);
if (newf)
rt6_purge_dflt_routers(net);
@@ -916,7 +917,7 @@ static void addrconf_linkdown_change(struct net *net, __s32 newf)
struct inet6_dev *idev;
for_each_netdev(net, dev) {
- idev = __in6_dev_get(dev);
+ idev = __in6_dev_get_rtnl_net(dev);
if (idev) {
int changed = (!idev->cnf.ignore_routes_with_linkdown) ^ (!newf);
@@ -933,13 +934,12 @@ static void addrconf_linkdown_change(struct net *net, __s32 newf)
static int addrconf_fixup_linkdown(const struct ctl_table *table, int *p, int newf)
{
- struct net *net;
+ struct net *net = (struct net *)table->extra2;
int old;
- if (!rtnl_trylock())
+ if (!rtnl_net_trylock(net))
return restart_syscall();
- net = (struct net *)table->extra2;
old = *p;
WRITE_ONCE(*p, newf);
@@ -950,7 +950,7 @@ static int addrconf_fixup_linkdown(const struct ctl_table *table, int *p, int ne
NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
NETCONFA_IFINDEX_DEFAULT,
net->ipv6.devconf_dflt);
- rtnl_unlock();
+ rtnl_net_unlock(net);
return 0;
}
@@ -964,7 +964,8 @@ static int addrconf_fixup_linkdown(const struct ctl_table *table, int *p, int ne
NETCONFA_IFINDEX_ALL,
net->ipv6.devconf_all);
}
- rtnl_unlock();
+
+ rtnl_net_unlock(net);
return 1;
}
@@ -1016,7 +1017,7 @@ ipv6_link_dev_addr(struct inet6_dev *idev, struct inet6_ifaddr *ifp)
static u32 inet6_addr_hash(const struct net *net, const struct in6_addr *addr)
{
- u32 val = ipv6_addr_hash(addr) ^ net_hash_mix(net);
+ u32 val = __ipv6_addr_jhash(addr, net_hash_mix(net));
return hash_32(val, IN6_ADDR_HSIZE_SHIFT);
}
@@ -2570,6 +2571,24 @@ static struct inet6_dev *addrconf_add_dev(struct net_device *dev)
return idev;
}
+static void delete_tempaddrs(struct inet6_dev *idev,
+ struct inet6_ifaddr *ifp)
+{
+ struct inet6_ifaddr *ift, *tmp;
+
+ write_lock_bh(&idev->lock);
+ list_for_each_entry_safe(ift, tmp, &idev->tempaddr_list, tmp_list) {
+ if (ift->ifpub != ifp)
+ continue;
+
+ in6_ifa_hold(ift);
+ write_unlock_bh(&idev->lock);
+ ipv6_del_addr(ift);
+ write_lock_bh(&idev->lock);
+ }
+ write_unlock_bh(&idev->lock);
+}
+
static void manage_tempaddrs(struct inet6_dev *idev,
struct inet6_ifaddr *ifp,
__u32 valid_lft, __u32 prefered_lft,
@@ -2962,11 +2981,11 @@ int addrconf_set_dstaddr(struct net *net, void __user *arg)
if (copy_from_user(&ireq, arg, sizeof(struct in6_ifreq)))
return -EFAULT;
- rtnl_lock();
+ rtnl_net_lock(net);
dev = __dev_get_by_index(net, ireq.ifr6_ifindex);
if (dev && dev->type == ARPHRD_SIT)
err = addrconf_set_sit_dstaddr(net, dev, &ireq);
- rtnl_unlock();
+ rtnl_net_unlock(net);
return err;
}
@@ -2990,39 +3009,25 @@ static int ipv6_mc_config(struct sock *sk, bool join,
/*
* Manual configuration of address on an interface
*/
-static int inet6_addr_add(struct net *net, int ifindex,
- struct ifa6_config *cfg,
+static int inet6_addr_add(struct net *net, struct net_device *dev,
+ struct ifa6_config *cfg, clock_t expires, u32 flags,
struct netlink_ext_ack *extack)
{
struct inet6_ifaddr *ifp;
struct inet6_dev *idev;
- struct net_device *dev;
- unsigned long timeout;
- clock_t expires;
- u32 flags;
- ASSERT_RTNL();
+ ASSERT_RTNL_NET(net);
if (cfg->plen > 128) {
NL_SET_ERR_MSG_MOD(extack, "Invalid prefix length");
return -EINVAL;
}
- /* check the lifetime */
- if (!cfg->valid_lft || cfg->preferred_lft > cfg->valid_lft) {
- NL_SET_ERR_MSG_MOD(extack, "address lifetime invalid");
- return -EINVAL;
- }
-
if (cfg->ifa_flags & IFA_F_MANAGETEMPADDR && cfg->plen != 64) {
NL_SET_ERR_MSG_MOD(extack, "address with \"mngtmpaddr\" flag must have a prefix length of 64");
return -EINVAL;
}
- dev = __dev_get_by_index(net, ifindex);
- if (!dev)
- return -ENODEV;
-
idev = addrconf_add_dev(dev);
if (IS_ERR(idev)) {
NL_SET_ERR_MSG_MOD(extack, "IPv6 is disabled on this device");
@@ -3031,7 +3036,7 @@ static int inet6_addr_add(struct net *net, int ifindex,
if (cfg->ifa_flags & IFA_F_MCAUTOJOIN) {
int ret = ipv6_mc_config(net->ipv6.mc_autojoin_sk,
- true, cfg->pfx, ifindex);
+ true, cfg->pfx, dev->ifindex);
if (ret < 0) {
NL_SET_ERR_MSG_MOD(extack, "Multicast auto join failed");
@@ -3041,24 +3046,6 @@ static int inet6_addr_add(struct net *net, int ifindex,
cfg->scope = ipv6_addr_scope(cfg->pfx);
- timeout = addrconf_timeout_fixup(cfg->valid_lft, HZ);
- if (addrconf_finite_timeout(timeout)) {
- expires = jiffies_to_clock_t(timeout * HZ);
- cfg->valid_lft = timeout;
- flags = RTF_EXPIRES;
- } else {
- expires = 0;
- flags = 0;
- cfg->ifa_flags |= IFA_F_PERMANENT;
- }
-
- timeout = addrconf_timeout_fixup(cfg->preferred_lft, HZ);
- if (addrconf_finite_timeout(timeout)) {
- if (timeout == 0)
- cfg->ifa_flags |= IFA_F_DEPRECATED;
- cfg->preferred_lft = timeout;
- }
-
ifp = ipv6_add_addr(idev, cfg, true, extack);
if (!IS_ERR(ifp)) {
if (!(cfg->ifa_flags & IFA_F_NOPREFIXROUTE)) {
@@ -3086,7 +3073,7 @@ static int inet6_addr_add(struct net *net, int ifindex,
return 0;
} else if (cfg->ifa_flags & IFA_F_MCAUTOJOIN) {
ipv6_mc_config(net->ipv6.mc_autojoin_sk, false,
- cfg->pfx, ifindex);
+ cfg->pfx, dev->ifindex);
}
return PTR_ERR(ifp);
@@ -3111,7 +3098,7 @@ static int inet6_addr_del(struct net *net, int ifindex, u32 ifa_flags,
return -ENODEV;
}
- idev = __in6_dev_get(dev);
+ idev = __in6_dev_get_rtnl_net(dev);
if (!idev) {
NL_SET_ERR_MSG_MOD(extack, "IPv6 is disabled on this device");
return -ENXIO;
@@ -3124,11 +3111,12 @@ static int inet6_addr_del(struct net *net, int ifindex, u32 ifa_flags,
in6_ifa_hold(ifp);
read_unlock_bh(&idev->lock);
- if (!(ifp->flags & IFA_F_TEMPORARY) &&
- (ifa_flags & IFA_F_MANAGETEMPADDR))
- manage_tempaddrs(idev, ifp, 0, 0, false,
- jiffies);
ipv6_del_addr(ifp);
+
+ if (!(ifp->flags & IFA_F_TEMPORARY) &&
+ (ifp->flags & IFA_F_MANAGETEMPADDR))
+ delete_tempaddrs(idev, ifp);
+
addrconf_verify_rtnl(net);
if (ipv6_addr_is_multicast(pfx)) {
ipv6_mc_config(net->ipv6.mc_autojoin_sk,
@@ -3151,6 +3139,7 @@ int addrconf_add_ifaddr(struct net *net, void __user *arg)
.preferred_lft = INFINITY_LIFE_TIME,
.valid_lft = INFINITY_LIFE_TIME,
};
+ struct net_device *dev;
struct in6_ifreq ireq;
int err;
@@ -3163,9 +3152,16 @@ int addrconf_add_ifaddr(struct net *net, void __user *arg)
cfg.pfx = &ireq.ifr6_addr;
cfg.plen = ireq.ifr6_prefixlen;
- rtnl_lock();
- err = inet6_addr_add(net, ireq.ifr6_ifindex, &cfg, NULL);
- rtnl_unlock();
+ rtnl_net_lock(net);
+ dev = __dev_get_by_index(net, ireq.ifr6_ifindex);
+ if (dev) {
+ netdev_lock_ops(dev);
+ err = inet6_addr_add(net, dev, &cfg, 0, 0, NULL);
+ netdev_unlock_ops(dev);
+ } else {
+ err = -ENODEV;
+ }
+ rtnl_net_unlock(net);
return err;
}
@@ -3180,10 +3176,10 @@ int addrconf_del_ifaddr(struct net *net, void __user *arg)
if (copy_from_user(&ireq, arg, sizeof(struct in6_ifreq)))
return -EFAULT;
- rtnl_lock();
+ rtnl_net_lock(net);
err = inet6_addr_del(net, ireq.ifr6_ifindex, 0, &ireq.ifr6_addr,
ireq.ifr6_prefixlen, NULL);
- rtnl_unlock();
+ rtnl_net_unlock(net);
return err;
}
@@ -3218,16 +3214,13 @@ static void add_v4_addrs(struct inet6_dev *idev)
struct in6_addr addr;
struct net_device *dev;
struct net *net = dev_net(idev->dev);
- int scope, plen, offset = 0;
+ int scope, plen;
u32 pflags = 0;
ASSERT_RTNL();
memset(&addr, 0, sizeof(struct in6_addr));
- /* in case of IP6GRE the dev_addr is an IPv6 and therefore we use only the last 4 bytes */
- if (idev->dev->addr_len == sizeof(struct in6_addr))
- offset = sizeof(struct in6_addr) - 4;
- memcpy(&addr.s6_addr32[3], idev->dev->dev_addr + offset, 4);
+ memcpy(&addr.s6_addr32[3], idev->dev->dev_addr, 4);
if (!(idev->dev->flags & IFF_POINTOPOINT) && idev->dev->type == ARPHRD_SIT) {
scope = IPV6_ADDR_COMPATv4;
@@ -3538,7 +3531,13 @@ static void addrconf_gre_config(struct net_device *dev)
return;
}
- if (dev->type == ARPHRD_ETHER) {
+ /* Generate the IPv6 link-local address using addrconf_addr_gen(),
+ * unless we have an IPv4 GRE device not bound to an IP address and
+ * which is in EUI64 mode (as __ipv6_isatap_ifid() would fail in this
+ * case). Such devices fall back to add_v4_addrs() instead.
+ */
+ if (!(dev->type == ARPHRD_IPGRE && *(__be32 *)dev->dev_addr == 0 &&
+ idev->cnf.addr_gen_mode == IN6_ADDR_GEN_MODE_EUI64)) {
addrconf_addr_gen(idev, true);
return;
}
@@ -4018,7 +4017,7 @@ restart:
static void addrconf_rs_timer(struct timer_list *t)
{
- struct inet6_dev *idev = from_timer(idev, t, rs_timer);
+ struct inet6_dev *idev = timer_container_of(idev, t, rs_timer);
struct net_device *dev = idev->dev;
struct in6_addr lladdr;
int rtr_solicits;
@@ -4186,6 +4185,7 @@ static void addrconf_dad_work(struct work_struct *w)
struct inet6_dev *idev = ifp->idev;
bool bump_id, disable_ipv6 = false;
struct in6_addr mcaddr;
+ struct net *net;
enum {
DAD_PROCESS,
@@ -4193,7 +4193,9 @@ static void addrconf_dad_work(struct work_struct *w)
DAD_ABORT,
} action = DAD_PROCESS;
- rtnl_lock();
+ net = dev_net(idev->dev);
+
+ rtnl_net_lock(net);
spin_lock_bh(&ifp->lock);
if (ifp->state == INET6_IFADDR_STATE_PREDAD) {
@@ -4203,7 +4205,7 @@ static void addrconf_dad_work(struct work_struct *w)
action = DAD_ABORT;
ifp->state = INET6_IFADDR_STATE_POSTDAD;
- if ((READ_ONCE(dev_net(idev->dev)->ipv6.devconf_all->accept_dad) > 1 ||
+ if ((READ_ONCE(net->ipv6.devconf_all->accept_dad) > 1 ||
READ_ONCE(idev->cnf.accept_dad) > 1) &&
!idev->cnf.disable_ipv6 &&
!(ifp->flags & IFA_F_STABLE_PRIVACY)) {
@@ -4285,7 +4287,7 @@ static void addrconf_dad_work(struct work_struct *w)
ifp->dad_nonce);
out:
in6_ifa_put(ifp);
- rtnl_unlock();
+ rtnl_net_unlock(net);
}
/* ifp->idev must be at least read locked */
@@ -4733,9 +4735,9 @@ static void addrconf_verify_work(struct work_struct *w)
struct net *net = container_of(to_delayed_work(w), struct net,
ipv6.addr_chk_work);
- rtnl_lock();
+ rtnl_net_lock(net);
addrconf_verify_rtnl(net);
- rtnl_unlock();
+ rtnl_net_unlock(net);
}
static void addrconf_verify(struct net *net)
@@ -4793,16 +4795,20 @@ inet6_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh,
if (!pfx)
return -EINVAL;
- ifa_flags = tb[IFA_FLAGS] ? nla_get_u32(tb[IFA_FLAGS]) : ifm->ifa_flags;
+ ifa_flags = nla_get_u32_default(tb[IFA_FLAGS], ifm->ifa_flags);
/* We ignore other flags so far. */
ifa_flags &= IFA_F_MANAGETEMPADDR;
- return inet6_addr_del(net, ifm->ifa_index, ifa_flags, pfx,
- ifm->ifa_prefixlen, extack);
+ rtnl_net_lock(net);
+ err = inet6_addr_del(net, ifm->ifa_index, ifa_flags, pfx,
+ ifm->ifa_prefixlen, extack);
+ rtnl_net_unlock(net);
+
+ return err;
}
-static int modify_prefix_route(struct inet6_ifaddr *ifp,
+static int modify_prefix_route(struct net *net, struct inet6_ifaddr *ifp,
unsigned long expires, u32 flags,
bool modify_peer)
{
@@ -4826,7 +4832,9 @@ static int modify_prefix_route(struct inet6_ifaddr *ifp,
ifp->prefix_len,
ifp->rt_priority, ifp->idev->dev,
expires, flags, GFP_KERNEL);
- } else {
+ return 0;
+ }
+ if (f6i != net->ipv6.fib6_null_entry) {
table = f6i->fib6_table;
spin_lock_bh(&table->tb6_lock);
@@ -4839,27 +4847,21 @@ static int modify_prefix_route(struct inet6_ifaddr *ifp,
}
spin_unlock_bh(&table->tb6_lock);
-
- fib6_info_release(f6i);
}
+ fib6_info_release(f6i);
return 0;
}
static int inet6_addr_modify(struct net *net, struct inet6_ifaddr *ifp,
- struct ifa6_config *cfg)
+ struct ifa6_config *cfg, clock_t expires,
+ u32 flags)
{
- u32 flags;
- clock_t expires;
- unsigned long timeout;
bool was_managetempaddr;
- bool had_prefixroute;
bool new_peer = false;
+ bool had_prefixroute;
- ASSERT_RTNL();
-
- if (!cfg->valid_lft || cfg->preferred_lft > cfg->valid_lft)
- return -EINVAL;
+ ASSERT_RTNL_NET(net);
if (cfg->ifa_flags & IFA_F_MANAGETEMPADDR &&
(ifp->flags & IFA_F_TEMPORARY || ifp->prefix_len != 64))
@@ -4868,24 +4870,6 @@ static int inet6_addr_modify(struct net *net, struct inet6_ifaddr *ifp,
if (!(ifp->flags & IFA_F_TENTATIVE) || ifp->flags & IFA_F_DADFAILED)
cfg->ifa_flags &= ~IFA_F_OPTIMISTIC;
- timeout = addrconf_timeout_fixup(cfg->valid_lft, HZ);
- if (addrconf_finite_timeout(timeout)) {
- expires = jiffies_to_clock_t(timeout * HZ);
- cfg->valid_lft = timeout;
- flags = RTF_EXPIRES;
- } else {
- expires = 0;
- flags = 0;
- cfg->ifa_flags |= IFA_F_PERMANENT;
- }
-
- timeout = addrconf_timeout_fixup(cfg->preferred_lft, HZ);
- if (addrconf_finite_timeout(timeout)) {
- if (timeout == 0)
- cfg->ifa_flags |= IFA_F_DEPRECATED;
- cfg->preferred_lft = timeout;
- }
-
if (cfg->peer_pfx &&
memcmp(&ifp->peer_addr, cfg->peer_pfx, sizeof(struct in6_addr))) {
if (!ipv6_addr_any(&ifp->peer_addr))
@@ -4920,7 +4904,7 @@ static int inet6_addr_modify(struct net *net, struct inet6_ifaddr *ifp,
int rc = -ENOENT;
if (had_prefixroute)
- rc = modify_prefix_route(ifp, expires, flags, false);
+ rc = modify_prefix_route(net, ifp, expires, flags, false);
/* prefix route could have been deleted; if so restore it */
if (rc == -ENOENT) {
@@ -4930,7 +4914,7 @@ static int inet6_addr_modify(struct net *net, struct inet6_ifaddr *ifp,
}
if (had_prefixroute && !ipv6_addr_any(&ifp->peer_addr))
- rc = modify_prefix_route(ifp, expires, flags, true);
+ rc = modify_prefix_route(net, ifp, expires, flags, true);
if (rc == -ENOENT && !ipv6_addr_any(&ifp->peer_addr)) {
addrconf_prefix_route(&ifp->peer_addr, ifp->prefix_len,
@@ -4952,14 +4936,12 @@ static int inet6_addr_modify(struct net *net, struct inet6_ifaddr *ifp,
}
if (was_managetempaddr || ifp->flags & IFA_F_MANAGETEMPADDR) {
- if (was_managetempaddr &&
- !(ifp->flags & IFA_F_MANAGETEMPADDR)) {
- cfg->valid_lft = 0;
- cfg->preferred_lft = 0;
- }
- manage_tempaddrs(ifp->idev, ifp, cfg->valid_lft,
- cfg->preferred_lft, !was_managetempaddr,
- jiffies);
+ if (was_managetempaddr && !(ifp->flags & IFA_F_MANAGETEMPADDR))
+ delete_tempaddrs(ifp->idev, ifp);
+ else
+ manage_tempaddrs(ifp->idev, ifp, cfg->valid_lft,
+ cfg->preferred_lft, !was_managetempaddr,
+ jiffies);
}
addrconf_verify_rtnl(net);
@@ -4972,13 +4954,16 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh,
struct netlink_ext_ack *extack)
{
struct net *net = sock_net(skb->sk);
- struct ifaddrmsg *ifm;
struct nlattr *tb[IFA_MAX+1];
struct in6_addr *peer_pfx;
struct inet6_ifaddr *ifa;
struct net_device *dev;
struct inet6_dev *idev;
struct ifa6_config cfg;
+ struct ifaddrmsg *ifm;
+ unsigned long timeout;
+ clock_t expires;
+ u32 flags;
int err;
err = nlmsg_parse_deprecated(nlh, sizeof(*ifm), tb, IFA_MAX,
@@ -5001,8 +4986,18 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh,
if (tb[IFA_PROTO])
cfg.ifa_proto = nla_get_u8(tb[IFA_PROTO]);
+ cfg.ifa_flags = nla_get_u32_default(tb[IFA_FLAGS], ifm->ifa_flags);
+
+ /* We ignore other flags so far. */
+ cfg.ifa_flags &= IFA_F_NODAD | IFA_F_HOMEADDRESS |
+ IFA_F_MANAGETEMPADDR | IFA_F_NOPREFIXROUTE |
+ IFA_F_MCAUTOJOIN | IFA_F_OPTIMISTIC;
+
+ cfg.ifa_flags |= IFA_F_PERMANENT;
cfg.valid_lft = INFINITY_LIFE_TIME;
cfg.preferred_lft = INFINITY_LIFE_TIME;
+ expires = 0;
+ flags = 0;
if (tb[IFA_CACHEINFO]) {
struct ifa_cacheinfo *ci;
@@ -5010,27 +5005,44 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh,
ci = nla_data(tb[IFA_CACHEINFO]);
cfg.valid_lft = ci->ifa_valid;
cfg.preferred_lft = ci->ifa_prefered;
+
+ if (!cfg.valid_lft || cfg.preferred_lft > cfg.valid_lft) {
+ NL_SET_ERR_MSG_MOD(extack, "address lifetime invalid");
+ return -EINVAL;
+ }
+
+ timeout = addrconf_timeout_fixup(cfg.valid_lft, HZ);
+ if (addrconf_finite_timeout(timeout)) {
+ cfg.ifa_flags &= ~IFA_F_PERMANENT;
+ cfg.valid_lft = timeout;
+ expires = jiffies_to_clock_t(timeout * HZ);
+ flags = RTF_EXPIRES;
+ }
+
+ timeout = addrconf_timeout_fixup(cfg.preferred_lft, HZ);
+ if (addrconf_finite_timeout(timeout)) {
+ if (timeout == 0)
+ cfg.ifa_flags |= IFA_F_DEPRECATED;
+
+ cfg.preferred_lft = timeout;
+ }
}
+ rtnl_net_lock(net);
+
dev = __dev_get_by_index(net, ifm->ifa_index);
if (!dev) {
NL_SET_ERR_MSG_MOD(extack, "Unable to find the interface");
- return -ENODEV;
+ err = -ENODEV;
+ goto unlock_rtnl;
}
- if (tb[IFA_FLAGS])
- cfg.ifa_flags = nla_get_u32(tb[IFA_FLAGS]);
- else
- cfg.ifa_flags = ifm->ifa_flags;
-
- /* We ignore other flags so far. */
- cfg.ifa_flags &= IFA_F_NODAD | IFA_F_HOMEADDRESS |
- IFA_F_MANAGETEMPADDR | IFA_F_NOPREFIXROUTE |
- IFA_F_MCAUTOJOIN | IFA_F_OPTIMISTIC;
-
+ netdev_lock_ops(dev);
idev = ipv6_find_idev(dev);
- if (IS_ERR(idev))
- return PTR_ERR(idev);
+ if (IS_ERR(idev)) {
+ err = PTR_ERR(idev);
+ goto unlock;
+ }
if (!ipv6_allow_optimistic_dad(net, idev))
cfg.ifa_flags &= ~IFA_F_OPTIMISTIC;
@@ -5038,7 +5050,8 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh,
if (cfg.ifa_flags & IFA_F_NODAD &&
cfg.ifa_flags & IFA_F_OPTIMISTIC) {
NL_SET_ERR_MSG(extack, "IFA_F_NODAD and IFA_F_OPTIMISTIC are mutually exclusive");
- return -EINVAL;
+ err = -EINVAL;
+ goto unlock;
}
ifa = ipv6_get_ifaddr(net, cfg.pfx, dev, 1);
@@ -5047,7 +5060,8 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh,
* It would be best to check for !NLM_F_CREATE here but
* userspace already relies on not having to provide this.
*/
- return inet6_addr_add(net, ifm->ifa_index, &cfg, extack);
+ err = inet6_addr_add(net, dev, &cfg, expires, flags, extack);
+ goto unlock;
}
if (nlh->nlmsg_flags & NLM_F_EXCL ||
@@ -5055,10 +5069,14 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh,
NL_SET_ERR_MSG_MOD(extack, "address already assigned");
err = -EEXIST;
} else {
- err = inet6_addr_modify(net, ifa, &cfg);
+ err = inet6_addr_modify(net, ifa, &cfg, expires, flags);
}
in6_ifa_put(ifa);
+unlock:
+ netdev_unlock_ops(dev);
+unlock_rtnl:
+ rtnl_net_unlock(net);
return err;
}
@@ -5112,22 +5130,6 @@ static inline int inet6_ifaddr_msgsize(void)
+ nla_total_size(4) /* IFA_RT_PRIORITY */;
}
-enum addr_type_t {
- UNICAST_ADDR,
- MULTICAST_ADDR,
- ANYCAST_ADDR,
-};
-
-struct inet6_fill_args {
- u32 portid;
- u32 seq;
- int event;
- unsigned int flags;
- int netnsid;
- int ifindex;
- enum addr_type_t type;
-};
-
static int inet6_fill_ifaddr(struct sk_buff *skb,
const struct inet6_ifaddr *ifa,
struct inet6_fill_args *args)
@@ -5206,15 +5208,16 @@ error:
return -EMSGSIZE;
}
-static int inet6_fill_ifmcaddr(struct sk_buff *skb,
- const struct ifmcaddr6 *ifmca,
- struct inet6_fill_args *args)
+int inet6_fill_ifmcaddr(struct sk_buff *skb,
+ const struct ifmcaddr6 *ifmca,
+ struct inet6_fill_args *args)
{
int ifindex = ifmca->idev->dev->ifindex;
u8 scope = RT_SCOPE_UNIVERSE;
struct nlmsghdr *nlh;
- if (ipv6_addr_scope(&ifmca->mca_addr) & IFA_SITE)
+ if (!args->force_rt_scope_universe &&
+ ipv6_addr_scope(&ifmca->mca_addr) & IFA_SITE)
scope = RT_SCOPE_SITE;
nlh = nlmsg_put(skb, args->portid, args->seq, args->event,
@@ -5240,9 +5243,9 @@ static int inet6_fill_ifmcaddr(struct sk_buff *skb,
return 0;
}
-static int inet6_fill_ifacaddr(struct sk_buff *skb,
- const struct ifacaddr6 *ifaca,
- struct inet6_fill_args *args)
+int inet6_fill_ifacaddr(struct sk_buff *skb,
+ const struct ifacaddr6 *ifaca,
+ struct inet6_fill_args *args)
{
struct net_device *dev = fib6_info_nh_dev(ifaca->aca_rt);
int ifindex = dev ? dev->ifindex : 1;
@@ -5346,12 +5349,12 @@ static int inet6_valid_dump_ifaddr_req(const struct nlmsghdr *nlh,
struct ifaddrmsg *ifm;
int err, i;
- if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ifm))) {
+ ifm = nlmsg_payload(nlh, sizeof(*ifm));
+ if (!ifm) {
NL_SET_ERR_MSG_MOD(extack, "Invalid header for address dump request");
return -EINVAL;
}
- ifm = nlmsg_data(nlh);
if (ifm->ifa_prefixlen || ifm->ifa_flags || ifm->ifa_scope) {
NL_SET_ERR_MSG_MOD(extack, "Invalid values in header for address dump request");
return -EINVAL;
@@ -5403,6 +5406,7 @@ static int inet6_dump_addr(struct sk_buff *skb, struct netlink_callback *cb,
.flags = NLM_F_MULTI,
.netnsid = -1,
.type = type,
+ .force_rt_scope_universe = false,
};
struct {
unsigned long ifindex;
@@ -5483,7 +5487,8 @@ static int inet6_rtm_valid_getaddr_req(struct sk_buff *skb,
struct ifaddrmsg *ifm;
int i, err;
- if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ifm))) {
+ ifm = nlmsg_payload(nlh, sizeof(*ifm));
+ if (!ifm) {
NL_SET_ERR_MSG_MOD(extack, "Invalid header for get address request");
return -EINVAL;
}
@@ -5492,7 +5497,6 @@ static int inet6_rtm_valid_getaddr_req(struct sk_buff *skb,
return nlmsg_parse_deprecated(nlh, sizeof(*ifm), tb, IFA_MAX,
ifa_ipv6_policy, extack);
- ifm = nlmsg_data(nlh);
if (ifm->ifa_prefixlen || ifm->ifa_flags || ifm->ifa_scope) {
NL_SET_ERR_MSG_MOD(extack, "Invalid values in header for get address request");
return -EINVAL;
@@ -5531,6 +5535,7 @@ static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr *nlh,
.event = RTM_NEWADDR,
.flags = 0,
.netnsid = -1,
+ .force_rt_scope_universe = false,
};
struct ifaddrmsg *ifm;
struct nlattr *tb[IFA_MAX+1];
@@ -5602,6 +5607,7 @@ static void inet6_ifa_notify(int event, struct inet6_ifaddr *ifa)
.event = event,
.flags = 0,
.netnsid = -1,
+ .force_rt_scope_universe = false,
};
int err = -ENOBUFS;
@@ -5789,6 +5795,27 @@ static void snmp6_fill_stats(u64 *stats, struct inet6_dev *idev, int attrtype,
}
}
+static int inet6_fill_ifla6_stats_attrs(struct sk_buff *skb,
+ struct inet6_dev *idev)
+{
+ struct nlattr *nla;
+
+ nla = nla_reserve(skb, IFLA_INET6_STATS, IPSTATS_MIB_MAX * sizeof(u64));
+ if (!nla)
+ goto nla_put_failure;
+ snmp6_fill_stats(nla_data(nla), idev, IFLA_INET6_STATS, nla_len(nla));
+
+ nla = nla_reserve(skb, IFLA_INET6_ICMP6STATS, ICMP6_MIB_MAX * sizeof(u64));
+ if (!nla)
+ goto nla_put_failure;
+ snmp6_fill_stats(nla_data(nla), idev, IFLA_INET6_ICMP6STATS, nla_len(nla));
+
+ return 0;
+
+nla_put_failure:
+ return -EMSGSIZE;
+}
+
static int inet6_fill_ifla6_attrs(struct sk_buff *skb, struct inet6_dev *idev,
u32 ext_filter_mask)
{
@@ -5811,18 +5838,10 @@ static int inet6_fill_ifla6_attrs(struct sk_buff *skb, struct inet6_dev *idev,
/* XXX - MC not implemented */
- if (ext_filter_mask & RTEXT_FILTER_SKIP_STATS)
- return 0;
-
- nla = nla_reserve(skb, IFLA_INET6_STATS, IPSTATS_MIB_MAX * sizeof(u64));
- if (!nla)
- goto nla_put_failure;
- snmp6_fill_stats(nla_data(nla), idev, IFLA_INET6_STATS, nla_len(nla));
-
- nla = nla_reserve(skb, IFLA_INET6_ICMP6STATS, ICMP6_MIB_MAX * sizeof(u64));
- if (!nla)
- goto nla_put_failure;
- snmp6_fill_stats(nla_data(nla), idev, IFLA_INET6_ICMP6STATS, nla_len(nla));
+ if (!(ext_filter_mask & RTEXT_FILTER_SKIP_STATS)) {
+ if (inet6_fill_ifla6_stats_attrs(skb, idev) < 0)
+ goto nla_put_failure;
+ }
nla = nla_reserve(skb, IFLA_INET6_TOKEN, sizeof(struct in6_addr));
if (!nla)
@@ -6096,7 +6115,8 @@ static int inet6_valid_dump_ifinfo(const struct nlmsghdr *nlh,
{
struct ifinfomsg *ifm;
- if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ifm))) {
+ ifm = nlmsg_payload(nlh, sizeof(*ifm));
+ if (!ifm) {
NL_SET_ERR_MSG_MOD(extack, "Invalid header for link dump request");
return -EINVAL;
}
@@ -6106,7 +6126,6 @@ static int inet6_valid_dump_ifinfo(const struct nlmsghdr *nlh,
return -EINVAL;
}
- ifm = nlmsg_data(nlh);
if (ifm->__ifi_pad || ifm->ifi_type || ifm->ifi_flags ||
ifm->ifi_change || ifm->ifi_index) {
NL_SET_ERR_MSG_MOD(extack, "Invalid values in header for dump request");
@@ -6367,7 +6386,7 @@ static void addrconf_disable_change(struct net *net, __s32 newf)
struct inet6_dev *idev;
for_each_netdev(net, dev) {
- idev = __in6_dev_get(dev);
+ idev = __in6_dev_get_rtnl_net(dev);
if (idev) {
int changed = (!idev->cnf.disable_ipv6) ^ (!newf);
@@ -6388,7 +6407,7 @@ static int addrconf_disable_ipv6(const struct ctl_table *table, int *p, int newf
return 0;
}
- if (!rtnl_trylock())
+ if (!rtnl_net_trylock(net))
return restart_syscall();
old = *p;
@@ -6397,10 +6416,11 @@ static int addrconf_disable_ipv6(const struct ctl_table *table, int *p, int newf
if (p == &net->ipv6.devconf_all->disable_ipv6) {
WRITE_ONCE(net->ipv6.devconf_dflt->disable_ipv6, newf);
addrconf_disable_change(net, newf);
- } else if ((!newf) ^ (!old))
+ } else if ((!newf) ^ (!old)) {
dev_disable_change((struct inet6_dev *)table->extra1);
+ }
- rtnl_unlock();
+ rtnl_net_unlock(net);
return 0;
}
@@ -6443,20 +6463,20 @@ static int addrconf_sysctl_proxy_ndp(const struct ctl_table *ctl, int write,
if (write && old != new) {
struct net *net = ctl->extra2;
- if (!rtnl_trylock())
+ if (!rtnl_net_trylock(net))
return restart_syscall();
- if (valp == &net->ipv6.devconf_dflt->proxy_ndp)
+ if (valp == &net->ipv6.devconf_dflt->proxy_ndp) {
inet6_netconf_notify_devconf(net, RTM_NEWNETCONF,
NETCONFA_PROXY_NEIGH,
NETCONFA_IFINDEX_DEFAULT,
net->ipv6.devconf_dflt);
- else if (valp == &net->ipv6.devconf_all->proxy_ndp)
+ } else if (valp == &net->ipv6.devconf_all->proxy_ndp) {
inet6_netconf_notify_devconf(net, RTM_NEWNETCONF,
NETCONFA_PROXY_NEIGH,
NETCONFA_IFINDEX_ALL,
net->ipv6.devconf_all);
- else {
+ } else {
struct inet6_dev *idev = ctl->extra1;
inet6_netconf_notify_devconf(net, RTM_NEWNETCONF,
@@ -6464,7 +6484,7 @@ static int addrconf_sysctl_proxy_ndp(const struct ctl_table *ctl, int write,
idev->dev->ifindex,
&idev->cnf);
}
- rtnl_unlock();
+ rtnl_net_unlock(net);
}
return ret;
@@ -6484,7 +6504,7 @@ static int addrconf_sysctl_addr_gen_mode(const struct ctl_table *ctl, int write,
.mode = ctl->mode,
};
- if (!rtnl_trylock())
+ if (!rtnl_net_trylock(net))
return restart_syscall();
new_val = *((u32 *)ctl->data);
@@ -6507,19 +6527,23 @@ static int addrconf_sysctl_addr_gen_mode(const struct ctl_table *ctl, int write,
if (idev->cnf.addr_gen_mode != new_val) {
WRITE_ONCE(idev->cnf.addr_gen_mode, new_val);
+ netdev_lock_ops(idev->dev);
addrconf_init_auto_addrs(idev->dev);
+ netdev_unlock_ops(idev->dev);
}
} else if (&net->ipv6.devconf_all->addr_gen_mode == ctl->data) {
struct net_device *dev;
WRITE_ONCE(net->ipv6.devconf_dflt->addr_gen_mode, new_val);
for_each_netdev(net, dev) {
- idev = __in6_dev_get(dev);
+ idev = __in6_dev_get_rtnl_net(dev);
if (idev &&
idev->cnf.addr_gen_mode != new_val) {
WRITE_ONCE(idev->cnf.addr_gen_mode,
new_val);
+ netdev_lock_ops(idev->dev);
addrconf_init_auto_addrs(idev->dev);
+ netdev_unlock_ops(idev->dev);
}
}
}
@@ -6528,7 +6552,7 @@ static int addrconf_sysctl_addr_gen_mode(const struct ctl_table *ctl, int write,
}
out:
- rtnl_unlock();
+ rtnl_net_unlock(net);
return ret;
}
@@ -6550,7 +6574,7 @@ static int addrconf_sysctl_stable_secret(const struct ctl_table *ctl, int write,
lctl.maxlen = IPV6_MAX_STRLEN;
lctl.data = str;
- if (!rtnl_trylock())
+ if (!rtnl_net_trylock(net))
return restart_syscall();
if (!write && !secret->initialized) {
@@ -6580,7 +6604,7 @@ static int addrconf_sysctl_stable_secret(const struct ctl_table *ctl, int write,
struct net_device *dev;
for_each_netdev(net, dev) {
- struct inet6_dev *idev = __in6_dev_get(dev);
+ struct inet6_dev *idev = __in6_dev_get_rtnl_net(dev);
if (idev) {
WRITE_ONCE(idev->cnf.addr_gen_mode,
@@ -6595,7 +6619,7 @@ static int addrconf_sysctl_stable_secret(const struct ctl_table *ctl, int write,
}
out:
- rtnl_unlock();
+ rtnl_net_unlock(net);
return err;
}
@@ -6679,7 +6703,7 @@ int addrconf_disable_policy(const struct ctl_table *ctl, int *valp, int val)
return 0;
}
- if (!rtnl_trylock())
+ if (!rtnl_net_trylock(net))
return restart_syscall();
WRITE_ONCE(*valp, val);
@@ -6688,7 +6712,7 @@ int addrconf_disable_policy(const struct ctl_table *ctl, int *valp, int val)
struct net_device *dev;
for_each_netdev(net, dev) {
- idev = __in6_dev_get(dev);
+ idev = __in6_dev_get_rtnl_net(dev);
if (idev)
addrconf_disable_policy_idev(idev, val);
}
@@ -6697,7 +6721,7 @@ int addrconf_disable_policy(const struct ctl_table *ctl, int *valp, int val)
addrconf_disable_policy_idev(idev, val);
}
- rtnl_unlock();
+ rtnl_net_unlock(net);
return 0;
}
@@ -7406,6 +7430,27 @@ static struct rtnl_af_ops inet6_ops __read_mostly = {
.set_link_af = inet6_set_link_af,
};
+static const struct rtnl_msg_handler addrconf_rtnl_msg_handlers[] __initconst_or_module = {
+ {.owner = THIS_MODULE, .protocol = PF_INET6, .msgtype = RTM_GETLINK,
+ .dumpit = inet6_dump_ifinfo, .flags = RTNL_FLAG_DUMP_UNLOCKED},
+ {.owner = THIS_MODULE, .protocol = PF_INET6, .msgtype = RTM_NEWADDR,
+ .doit = inet6_rtm_newaddr, .flags = RTNL_FLAG_DOIT_PERNET},
+ {.owner = THIS_MODULE, .protocol = PF_INET6, .msgtype = RTM_DELADDR,
+ .doit = inet6_rtm_deladdr, .flags = RTNL_FLAG_DOIT_PERNET},
+ {.owner = THIS_MODULE, .protocol = PF_INET6, .msgtype = RTM_GETADDR,
+ .doit = inet6_rtm_getaddr, .dumpit = inet6_dump_ifaddr,
+ .flags = RTNL_FLAG_DOIT_UNLOCKED | RTNL_FLAG_DUMP_UNLOCKED},
+ {.owner = THIS_MODULE, .protocol = PF_INET6, .msgtype = RTM_GETMULTICAST,
+ .dumpit = inet6_dump_ifmcaddr,
+ .flags = RTNL_FLAG_DUMP_UNLOCKED},
+ {.owner = THIS_MODULE, .protocol = PF_INET6, .msgtype = RTM_GETANYCAST,
+ .dumpit = inet6_dump_ifacaddr,
+ .flags = RTNL_FLAG_DUMP_UNLOCKED},
+ {.owner = THIS_MODULE, .protocol = PF_INET6, .msgtype = RTM_GETNETCONF,
+ .doit = inet6_netconf_get_devconf, .dumpit = inet6_netconf_dump_devconf,
+ .flags = RTNL_FLAG_DOIT_UNLOCKED | RTNL_FLAG_DUMP_UNLOCKED},
+};
+
/*
* Init / cleanup code
*/
@@ -7433,9 +7478,9 @@ int __init addrconf_init(void)
goto out_nowq;
}
- rtnl_lock();
+ rtnl_net_lock(&init_net);
idev = ipv6_add_dev(blackhole_netdev);
- rtnl_unlock();
+ rtnl_net_unlock(&init_net);
if (IS_ERR(idev)) {
err = PTR_ERR(idev);
goto errlo;
@@ -7447,44 +7492,14 @@ int __init addrconf_init(void)
addrconf_verify(&init_net);
- rtnl_af_register(&inet6_ops);
+ err = rtnl_af_register(&inet6_ops);
+ if (err)
+ goto erraf;
- err = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETLINK,
- NULL, inet6_dump_ifinfo, RTNL_FLAG_DUMP_UNLOCKED);
- if (err < 0)
+ err = rtnl_register_many(addrconf_rtnl_msg_handlers);
+ if (err)
goto errout;
- err = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_NEWADDR,
- inet6_rtm_newaddr, NULL, 0);
- if (err < 0)
- goto errout;
- err = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_DELADDR,
- inet6_rtm_deladdr, NULL, 0);
- if (err < 0)
- goto errout;
- err = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETADDR,
- inet6_rtm_getaddr, inet6_dump_ifaddr,
- RTNL_FLAG_DOIT_UNLOCKED |
- RTNL_FLAG_DUMP_UNLOCKED);
- if (err < 0)
- goto errout;
- err = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETMULTICAST,
- NULL, inet6_dump_ifmcaddr,
- RTNL_FLAG_DUMP_UNLOCKED);
- if (err < 0)
- goto errout;
- err = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETANYCAST,
- NULL, inet6_dump_ifacaddr,
- RTNL_FLAG_DUMP_UNLOCKED);
- if (err < 0)
- goto errout;
- err = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETNETCONF,
- inet6_netconf_get_devconf,
- inet6_netconf_dump_devconf,
- RTNL_FLAG_DOIT_UNLOCKED |
- RTNL_FLAG_DUMP_UNLOCKED);
- if (err < 0)
- goto errout;
err = ipv6_addr_label_rtnl_register();
if (err < 0)
goto errout;
@@ -7493,6 +7508,7 @@ int __init addrconf_init(void)
errout:
rtnl_unregister_all(PF_INET6);
rtnl_af_unregister(&inet6_ops);
+erraf:
unregister_netdevice_notifier(&ipv6_dev_notf);
errlo:
destroy_workqueue(addrconf_wq);
@@ -7514,17 +7530,17 @@ void addrconf_cleanup(void)
rtnl_af_unregister(&inet6_ops);
- rtnl_lock();
+ rtnl_net_lock(&init_net);
/* clean dev list */
for_each_netdev(&init_net, dev) {
- if (__in6_dev_get(dev) == NULL)
+ if (!__in6_dev_get_rtnl_net(dev))
continue;
addrconf_ifdown(dev, true);
}
addrconf_ifdown(init_net.loopback_dev, true);
- rtnl_unlock();
+ rtnl_net_unlock(&init_net);
destroy_workqueue(addrconf_wq);
}
diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c
index acd70b5992a7..fb63ffbcfc64 100644
--- a/net/ipv6/addrlabel.c
+++ b/net/ipv6/addrlabel.c
@@ -473,12 +473,12 @@ static int ip6addrlbl_valid_dump_req(const struct nlmsghdr *nlh,
{
struct ifaddrlblmsg *ifal;
- if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ifal))) {
+ ifal = nlmsg_payload(nlh, sizeof(*ifal));
+ if (!ifal) {
NL_SET_ERR_MSG_MOD(extack, "Invalid header for address label dump request");
return -EINVAL;
}
- ifal = nlmsg_data(nlh);
if (ifal->__ifal_reserved || ifal->ifal_prefixlen ||
ifal->ifal_flags || ifal->ifal_index || ifal->ifal_seq) {
NL_SET_ERR_MSG_MOD(extack, "Invalid values in header for address label dump request");
@@ -543,7 +543,8 @@ static int ip6addrlbl_valid_get_req(struct sk_buff *skb,
struct ifaddrlblmsg *ifal;
int i, err;
- if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ifal))) {
+ ifal = nlmsg_payload(nlh, sizeof(*ifal));
+ if (!ifal) {
NL_SET_ERR_MSG_MOD(extack, "Invalid header for addrlabel get request");
return -EINVAL;
}
@@ -552,7 +553,6 @@ static int ip6addrlbl_valid_get_req(struct sk_buff *skb,
return nlmsg_parse_deprecated(nlh, sizeof(*ifal), tb,
IFAL_MAX, ifal_policy, extack);
- ifal = nlmsg_data(nlh);
if (ifal->__ifal_reserved || ifal->ifal_flags || ifal->ifal_seq) {
NL_SET_ERR_MSG_MOD(extack, "Invalid values in header for addrlabel get request");
return -EINVAL;
@@ -634,23 +634,17 @@ static int ip6addrlbl_get(struct sk_buff *in_skb, struct nlmsghdr *nlh,
return err;
}
+static const struct rtnl_msg_handler ipv6_adddr_label_rtnl_msg_handlers[] __initconst_or_module = {
+ {.owner = THIS_MODULE, .protocol = PF_INET6, .msgtype = RTM_NEWADDRLABEL,
+ .doit = ip6addrlbl_newdel, .flags = RTNL_FLAG_DOIT_UNLOCKED},
+ {.owner = THIS_MODULE, .protocol = PF_INET6, .msgtype = RTM_DELADDRLABEL,
+ .doit = ip6addrlbl_newdel, .flags = RTNL_FLAG_DOIT_UNLOCKED},
+ {.owner = THIS_MODULE, .protocol = PF_INET6, .msgtype = RTM_GETADDRLABEL,
+ .doit = ip6addrlbl_get, .dumpit = ip6addrlbl_dump,
+ .flags = RTNL_FLAG_DOIT_UNLOCKED | RTNL_FLAG_DUMP_UNLOCKED},
+};
+
int __init ipv6_addr_label_rtnl_register(void)
{
- int ret;
-
- ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_NEWADDRLABEL,
- ip6addrlbl_newdel,
- NULL, RTNL_FLAG_DOIT_UNLOCKED);
- if (ret < 0)
- return ret;
- ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_DELADDRLABEL,
- ip6addrlbl_newdel,
- NULL, RTNL_FLAG_DOIT_UNLOCKED);
- if (ret < 0)
- return ret;
- ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETADDRLABEL,
- ip6addrlbl_get,
- ip6addrlbl_dump, RTNL_FLAG_DOIT_UNLOCKED |
- RTNL_FLAG_DUMP_UNLOCKED);
- return ret;
+ return rtnl_register_many(ipv6_adddr_label_rtnl_msg_handlers);
}
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index ba69b86f1c7d..acaff1296783 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -252,31 +252,29 @@ lookup_protocol:
*/
inet->inet_sport = htons(inet->inet_num);
err = sk->sk_prot->hash(sk);
- if (err) {
- sk_common_release(sk);
- goto out;
- }
+ if (err)
+ goto out_sk_release;
}
if (sk->sk_prot->init) {
err = sk->sk_prot->init(sk);
- if (err) {
- sk_common_release(sk);
- goto out;
- }
+ if (err)
+ goto out_sk_release;
}
if (!kern) {
err = BPF_CGROUP_RUN_PROG_INET_SOCK(sk);
- if (err) {
- sk_common_release(sk);
- goto out;
- }
+ if (err)
+ goto out_sk_release;
}
out:
return err;
out_rcu_unlock:
rcu_read_unlock();
goto out;
+out_sk_release:
+ sk_common_release(sk);
+ sock->sk = NULL;
+ goto out;
}
static int __inet6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len,
@@ -717,6 +715,7 @@ const struct proto_ops inet6_stream_ops = {
#endif
.set_rcvlowat = tcp_set_rcvlowat,
};
+EXPORT_SYMBOL_GPL(inet6_stream_ops);
const struct proto_ops inet6_dgram_ops = {
.family = PF_INET6,
@@ -883,7 +882,6 @@ bool ipv6_opt_accepted(const struct sock *sk, const struct sk_buff *skb,
}
return false;
}
-EXPORT_SYMBOL_GPL(ipv6_opt_accepted);
static struct packet_type ipv6_packet_type __read_mostly = {
.type = cpu_to_be16(ETH_P_IPV6),
diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c
index 0627c4c18d1a..21e01695b48c 100644
--- a/net/ipv6/anycast.c
+++ b/net/ipv6/anycast.c
@@ -49,9 +49,10 @@ static DEFINE_SPINLOCK(acaddr_hash_lock);
static int ipv6_dev_ac_dec(struct net_device *dev, const struct in6_addr *addr);
-static u32 inet6_acaddr_hash(struct net *net, const struct in6_addr *addr)
+static u32 inet6_acaddr_hash(const struct net *net,
+ const struct in6_addr *addr)
{
- u32 val = ipv6_addr_hash(addr) ^ net_hash_mix(net);
+ u32 val = __ipv6_addr_jhash(addr, net_hash_mix(net));
return hash_32(val, IN6_ADDR_HSIZE_SHIFT);
}
@@ -277,6 +278,37 @@ static struct ifacaddr6 *aca_alloc(struct fib6_info *f6i,
return aca;
}
+static void inet6_ifacaddr_notify(struct net_device *dev,
+ const struct ifacaddr6 *ifaca, int event)
+{
+ struct inet6_fill_args fillargs = {
+ .event = event,
+ .netnsid = -1,
+ };
+ struct net *net = dev_net(dev);
+ struct sk_buff *skb;
+ int err = -ENOMEM;
+
+ skb = nlmsg_new(NLMSG_ALIGN(sizeof(struct ifaddrmsg)) +
+ nla_total_size(sizeof(struct in6_addr)) +
+ nla_total_size(sizeof(struct ifa_cacheinfo)),
+ GFP_KERNEL);
+ if (!skb)
+ goto error;
+
+ err = inet6_fill_ifacaddr(skb, ifaca, &fillargs);
+ if (err < 0) {
+ pr_err("Failed to fill in anycast addresses (err %d)\n", err);
+ nlmsg_free(skb);
+ goto error;
+ }
+
+ rtnl_notify(skb, net, 0, RTNLGRP_IPV6_ACADDR, NULL, GFP_KERNEL);
+ return;
+error:
+ rtnl_set_sk_err(net, RTNLGRP_IPV6_ACADDR, err);
+}
+
/*
* device anycast group inc (add if not found)
*/
@@ -332,6 +364,8 @@ int __ipv6_dev_ac_inc(struct inet6_dev *idev, const struct in6_addr *addr)
addrconf_join_solict(idev->dev, &aca->aca_addr);
+ inet6_ifacaddr_notify(idev->dev, aca, RTM_NEWANYCAST);
+
aca_put(aca);
return 0;
out:
@@ -374,6 +408,8 @@ int __ipv6_dev_ac_dec(struct inet6_dev *idev, const struct in6_addr *addr)
ip6_del_rt(dev_net(idev->dev), aca->aca_rt, false);
+ inet6_ifacaddr_notify(idev->dev, aca, RTM_DELANYCAST);
+
aca_put(aca);
return 0;
}
diff --git a/net/ipv6/calipso.c b/net/ipv6/calipso.c
index dbcea9fee626..a247bb93908b 100644
--- a/net/ipv6/calipso.c
+++ b/net/ipv6/calipso.c
@@ -1072,8 +1072,13 @@ static int calipso_sock_getattr(struct sock *sk,
struct ipv6_opt_hdr *hop;
int opt_len, len, ret_val = -ENOMSG, offset;
unsigned char *opt;
- struct ipv6_txoptions *txopts = txopt_get(inet6_sk(sk));
+ struct ipv6_pinfo *pinfo = inet6_sk(sk);
+ struct ipv6_txoptions *txopts;
+
+ if (!pinfo)
+ return -EAFNOSUPPORT;
+ txopts = txopt_get(pinfo);
if (!txopts || !txopts->hopopt)
goto done;
@@ -1125,8 +1130,13 @@ static int calipso_sock_setattr(struct sock *sk,
{
int ret_val;
struct ipv6_opt_hdr *old, *new;
- struct ipv6_txoptions *txopts = txopt_get(inet6_sk(sk));
+ struct ipv6_pinfo *pinfo = inet6_sk(sk);
+ struct ipv6_txoptions *txopts;
+ if (!pinfo)
+ return -EAFNOSUPPORT;
+
+ txopts = txopt_get(pinfo);
old = NULL;
if (txopts)
old = txopts->hopopt;
@@ -1153,8 +1163,13 @@ static int calipso_sock_setattr(struct sock *sk,
static void calipso_sock_delattr(struct sock *sk)
{
struct ipv6_opt_hdr *new_hop;
- struct ipv6_txoptions *txopts = txopt_get(inet6_sk(sk));
+ struct ipv6_pinfo *pinfo = inet6_sk(sk);
+ struct ipv6_txoptions *txopts;
+ if (!pinfo)
+ return;
+
+ txopts = txopt_get(pinfo);
if (!txopts || !txopts->hopopt)
goto done;
@@ -1192,6 +1207,10 @@ static int calipso_req_setattr(struct request_sock *req,
struct ipv6_opt_hdr *old, *new;
struct sock *sk = sk_to_full_sk(req_to_sk(req));
+ /* sk is NULL for SYN+ACK w/ SYN Cookie */
+ if (!sk)
+ return -ENOMEM;
+
if (req_inet->ipv6_opt && req_inet->ipv6_opt->hopopt)
old = req_inet->ipv6_opt->hopopt;
else
@@ -1232,6 +1251,10 @@ static void calipso_req_delattr(struct request_sock *req)
struct ipv6_txoptions *txopts;
struct sock *sk = sk_to_full_sk(req_to_sk(req));
+ /* sk is NULL for SYN+ACK w/ SYN Cookie */
+ if (!sk)
+ return;
+
if (!req_inet->ipv6_opt || !req_inet->ipv6_opt->hopopt)
return;
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index b2400c226a32..72adfc107b55 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -137,47 +137,16 @@ static void esp_ssg_unref(struct xfrm_state *x, void *tmp, struct sk_buff *skb)
}
#ifdef CONFIG_INET6_ESPINTCP
-struct esp_tcp_sk {
- struct sock *sk;
- struct rcu_head rcu;
-};
-
-static void esp_free_tcp_sk(struct rcu_head *head)
-{
- struct esp_tcp_sk *esk = container_of(head, struct esp_tcp_sk, rcu);
-
- sock_put(esk->sk);
- kfree(esk);
-}
-
static struct sock *esp6_find_tcp_sk(struct xfrm_state *x)
{
struct xfrm_encap_tmpl *encap = x->encap;
struct net *net = xs_net(x);
- struct esp_tcp_sk *esk;
__be16 sport, dport;
- struct sock *nsk;
struct sock *sk;
- sk = rcu_dereference(x->encap_sk);
- if (sk && sk->sk_state == TCP_ESTABLISHED)
- return sk;
-
spin_lock_bh(&x->lock);
sport = encap->encap_sport;
dport = encap->encap_dport;
- nsk = rcu_dereference_protected(x->encap_sk,
- lockdep_is_held(&x->lock));
- if (sk && sk == nsk) {
- esk = kmalloc(sizeof(*esk), GFP_ATOMIC);
- if (!esk) {
- spin_unlock_bh(&x->lock);
- return ERR_PTR(-ENOMEM);
- }
- RCU_INIT_POINTER(x->encap_sk, NULL);
- esk->sk = sk;
- call_rcu(&esk->rcu, esp_free_tcp_sk);
- }
spin_unlock_bh(&x->lock);
sk = __inet6_lookup_established(net, net->ipv4.tcp_death_row.hashinfo, &x->id.daddr.in6,
@@ -190,20 +159,6 @@ static struct sock *esp6_find_tcp_sk(struct xfrm_state *x)
return ERR_PTR(-EINVAL);
}
- spin_lock_bh(&x->lock);
- nsk = rcu_dereference_protected(x->encap_sk,
- lockdep_is_held(&x->lock));
- if (encap->encap_sport != sport ||
- encap->encap_dport != dport) {
- sock_put(sk);
- sk = nsk ?: ERR_PTR(-EREMCHG);
- } else if (sk == nsk) {
- sock_put(sk);
- } else {
- rcu_assign_pointer(x->encap_sk, sk);
- }
- spin_unlock_bh(&x->lock);
-
return sk;
}
@@ -216,8 +171,10 @@ static int esp_output_tcp_finish(struct xfrm_state *x, struct sk_buff *skb)
sk = esp6_find_tcp_sk(x);
err = PTR_ERR_OR_ZERO(sk);
- if (err)
+ if (err) {
+ kfree_skb(skb);
goto out;
+ }
bh_lock_sock(sk);
if (sock_owned_by_user(sk))
@@ -226,6 +183,8 @@ static int esp_output_tcp_finish(struct xfrm_state *x, struct sk_buff *skb)
err = espintcp_push_skb(sk, skb);
bh_unlock_sock(sk);
+ sock_put(sk);
+
out:
rcu_read_unlock();
return err;
@@ -315,7 +274,7 @@ static void esp_output_done(void *data, int err)
x->encap && x->encap->encap_type == TCP_ENCAP_ESPINTCP)
esp_output_tail_tcp(x, skb);
else
- xfrm_output_resume(skb->sk, skb, err);
+ xfrm_output_resume(skb_to_full_sk(skb), skb, err);
}
}
@@ -422,6 +381,8 @@ static struct ip_esp_hdr *esp6_output_tcp_encap(struct xfrm_state *x,
if (IS_ERR(sk))
return ERR_CAST(sk);
+ sock_put(sk);
+
*lenp = htons(len);
esph = (struct ip_esp_hdr *)(lenp + 1);
@@ -859,7 +820,8 @@ int esp6_input_done2(struct sk_buff *skb, int err)
skb_postpull_rcsum(skb, skb_network_header(skb),
skb_network_header_len(skb));
skb_pull_rcsum(skb, hlen);
- if (x->props.mode == XFRM_MODE_TUNNEL)
+ if (x->props.mode == XFRM_MODE_TUNNEL ||
+ x->props.mode == XFRM_MODE_IPTFS)
skb_reset_transport_header(skb);
else
skb_set_transport_header(skb, -hdr_len);
diff --git a/net/ipv6/esp6_offload.c b/net/ipv6/esp6_offload.c
index 919ebfabbe4e..7b41fb4f00b5 100644
--- a/net/ipv6/esp6_offload.c
+++ b/net/ipv6/esp6_offload.c
@@ -80,9 +80,9 @@ static struct sk_buff *esp6_gro_receive(struct list_head *head,
if (sp->len == XFRM_MAX_DEPTH)
goto out_reset;
- x = xfrm_state_lookup(dev_net(skb->dev), skb->mark,
- (xfrm_address_t *)&ipv6_hdr(skb)->daddr,
- spi, IPPROTO_ESP, AF_INET6);
+ x = xfrm_input_state_lookup(dev_net(skb->dev), skb->mark,
+ (xfrm_address_t *)&ipv6_hdr(skb)->daddr,
+ spi, IPPROTO_ESP, AF_INET6);
if (unlikely(x && x->dir && x->dir != XFRM_SA_DIR_IN)) {
/* non-offload path will record the error and audit log */
diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c
index 6789623b2b0d..457de0745a33 100644
--- a/net/ipv6/exthdrs.c
+++ b/net/ipv6/exthdrs.c
@@ -1204,10 +1204,9 @@ ipv6_dup_options(struct sock *sk, struct ipv6_txoptions *opt)
{
struct ipv6_txoptions *opt2;
- opt2 = sock_kmalloc(sk, opt->tot_len, GFP_ATOMIC);
+ opt2 = sock_kmemdup(sk, opt, opt->tot_len, GFP_ATOMIC);
if (opt2) {
long dif = (char *)opt2 - (char *)opt;
- memcpy(opt2, opt, opt->tot_len);
if (opt2->hopopt)
*((char **)&opt2->hopopt) += dif;
if (opt2->dst0opt)
diff --git a/net/ipv6/fib6_notifier.c b/net/ipv6/fib6_notifier.c
index f87ae33e1d01..949b72610df7 100644
--- a/net/ipv6/fib6_notifier.c
+++ b/net/ipv6/fib6_notifier.c
@@ -22,7 +22,7 @@ int call_fib6_notifiers(struct net *net, enum fib_event_type event_type,
return call_fib_notifiers(net, event_type, info);
}
-static unsigned int fib6_seq_read(struct net *net)
+static unsigned int fib6_seq_read(const struct net *net)
{
return fib6_tables_seq_read(net) + fib6_rules_seq_read(net);
}
diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c
index 04a9ed5e8310..fd5f7112a51f 100644
--- a/net/ipv6/fib6_rules.c
+++ b/net/ipv6/fib6_rules.c
@@ -26,7 +26,10 @@ struct fib6_rule {
struct fib_rule common;
struct rt6key src;
struct rt6key dst;
+ __be32 flowlabel;
+ __be32 flowlabel_mask;
dscp_t dscp;
+ dscp_t dscp_mask;
u8 dscp_full:1; /* DSCP or TOS selector */
};
@@ -34,7 +37,7 @@ static bool fib6_rule_matchall(const struct fib_rule *rule)
{
struct fib6_rule *r = container_of(rule, struct fib6_rule, common);
- if (r->dst.plen || r->src.plen || r->dscp)
+ if (r->dst.plen || r->src.plen || r->dscp || r->flowlabel_mask)
return false;
return fib_rule_matchall(rule);
}
@@ -56,7 +59,7 @@ int fib6_rules_dump(struct net *net, struct notifier_block *nb,
return fib_rules_dump(net, nb, AF_INET6, extack);
}
-unsigned int fib6_rules_seq_read(struct net *net)
+unsigned int fib6_rules_seq_read(const struct net *net)
{
return fib_rules_seq_read(net, AF_INET6);
}
@@ -329,18 +332,21 @@ INDIRECT_CALLABLE_SCOPE int fib6_rule_match(struct fib_rule *rule,
return 0;
}
- if (r->dscp && r->dscp != ip6_dscp(fl6->flowlabel))
+ if ((r->dscp ^ ip6_dscp(fl6->flowlabel)) & r->dscp_mask)
+ return 0;
+
+ if ((r->flowlabel ^ flowi6_get_flowlabel(fl6)) & r->flowlabel_mask)
return 0;
if (rule->ip_proto && (rule->ip_proto != fl6->flowi6_proto))
return 0;
- if (fib_rule_port_range_set(&rule->sport_range) &&
- !fib_rule_port_inrange(&rule->sport_range, fl6->fl6_sport))
+ if (!fib_rule_port_match(&rule->sport_range, rule->sport_mask,
+ fl6->fl6_sport))
return 0;
- if (fib_rule_port_range_set(&rule->dport_range) &&
- !fib_rule_port_inrange(&rule->dport_range, fl6->fl6_dport))
+ if (!fib_rule_port_match(&rule->dport_range, rule->dport_mask,
+ fl6->fl6_dport))
return 0;
return 1;
@@ -355,19 +361,72 @@ static int fib6_nl2rule_dscp(const struct nlattr *nla, struct fib6_rule *rule6,
}
rule6->dscp = inet_dsfield_to_dscp(nla_get_u8(nla) << 2);
+ rule6->dscp_mask = inet_dsfield_to_dscp(INET_DSCP_MASK);
rule6->dscp_full = true;
return 0;
}
+static int fib6_nl2rule_dscp_mask(const struct nlattr *nla,
+ struct fib6_rule *rule6,
+ struct netlink_ext_ack *extack)
+{
+ dscp_t dscp_mask;
+
+ if (!rule6->dscp_full) {
+ NL_SET_ERR_MSG_ATTR(extack, nla,
+ "Cannot specify DSCP mask without DSCP value");
+ return -EINVAL;
+ }
+
+ dscp_mask = inet_dsfield_to_dscp(nla_get_u8(nla) << 2);
+ if (rule6->dscp & ~dscp_mask) {
+ NL_SET_ERR_MSG_ATTR(extack, nla, "Invalid DSCP mask");
+ return -EINVAL;
+ }
+
+ rule6->dscp_mask = dscp_mask;
+
+ return 0;
+}
+
+static int fib6_nl2rule_flowlabel(struct nlattr **tb, struct fib6_rule *rule6,
+ struct netlink_ext_ack *extack)
+{
+ __be32 flowlabel, flowlabel_mask;
+
+ if (NL_REQ_ATTR_CHECK(extack, NULL, tb, FRA_FLOWLABEL) ||
+ NL_REQ_ATTR_CHECK(extack, NULL, tb, FRA_FLOWLABEL_MASK))
+ return -EINVAL;
+
+ flowlabel = nla_get_be32(tb[FRA_FLOWLABEL]);
+ flowlabel_mask = nla_get_be32(tb[FRA_FLOWLABEL_MASK]);
+
+ if (flowlabel_mask & ~IPV6_FLOWLABEL_MASK) {
+ NL_SET_ERR_MSG_ATTR(extack, tb[FRA_FLOWLABEL_MASK],
+ "Invalid flow label mask");
+ return -EINVAL;
+ }
+
+ if (flowlabel & ~flowlabel_mask) {
+ NL_SET_ERR_MSG(extack, "Flow label and mask do not match");
+ return -EINVAL;
+ }
+
+ rule6->flowlabel = flowlabel;
+ rule6->flowlabel_mask = flowlabel_mask;
+
+ return 0;
+}
+
static int fib6_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
struct fib_rule_hdr *frh,
struct nlattr **tb,
struct netlink_ext_ack *extack)
{
+ struct fib6_rule *rule6 = (struct fib6_rule *)rule;
+ struct net *net = rule->fr_net;
int err = -EINVAL;
- struct net *net = sock_net(skb->sk);
- struct fib6_rule *rule6 = (struct fib6_rule *) rule;
if (!inet_validate_dscp(frh->tos)) {
NL_SET_ERR_MSG(extack,
@@ -375,10 +434,19 @@ static int fib6_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
goto errout;
}
rule6->dscp = inet_dsfield_to_dscp(frh->tos);
+ rule6->dscp_mask = frh->tos ? inet_dsfield_to_dscp(INET_DSCP_MASK) : 0;
if (tb[FRA_DSCP] && fib6_nl2rule_dscp(tb[FRA_DSCP], rule6, extack) < 0)
goto errout;
+ if (tb[FRA_DSCP_MASK] &&
+ fib6_nl2rule_dscp_mask(tb[FRA_DSCP_MASK], rule6, extack) < 0)
+ goto errout;
+
+ if ((tb[FRA_FLOWLABEL] || tb[FRA_FLOWLABEL_MASK]) &&
+ fib6_nl2rule_flowlabel(tb, rule6, extack) < 0)
+ goto errout;
+
if (rule->action == FR_ACT_TO_TBL && !rule->l3mdev) {
if (rule->table == RT6_TABLE_UNSPEC) {
NL_SET_ERR_MSG(extack, "Invalid table");
@@ -444,6 +512,22 @@ static int fib6_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
return 0;
}
+ if (tb[FRA_DSCP_MASK]) {
+ dscp_t dscp_mask;
+
+ dscp_mask = inet_dsfield_to_dscp(nla_get_u8(tb[FRA_DSCP_MASK]) << 2);
+ if (!rule6->dscp_full || rule6->dscp_mask != dscp_mask)
+ return 0;
+ }
+
+ if (tb[FRA_FLOWLABEL] &&
+ nla_get_be32(tb[FRA_FLOWLABEL]) != rule6->flowlabel)
+ return 0;
+
+ if (tb[FRA_FLOWLABEL_MASK] &&
+ nla_get_be32(tb[FRA_FLOWLABEL_MASK]) != rule6->flowlabel_mask)
+ return 0;
+
if (frh->src_len &&
nla_memcmp(tb[FRA_SRC], &rule6->src.addr, sizeof(struct in6_addr)))
return 0;
@@ -466,12 +550,19 @@ static int fib6_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
if (rule6->dscp_full) {
frh->tos = 0;
if (nla_put_u8(skb, FRA_DSCP,
- inet_dscp_to_dsfield(rule6->dscp) >> 2))
+ inet_dscp_to_dsfield(rule6->dscp) >> 2) ||
+ nla_put_u8(skb, FRA_DSCP_MASK,
+ inet_dscp_to_dsfield(rule6->dscp_mask) >> 2))
goto nla_put_failure;
} else {
frh->tos = inet_dscp_to_dsfield(rule6->dscp);
}
+ if (rule6->flowlabel_mask &&
+ (nla_put_be32(skb, FRA_FLOWLABEL, rule6->flowlabel) ||
+ nla_put_be32(skb, FRA_FLOWLABEL_MASK, rule6->flowlabel_mask)))
+ goto nla_put_failure;
+
if ((rule6->dst.plen &&
nla_put_in6_addr(skb, FRA_DST, &rule6->dst.addr)) ||
(rule6->src.plen &&
@@ -487,7 +578,10 @@ static size_t fib6_rule_nlmsg_payload(struct fib_rule *rule)
{
return nla_total_size(16) /* dst */
+ nla_total_size(16) /* src */
- + nla_total_size(1); /* dscp */
+ + nla_total_size(1) /* dscp */
+ + nla_total_size(1) /* dscp mask */
+ + nla_total_size(4) /* flowlabel */
+ + nla_total_size(4); /* flowlabel mask */
}
static void fib6_rule_flush_cache(struct fib_rules_ops *ops)
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index 071b0bc1179d..3fd19a84b358 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -76,7 +76,7 @@ static int icmpv6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
{
/* icmpv6_notify checks 8 bytes can be pulled, icmp6hdr is 8 bytes */
struct icmp6hdr *icmp6 = (struct icmp6hdr *) (skb->data + offset);
- struct net *net = dev_net(skb->dev);
+ struct net *net = dev_net_rcu(skb->dev);
if (type == ICMPV6_PKT_TOOBIG)
ip6_update_pmtu(skb, net, info, skb->dev->ifindex, 0, sock_net_uid(net, NULL));
@@ -222,10 +222,10 @@ static bool icmpv6_xrlim_allow(struct sock *sk, u8 type,
if (rt->rt6i_dst.plen < 128)
tmo >>= ((128 - rt->rt6i_dst.plen)>>5);
- peer = inet_getpeer_v6(net->ipv6.peers, &fl6->daddr, 1);
+ rcu_read_lock();
+ peer = inet_getpeer_v6(net->ipv6.peers, &fl6->daddr);
res = inet_peer_xrlim_allow(peer, tmo);
- if (peer)
- inet_putpeer(peer);
+ rcu_read_unlock();
}
if (!res)
__ICMP6_INC_STATS(net, ip6_dst_idev(dst),
@@ -473,7 +473,10 @@ void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
if (!skb->dev)
return;
- net = dev_net(skb->dev);
+
+ rcu_read_lock();
+
+ net = dev_net_rcu(skb->dev);
mark = IP6_REPLY_MARK(net, skb->mark);
/*
* Make sure we respect the rules
@@ -496,7 +499,7 @@ void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
!(type == ICMPV6_PARAMPROB &&
code == ICMPV6_UNK_OPTION &&
(opt_unrec(skb, info))))
- return;
+ goto out;
saddr = NULL;
}
@@ -526,7 +529,7 @@ void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
if ((addr_type == IPV6_ADDR_ANY) || (addr_type & IPV6_ADDR_MULTICAST)) {
net_dbg_ratelimited("icmp6_send: addr_any/mcast source [%pI6c > %pI6c]\n",
&hdr->saddr, &hdr->daddr);
- return;
+ goto out;
}
/*
@@ -535,7 +538,7 @@ void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
if (is_ineligible(skb)) {
net_dbg_ratelimited("icmp6_send: no reply to icmp error [%pI6c > %pI6c]\n",
&hdr->saddr, &hdr->daddr);
- return;
+ goto out;
}
/* Needed by both icmpv6_global_allow and icmpv6_xmit_lock */
@@ -582,7 +585,7 @@ void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
np = inet6_sk(sk);
if (!icmpv6_xrlim_allow(sk, type, &fl6, apply_ratelimit))
- goto out;
+ goto out_unlock;
tmp_hdr.icmp6_type = type;
tmp_hdr.icmp6_code = code;
@@ -600,7 +603,7 @@ void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
dst = icmpv6_route_lookup(net, skb, sk, &fl6);
if (IS_ERR(dst))
- goto out;
+ goto out_unlock;
ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
@@ -616,7 +619,6 @@ void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
goto out_dst_release;
}
- rcu_read_lock();
idev = __in6_dev_get(skb->dev);
if (ip6_append_data(sk, icmpv6_getfrag, &msg,
@@ -630,13 +632,15 @@ void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr,
len + sizeof(struct icmp6hdr));
}
- rcu_read_unlock();
+
out_dst_release:
dst_release(dst);
-out:
+out_unlock:
icmpv6_xmit_unlock(sk);
out_bh_enable:
local_bh_enable();
+out:
+ rcu_read_unlock();
}
EXPORT_SYMBOL(icmp6_send);
@@ -679,8 +683,8 @@ int ip6_err_gen_icmpv6_unreach(struct sk_buff *skb, int nhs, int type,
skb_pull(skb2, nhs);
skb_reset_network_header(skb2);
- rt = rt6_lookup(dev_net(skb->dev), &ipv6_hdr(skb2)->saddr, NULL, 0,
- skb, 0);
+ rt = rt6_lookup(dev_net_rcu(skb->dev), &ipv6_hdr(skb2)->saddr,
+ NULL, 0, skb, 0);
if (rt && rt->dst.dev)
skb2->dev = rt->dst.dev;
@@ -717,7 +721,7 @@ EXPORT_SYMBOL(ip6_err_gen_icmpv6_unreach);
static enum skb_drop_reason icmpv6_echo_reply(struct sk_buff *skb)
{
- struct net *net = dev_net(skb->dev);
+ struct net *net = dev_net_rcu(skb->dev);
struct sock *sk;
struct inet6_dev *idev;
struct ipv6_pinfo *np;
@@ -832,7 +836,7 @@ enum skb_drop_reason icmpv6_notify(struct sk_buff *skb, u8 type,
u8 code, __be32 info)
{
struct inet6_skb_parm *opt = IP6CB(skb);
- struct net *net = dev_net(skb->dev);
+ struct net *net = dev_net_rcu(skb->dev);
const struct inet6_protocol *ipprot;
enum skb_drop_reason reason;
int inner_offset;
@@ -889,7 +893,7 @@ out:
static int icmpv6_rcv(struct sk_buff *skb)
{
enum skb_drop_reason reason = SKB_DROP_REASON_NOT_SPECIFIED;
- struct net *net = dev_net(skb->dev);
+ struct net *net = dev_net_rcu(skb->dev);
struct net_device *dev = icmp6_dev(skb);
struct inet6_dev *idev = __in6_dev_get(dev);
const struct in6_addr *saddr, *daddr;
@@ -921,7 +925,7 @@ static int icmpv6_rcv(struct sk_buff *skb)
skb_set_network_header(skb, nh);
}
- __ICMP6_INC_STATS(dev_net(dev), idev, ICMP6_MIB_INMSGS);
+ __ICMP6_INC_STATS(dev_net_rcu(dev), idev, ICMP6_MIB_INMSGS);
saddr = &ipv6_hdr(skb)->saddr;
daddr = &ipv6_hdr(skb)->daddr;
@@ -939,7 +943,7 @@ static int icmpv6_rcv(struct sk_buff *skb)
type = hdr->icmp6_type;
- ICMP6MSGIN_INC_STATS(dev_net(dev), idev, type);
+ ICMP6MSGIN_INC_STATS(dev_net_rcu(dev), idev, type);
switch (type) {
case ICMPV6_ECHO_REQUEST:
@@ -953,12 +957,9 @@ static int icmpv6_rcv(struct sk_buff *skb)
break;
case ICMPV6_ECHO_REPLY:
- reason = ping_rcv(skb);
- break;
-
case ICMPV6_EXT_ECHO_REPLY:
- reason = ping_rcv(skb);
- break;
+ ping_rcv(skb);
+ return 0;
case ICMPV6_PKT_TOOBIG:
/* BUGGG_FUTURE: if packet contains rthdr, we cannot update
@@ -1034,9 +1035,9 @@ static int icmpv6_rcv(struct sk_buff *skb)
csum_error:
reason = SKB_DROP_REASON_ICMP_CSUM;
- __ICMP6_INC_STATS(dev_net(dev), idev, ICMP6_MIB_CSUMERRORS);
+ __ICMP6_INC_STATS(dev_net_rcu(dev), idev, ICMP6_MIB_CSUMERRORS);
discard_it:
- __ICMP6_INC_STATS(dev_net(dev), idev, ICMP6_MIB_INERRORS);
+ __ICMP6_INC_STATS(dev_net_rcu(dev), idev, ICMP6_MIB_INERRORS);
drop_no_count:
kfree_skb_reason(skb, reason);
return 0;
diff --git a/net/ipv6/ila/ila_common.c b/net/ipv6/ila/ila_common.c
index 95e9146918cc..b8d43ed4689d 100644
--- a/net/ipv6/ila/ila_common.c
+++ b/net/ipv6/ila/ila_common.c
@@ -86,7 +86,7 @@ static void ila_csum_adjust_transport(struct sk_buff *skb,
diff = get_csum_diff(ip6h, p);
inet_proto_csum_replace_by_diff(&th->check, skb,
- diff, true);
+ diff, true, true);
}
break;
case NEXTHDR_UDP:
@@ -97,7 +97,7 @@ static void ila_csum_adjust_transport(struct sk_buff *skb,
if (uh->check || skb->ip_summed == CHECKSUM_PARTIAL) {
diff = get_csum_diff(ip6h, p);
inet_proto_csum_replace_by_diff(&uh->check, skb,
- diff, true);
+ diff, true, true);
if (!uh->check)
uh->check = CSUM_MANGLED_0;
}
@@ -111,7 +111,7 @@ static void ila_csum_adjust_transport(struct sk_buff *skb,
diff = get_csum_diff(ip6h, p);
inet_proto_csum_replace_by_diff(&ih->icmp6_cksum, skb,
- diff, true);
+ diff, true, true);
}
break;
}
diff --git a/net/ipv6/ila/ila_lwt.c b/net/ipv6/ila/ila_lwt.c
index ff7e734e335b..7d574f5132e2 100644
--- a/net/ipv6/ila/ila_lwt.c
+++ b/net/ipv6/ila/ila_lwt.c
@@ -88,13 +88,15 @@ static int ila_output(struct net *net, struct sock *sk, struct sk_buff *skb)
goto drop;
}
- if (ilwt->connected) {
+ /* cache only if we don't create a dst reference loop */
+ if (ilwt->connected && orig_dst->lwtstate != dst->lwtstate) {
local_bh_disable();
dst_cache_set_ip6(&ilwt->dst_cache, dst, &fl6.saddr);
local_bh_enable();
}
}
+ skb_dst_drop(skb);
skb_dst_set(skb, dst);
return dst_output(net, sk, skb);
diff --git a/net/ipv6/ila/ila_xlat.c b/net/ipv6/ila/ila_xlat.c
index 534a4498e280..1d41b2ab4884 100644
--- a/net/ipv6/ila/ila_xlat.c
+++ b/net/ipv6/ila/ila_xlat.c
@@ -105,16 +105,11 @@ static int parse_nl_config(struct genl_info *info,
xp->ip.locator_match.v64 = (__force __be64)nla_get_u64(
info->attrs[ILA_ATTR_LOCATOR_MATCH]);
- if (info->attrs[ILA_ATTR_CSUM_MODE])
- xp->ip.csum_mode = nla_get_u8(info->attrs[ILA_ATTR_CSUM_MODE]);
- else
- xp->ip.csum_mode = ILA_CSUM_NO_ACTION;
+ xp->ip.csum_mode = nla_get_u8_default(info->attrs[ILA_ATTR_CSUM_MODE],
+ ILA_CSUM_NO_ACTION);
- if (info->attrs[ILA_ATTR_IDENT_TYPE])
- xp->ip.ident_type = nla_get_u8(
- info->attrs[ILA_ATTR_IDENT_TYPE]);
- else
- xp->ip.ident_type = ILA_ATYPE_USE_FORMAT;
+ xp->ip.ident_type = nla_get_u8_default(info->attrs[ILA_ATTR_IDENT_TYPE],
+ ILA_ATYPE_USE_FORMAT);
if (info->attrs[ILA_ATTR_IFINDEX])
xp->ifindex = nla_get_s32(info->attrs[ILA_ATTR_IFINDEX]);
@@ -200,6 +195,8 @@ static const struct nf_hook_ops ila_nf_hook_ops[] = {
},
};
+static DEFINE_MUTEX(ila_mutex);
+
static int ila_add_mapping(struct net *net, struct ila_xlat_params *xp)
{
struct ila_net *ilan = net_generic(net, ila_net_id);
@@ -207,16 +204,20 @@ static int ila_add_mapping(struct net *net, struct ila_xlat_params *xp)
spinlock_t *lock = ila_get_lock(ilan, xp->ip.locator_match);
int err = 0, order;
- if (!ilan->xlat.hooks_registered) {
+ if (!READ_ONCE(ilan->xlat.hooks_registered)) {
/* We defer registering net hooks in the namespace until the
* first mapping is added.
*/
- err = nf_register_net_hooks(net, ila_nf_hook_ops,
- ARRAY_SIZE(ila_nf_hook_ops));
+ mutex_lock(&ila_mutex);
+ if (!ilan->xlat.hooks_registered) {
+ err = nf_register_net_hooks(net, ila_nf_hook_ops,
+ ARRAY_SIZE(ila_nf_hook_ops));
+ if (!err)
+ WRITE_ONCE(ilan->xlat.hooks_registered, true);
+ }
+ mutex_unlock(&ila_mutex);
if (err)
return err;
-
- ilan->xlat.hooks_registered = true;
}
ila = kzalloc(sizeof(*ila), GFP_KERNEL);
diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c
index 80043e46117c..8f500eaf33cf 100644
--- a/net/ipv6/inet6_connection_sock.c
+++ b/net/ipv6/inet6_connection_sock.c
@@ -54,21 +54,6 @@ struct dst_entry *inet6_csk_route_req(const struct sock *sk,
return dst;
}
-EXPORT_SYMBOL(inet6_csk_route_req);
-
-void inet6_csk_addr2sockaddr(struct sock *sk, struct sockaddr *uaddr)
-{
- struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) uaddr;
-
- sin6->sin6_family = AF_INET6;
- sin6->sin6_addr = sk->sk_v6_daddr;
- sin6->sin6_port = inet_sk(sk)->inet_dport;
- /* We do not store received flowlabel for TCP */
- sin6->sin6_flowinfo = 0;
- sin6->sin6_scope_id = ipv6_iface_scope_id(&sin6->sin6_addr,
- sk->sk_bound_dev_if);
-}
-EXPORT_SYMBOL_GPL(inet6_csk_addr2sockaddr);
static inline
struct dst_entry *__inet6_csk_dst_check(struct sock *sk, u32 cookie)
@@ -151,4 +136,3 @@ struct dst_entry *inet6_csk_update_pmtu(struct sock *sk, u32 mtu)
dst = inet6_csk_route_socket(sk, &fl6);
return IS_ERR(dst) ? NULL : dst;
}
-EXPORT_SYMBOL_GPL(inet6_csk_update_pmtu);
diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c
index 9ec05e354baa..76ee521189eb 100644
--- a/net/ipv6/inet6_hashtables.c
+++ b/net/ipv6/inet6_hashtables.c
@@ -35,8 +35,8 @@ u32 inet6_ehashfn(const struct net *net,
lhash = (__force u32)laddr->s6_addr32[3];
fhash = __ipv6_addr_jhash(faddr, tcp_ipv6_hash_secret);
- return __inet6_ehashfn(lhash, lport, fhash, fport,
- inet6_ehash_secret + net_hash_mix(net));
+ return lport + __inet6_ehashfn(lhash, 0, fhash, fport,
+ inet6_ehash_secret + net_hash_mix(net));
}
EXPORT_SYMBOL_GPL(inet6_ehashfn);
@@ -263,7 +263,9 @@ EXPORT_SYMBOL_GPL(inet6_lookup);
static int __inet6_check_established(struct inet_timewait_death_row *death_row,
struct sock *sk, const __u16 lport,
- struct inet_timewait_sock **twp)
+ struct inet_timewait_sock **twp,
+ bool rcu_lookup,
+ u32 hash)
{
struct inet_hashinfo *hinfo = death_row->hashinfo;
struct inet_sock *inet = inet_sk(sk);
@@ -273,14 +275,26 @@ static int __inet6_check_established(struct inet_timewait_death_row *death_row,
struct net *net = sock_net(sk);
const int sdif = l3mdev_master_ifindex_by_index(net, dif);
const __portpair ports = INET_COMBINED_PORTS(inet->inet_dport, lport);
- const unsigned int hash = inet6_ehashfn(net, daddr, lport, saddr,
- inet->inet_dport);
struct inet_ehash_bucket *head = inet_ehash_bucket(hinfo, hash);
- spinlock_t *lock = inet_ehash_lockp(hinfo, hash);
- struct sock *sk2;
- const struct hlist_nulls_node *node;
struct inet_timewait_sock *tw = NULL;
+ const struct hlist_nulls_node *node;
+ struct sock *sk2;
+ spinlock_t *lock;
+
+ if (rcu_lookup) {
+ sk_nulls_for_each(sk2, node, &head->chain) {
+ if (sk2->sk_hash != hash ||
+ !inet6_match(net, sk2, saddr, daddr,
+ ports, dif, sdif))
+ continue;
+ if (sk2->sk_state == TCP_TIME_WAIT)
+ break;
+ return -EADDRNOTAVAIL;
+ }
+ return 0;
+ }
+ lock = inet_ehash_lockp(hinfo, hash);
spin_lock(lock);
sk_nulls_for_each(sk2, node, &head->chain) {
@@ -339,11 +353,19 @@ static u64 inet6_sk_port_offset(const struct sock *sk)
int inet6_hash_connect(struct inet_timewait_death_row *death_row,
struct sock *sk)
{
+ const struct in6_addr *daddr = &sk->sk_v6_rcv_saddr;
+ const struct in6_addr *saddr = &sk->sk_v6_daddr;
+ const struct inet_sock *inet = inet_sk(sk);
+ const struct net *net = sock_net(sk);
u64 port_offset = 0;
+ u32 hash_port0;
if (!inet_sk(sk)->inet_num)
port_offset = inet6_sk_port_offset(sk);
- return __inet_hash_connect(death_row, sk, port_offset,
+
+ hash_port0 = inet6_ehashfn(net, daddr, 0, saddr, inet->inet_dport);
+
+ return __inet_hash_connect(death_row, sk, port_offset, hash_port0,
__inet6_check_established);
}
EXPORT_SYMBOL_GPL(inet6_hash_connect);
diff --git a/net/ipv6/ioam6.c b/net/ipv6/ioam6.c
index 08c929513065..a84d332f952f 100644
--- a/net/ipv6/ioam6.c
+++ b/net/ipv6/ioam6.c
@@ -135,15 +135,11 @@ static int ioam6_genl_addns(struct sk_buff *skb, struct genl_info *info)
ns->id = id;
- if (!info->attrs[IOAM6_ATTR_NS_DATA])
- data32 = IOAM6_U32_UNAVAILABLE;
- else
- data32 = nla_get_u32(info->attrs[IOAM6_ATTR_NS_DATA]);
-
- if (!info->attrs[IOAM6_ATTR_NS_DATA_WIDE])
- data64 = IOAM6_U64_UNAVAILABLE;
- else
- data64 = nla_get_u64(info->attrs[IOAM6_ATTR_NS_DATA_WIDE]);
+ data32 = nla_get_u32_default(info->attrs[IOAM6_ATTR_NS_DATA],
+ IOAM6_U32_UNAVAILABLE);
+
+ data64 = nla_get_u64_default(info->attrs[IOAM6_ATTR_NS_DATA_WIDE],
+ IOAM6_U64_UNAVAILABLE);
ns->data = cpu_to_be32(data32);
ns->data_wide = cpu_to_be64(data64);
diff --git a/net/ipv6/ioam6_iptunnel.c b/net/ipv6/ioam6_iptunnel.c
index beb6b4cfc551..40df8bdfaacd 100644
--- a/net/ipv6/ioam6_iptunnel.c
+++ b/net/ipv6/ioam6_iptunnel.c
@@ -38,6 +38,7 @@ struct ioam6_lwt_freq {
};
struct ioam6_lwt {
+ struct dst_entry null_dst;
struct dst_cache cache;
struct ioam6_lwt_freq freq;
atomic_t pkt_cnt;
@@ -142,10 +143,8 @@ static int ioam6_build_state(struct net *net, struct nlattr *nla,
}
}
- if (!tb[IOAM6_IPTUNNEL_MODE])
- mode = IOAM6_IPTUNNEL_MODE_INLINE;
- else
- mode = nla_get_u8(tb[IOAM6_IPTUNNEL_MODE]);
+ mode = nla_get_u8_default(tb[IOAM6_IPTUNNEL_MODE],
+ IOAM6_IPTUNNEL_MODE_INLINE);
if (tb[IOAM6_IPTUNNEL_SRC] && mode == IOAM6_IPTUNNEL_MODE_INLINE) {
NL_SET_ERR_MSG(extack, "no tunnel src expected with this mode");
@@ -179,6 +178,14 @@ static int ioam6_build_state(struct net *net, struct nlattr *nla,
if (err)
goto free_lwt;
+ /* This "fake" dst_entry will be stored in a dst_cache, which will call
+ * dst_hold() and dst_release() on it. We must ensure that dst_destroy()
+ * will never be called. For that, its initial refcount is 1 and +1 when
+ * it is stored in the cache. Then, +1/-1 each time we read the cache
+ * and release it. Long story short, we're fine.
+ */
+ dst_init(&ilwt->null_dst, NULL, NULL, DST_OBSOLETE_NONE, DST_NOCOUNT);
+
atomic_set(&ilwt->pkt_cnt, 0);
ilwt->freq.k = freq_k;
ilwt->freq.n = freq_n;
@@ -255,14 +262,15 @@ static int ioam6_do_fill(struct net *net, struct sk_buff *skb)
}
static int ioam6_do_inline(struct net *net, struct sk_buff *skb,
- struct ioam6_lwt_encap *tuninfo)
+ struct ioam6_lwt_encap *tuninfo,
+ struct dst_entry *cache_dst)
{
struct ipv6hdr *oldhdr, *hdr;
int hdrlen, err;
hdrlen = (tuninfo->eh.hdrlen + 1) << 3;
- err = skb_cow_head(skb, hdrlen + skb->mac_len);
+ err = skb_cow_head(skb, hdrlen + dst_dev_overhead(cache_dst, skb));
if (unlikely(err))
return err;
@@ -293,7 +301,8 @@ static int ioam6_do_encap(struct net *net, struct sk_buff *skb,
struct ioam6_lwt_encap *tuninfo,
bool has_tunsrc,
struct in6_addr *tunsrc,
- struct in6_addr *tundst)
+ struct in6_addr *tundst,
+ struct dst_entry *cache_dst)
{
struct dst_entry *dst = skb_dst(skb);
struct ipv6hdr *hdr, *inner_hdr;
@@ -302,7 +311,7 @@ static int ioam6_do_encap(struct net *net, struct sk_buff *skb,
hdrlen = (tuninfo->eh.hdrlen + 1) << 3;
len = sizeof(*hdr) + hdrlen;
- err = skb_cow_head(skb, len + skb->mac_len);
+ err = skb_cow_head(skb, len + dst_dev_overhead(cache_dst, skb));
if (unlikely(err))
return err;
@@ -336,8 +345,8 @@ static int ioam6_do_encap(struct net *net, struct sk_buff *skb,
static int ioam6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
{
- struct dst_entry *dst = skb_dst(skb);
- struct in6_addr orig_daddr;
+ struct dst_entry *orig_dst = skb_dst(skb);
+ struct dst_entry *dst = NULL;
struct ioam6_lwt *ilwt;
int err = -EINVAL;
u32 pkt_cnt;
@@ -345,14 +354,27 @@ static int ioam6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
if (skb->protocol != htons(ETH_P_IPV6))
goto drop;
- ilwt = ioam6_lwt_state(dst->lwtstate);
+ ilwt = ioam6_lwt_state(orig_dst->lwtstate);
/* Check for insertion frequency (i.e., "k over n" insertions) */
pkt_cnt = atomic_fetch_inc(&ilwt->pkt_cnt);
if (pkt_cnt % ilwt->freq.n >= ilwt->freq.k)
goto out;
- orig_daddr = ipv6_hdr(skb)->daddr;
+ local_bh_disable();
+ dst = dst_cache_get(&ilwt->cache);
+ local_bh_enable();
+
+ /* This is how we notify that the destination does not change after
+ * transformation and that we need to use orig_dst instead of the cache
+ */
+ if (dst == &ilwt->null_dst) {
+ dst_release(dst);
+
+ dst = orig_dst;
+ /* keep refcount balance: dst_release() is called at the end */
+ dst_hold(dst);
+ }
switch (ilwt->mode) {
case IOAM6_IPTUNNEL_MODE_INLINE:
@@ -361,7 +383,7 @@ do_inline:
if (ipv6_hdr(skb)->nexthdr == NEXTHDR_HOP)
goto out;
- err = ioam6_do_inline(net, skb, &ilwt->tuninfo);
+ err = ioam6_do_inline(net, skb, &ilwt->tuninfo, dst);
if (unlikely(err))
goto drop;
@@ -371,7 +393,7 @@ do_encap:
/* Encapsulation (ip6ip6) */
err = ioam6_do_encap(net, skb, &ilwt->tuninfo,
ilwt->has_tunsrc, &ilwt->tunsrc,
- &ilwt->tundst);
+ &ilwt->tundst, dst);
if (unlikely(err))
goto drop;
@@ -389,52 +411,65 @@ do_encap:
goto drop;
}
- err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev));
- if (unlikely(err))
- goto drop;
+ if (unlikely(!dst)) {
+ struct ipv6hdr *hdr = ipv6_hdr(skb);
+ struct flowi6 fl6;
+
+ memset(&fl6, 0, sizeof(fl6));
+ fl6.daddr = hdr->daddr;
+ fl6.saddr = hdr->saddr;
+ fl6.flowlabel = ip6_flowinfo(hdr);
+ fl6.flowi6_mark = skb->mark;
+ fl6.flowi6_proto = hdr->nexthdr;
+
+ dst = ip6_route_output(net, NULL, &fl6);
+ if (dst->error) {
+ err = dst->error;
+ goto drop;
+ }
- if (!ipv6_addr_equal(&orig_daddr, &ipv6_hdr(skb)->daddr)) {
+ /* If the destination is the same after transformation (which is
+ * a valid use case for IOAM), then we don't want to add it to
+ * the cache in order to avoid a reference loop. Instead, we add
+ * our fake dst_entry to the cache as a way to detect this case.
+ * Otherwise, we add the resolved destination to the cache.
+ */
local_bh_disable();
- dst = dst_cache_get(&ilwt->cache);
+ if (orig_dst->lwtstate == dst->lwtstate)
+ dst_cache_set_ip6(&ilwt->cache,
+ &ilwt->null_dst, &fl6.saddr);
+ else
+ dst_cache_set_ip6(&ilwt->cache, dst, &fl6.saddr);
local_bh_enable();
- if (unlikely(!dst)) {
- struct ipv6hdr *hdr = ipv6_hdr(skb);
- struct flowi6 fl6;
-
- memset(&fl6, 0, sizeof(fl6));
- fl6.daddr = hdr->daddr;
- fl6.saddr = hdr->saddr;
- fl6.flowlabel = ip6_flowinfo(hdr);
- fl6.flowi6_mark = skb->mark;
- fl6.flowi6_proto = hdr->nexthdr;
-
- dst = ip6_route_output(net, NULL, &fl6);
- if (dst->error) {
- err = dst->error;
- dst_release(dst);
- goto drop;
- }
-
- local_bh_disable();
- dst_cache_set_ip6(&ilwt->cache, dst, &fl6.saddr);
- local_bh_enable();
- }
+ err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev));
+ if (unlikely(err))
+ goto drop;
+ }
+ /* avoid lwtunnel_output() reentry loop when destination is the same
+ * after transformation (e.g., with the inline mode)
+ */
+ if (orig_dst->lwtstate != dst->lwtstate) {
skb_dst_drop(skb);
skb_dst_set(skb, dst);
-
return dst_output(net, sk, skb);
}
out:
- return dst->lwtstate->orig_output(net, sk, skb);
+ dst_release(dst);
+ return orig_dst->lwtstate->orig_output(net, sk, skb);
drop:
+ dst_release(dst);
kfree_skb(skb);
return err;
}
static void ioam6_destroy_state(struct lwtunnel_state *lwt)
{
+ /* Since the refcount of per-cpu dst_entry caches will never be 0 (see
+ * why above) when our "fake" dst_entry is used, it is not necessary to
+ * remove them before calling dst_cache_destroy()
+ */
dst_cache_destroy(&ioam6_lwt_state(lwt)->cache);
}
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index eb111d20615c..93578b2ec35f 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -198,16 +198,9 @@ static void node_free_immediate(struct net *net, struct fib6_node *fn)
net->ipv6.rt6_stats->fib_nodes--;
}
-static void node_free_rcu(struct rcu_head *head)
-{
- struct fib6_node *fn = container_of(head, struct fib6_node, rcu);
-
- kmem_cache_free(fib6_node_kmem, fn);
-}
-
static void node_free(struct net *net, struct fib6_node *fn)
{
- call_rcu(&fn->rcu, node_free_rcu);
+ kfree_rcu(fn, rcu);
net->ipv6.rt6_stats->fib_nodes--;
}
@@ -256,40 +249,52 @@ static struct fib6_table *fib6_alloc_table(struct net *net, u32 id)
struct fib6_table *fib6_new_table(struct net *net, u32 id)
{
- struct fib6_table *tb;
+ struct fib6_table *tb, *new_tb;
if (id == 0)
id = RT6_TABLE_MAIN;
+
tb = fib6_get_table(net, id);
if (tb)
return tb;
- tb = fib6_alloc_table(net, id);
- if (tb)
- fib6_link_table(net, tb);
+ new_tb = fib6_alloc_table(net, id);
+ if (!new_tb)
+ return NULL;
+
+ spin_lock_bh(&net->ipv6.fib_table_hash_lock);
+
+ tb = fib6_get_table(net, id);
+ if (unlikely(tb)) {
+ spin_unlock_bh(&net->ipv6.fib_table_hash_lock);
+ kfree(new_tb);
+ return tb;
+ }
- return tb;
+ fib6_link_table(net, new_tb);
+
+ spin_unlock_bh(&net->ipv6.fib_table_hash_lock);
+
+ return new_tb;
}
EXPORT_SYMBOL_GPL(fib6_new_table);
struct fib6_table *fib6_get_table(struct net *net, u32 id)
{
- struct fib6_table *tb;
struct hlist_head *head;
- unsigned int h;
+ struct fib6_table *tb;
- if (id == 0)
+ if (!id)
id = RT6_TABLE_MAIN;
- h = id & (FIB6_TABLE_HASHSZ - 1);
- rcu_read_lock();
- head = &net->ipv6.fib_table_hash[h];
- hlist_for_each_entry_rcu(tb, head, tb6_hlist) {
- if (tb->tb6_id == id) {
- rcu_read_unlock();
+
+ head = &net->ipv6.fib_table_hash[id & (FIB6_TABLE_HASHSZ - 1)];
+
+ /* See comment in fib6_link_table(). RCU is not required,
+ * but rcu_dereference_raw() is used to avoid data-race.
+ */
+ hlist_for_each_entry_rcu(tb, head, tb6_hlist, true)
+ if (tb->tb6_id == id)
return tb;
- }
- }
- rcu_read_unlock();
return NULL;
}
@@ -345,17 +350,17 @@ static void __net_init fib6_tables_init(struct net *net)
#endif
-unsigned int fib6_tables_seq_read(struct net *net)
+unsigned int fib6_tables_seq_read(const struct net *net)
{
unsigned int h, fib_seq = 0;
rcu_read_lock();
for (h = 0; h < FIB6_TABLE_HASHSZ; h++) {
- struct hlist_head *head = &net->ipv6.fib_table_hash[h];
- struct fib6_table *tb;
+ const struct hlist_head *head = &net->ipv6.fib_table_hash[h];
+ const struct fib6_table *tb;
hlist_for_each_entry_rcu(tb, head, tb6_hlist)
- fib_seq += tb->fib_seq;
+ fib_seq += READ_ONCE(tb->fib_seq);
}
rcu_read_unlock();
@@ -400,7 +405,7 @@ int call_fib6_entry_notifiers(struct net *net,
.rt = rt,
};
- rt->fib6_table->fib_seq++;
+ WRITE_ONCE(rt->fib6_table->fib_seq, rt->fib6_table->fib_seq + 1);
return call_fib6_notifiers(net, event_type, &info.info);
}
@@ -416,7 +421,7 @@ int call_fib6_multipath_entry_notifiers(struct net *net,
.nsiblings = nsiblings,
};
- rt->fib6_table->fib_seq++;
+ WRITE_ONCE(rt->fib6_table->fib_seq, rt->fib6_table->fib_seq + 1);
return call_fib6_notifiers(net, event_type, &info.info);
}
@@ -427,7 +432,7 @@ int call_fib6_entry_notifiers_replace(struct net *net, struct fib6_info *rt)
.nsiblings = rt->fib6_nsiblings,
};
- rt->fib6_table->fib_seq++;
+ WRITE_ONCE(rt->fib6_table->fib_seq, rt->fib6_table->fib_seq + 1);
return call_fib6_notifiers(net, FIB_EVENT_ENTRY_REPLACE, &info.info);
}
@@ -1022,8 +1027,9 @@ static void fib6_drop_pcpu_from(struct fib6_info *f6i,
.table = table
};
- nexthop_for_each_fib6_nh(f6i->nh, fib6_nh_drop_pcpu_from,
- &arg);
+ rcu_read_lock();
+ nexthop_for_each_fib6_nh(f6i->nh, fib6_nh_drop_pcpu_from, &arg);
+ rcu_read_unlock();
} else {
struct fib6_nh *fib6_nh;
@@ -1041,8 +1047,14 @@ static void fib6_purge_rt(struct fib6_info *rt, struct fib6_node *fn,
rt6_flush_exceptions(rt);
fib6_drop_pcpu_from(rt, table);
- if (rt->nh && !list_empty(&rt->nh_list))
- list_del_init(&rt->nh_list);
+ if (rt->nh) {
+ spin_lock(&rt->nh->lock);
+
+ if (!list_empty(&rt->nh_list))
+ list_del_init(&rt->nh_list);
+
+ spin_unlock(&rt->nh->lock);
+ }
if (refcount_read(&rt->fib6_ref) != 1) {
/* This route is used as dummy address holder in some split
@@ -1076,8 +1088,8 @@ static void fib6_purge_rt(struct fib6_info *rt, struct fib6_node *fn,
*/
static int fib6_add_rt2node(struct fib6_node *fn, struct fib6_info *rt,
- struct nl_info *info,
- struct netlink_ext_ack *extack)
+ struct nl_info *info, struct netlink_ext_ack *extack,
+ struct list_head *purge_list)
{
struct fib6_info *leaf = rcu_dereference_protected(fn->leaf,
lockdep_is_held(&rt->fib6_table->tb6_lock));
@@ -1190,8 +1202,8 @@ next_iter:
while (sibling) {
if (sibling->fib6_metric == rt->fib6_metric &&
rt6_qualify_for_ecmp(sibling)) {
- list_add_tail(&rt->fib6_siblings,
- &sibling->fib6_siblings);
+ list_add_tail_rcu(&rt->fib6_siblings,
+ &sibling->fib6_siblings);
break;
}
sibling = rcu_dereference_protected(sibling->fib6_next,
@@ -1210,7 +1222,9 @@ next_iter:
fib6_nsiblings++;
}
BUG_ON(fib6_nsiblings != rt->fib6_nsiblings);
+ rcu_read_lock();
rt6_multipath_rebalance(temp_sibling);
+ rcu_read_unlock();
}
/*
@@ -1252,8 +1266,10 @@ add:
fib6_siblings)
sibling->fib6_nsiblings--;
rt->fib6_nsiblings = 0;
- list_del_init(&rt->fib6_siblings);
+ list_del_rcu(&rt->fib6_siblings);
+ rcu_read_lock();
rt6_multipath_rebalance(next_sibling);
+ rcu_read_unlock();
return err;
}
}
@@ -1301,10 +1317,9 @@ add:
}
nsiblings = iter->fib6_nsiblings;
iter->fib6_node = NULL;
- fib6_purge_rt(iter, fn, info->nl_net);
+ list_add(&iter->purge_link, purge_list);
if (rcu_access_pointer(fn->rr_ptr) == iter)
fn->rr_ptr = NULL;
- fib6_info_release(iter);
if (nsiblings) {
/* Replacing an ECMP route, remove all siblings */
@@ -1317,10 +1332,9 @@ add:
if (rt6_qualify_for_ecmp(iter)) {
*ins = iter->fib6_next;
iter->fib6_node = NULL;
- fib6_purge_rt(iter, fn, info->nl_net);
+ list_add(&iter->purge_link, purge_list);
if (rcu_access_pointer(fn->rr_ptr) == iter)
fn->rr_ptr = NULL;
- fib6_info_release(iter);
nsiblings--;
info->nl_net->ipv6.rt6_stats->fib_rt_entries--;
} else {
@@ -1336,6 +1350,28 @@ add:
return 0;
}
+static int fib6_add_rt2node_nh(struct fib6_node *fn, struct fib6_info *rt,
+ struct nl_info *info, struct netlink_ext_ack *extack,
+ struct list_head *purge_list)
+{
+ int err;
+
+ spin_lock(&rt->nh->lock);
+
+ if (rt->nh->dead) {
+ NL_SET_ERR_MSG(extack, "Nexthop has been deleted");
+ err = -EINVAL;
+ } else {
+ err = fib6_add_rt2node(fn, rt, info, extack, purge_list);
+ if (!err)
+ list_add(&rt->nh_list, &rt->nh->f6i_list);
+ }
+
+ spin_unlock(&rt->nh->lock);
+
+ return err;
+}
+
static void fib6_start_gc(struct net *net, struct fib6_info *rt)
{
if (!timer_pending(&net->ipv6.ip6_fib_timer) &&
@@ -1390,6 +1426,7 @@ int fib6_add(struct fib6_node *root, struct fib6_info *rt,
struct nl_info *info, struct netlink_ext_ack *extack)
{
struct fib6_table *table = rt->fib6_table;
+ LIST_HEAD(purge_list);
struct fib6_node *fn;
#ifdef CONFIG_IPV6_SUBTREES
struct fib6_node *pn = NULL;
@@ -1492,10 +1529,19 @@ int fib6_add(struct fib6_node *root, struct fib6_info *rt,
}
#endif
- err = fib6_add_rt2node(fn, rt, info, extack);
+ if (rt->nh)
+ err = fib6_add_rt2node_nh(fn, rt, info, extack, &purge_list);
+ else
+ err = fib6_add_rt2node(fn, rt, info, extack, &purge_list);
if (!err) {
- if (rt->nh)
- list_add(&rt->nh_list, &rt->nh->f6i_list);
+ struct fib6_info *iter, *next;
+
+ list_for_each_entry_safe(iter, next, &purge_list, purge_link) {
+ list_del(&iter->purge_link);
+ fib6_purge_rt(iter, fn, info->nl_net);
+ fib6_info_release(iter);
+ }
+
__fib6_update_sernum_upto_root(rt, fib6_new_sernum(info->nl_net));
if (rt->fib6_flags & RTF_EXPIRES)
@@ -1970,7 +2016,7 @@ static void fib6_del_route(struct fib6_table *table, struct fib6_node *fn,
&rt->fib6_siblings, fib6_siblings)
sibling->fib6_nsiblings--;
rt->fib6_nsiblings = 0;
- list_del_init(&rt->fib6_siblings);
+ list_del_rcu(&rt->fib6_siblings);
rt6_multipath_rebalance(next_sibling);
}
@@ -2390,13 +2436,13 @@ void fib6_run_gc(unsigned long expires, struct net *net, bool force)
round_jiffies(now
+ net->ipv6.sysctl.ip6_rt_gc_interval));
else
- del_timer(&net->ipv6.ip6_fib_timer);
+ timer_delete(&net->ipv6.ip6_fib_timer);
spin_unlock_bh(&net->ipv6.fib6_gc_lock);
}
static void fib6_gc_timer_cb(struct timer_list *t)
{
- struct net *arg = from_timer(arg, t, ipv6.ip6_fib_timer);
+ struct net *arg = timer_container_of(arg, t, ipv6.ip6_fib_timer);
fib6_run_gc(0, arg, true);
}
@@ -2430,6 +2476,8 @@ static int __net_init fib6_net_init(struct net *net)
if (!net->ipv6.fib_table_hash)
goto out_rt6_stats;
+ spin_lock_init(&net->ipv6.fib_table_hash_lock);
+
net->ipv6.fib6_main_tbl = kzalloc(sizeof(*net->ipv6.fib6_main_tbl),
GFP_KERNEL);
if (!net->ipv6.fib6_main_tbl)
@@ -2477,7 +2525,7 @@ static void fib6_net_exit(struct net *net)
{
unsigned int i;
- del_timer_sync(&net->ipv6.ip6_fib_timer);
+ timer_delete_sync(&net->ipv6.ip6_fib_timer);
for (i = 0; i < FIB6_TABLE_HASHSZ; i++) {
struct hlist_head *head = &net->ipv6.fib_table_hash[i];
@@ -2500,6 +2548,12 @@ static struct pernet_operations fib6_net_ops = {
.exit = fib6_net_exit,
};
+static const struct rtnl_msg_handler fib6_rtnl_msg_handlers[] __initconst_or_module = {
+ {.owner = THIS_MODULE, .protocol = PF_INET6, .msgtype = RTM_GETROUTE,
+ .dumpit = inet6_dump_fib,
+ .flags = RTNL_FLAG_DUMP_UNLOCKED | RTNL_FLAG_DUMP_SPLIT_NLM_DONE},
+};
+
int __init fib6_init(void)
{
int ret = -ENOMEM;
@@ -2513,9 +2567,7 @@ int __init fib6_init(void)
if (ret)
goto out_kmem_cache_create;
- ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETROUTE, NULL,
- inet6_dump_fib, RTNL_FLAG_DUMP_UNLOCKED |
- RTNL_FLAG_DUMP_SPLIT_NLM_DONE);
+ ret = rtnl_register_many(fib6_rtnl_msg_handlers);
if (ret)
goto out_unregister_subsys;
diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c
index eca07e10e21f..a3ff575798dd 100644
--- a/net/ipv6/ip6_flowlabel.c
+++ b/net/ipv6/ip6_flowlabel.c
@@ -907,6 +907,6 @@ int ip6_flowlabel_init(void)
void ip6_flowlabel_cleanup(void)
{
static_key_deferred_flush(&ipv6_flowlabel_exclusive);
- del_timer(&ip6_fl_gc_timer);
+ timer_delete(&ip6_fl_gc_timer);
unregister_pernet_subsys(&ip6_flowlabel_net_ops);
}
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index 235808cfec70..2dc9dcffe2ca 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -43,6 +43,7 @@
#include <net/xfrm.h>
#include <net/net_namespace.h>
#include <net/netns/generic.h>
+#include <net/netdev_lock.h>
#include <net/rtnetlink.h>
#include <net/ipv6.h>
@@ -1498,7 +1499,6 @@ static int ip6gre_tunnel_init_common(struct net_device *dev)
tunnel = netdev_priv(dev);
tunnel->dev = dev;
- tunnel->net = dev_net(dev);
strcpy(tunnel->parms.name, dev->name);
ret = dst_cache_init(&tunnel->dst_cache, GFP_KERNEL);
@@ -1570,7 +1570,7 @@ static struct inet6_protocol ip6gre_protocol __read_mostly = {
.flags = INET6_PROTO_FINAL,
};
-static void ip6gre_destroy_tunnels(struct net *net, struct list_head *head)
+static void __net_exit ip6gre_exit_rtnl_net(struct net *net, struct list_head *head)
{
struct ip6gre_net *ign = net_generic(net, ip6gre_net_id);
struct net_device *dev, *aux;
@@ -1587,16 +1587,16 @@ static void ip6gre_destroy_tunnels(struct net *net, struct list_head *head)
for (h = 0; h < IP6_GRE_HASH_SIZE; h++) {
struct ip6_tnl *t;
- t = rtnl_dereference(ign->tunnels[prio][h]);
+ t = rtnl_net_dereference(net, ign->tunnels[prio][h]);
while (t) {
/* If dev is in the same netns, it has already
* been added to the list by the previous loop.
*/
if (!net_eq(dev_net(t->dev), net))
- unregister_netdevice_queue(t->dev,
- head);
- t = rtnl_dereference(t->next);
+ unregister_netdevice_queue(t->dev, head);
+
+ t = rtnl_net_dereference(net, t->next);
}
}
}
@@ -1621,7 +1621,7 @@ static int __net_init ip6gre_init_net(struct net *net)
/* FB netdevice is special: we have one, and only one per netns.
* Allowing to move it to another netns is clearly unsafe.
*/
- ign->fb_tunnel_dev->netns_local = true;
+ ign->fb_tunnel_dev->netns_immutable = true;
ip6gre_fb_tunnel_init(ign->fb_tunnel_dev);
ign->fb_tunnel_dev->rtnl_link_ops = &ip6gre_link_ops;
@@ -1640,19 +1640,9 @@ err_alloc_dev:
return err;
}
-static void __net_exit ip6gre_exit_batch_rtnl(struct list_head *net_list,
- struct list_head *dev_to_kill)
-{
- struct net *net;
-
- ASSERT_RTNL();
- list_for_each_entry(net, net_list, exit_list)
- ip6gre_destroy_tunnels(net, dev_to_kill);
-}
-
static struct pernet_operations ip6gre_net_ops = {
.init = ip6gre_init_net,
- .exit_batch_rtnl = ip6gre_exit_batch_rtnl,
+ .exit_rtnl = ip6gre_exit_rtnl_net,
.id = &ip6gre_net_id,
.size = sizeof(struct ip6gre_net),
};
@@ -1882,7 +1872,6 @@ static int ip6erspan_tap_init(struct net_device *dev)
tunnel = netdev_priv(dev);
tunnel->dev = dev;
- tunnel->net = dev_net(dev);
strcpy(tunnel->parms.name, dev->name);
ret = dst_cache_init(&tunnel->dst_cache, GFP_KERNEL);
@@ -1971,7 +1960,7 @@ static bool ip6gre_netlink_encap_parms(struct nlattr *data[],
return ret;
}
-static int ip6gre_newlink_common(struct net *src_net, struct net_device *dev,
+static int ip6gre_newlink_common(struct net *link_net, struct net_device *dev,
struct nlattr *tb[], struct nlattr *data[],
struct netlink_ext_ack *extack)
{
@@ -1992,7 +1981,7 @@ static int ip6gre_newlink_common(struct net *src_net, struct net_device *dev,
eth_hw_addr_random(dev);
nt->dev = dev;
- nt->net = dev_net(dev);
+ nt->net = link_net;
err = register_netdevice(dev);
if (err)
@@ -2005,12 +1994,14 @@ out:
return err;
}
-static int ip6gre_newlink(struct net *src_net, struct net_device *dev,
- struct nlattr *tb[], struct nlattr *data[],
+static int ip6gre_newlink(struct net_device *dev,
+ struct rtnl_newlink_params *params,
struct netlink_ext_ack *extack)
{
+ struct net *net = params->link_net ? : dev_net(dev);
struct ip6_tnl *nt = netdev_priv(dev);
- struct net *net = dev_net(dev);
+ struct nlattr **data = params->data;
+ struct nlattr **tb = params->tb;
struct ip6gre_net *ign;
int err;
@@ -2025,7 +2016,7 @@ static int ip6gre_newlink(struct net *src_net, struct net_device *dev,
return -EEXIST;
}
- err = ip6gre_newlink_common(src_net, dev, tb, data, extack);
+ err = ip6gre_newlink_common(net, dev, tb, data, extack);
if (!err) {
ip6gre_tnl_link_config(nt, !tb[IFLA_MTU]);
ip6gre_tunnel_link_md(ign, nt);
@@ -2241,12 +2232,14 @@ static void ip6erspan_tap_setup(struct net_device *dev)
netif_keep_dst(dev);
}
-static int ip6erspan_newlink(struct net *src_net, struct net_device *dev,
- struct nlattr *tb[], struct nlattr *data[],
+static int ip6erspan_newlink(struct net_device *dev,
+ struct rtnl_newlink_params *params,
struct netlink_ext_ack *extack)
{
+ struct net *net = params->link_net ? : dev_net(dev);
struct ip6_tnl *nt = netdev_priv(dev);
- struct net *net = dev_net(dev);
+ struct nlattr **data = params->data;
+ struct nlattr **tb = params->tb;
struct ip6gre_net *ign;
int err;
@@ -2262,7 +2255,7 @@ static int ip6erspan_newlink(struct net *src_net, struct net_device *dev,
return -EEXIST;
}
- err = ip6gre_newlink_common(src_net, dev, tb, data, extack);
+ err = ip6gre_newlink_common(net, dev, tb, data, extack);
if (!err) {
ip6erspan_tnl_link_config(nt, !tb[IFLA_MTU]);
ip6erspan_tunnel_link_md(ign, nt);
diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c
index 70c0e16c0ae6..39da6a7ce5f1 100644
--- a/net/ipv6/ip6_input.c
+++ b/net/ipv6/ip6_input.c
@@ -477,9 +477,7 @@ discard:
static int ip6_input_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
{
skb_clear_delivery_time(skb);
- rcu_read_lock();
ip6_protocol_deliver_rcu(net, skb, 0, false);
- rcu_read_unlock();
return 0;
}
@@ -487,9 +485,15 @@ static int ip6_input_finish(struct net *net, struct sock *sk, struct sk_buff *sk
int ip6_input(struct sk_buff *skb)
{
- return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_IN,
- dev_net(skb->dev), NULL, skb, skb->dev, NULL,
- ip6_input_finish);
+ int res;
+
+ rcu_read_lock();
+ res = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_IN,
+ dev_net_rcu(skb->dev), NULL, skb, skb->dev, NULL,
+ ip6_input_finish);
+ rcu_read_unlock();
+
+ return res;
}
EXPORT_SYMBOL_GPL(ip6_input);
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index f26841f1490f..7bd29a9ff0db 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -127,7 +127,7 @@ static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *
nexthop = rt6_nexthop(dst_rt6_info(dst), daddr);
neigh = __ipv6_neigh_lookup_noref(dev, nexthop);
- if (unlikely(IS_ERR_OR_NULL(neigh))) {
+ if (IS_ERR_OR_NULL(neigh)) {
if (unlikely(!neigh))
neigh = __neigh_create(&nd_tbl, nexthop, dev, false);
if (IS_ERR(neigh)) {
@@ -259,7 +259,7 @@ bool ip6_autoflowlabel(struct net *net, const struct sock *sk)
}
/*
- * xmit an sk_buff (used by TCP, SCTP and DCCP)
+ * xmit an sk_buff (used by TCP and SCTP)
* Note : socket lock is not held for SYNACK packets, but might be modified
* by calls to skb_set_owner_w() and ipv6_local_error(),
* which are using proper atomic operations or spinlocks.
@@ -613,15 +613,15 @@ int ip6_forward(struct sk_buff *skb)
else
target = &hdr->daddr;
- peer = inet_getpeer_v6(net->ipv6.peers, &hdr->daddr, 1);
+ rcu_read_lock();
+ peer = inet_getpeer_v6(net->ipv6.peers, &hdr->daddr);
/* Limit redirects both by destination (here)
and by source (inside ndisc_send_redirect)
*/
if (inet_peer_xrlim_allow(peer, 1*HZ))
ndisc_send_redirect(skb, target);
- if (peer)
- inet_putpeer(peer);
+ rcu_read_unlock();
} else {
int addrtype = ipv6_addr_type(&hdr->saddr);
@@ -1386,6 +1386,7 @@ static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork,
}
v6_cork->hop_limit = ipc6->hlimit;
v6_cork->tclass = ipc6->tclass;
+ v6_cork->dontfrag = ipc6->dontfrag;
if (rt->dst.flags & DST_XFRM_TUNNEL)
mtu = READ_ONCE(np->pmtudisc) >= IPV6_PMTUDISC_PROBE ?
READ_ONCE(rt->dst.dev->mtu) : dst_mtu(&rt->dst);
@@ -1401,8 +1402,12 @@ static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork,
cork->base.gso_size = ipc6->gso_size;
cork->base.tx_flags = 0;
cork->base.mark = ipc6->sockc.mark;
- sock_tx_timestamp(sk, ipc6->sockc.tsflags, &cork->base.tx_flags);
-
+ cork->base.priority = ipc6->sockc.priority;
+ sock_tx_timestamp(sk, &ipc6->sockc, &cork->base.tx_flags);
+ if (ipc6->sockc.tsflags & SOCKCM_FLAG_TS_OPT_ID) {
+ cork->base.flags |= IPCORK_TS_OPT_ID;
+ cork->base.ts_opt_id = ipc6->sockc.ts_opt_id;
+ }
cork->base.length = 0;
cork->base.transmit_time = ipc6->sockc.transmit_time;
@@ -1417,7 +1422,7 @@ static int __ip6_append_data(struct sock *sk,
int getfrag(void *from, char *to, int offset,
int len, int odd, struct sk_buff *skb),
void *from, size_t length, int transhdrlen,
- unsigned int flags, struct ipcm6_cookie *ipc6)
+ unsigned int flags)
{
struct sk_buff *skb, *skb_prev = NULL;
struct inet_cork *cork = &cork_full->base;
@@ -1433,7 +1438,7 @@ static int __ip6_append_data(struct sock *sk,
bool zc = false;
u32 tskey = 0;
struct rt6_info *rt = dst_rt6_info(cork->dst);
- bool paged, hold_tskey, extra_uref = false;
+ bool paged, hold_tskey = false, extra_uref = false;
struct ipv6_txoptions *opt = v6_cork->opt;
int csummode = CHECKSUM_NONE;
unsigned int maxnonfragsize, headersize;
@@ -1471,7 +1476,7 @@ static int __ip6_append_data(struct sock *sk,
if (headersize + transhdrlen > mtu)
goto emsgsize;
- if (cork->length + length > mtu - headersize && ipc6->dontfrag &&
+ if (cork->length + length > mtu - headersize && v6_cork->dontfrag &&
(sk->sk_protocol == IPPROTO_UDP ||
sk->sk_protocol == IPPROTO_ICMPV6 ||
sk->sk_protocol == IPPROTO_RAW)) {
@@ -1519,7 +1524,8 @@ emsgsize:
uarg = msg->msg_ubuf;
}
} else if (sock_flag(sk, SOCK_ZEROCOPY)) {
- uarg = msg_zerocopy_realloc(sk, length, skb_zcopy(skb));
+ uarg = msg_zerocopy_realloc(sk, length, skb_zcopy(skb),
+ false);
if (!uarg)
return -ENOBUFS;
extra_uref = !skb_zcopy(skb); /* only ref on new uarg */
@@ -1543,10 +1549,15 @@ emsgsize:
flags &= ~MSG_SPLICE_PAGES;
}
- hold_tskey = cork->tx_flags & SKBTX_ANY_TSTAMP &&
- READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_OPT_ID;
- if (hold_tskey)
- tskey = atomic_inc_return(&sk->sk_tskey) - 1;
+ if (cork->tx_flags & SKBTX_ANY_TSTAMP &&
+ READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_OPT_ID) {
+ if (cork->flags & IPCORK_TS_OPT_ID) {
+ tskey = cork->ts_opt_id;
+ } else {
+ tskey = atomic_inc_return(&sk->sk_tskey) - 1;
+ hold_tskey = true;
+ }
+ }
/*
* Let's try using as much space as possible.
@@ -1689,8 +1700,9 @@ alloc_new_skb:
pskb_trim_unique(skb_prev, maxfraglen);
}
if (copy > 0 &&
- getfrag(from, data + transhdrlen, offset,
- copy, fraggap, skb) < 0) {
+ INDIRECT_CALL_1(getfrag, ip_generic_getfrag,
+ from, data + transhdrlen, offset,
+ copy, fraggap, skb) < 0) {
err = -EFAULT;
kfree_skb(skb);
goto error;
@@ -1734,8 +1746,9 @@ alloc_new_skb:
unsigned int off;
off = skb->len;
- if (getfrag(from, skb_put(skb, copy),
- offset, copy, off, skb) < 0) {
+ if (INDIRECT_CALL_1(getfrag, ip_generic_getfrag,
+ from, skb_put(skb, copy),
+ offset, copy, off, skb) < 0) {
__skb_trim(skb, off);
err = -EFAULT;
goto error;
@@ -1773,7 +1786,8 @@ alloc_new_skb:
get_page(pfrag->page);
}
copy = min_t(int, copy, pfrag->size - pfrag->offset);
- if (getfrag(from,
+ if (INDIRECT_CALL_1(getfrag, ip_generic_getfrag,
+ from,
page_address(pfrag->page) + pfrag->offset,
offset, copy, skb->len, skb) < 0)
goto error_efault;
@@ -1843,7 +1857,7 @@ int ip6_append_data(struct sock *sk,
return __ip6_append_data(sk, &sk->sk_write_queue, &inet->cork,
&np->cork, sk_page_frag(sk), getfrag,
- from, length, transhdrlen, flags, ipc6);
+ from, length, transhdrlen, flags);
}
EXPORT_SYMBOL_GPL(ip6_append_data);
@@ -1931,7 +1945,7 @@ struct sk_buff *__ip6_make_skb(struct sock *sk,
hdr->saddr = fl6->saddr;
hdr->daddr = *final_dst;
- skb->priority = READ_ONCE(sk->sk_priority);
+ skb->priority = cork->base.priority;
skb->mark = cork->base.mark;
if (sk_is_tcp(sk))
skb_set_delivery_time(skb, cork->base.transmit_time, SKB_CLOCK_MONOTONIC);
@@ -2042,13 +2056,11 @@ struct sk_buff *ip6_make_skb(struct sock *sk,
ip6_cork_release(cork, &v6_cork);
return ERR_PTR(err);
}
- if (ipc6->dontfrag < 0)
- ipc6->dontfrag = inet6_test_bit(DONTFRAG, sk);
err = __ip6_append_data(sk, &queue, cork, &v6_cork,
&current->task_frag, getfrag, from,
length + exthdrlen, transhdrlen + exthdrlen,
- flags, ipc6);
+ flags);
if (err) {
__ip6_flush_pending_frames(sk, &queue, cork, &v6_cork);
return ERR_PTR(err);
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index b60e13c42bca..894d3158a6f0 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -52,6 +52,7 @@
#include <net/inet_ecn.h>
#include <net/net_namespace.h>
#include <net/netns/generic.h>
+#include <net/netdev_lock.h>
#include <net/dst_metadata.h>
#include <net/inet_dscp.h>
@@ -253,8 +254,7 @@ static void ip6_dev_free(struct net_device *dev)
static int ip6_tnl_create2(struct net_device *dev)
{
struct ip6_tnl *t = netdev_priv(dev);
- struct net *net = dev_net(dev);
- struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
+ struct ip6_tnl_net *ip6n = net_generic(t->net, ip6_tnl_net_id);
int err;
dev->rtnl_link_ops = &ip6_link_ops;
@@ -630,8 +630,8 @@ ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
}
skb_dst_set(skb2, &rt->dst);
} else {
- if (ip_route_input(skb2, eiph->daddr, eiph->saddr, eiph->tos,
- skb2->dev) ||
+ if (ip_route_input(skb2, eiph->daddr, eiph->saddr,
+ ip4h_dscp(eiph), skb2->dev) ||
skb_dst(skb2)->dev->type != ARPHRD_TUNNEL6)
goto out;
}
@@ -1878,7 +1878,6 @@ ip6_tnl_dev_init_gen(struct net_device *dev)
int t_hlen;
t->dev = dev;
- t->net = dev_net(dev);
ret = dst_cache_init(&t->dst_cache, GFP_KERNEL);
if (ret)
@@ -1940,6 +1939,7 @@ static int __net_init ip6_fb_tnl_dev_init(struct net_device *dev)
struct net *net = dev_net(dev);
struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
+ t->net = net;
t->parms.proto = IPPROTO_IPV6;
rcu_assign_pointer(ip6n->tnls_wc[0], t);
@@ -2002,17 +2002,22 @@ static void ip6_tnl_netlink_parms(struct nlattr *data[],
parms->fwmark = nla_get_u32(data[IFLA_IPTUN_FWMARK]);
}
-static int ip6_tnl_newlink(struct net *src_net, struct net_device *dev,
- struct nlattr *tb[], struct nlattr *data[],
+static int ip6_tnl_newlink(struct net_device *dev,
+ struct rtnl_newlink_params *params,
struct netlink_ext_ack *extack)
{
- struct net *net = dev_net(dev);
- struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
+ struct nlattr **data = params->data;
+ struct nlattr **tb = params->tb;
struct ip_tunnel_encap ipencap;
+ struct ip6_tnl_net *ip6n;
struct ip6_tnl *nt, *t;
+ struct net *net;
int err;
+ net = params->link_net ? : dev_net(dev);
+ ip6n = net_generic(net, ip6_tnl_net_id);
nt = netdev_priv(dev);
+ nt->net = net;
if (ip_tunnel_netlink_encap_parms(data, &ipencap)) {
err = ip6_tnl_encap_setup(nt, &ipencap);
@@ -2205,7 +2210,7 @@ static struct xfrm6_tunnel mplsip6_handler __read_mostly = {
.priority = 1,
};
-static void __net_exit ip6_tnl_destroy_tunnels(struct net *net, struct list_head *list)
+static void __net_exit ip6_tnl_exit_rtnl_net(struct net *net, struct list_head *list)
{
struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
struct net_device *dev, *aux;
@@ -2217,25 +2222,27 @@ static void __net_exit ip6_tnl_destroy_tunnels(struct net *net, struct list_head
unregister_netdevice_queue(dev, list);
for (h = 0; h < IP6_TUNNEL_HASH_SIZE; h++) {
- t = rtnl_dereference(ip6n->tnls_r_l[h]);
+ t = rtnl_net_dereference(net, ip6n->tnls_r_l[h]);
while (t) {
/* If dev is in the same netns, it has already
* been added to the list by the previous loop.
*/
if (!net_eq(dev_net(t->dev), net))
unregister_netdevice_queue(t->dev, list);
- t = rtnl_dereference(t->next);
+
+ t = rtnl_net_dereference(net, t->next);
}
}
- t = rtnl_dereference(ip6n->tnls_wc[0]);
+ t = rtnl_net_dereference(net, ip6n->tnls_wc[0]);
while (t) {
/* If dev is in the same netns, it has already
* been added to the list by the previous loop.
*/
if (!net_eq(dev_net(t->dev), net))
unregister_netdevice_queue(t->dev, list);
- t = rtnl_dereference(t->next);
+
+ t = rtnl_net_dereference(net, t->next);
}
}
@@ -2261,7 +2268,7 @@ static int __net_init ip6_tnl_init_net(struct net *net)
/* FB netdevice is special: we have one, and only one per netns.
* Allowing to move it to another netns is clearly unsafe.
*/
- ip6n->fb_tnl_dev->netns_local = true;
+ ip6n->fb_tnl_dev->netns_immutable = true;
err = ip6_fb_tnl_dev_init(ip6n->fb_tnl_dev);
if (err < 0)
@@ -2282,19 +2289,9 @@ err_alloc_dev:
return err;
}
-static void __net_exit ip6_tnl_exit_batch_rtnl(struct list_head *net_list,
- struct list_head *dev_to_kill)
-{
- struct net *net;
-
- ASSERT_RTNL();
- list_for_each_entry(net, net_list, exit_list)
- ip6_tnl_destroy_tunnels(net, dev_to_kill);
-}
-
static struct pernet_operations ip6_tnl_net_ops = {
.init = ip6_tnl_init_net,
- .exit_batch_rtnl = ip6_tnl_exit_batch_rtnl,
+ .exit_rtnl = ip6_tnl_exit_rtnl_net,
.id = &ip6_tnl_net_id,
.size = sizeof(struct ip6_tnl_net),
};
diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c
index 590737c27537..40464a88bca6 100644
--- a/net/ipv6/ip6_vti.c
+++ b/net/ipv6/ip6_vti.c
@@ -45,6 +45,7 @@
#include <net/xfrm.h>
#include <net/net_namespace.h>
#include <net/netns/generic.h>
+#include <net/netdev_lock.h>
#include <linux/etherdevice.h>
#define IP6_VTI_HASH_SIZE_SHIFT 5
@@ -177,8 +178,7 @@ vti6_tnl_unlink(struct vti6_net *ip6n, struct ip6_tnl *t)
static int vti6_tnl_create2(struct net_device *dev)
{
struct ip6_tnl *t = netdev_priv(dev);
- struct net *net = dev_net(dev);
- struct vti6_net *ip6n = net_generic(net, vti6_net_id);
+ struct vti6_net *ip6n = net_generic(t->net, vti6_net_id);
int err;
dev->rtnl_link_ops = &vti6_link_ops;
@@ -925,7 +925,6 @@ static inline int vti6_dev_init_gen(struct net_device *dev)
struct ip6_tnl *t = netdev_priv(dev);
t->dev = dev;
- t->net = dev_net(dev);
netdev_hold(dev, &t->dev_tracker, GFP_KERNEL);
netdev_lockdep_set_classes(dev);
return 0;
@@ -958,6 +957,7 @@ static int __net_init vti6_fb_tnl_dev_init(struct net_device *dev)
struct net *net = dev_net(dev);
struct vti6_net *ip6n = net_generic(net, vti6_net_id);
+ t->net = net;
t->parms.proto = IPPROTO_IPV6;
rcu_assign_pointer(ip6n->tnls_wc[0], t);
@@ -997,17 +997,20 @@ static void vti6_netlink_parms(struct nlattr *data[],
parms->fwmark = nla_get_u32(data[IFLA_VTI_FWMARK]);
}
-static int vti6_newlink(struct net *src_net, struct net_device *dev,
- struct nlattr *tb[], struct nlattr *data[],
+static int vti6_newlink(struct net_device *dev,
+ struct rtnl_newlink_params *params,
struct netlink_ext_ack *extack)
{
- struct net *net = dev_net(dev);
+ struct nlattr **data = params->data;
struct ip6_tnl *nt;
+ struct net *net;
+ net = params->link_net ? : dev_net(dev);
nt = netdev_priv(dev);
vti6_netlink_parms(data, &nt->parms);
nt->parms.proto = IPPROTO_IPV6;
+ nt->net = net;
if (vti6_locate(net, &nt->parms, 0))
return -EEXIST;
@@ -1109,21 +1112,21 @@ static struct rtnl_link_ops vti6_link_ops __read_mostly = {
.get_link_net = ip6_tnl_get_link_net,
};
-static void __net_exit vti6_destroy_tunnels(struct vti6_net *ip6n,
- struct list_head *list)
+static void __net_exit vti6_exit_rtnl_net(struct net *net, struct list_head *list)
{
- int h;
+ struct vti6_net *ip6n = net_generic(net, vti6_net_id);
struct ip6_tnl *t;
+ int h;
for (h = 0; h < IP6_VTI_HASH_SIZE; h++) {
- t = rtnl_dereference(ip6n->tnls_r_l[h]);
+ t = rtnl_net_dereference(net, ip6n->tnls_r_l[h]);
while (t) {
unregister_netdevice_queue(t->dev, list);
- t = rtnl_dereference(t->next);
+ t = rtnl_net_dereference(net, t->next);
}
}
- t = rtnl_dereference(ip6n->tnls_wc[0]);
+ t = rtnl_net_dereference(net, ip6n->tnls_wc[0]);
if (t)
unregister_netdevice_queue(t->dev, list);
}
@@ -1167,22 +1170,9 @@ err_alloc_dev:
return err;
}
-static void __net_exit vti6_exit_batch_rtnl(struct list_head *net_list,
- struct list_head *dev_to_kill)
-{
- struct vti6_net *ip6n;
- struct net *net;
-
- ASSERT_RTNL();
- list_for_each_entry(net, net_list, exit_list) {
- ip6n = net_generic(net, vti6_net_id);
- vti6_destroy_tunnels(ip6n, dev_to_kill);
- }
-}
-
static struct pernet_operations vti6_net_ops = {
.init = vti6_init_net,
- .exit_batch_rtnl = vti6_exit_batch_rtnl,
+ .exit_rtnl = vti6_exit_rtnl_net,
.id = &vti6_net_id,
.size = sizeof(struct vti6_net),
};
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 2ce4ae0d8dc3..9db31e5b998c 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -125,7 +125,7 @@ static struct mr_table *ip6mr_mr_table_iter(struct net *net,
return ret;
}
-static struct mr_table *ip6mr_get_table(struct net *net, u32 id)
+static struct mr_table *__ip6mr_get_table(struct net *net, u32 id)
{
struct mr_table *mrt;
@@ -136,6 +136,16 @@ static struct mr_table *ip6mr_get_table(struct net *net, u32 id)
return NULL;
}
+static struct mr_table *ip6mr_get_table(struct net *net, u32 id)
+{
+ struct mr_table *mrt;
+
+ rcu_read_lock();
+ mrt = __ip6mr_get_table(net, id);
+ rcu_read_unlock();
+ return mrt;
+}
+
static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6,
struct mr_table **mrt)
{
@@ -177,7 +187,7 @@ static int ip6mr_rule_action(struct fib_rule *rule, struct flowi *flp,
arg->table = fib_rule_get_table(rule, arg);
- mrt = ip6mr_get_table(rule->fr_net, arg->table);
+ mrt = __ip6mr_get_table(rule->fr_net, arg->table);
if (!mrt)
return -EAGAIN;
res->mrt = mrt;
@@ -276,7 +286,7 @@ static int ip6mr_rules_dump(struct net *net, struct notifier_block *nb,
return fib_rules_dump(net, nb, RTNL_FAMILY_IP6MR, extack);
}
-static unsigned int ip6mr_rules_seq_read(struct net *net)
+static unsigned int ip6mr_rules_seq_read(const struct net *net)
{
return fib_rules_seq_read(net, RTNL_FAMILY_IP6MR);
}
@@ -304,6 +314,8 @@ static struct mr_table *ip6mr_get_table(struct net *net, u32 id)
return net->ipv6.mrt6;
}
+#define __ip6mr_get_table ip6mr_get_table
+
static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6,
struct mr_table **mrt)
{
@@ -335,7 +347,7 @@ static int ip6mr_rules_dump(struct net *net, struct notifier_block *nb,
return 0;
}
-static unsigned int ip6mr_rules_seq_read(struct net *net)
+static unsigned int ip6mr_rules_seq_read(const struct net *net)
{
return 0;
}
@@ -382,7 +394,7 @@ static struct mr_table *ip6mr_new_table(struct net *net, u32 id)
{
struct mr_table *mrt;
- mrt = ip6mr_get_table(net, id);
+ mrt = __ip6mr_get_table(net, id);
if (mrt)
return mrt;
@@ -392,6 +404,10 @@ static struct mr_table *ip6mr_new_table(struct net *net, u32 id)
static void ip6mr_free_table(struct mr_table *mrt)
{
+ struct net *net = read_pnet(&mrt->net);
+
+ WARN_ON_ONCE(!mr_can_free_table(net));
+
timer_shutdown_sync(&mrt->ipmr_expire_timer);
mroute_clean_tables(mrt, MRT6_FLUSH_MIFS | MRT6_FLUSH_MIFS_STATIC |
MRT6_FLUSH_MFC | MRT6_FLUSH_MFC_STATIC);
@@ -411,13 +427,15 @@ static void *ip6mr_vif_seq_start(struct seq_file *seq, loff_t *pos)
struct net *net = seq_file_net(seq);
struct mr_table *mrt;
- mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
- if (!mrt)
+ rcu_read_lock();
+ mrt = __ip6mr_get_table(net, RT6_TABLE_DFLT);
+ if (!mrt) {
+ rcu_read_unlock();
return ERR_PTR(-ENOENT);
+ }
iter->mrt = mrt;
- rcu_read_lock();
return mr_vif_seq_start(seq, pos);
}
@@ -492,9 +510,9 @@ static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
if (it->cache != &mrt->mfc_unres_queue) {
seq_printf(seq, " %8lu %8lu %8lu",
- mfc->_c.mfc_un.res.pkt,
- mfc->_c.mfc_un.res.bytes,
- mfc->_c.mfc_un.res.wrong_if);
+ atomic_long_read(&mfc->_c.mfc_un.res.pkt),
+ atomic_long_read(&mfc->_c.mfc_un.res.bytes),
+ atomic_long_read(&mfc->_c.mfc_un.res.wrong_if));
for (n = mfc->_c.mfc_un.res.minvif;
n < mfc->_c.mfc_un.res.maxvif; n++) {
if (VIF_EXISTS(mrt, n) &&
@@ -640,7 +658,7 @@ static void reg_vif_setup(struct net_device *dev)
dev->flags = IFF_NOARP;
dev->netdev_ops = &reg_vif_netdev_ops;
dev->needs_free_netdev = true;
- dev->netns_local = true;
+ dev->netns_immutable = true;
}
static struct net_device *ip6mr_reg_vif(struct net *net, struct mr_table *mrt)
@@ -821,7 +839,7 @@ static void ipmr_do_expire_process(struct mr_table *mrt)
static void ipmr_expire_process(struct timer_list *t)
{
- struct mr_table *mrt = from_timer(mrt, t, ipmr_expire_timer);
+ struct mr_table *mrt = timer_container_of(mrt, t, ipmr_expire_timer);
if (!spin_trylock(&mfc_unres_lock)) {
mod_timer(&mrt->ipmr_expire_timer, jiffies + 1);
@@ -856,7 +874,7 @@ static void ip6mr_update_thresholds(struct mr_table *mrt,
cache->mfc_un.res.maxvif = vifi + 1;
}
}
- cache->mfc_un.res.lastuse = jiffies;
+ WRITE_ONCE(cache->mfc_un.res.lastuse, jiffies);
}
static int mif6_add(struct net *net, struct mr_table *mrt,
@@ -1260,11 +1278,9 @@ static int ip6mr_device_event(struct notifier_block *this,
return NOTIFY_DONE;
}
-static unsigned int ip6mr_seq_read(struct net *net)
+static unsigned int ip6mr_seq_read(const struct net *net)
{
- ASSERT_RTNL();
-
- return net->ipv6.ipmr_seq + ip6mr_rules_seq_read(net);
+ return READ_ONCE(net->ipv6.ipmr_seq) + ip6mr_rules_seq_read(net);
}
static int ip6mr_dump(struct net *net, struct notifier_block *nb,
@@ -1369,6 +1385,12 @@ static struct pernet_operations ip6mr_net_ops = {
.exit_batch = ip6mr_net_exit_batch,
};
+static const struct rtnl_msg_handler ip6mr_rtnl_msg_handlers[] __initconst_or_module = {
+ {.owner = THIS_MODULE, .protocol = RTNL_FAMILY_IP6MR,
+ .msgtype = RTM_GETROUTE,
+ .doit = ip6mr_rtm_getroute, .dumpit = ip6mr_rtm_dumproute},
+};
+
int __init ip6_mr_init(void)
{
int err;
@@ -1391,9 +1413,8 @@ int __init ip6_mr_init(void)
goto add_proto_fail;
}
#endif
- err = rtnl_register_module(THIS_MODULE, RTNL_FAMILY_IP6MR, RTM_GETROUTE,
- ip6mr_rtm_getroute, ip6mr_rtm_dumproute, 0);
- if (err == 0)
+ err = rtnl_register_many(ip6mr_rtnl_msg_handlers);
+ if (!err)
return 0;
#ifdef CONFIG_IPV6_PIMSM_V2
@@ -1408,9 +1429,9 @@ reg_pernet_fail:
return err;
}
-void ip6_mr_cleanup(void)
+void __init ip6_mr_cleanup(void)
{
- rtnl_unregister(RTNL_FAMILY_IP6MR, RTM_GETROUTE);
+ rtnl_unregister_many(ip6mr_rtnl_msg_handlers);
#ifdef CONFIG_IPV6_PIMSM_V2
inet6_del_protocol(&pim6_protocol, IPPROTO_PIM);
#endif
@@ -1495,7 +1516,7 @@ static int ip6mr_mfc_add(struct net *net, struct mr_table *mrt,
}
}
if (list_empty(&mrt->mfc_unres_queue))
- del_timer(&mrt->ipmr_expire_timer);
+ timer_delete(&mrt->ipmr_expire_timer);
spin_unlock_bh(&mfc_unres_lock);
if (found) {
@@ -1914,9 +1935,9 @@ int ip6mr_ioctl(struct sock *sk, int cmd, void *arg)
c = ip6mr_cache_find(mrt, &sr->src.sin6_addr,
&sr->grp.sin6_addr);
if (c) {
- sr->pktcnt = c->_c.mfc_un.res.pkt;
- sr->bytecnt = c->_c.mfc_un.res.bytes;
- sr->wrong_if = c->_c.mfc_un.res.wrong_if;
+ sr->pktcnt = atomic_long_read(&c->_c.mfc_un.res.pkt);
+ sr->bytecnt = atomic_long_read(&c->_c.mfc_un.res.bytes);
+ sr->wrong_if = atomic_long_read(&c->_c.mfc_un.res.wrong_if);
rcu_read_unlock();
return 0;
}
@@ -1986,9 +2007,9 @@ int ip6mr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg)
rcu_read_lock();
c = ip6mr_cache_find(mrt, &sr.src.sin6_addr, &sr.grp.sin6_addr);
if (c) {
- sr.pktcnt = c->_c.mfc_un.res.pkt;
- sr.bytecnt = c->_c.mfc_un.res.bytes;
- sr.wrong_if = c->_c.mfc_un.res.wrong_if;
+ sr.pktcnt = atomic_long_read(&c->_c.mfc_un.res.pkt);
+ sr.bytecnt = atomic_long_read(&c->_c.mfc_un.res.bytes);
+ sr.wrong_if = atomic_long_read(&c->_c.mfc_un.res.wrong_if);
rcu_read_unlock();
if (copy_to_user(arg, &sr, sizeof(sr)))
@@ -2111,9 +2132,9 @@ static void ip6_mr_forward(struct net *net, struct mr_table *mrt,
int true_vifi = ip6mr_find_vif(mrt, dev);
vif = c->_c.mfc_parent;
- c->_c.mfc_un.res.pkt++;
- c->_c.mfc_un.res.bytes += skb->len;
- c->_c.mfc_un.res.lastuse = jiffies;
+ atomic_long_inc(&c->_c.mfc_un.res.pkt);
+ atomic_long_add(skb->len, &c->_c.mfc_un.res.bytes);
+ WRITE_ONCE(c->_c.mfc_un.res.lastuse, jiffies);
if (ipv6_addr_any(&c->mf6c_origin) && true_vifi >= 0) {
struct mfc6_cache *cache_proxy;
@@ -2131,7 +2152,7 @@ static void ip6_mr_forward(struct net *net, struct mr_table *mrt,
* Wrong interface: drop packet and (maybe) send PIM assert.
*/
if (rcu_access_pointer(mrt->vif_table[vif].dev) != dev) {
- c->_c.mfc_un.res.wrong_if++;
+ atomic_long_inc(&c->_c.mfc_un.res.wrong_if);
if (true_vifi >= 0 && mrt->mroute_do_assert &&
/* pimsm uses asserts, when switching from RPT to SPT,
@@ -2275,11 +2296,13 @@ int ip6mr_get_route(struct net *net, struct sk_buff *skb, struct rtmsg *rtm,
struct mfc6_cache *cache;
struct rt6_info *rt = dst_rt6_info(skb_dst(skb));
- mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
- if (!mrt)
+ rcu_read_lock();
+ mrt = __ip6mr_get_table(net, RT6_TABLE_DFLT);
+ if (!mrt) {
+ rcu_read_unlock();
return -ENOENT;
+ }
- rcu_read_lock();
cache = ip6mr_cache_find(mrt, &rt->rt6i_src.addr, &rt->rt6i_dst.addr);
if (!cache && skb->dev) {
int vif = ip6mr_find_vif(mrt, skb->dev);
@@ -2557,9 +2580,9 @@ static int ip6mr_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
src = nla_get_in6_addr(tb[RTA_SRC]);
if (tb[RTA_DST])
grp = nla_get_in6_addr(tb[RTA_DST]);
- tableid = tb[RTA_TABLE] ? nla_get_u32(tb[RTA_TABLE]) : 0;
+ tableid = nla_get_u32_default(tb[RTA_TABLE], 0);
- mrt = ip6mr_get_table(net, tableid ?: RT_TABLE_DEFAULT);
+ mrt = __ip6mr_get_table(net, tableid ?: RT_TABLE_DEFAULT);
if (!mrt) {
NL_SET_ERR_MSG_MOD(extack, "MR table does not exist");
return -ENOENT;
@@ -2606,7 +2629,7 @@ static int ip6mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb)
if (filter.table_id) {
struct mr_table *mrt;
- mrt = ip6mr_get_table(sock_net(skb->sk), filter.table_id);
+ mrt = __ip6mr_get_table(sock_net(skb->sk), filter.table_id);
if (!mrt) {
if (rtnl_msg_family(cb->nlh) != RTNL_FAMILY_IP6MR)
return skb->len;
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index b244dbf61d5f..65831b4fee1f 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -33,8 +33,10 @@
#include <linux/in.h>
#include <linux/in6.h>
#include <linux/netdevice.h>
+#include <linux/if_addr.h>
#include <linux/if_arp.h>
#include <linux/route.h>
+#include <linux/rtnetlink.h>
#include <linux/init.h>
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
@@ -47,6 +49,7 @@
#include <linux/netfilter_ipv6.h>
#include <net/net_namespace.h>
+#include <net/netlink.h>
#include <net/sock.h>
#include <net/snmp.h>
@@ -901,6 +904,41 @@ static struct ifmcaddr6 *mca_alloc(struct inet6_dev *idev,
return mc;
}
+static void inet6_ifmcaddr_notify(struct net_device *dev,
+ const struct ifmcaddr6 *ifmca, int event)
+{
+ struct inet6_fill_args fillargs = {
+ .portid = 0,
+ .seq = 0,
+ .event = event,
+ .flags = 0,
+ .netnsid = -1,
+ .force_rt_scope_universe = true,
+ };
+ struct net *net = dev_net(dev);
+ struct sk_buff *skb;
+ int err = -ENOMEM;
+
+ skb = nlmsg_new(NLMSG_ALIGN(sizeof(struct ifaddrmsg)) +
+ nla_total_size(sizeof(struct in6_addr)) +
+ nla_total_size(sizeof(struct ifa_cacheinfo)),
+ GFP_KERNEL);
+ if (!skb)
+ goto error;
+
+ err = inet6_fill_ifmcaddr(skb, ifmca, &fillargs);
+ if (err < 0) {
+ WARN_ON_ONCE(err == -EMSGSIZE);
+ nlmsg_free(skb);
+ goto error;
+ }
+
+ rtnl_notify(skb, net, 0, RTNLGRP_IPV6_MCADDR, NULL, GFP_KERNEL);
+ return;
+error:
+ rtnl_set_sk_err(net, RTNLGRP_IPV6_MCADDR, err);
+}
+
/*
* device multicast group inc (add if not found)
*/
@@ -948,6 +986,7 @@ static int __ipv6_dev_mc_inc(struct net_device *dev,
mld_del_delrec(idev, mc);
igmp6_group_added(mc);
+ inet6_ifmcaddr_notify(dev, mc, RTM_NEWMULTICAST);
mutex_unlock(&idev->mc_lock);
ma_put(mc);
return 0;
@@ -977,6 +1016,8 @@ int __ipv6_dev_mc_dec(struct inet6_dev *idev, const struct in6_addr *addr)
*map = ma->next;
igmp6_group_dropped(ma);
+ inet6_ifmcaddr_notify(idev->dev, ma,
+ RTM_DELMULTICAST);
ip6_mc_clear_src(ma);
mutex_unlock(&idev->mc_lock);
@@ -1021,29 +1062,31 @@ bool ipv6_chk_mcast_addr(struct net_device *dev, const struct in6_addr *group,
rcu_read_lock();
idev = __in6_dev_get(dev);
- if (idev) {
- for_each_mc_rcu(idev, mc) {
- if (ipv6_addr_equal(&mc->mca_addr, group))
- break;
- }
- if (mc) {
- if (src_addr && !ipv6_addr_any(src_addr)) {
- struct ip6_sf_list *psf;
+ if (!idev)
+ goto unlock;
+ for_each_mc_rcu(idev, mc) {
+ if (ipv6_addr_equal(&mc->mca_addr, group))
+ break;
+ }
+ if (!mc)
+ goto unlock;
+ if (src_addr && !ipv6_addr_any(src_addr)) {
+ struct ip6_sf_list *psf;
- for_each_psf_rcu(mc, psf) {
- if (ipv6_addr_equal(&psf->sf_addr, src_addr))
- break;
- }
- if (psf)
- rv = psf->sf_count[MCAST_INCLUDE] ||
- psf->sf_count[MCAST_EXCLUDE] !=
- mc->mca_sfcount[MCAST_EXCLUDE];
- else
- rv = mc->mca_sfcount[MCAST_EXCLUDE] != 0;
- } else
- rv = true; /* don't filter unspecified source */
+ for_each_psf_rcu(mc, psf) {
+ if (ipv6_addr_equal(&psf->sf_addr, src_addr))
+ break;
}
+ if (psf)
+ rv = READ_ONCE(psf->sf_count[MCAST_INCLUDE]) ||
+ READ_ONCE(psf->sf_count[MCAST_EXCLUDE]) !=
+ READ_ONCE(mc->mca_sfcount[MCAST_EXCLUDE]);
+ else
+ rv = READ_ONCE(mc->mca_sfcount[MCAST_EXCLUDE]) != 0;
+ } else {
+ rv = true; /* don't filter unspecified source */
}
+unlock:
rcu_read_unlock();
return rv;
}
@@ -1730,21 +1773,19 @@ static struct sk_buff *mld_newpack(struct inet6_dev *idev, unsigned int mtu)
struct net_device *dev = idev->dev;
int hlen = LL_RESERVED_SPACE(dev);
int tlen = dev->needed_tailroom;
- struct net *net = dev_net(dev);
const struct in6_addr *saddr;
struct in6_addr addr_buf;
struct mld2_report *pmr;
struct sk_buff *skb;
unsigned int size;
struct sock *sk;
- int err;
+ struct net *net;
- sk = net->ipv6.igmp_sk;
/* we assume size > sizeof(ra) here
* Also try to not allocate high-order pages for big MTU
*/
size = min_t(int, mtu, PAGE_SIZE / 2) + hlen + tlen;
- skb = sock_alloc_send_skb(sk, size, 1, &err);
+ skb = alloc_skb(size, GFP_KERNEL);
if (!skb)
return NULL;
@@ -1752,6 +1793,12 @@ static struct sk_buff *mld_newpack(struct inet6_dev *idev, unsigned int mtu)
skb_reserve(skb, hlen);
skb_tailroom_reserve(skb, mtu, tlen);
+ rcu_read_lock();
+
+ net = dev_net_rcu(dev);
+ sk = net->ipv6.igmp_sk;
+ skb_set_owner_w(skb, sk);
+
if (ipv6_get_lladdr(dev, &addr_buf, IFA_F_TENTATIVE)) {
/* <draft-ietf-magma-mld-source-05.txt>:
* use unspecified address as the source address
@@ -1763,6 +1810,8 @@ static struct sk_buff *mld_newpack(struct inet6_dev *idev, unsigned int mtu)
ip6_mc_hdr(sk, skb, dev, saddr, &mld2_all_mcr, NEXTHDR_HOP, 0);
+ rcu_read_unlock();
+
skb_put_data(skb, ra, sizeof(ra));
skb_set_transport_header(skb, skb_tail_pointer(skb) - skb->data);
@@ -2122,21 +2171,21 @@ static void mld_send_cr(struct inet6_dev *idev)
static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type)
{
- struct net *net = dev_net(dev);
- struct sock *sk = net->ipv6.igmp_sk;
+ const struct in6_addr *snd_addr, *saddr;
+ int err, len, payload_len, full_len;
+ struct in6_addr addr_buf;
struct inet6_dev *idev;
struct sk_buff *skb;
struct mld_msg *hdr;
- const struct in6_addr *snd_addr, *saddr;
- struct in6_addr addr_buf;
int hlen = LL_RESERVED_SPACE(dev);
int tlen = dev->needed_tailroom;
- int err, len, payload_len, full_len;
u8 ra[8] = { IPPROTO_ICMPV6, 0,
IPV6_TLV_ROUTERALERT, 2, 0, 0,
IPV6_TLV_PADN, 0 };
- struct flowi6 fl6;
struct dst_entry *dst;
+ struct flowi6 fl6;
+ struct net *net;
+ struct sock *sk;
if (type == ICMPV6_MGM_REDUCTION)
snd_addr = &in6addr_linklocal_allrouters;
@@ -2147,19 +2196,21 @@ static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type)
payload_len = len + sizeof(ra);
full_len = sizeof(struct ipv6hdr) + payload_len;
- rcu_read_lock();
- IP6_INC_STATS(net, __in6_dev_get(dev), IPSTATS_MIB_OUTREQUESTS);
- rcu_read_unlock();
+ skb = alloc_skb(hlen + tlen + full_len, GFP_KERNEL);
- skb = sock_alloc_send_skb(sk, hlen + tlen + full_len, 1, &err);
+ rcu_read_lock();
+ net = dev_net_rcu(dev);
+ idev = __in6_dev_get(dev);
+ IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTREQUESTS);
if (!skb) {
- rcu_read_lock();
- IP6_INC_STATS(net, __in6_dev_get(dev),
- IPSTATS_MIB_OUTDISCARDS);
+ IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS);
rcu_read_unlock();
return;
}
+ sk = net->ipv6.igmp_sk;
+ skb_set_owner_w(skb, sk);
+
skb->priority = TC_PRIO_CONTROL;
skb_reserve(skb, hlen);
@@ -2184,9 +2235,6 @@ static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type)
IPPROTO_ICMPV6,
csum_partial(hdr, len, 0));
- rcu_read_lock();
- idev = __in6_dev_get(skb->dev);
-
icmpv6_flow_init(sk, &fl6, type,
&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr,
skb->dev->ifindex);
@@ -2285,7 +2333,7 @@ static int ip6_mc_del1_src(struct ifmcaddr6 *pmc, int sfmode,
/* source filter not found, or count wrong => bug */
return -ESRCH;
}
- psf->sf_count[sfmode]--;
+ WRITE_ONCE(psf->sf_count[sfmode], psf->sf_count[sfmode] - 1);
if (!psf->sf_count[MCAST_INCLUDE] && !psf->sf_count[MCAST_EXCLUDE]) {
struct inet6_dev *idev = pmc->idev;
@@ -2391,7 +2439,7 @@ static int ip6_mc_add1_src(struct ifmcaddr6 *pmc, int sfmode,
rcu_assign_pointer(pmc->mca_sources, psf);
}
}
- psf->sf_count[sfmode]++;
+ WRITE_ONCE(psf->sf_count[sfmode], psf->sf_count[sfmode] + 1);
return 0;
}
@@ -2503,7 +2551,8 @@ static int ip6_mc_add_src(struct inet6_dev *idev, const struct in6_addr *pmca,
sf_markstate(pmc);
isexclude = pmc->mca_sfmode == MCAST_EXCLUDE;
if (!delta)
- pmc->mca_sfcount[sfmode]++;
+ WRITE_ONCE(pmc->mca_sfcount[sfmode],
+ pmc->mca_sfcount[sfmode] + 1);
err = 0;
for (i = 0; i < sfcount; i++) {
err = ip6_mc_add1_src(pmc, sfmode, &psfsrc[i]);
@@ -2514,7 +2563,8 @@ static int ip6_mc_add_src(struct inet6_dev *idev, const struct in6_addr *pmca,
int j;
if (!delta)
- pmc->mca_sfcount[sfmode]--;
+ WRITE_ONCE(pmc->mca_sfcount[sfmode],
+ pmc->mca_sfcount[sfmode] - 1);
for (j = 0; j < i; j++)
ip6_mc_del1_src(pmc, sfmode, &psfsrc[j]);
} else if (isexclude != (pmc->mca_sfcount[MCAST_EXCLUDE] != 0)) {
@@ -2559,7 +2609,8 @@ static void ip6_mc_clear_src(struct ifmcaddr6 *pmc)
RCU_INIT_POINTER(pmc->mca_sources, NULL);
pmc->mca_sfmode = MCAST_EXCLUDE;
pmc->mca_sfcount[MCAST_INCLUDE] = 0;
- pmc->mca_sfcount[MCAST_EXCLUDE] = 1;
+ /* Paired with the READ_ONCE() from ipv6_chk_mcast_addr() */
+ WRITE_ONCE(pmc->mca_sfcount[MCAST_EXCLUDE], 1);
}
/* called with mc_lock */
@@ -3074,8 +3125,8 @@ static int igmp6_mcf_seq_show(struct seq_file *seq, void *v)
state->dev->ifindex, state->dev->name,
&state->im->mca_addr,
&psf->sf_addr,
- psf->sf_count[MCAST_INCLUDE],
- psf->sf_count[MCAST_EXCLUDE]);
+ READ_ONCE(psf->sf_count[MCAST_INCLUDE]),
+ READ_ONCE(psf->sf_count[MCAST_EXCLUDE]));
}
return 0;
}
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index aba94a348673..ecb5c4b8518f 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -418,15 +418,11 @@ static struct sk_buff *ndisc_alloc_skb(struct net_device *dev,
{
int hlen = LL_RESERVED_SPACE(dev);
int tlen = dev->needed_tailroom;
- struct sock *sk = dev_net(dev)->ipv6.ndisc_sk;
struct sk_buff *skb;
skb = alloc_skb(hlen + sizeof(struct ipv6hdr) + len + tlen, GFP_ATOMIC);
- if (!skb) {
- ND_PRINTK(0, err, "ndisc: %s failed to allocate an skb\n",
- __func__);
+ if (!skb)
return NULL;
- }
skb->protocol = htons(ETH_P_IPV6);
skb->dev = dev;
@@ -437,7 +433,9 @@ static struct sk_buff *ndisc_alloc_skb(struct net_device *dev,
/* Manually assign socket ownership as we avoid calling
* sock_alloc_send_pskb() to bypass wmem buffer limits
*/
- skb_set_owner_w(skb, sk);
+ rcu_read_lock();
+ skb_set_owner_w(skb, dev_net_rcu(dev)->ipv6.ndisc_sk);
+ rcu_read_unlock();
return skb;
}
@@ -473,16 +471,20 @@ static void ip6_nd_hdr(struct sk_buff *skb,
void ndisc_send_skb(struct sk_buff *skb, const struct in6_addr *daddr,
const struct in6_addr *saddr)
{
+ struct icmp6hdr *icmp6h = icmp6_hdr(skb);
struct dst_entry *dst = skb_dst(skb);
- struct net *net = dev_net(skb->dev);
- struct sock *sk = net->ipv6.ndisc_sk;
struct inet6_dev *idev;
+ struct net *net;
+ struct sock *sk;
int err;
- struct icmp6hdr *icmp6h = icmp6_hdr(skb);
u8 type;
type = icmp6h->icmp6_type;
+ rcu_read_lock();
+
+ net = dev_net_rcu(skb->dev);
+ sk = net->ipv6.ndisc_sk;
if (!dst) {
struct flowi6 fl6;
int oif = skb->dev->ifindex;
@@ -490,6 +492,7 @@ void ndisc_send_skb(struct sk_buff *skb, const struct in6_addr *daddr,
icmpv6_flow_init(sk, &fl6, type, saddr, daddr, oif);
dst = icmp6_dst_alloc(skb->dev, &fl6);
if (IS_ERR(dst)) {
+ rcu_read_unlock();
kfree_skb(skb);
return;
}
@@ -504,7 +507,6 @@ void ndisc_send_skb(struct sk_buff *skb, const struct in6_addr *daddr,
ip6_nd_hdr(skb, saddr, daddr, READ_ONCE(inet6_sk(sk)->hop_limit), skb->len);
- rcu_read_lock();
idev = __in6_dev_get(dst->dev);
IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTREQUESTS);
@@ -1678,7 +1680,7 @@ static void ndisc_fill_redirect_hdr_option(struct sk_buff *skb,
void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target)
{
struct net_device *dev = skb->dev;
- struct net *net = dev_net(dev);
+ struct net *net = dev_net_rcu(dev);
struct sock *sk = net->ipv6.ndisc_sk;
int optlen = 0;
struct inet_peer *peer;
@@ -1693,8 +1695,8 @@ void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target)
ops_data_buf[NDISC_OPS_REDIRECT_DATA_SPACE], *ops_data = NULL;
bool ret;
- if (netif_is_l3_master(skb->dev)) {
- dev = __dev_get_by_index(dev_net(skb->dev), IPCB(skb)->iif);
+ if (netif_is_l3_master(dev)) {
+ dev = dev_get_by_index_rcu(net, IPCB(skb)->iif);
if (!dev)
return;
}
@@ -1731,10 +1733,10 @@ void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target)
"Redirect: destination is not a neighbour\n");
goto release;
}
- peer = inet_getpeer_v6(net->ipv6.peers, &ipv6_hdr(skb)->saddr, 1);
+
+ peer = inet_getpeer_v6(net->ipv6.peers, &ipv6_hdr(skb)->saddr);
ret = inet_peer_xrlim_allow(peer, 1*HZ);
- if (peer)
- inet_putpeer(peer);
+
if (!ret)
goto release;
diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c
index 581ce055bf52..4541836ee3da 100644
--- a/net/ipv6/netfilter.c
+++ b/net/ipv6/netfilter.c
@@ -164,20 +164,20 @@ int br_ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
struct ip6_fraglist_iter iter;
struct sk_buff *frag2;
- if (first_len - hlen > mtu ||
- skb_headroom(skb) < (hroom + sizeof(struct frag_hdr)))
+ if (first_len - hlen > mtu)
goto blackhole;
- if (skb_cloned(skb))
+ if (skb_cloned(skb) ||
+ skb_headroom(skb) < (hroom + sizeof(struct frag_hdr)))
goto slow_path;
skb_walk_frags(skb, frag2) {
- if (frag2->len > mtu ||
- skb_headroom(frag2) < (hlen + hroom + sizeof(struct frag_hdr)))
+ if (frag2->len > mtu)
goto blackhole;
/* Partially cloned skb? */
- if (skb_shared(frag2))
+ if (skb_shared(frag2) ||
+ skb_headroom(frag2) < (hlen + hroom + sizeof(struct frag_hdr)))
goto slow_path;
}
diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig
index f3c8e2d918e1..e087a8e97ba7 100644
--- a/net/ipv6/netfilter/Kconfig
+++ b/net/ipv6/netfilter/Kconfig
@@ -8,7 +8,14 @@ menu "IPv6: Netfilter Configuration"
# old sockopt interface and eval loop
config IP6_NF_IPTABLES_LEGACY
- tristate
+ tristate "Legacy IP6 tables support"
+ depends on INET && IPV6
+ select NETFILTER_XTABLES
+ default n
+ help
+ ip6tables is a legacy packet classifier.
+ This is not needed if you are using iptables over nftables
+ (iptables-nft).
config NF_SOCKET_IPV6
tristate "IPv6 socket lookup support"
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 7d5602950ae7..d585ac3c1113 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -292,7 +292,7 @@ ip6t_do_table(void *priv, struct sk_buff *skb,
* but it is no problem since absolute verdict is issued by these.
*/
if (static_key_false(&xt_tee_enabled))
- jumpstack += private->stacksize * __this_cpu_read(nf_skb_duplicated);
+ jumpstack += private->stacksize * current->in_nf_duplicate;
e = get_entry(table_base, private->hook_entry[hook]);
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index 4120e67a8ce6..64ab23ff559b 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -123,7 +123,8 @@ static void __net_exit nf_ct_frags6_sysctl_unregister(struct net *net)
#endif
static int nf_ct_frag6_reasm(struct frag_queue *fq, struct sk_buff *skb,
- struct sk_buff *prev_tail, struct net_device *dev);
+ struct sk_buff *prev_tail, struct net_device *dev,
+ int *refs);
static inline u8 ip6_frag_ecn(const struct ipv6hdr *ipv6h)
{
@@ -132,7 +133,7 @@ static inline u8 ip6_frag_ecn(const struct ipv6hdr *ipv6h)
static void nf_ct_frag6_expire(struct timer_list *t)
{
- struct inet_frag_queue *frag = from_timer(frag, t, timer);
+ struct inet_frag_queue *frag = timer_container_of(frag, t, timer);
struct frag_queue *fq;
fq = container_of(frag, struct frag_queue, q);
@@ -167,7 +168,8 @@ static struct frag_queue *fq_find(struct net *net, __be32 id, u32 user,
static int nf_ct_frag6_queue(struct frag_queue *fq, struct sk_buff *skb,
- const struct frag_hdr *fhdr, int nhoff)
+ const struct frag_hdr *fhdr, int nhoff,
+ int *refs)
{
unsigned int payload_len;
struct net_device *dev;
@@ -221,7 +223,7 @@ static int nf_ct_frag6_queue(struct frag_queue *fq, struct sk_buff *skb,
* this case. -DaveM
*/
pr_debug("end of fragment not rounded to 8 bytes.\n");
- inet_frag_kill(&fq->q);
+ inet_frag_kill(&fq->q, refs);
return -EPROTO;
}
if (end > fq->q.len) {
@@ -287,7 +289,7 @@ static int nf_ct_frag6_queue(struct frag_queue *fq, struct sk_buff *skb,
unsigned long orefdst = skb->_skb_refdst;
skb->_skb_refdst = 0UL;
- err = nf_ct_frag6_reasm(fq, skb, prev, dev);
+ err = nf_ct_frag6_reasm(fq, skb, prev, dev, refs);
skb->_skb_refdst = orefdst;
/* After queue has assumed skb ownership, only 0 or
@@ -301,7 +303,7 @@ static int nf_ct_frag6_queue(struct frag_queue *fq, struct sk_buff *skb,
return -EINPROGRESS;
insert_error:
- inet_frag_kill(&fq->q);
+ inet_frag_kill(&fq->q, refs);
err:
skb_dst_drop(skb);
return -EINVAL;
@@ -315,13 +317,14 @@ err:
* the last and the first frames arrived and all the bits are here.
*/
static int nf_ct_frag6_reasm(struct frag_queue *fq, struct sk_buff *skb,
- struct sk_buff *prev_tail, struct net_device *dev)
+ struct sk_buff *prev_tail, struct net_device *dev,
+ int *refs)
{
void *reasm_data;
int payload_len;
u8 ecn;
- inet_frag_kill(&fq->q);
+ inet_frag_kill(&fq->q, refs);
ecn = ip_frag_ecn_table[fq->ecn];
if (unlikely(ecn == 0xff))
@@ -372,7 +375,7 @@ static int nf_ct_frag6_reasm(struct frag_queue *fq, struct sk_buff *skb,
return 0;
err:
- inet_frag_kill(&fq->q);
+ inet_frag_kill(&fq->q, refs);
return -EINVAL;
}
@@ -447,6 +450,7 @@ int nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 user)
struct frag_hdr *fhdr;
struct frag_queue *fq;
struct ipv6hdr *hdr;
+ int refs = 0;
u8 prevhdr;
/* Jumbo payload inhibits frag. header */
@@ -473,23 +477,26 @@ int nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 user)
hdr = ipv6_hdr(skb);
fhdr = (struct frag_hdr *)skb_transport_header(skb);
+ rcu_read_lock();
fq = fq_find(net, fhdr->identification, user, hdr,
skb->dev ? skb->dev->ifindex : 0);
if (fq == NULL) {
+ rcu_read_unlock();
pr_debug("Can't find and can't create new queue\n");
return -ENOMEM;
}
spin_lock_bh(&fq->q.lock);
- ret = nf_ct_frag6_queue(fq, skb, fhdr, nhoff);
+ ret = nf_ct_frag6_queue(fq, skb, fhdr, nhoff, &refs);
if (ret == -EPROTO) {
skb->transport_header = savethdr;
ret = 0;
}
spin_unlock_bh(&fq->q.lock);
- inet_frag_put(&fq->q);
+ rcu_read_unlock();
+ inet_frag_putn(&fq->q, refs);
return ret;
}
EXPORT_SYMBOL_GPL(nf_ct_frag6_gather);
diff --git a/net/ipv6/netfilter/nf_dup_ipv6.c b/net/ipv6/netfilter/nf_dup_ipv6.c
index 0c39c77fe8a8..b903c62c00c9 100644
--- a/net/ipv6/netfilter/nf_dup_ipv6.c
+++ b/net/ipv6/netfilter/nf_dup_ipv6.c
@@ -48,7 +48,7 @@ void nf_dup_ipv6(struct net *net, struct sk_buff *skb, unsigned int hooknum,
const struct in6_addr *gw, int oif)
{
local_bh_disable();
- if (this_cpu_read(nf_skb_duplicated))
+ if (current->in_nf_duplicate)
goto out;
skb = pskb_copy(skb, GFP_ATOMIC);
if (skb == NULL)
@@ -64,9 +64,9 @@ void nf_dup_ipv6(struct net *net, struct sk_buff *skb, unsigned int hooknum,
--iph->hop_limit;
}
if (nf_dup_ipv6_route(net, skb, gw, oif)) {
- __this_cpu_write(nf_skb_duplicated, true);
+ current->in_nf_duplicate = true;
ip6_local_out(net, skb->sk, skb);
- __this_cpu_write(nf_skb_duplicated, false);
+ current->in_nf_duplicate = false;
} else {
kfree_skb(skb);
}
diff --git a/net/ipv6/netfilter/nf_reject_ipv6.c b/net/ipv6/netfilter/nf_reject_ipv6.c
index 7db0437140bf..9ae2b2725bf9 100644
--- a/net/ipv6/netfilter/nf_reject_ipv6.c
+++ b/net/ipv6/netfilter/nf_reject_ipv6.c
@@ -268,12 +268,12 @@ static int nf_reject6_fill_skb_dst(struct sk_buff *skb_in)
void nf_send_reset6(struct net *net, struct sock *sk, struct sk_buff *oldskb,
int hook)
{
- struct sk_buff *nskb;
- struct tcphdr _otcph;
- const struct tcphdr *otcph;
- unsigned int otcplen, hh_len;
const struct ipv6hdr *oip6h = ipv6_hdr(oldskb);
struct dst_entry *dst = NULL;
+ const struct tcphdr *otcph;
+ struct sk_buff *nskb;
+ struct tcphdr _otcph;
+ unsigned int otcplen;
struct flowi6 fl6;
if ((!(ipv6_addr_type(&oip6h->saddr) & IPV6_ADDR_UNICAST)) ||
@@ -312,9 +312,8 @@ void nf_send_reset6(struct net *net, struct sock *sk, struct sk_buff *oldskb,
if (IS_ERR(dst))
return;
- hh_len = (dst->dev->hard_header_len + 15)&~15;
- nskb = alloc_skb(hh_len + 15 + dst->header_len + sizeof(struct ipv6hdr)
- + sizeof(struct tcphdr) + dst->trailer_len,
+ nskb = alloc_skb(LL_MAX_HEADER + sizeof(struct ipv6hdr) +
+ sizeof(struct tcphdr) + dst->trailer_len,
GFP_ATOMIC);
if (!nskb) {
@@ -327,7 +326,7 @@ void nf_send_reset6(struct net *net, struct sock *sk, struct sk_buff *oldskb,
nskb->mark = fl6.flowi6_mark;
- skb_reserve(nskb, hh_len + dst->header_len);
+ skb_reserve(nskb, LL_MAX_HEADER);
nf_reject_ip6hdr_put(nskb, oldskb, IPPROTO_TCP, ip6_dst_hoplimit(dst));
nf_reject_ip6_tcphdr_put(nskb, oldskb, otcph, otcplen);
diff --git a/net/ipv6/netfilter/nf_socket_ipv6.c b/net/ipv6/netfilter/nf_socket_ipv6.c
index a7690ec62325..9ea5ef56cb27 100644
--- a/net/ipv6/netfilter/nf_socket_ipv6.c
+++ b/net/ipv6/netfilter/nf_socket_ipv6.c
@@ -103,6 +103,10 @@ struct sock *nf_sk_lookup_slow_v6(struct net *net, const struct sk_buff *skb,
struct sk_buff *data_skb = NULL;
int doff = 0;
int thoff = 0, tproto;
+#if IS_ENABLED(CONFIG_NF_CONNTRACK)
+ enum ip_conntrack_info ctinfo;
+ struct nf_conn const *ct;
+#endif
tproto = ipv6_find_hdr(skb, &thoff, -1, NULL, NULL);
if (tproto < 0) {
@@ -136,6 +140,25 @@ struct sock *nf_sk_lookup_slow_v6(struct net *net, const struct sk_buff *skb,
return NULL;
}
+#if IS_ENABLED(CONFIG_NF_CONNTRACK)
+ /* Do the lookup with the original socket address in
+ * case this is a reply packet of an established
+ * SNAT-ted connection.
+ */
+ ct = nf_ct_get(skb, &ctinfo);
+ if (ct &&
+ ((tproto != IPPROTO_ICMPV6 &&
+ ctinfo == IP_CT_ESTABLISHED_REPLY) ||
+ (tproto == IPPROTO_ICMPV6 &&
+ ctinfo == IP_CT_RELATED_REPLY)) &&
+ (ct->status & IPS_SRC_NAT_DONE)) {
+ daddr = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.in6;
+ dport = (tproto == IPPROTO_TCP) ?
+ ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u.tcp.port :
+ ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u.udp.port;
+ }
+#endif
+
return nf_socket_get_sock_v6(net, data_skb, doff, tproto, saddr, daddr,
sport, dport, indev);
}
diff --git a/net/ipv6/netfilter/nft_fib_ipv6.c b/net/ipv6/netfilter/nft_fib_ipv6.c
index c9f1634b3838..421036a3605b 100644
--- a/net/ipv6/netfilter/nft_fib_ipv6.c
+++ b/net/ipv6/netfilter/nft_fib_ipv6.c
@@ -50,6 +50,7 @@ static int nft_fib6_flowi_init(struct flowi6 *fl6, const struct nft_fib *priv,
fl6->flowi6_mark = pkt->skb->mark;
fl6->flowlabel = (*(__be32 *)iph) & IPV6_FLOWINFO_MASK;
+ fl6->flowi6_l3mdev = nft_fib_l3mdev_master_ifindex_rcu(pkt, dev);
return lookup_flags;
}
@@ -73,8 +74,6 @@ static u32 __nft_fib6_eval_type(const struct nft_fib *priv,
else if (priv->flags & NFTA_FIB_F_OIF)
dev = nft_out(pkt);
- fl6.flowi6_l3mdev = l3mdev_master_ifindex_rcu(dev);
-
nft_fib6_flowi_init(&fl6, priv, pkt, dev, iph);
if (dev && nf_ipv6_chk_addr(nft_net(pkt), &fl6.daddr, dev, true))
@@ -158,6 +157,7 @@ void nft_fib6_eval(const struct nft_expr *expr, struct nft_regs *regs,
{
const struct nft_fib *priv = nft_expr_priv(expr);
int noff = skb_network_offset(pkt->skb);
+ const struct net_device *found = NULL;
const struct net_device *oif = NULL;
u32 *dest = &regs->data[priv->dreg];
struct ipv6hdr *iph, _iph;
@@ -165,11 +165,15 @@ void nft_fib6_eval(const struct nft_expr *expr, struct nft_regs *regs,
.flowi6_iif = LOOPBACK_IFINDEX,
.flowi6_proto = pkt->tprot,
.flowi6_uid = sock_net_uid(nft_net(pkt), NULL),
- .flowi6_l3mdev = l3mdev_master_ifindex_rcu(nft_in(pkt)),
};
struct rt6_info *rt;
int lookup_flags;
+ if (nft_fib_can_skip(pkt)) {
+ nft_fib_store_result(dest, priv, nft_in(pkt));
+ return;
+ }
+
if (priv->flags & NFTA_FIB_F_IIF)
oif = nft_in(pkt);
else if (priv->flags & NFTA_FIB_F_OIF)
@@ -181,17 +185,13 @@ void nft_fib6_eval(const struct nft_expr *expr, struct nft_regs *regs,
return;
}
- lookup_flags = nft_fib6_flowi_init(&fl6, priv, pkt, oif, iph);
-
- if (nft_hook(pkt) == NF_INET_PRE_ROUTING ||
- nft_hook(pkt) == NF_INET_INGRESS) {
- if (nft_fib_is_loopback(pkt->skb, nft_in(pkt)) ||
- nft_fib_v6_skip_icmpv6(pkt->skb, pkt->tprot, iph)) {
- nft_fib_store_result(dest, priv, nft_in(pkt));
- return;
- }
+ if (nft_fib_v6_skip_icmpv6(pkt->skb, pkt->tprot, iph)) {
+ nft_fib_store_result(dest, priv, nft_in(pkt));
+ return;
}
+ lookup_flags = nft_fib6_flowi_init(&fl6, priv, pkt, oif, iph);
+
*dest = 0;
rt = (void *)ip6_route_lookup(nft_net(pkt), &fl6, pkt->skb,
lookup_flags);
@@ -202,11 +202,15 @@ void nft_fib6_eval(const struct nft_expr *expr, struct nft_regs *regs,
if (rt->rt6i_flags & (RTF_REJECT | RTF_ANYCAST | RTF_LOCAL))
goto put_rt_err;
- if (oif && oif != rt->rt6i_idev->dev &&
- l3mdev_master_ifindex_rcu(rt->rt6i_idev->dev) != oif->ifindex)
- goto put_rt_err;
+ if (!oif) {
+ found = rt->rt6i_idev->dev;
+ } else {
+ if (oif == rt->rt6i_idev->dev ||
+ l3mdev_master_ifindex_rcu(rt->rt6i_idev->dev) == oif->ifindex)
+ found = oif;
+ }
- nft_fib_store_result(dest, priv, rt->rt6i_idev->dev);
+ nft_fib_store_result(dest, priv, found);
put_rt_err:
ip6_rt_put(rt);
}
diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c
index 88b3fcacd4f9..84d90dd8b3f0 100644
--- a/net/ipv6/ping.c
+++ b/net/ipv6/ping.c
@@ -119,8 +119,6 @@ static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
return -EINVAL;
ipcm6_init_sk(&ipc6, sk);
- ipc6.sockc.tsflags = READ_ONCE(sk->sk_tsflags);
- ipc6.sockc.mark = READ_ONCE(sk->sk_mark);
fl6.flowi6_oif = oif;
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 608fa9d05b55..fda640ebd53f 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -619,7 +619,7 @@ static int rawv6_send_hdrinc(struct sock *sk, struct msghdr *msg, int length,
skb_reserve(skb, hlen);
skb->protocol = htons(ETH_P_IPV6);
- skb->priority = READ_ONCE(sk->sk_priority);
+ skb->priority = sockc->priority;
skb->mark = sockc->mark;
skb_set_delivery_type_by_clockid(skb, sockc->transmit_time, sk->sk_clockid);
@@ -629,7 +629,7 @@ static int rawv6_send_hdrinc(struct sock *sk, struct msghdr *msg, int length,
skb->ip_summed = CHECKSUM_NONE;
- skb_setup_tx_timestamp(skb, sockc->tsflags);
+ skb_setup_tx_timestamp(skb, sockc);
if (flags & MSG_CONFIRM)
skb_set_dst_pending_confirm(skb, 1);
@@ -769,18 +769,16 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
hdrincl = inet_test_bit(HDRINCL, sk);
+ ipcm6_init_sk(&ipc6, sk);
+
/*
* Get and verify the address.
*/
memset(&fl6, 0, sizeof(fl6));
- fl6.flowi6_mark = READ_ONCE(sk->sk_mark);
+ fl6.flowi6_mark = ipc6.sockc.mark;
fl6.flowi6_uid = sk->sk_uid;
- ipcm6_init(&ipc6);
- ipc6.sockc.tsflags = READ_ONCE(sk->sk_tsflags);
- ipc6.sockc.mark = fl6.flowi6_mark;
-
if (sin6) {
if (addr_len < SIN6_LEN_RFC2133)
return -EINVAL;
@@ -890,9 +888,6 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
if (hdrincl)
fl6.flowi6_flags |= FLOWI_FLAG_KNOWN_NH;
- if (ipc6.tclass < 0)
- ipc6.tclass = np->tclass;
-
fl6.flowlabel = ip6_make_flowinfo(ipc6.tclass, fl6.flowlabel);
dst = ip6_dst_lookup_flow(sock_net(sk), sk, &fl6, final_p);
@@ -903,9 +898,6 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
if (ipc6.hlimit < 0)
ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
- if (ipc6.dontfrag < 0)
- ipc6.dontfrag = inet6_test_bit(DONTFRAG, sk);
-
if (msg->msg_flags&MSG_CONFIRM)
goto do_confirm;
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index a48be617a8ab..7d4bcf3fda5b 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -68,11 +68,12 @@ static u8 ip6_frag_ecn(const struct ipv6hdr *ipv6h)
static struct inet_frags ip6_frags;
static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *skb,
- struct sk_buff *prev_tail, struct net_device *dev);
+ struct sk_buff *prev_tail, struct net_device *dev,
+ int *refs);
static void ip6_frag_expire(struct timer_list *t)
{
- struct inet_frag_queue *frag = from_timer(frag, t, timer);
+ struct inet_frag_queue *frag = timer_container_of(frag, t, timer);
struct frag_queue *fq;
fq = container_of(frag, struct frag_queue, q);
@@ -105,7 +106,7 @@ fq_find(struct net *net, __be32 id, const struct ipv6hdr *hdr, int iif)
static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
struct frag_hdr *fhdr, int nhoff,
- u32 *prob_offset)
+ u32 *prob_offset, int *refs)
{
struct net *net = dev_net(skb_dst(skb)->dev);
int offset, end, fragsize;
@@ -220,7 +221,7 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
unsigned long orefdst = skb->_skb_refdst;
skb->_skb_refdst = 0UL;
- err = ip6_frag_reasm(fq, skb, prev_tail, dev);
+ err = ip6_frag_reasm(fq, skb, prev_tail, dev, refs);
skb->_skb_refdst = orefdst;
return err;
}
@@ -238,7 +239,7 @@ insert_error:
__IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
IPSTATS_MIB_REASM_OVERLAPS);
discard_fq:
- inet_frag_kill(&fq->q);
+ inet_frag_kill(&fq->q, refs);
__IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
IPSTATS_MIB_REASMFAILS);
err:
@@ -254,7 +255,8 @@ err:
* the last and the first frames arrived and all the bits are here.
*/
static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *skb,
- struct sk_buff *prev_tail, struct net_device *dev)
+ struct sk_buff *prev_tail, struct net_device *dev,
+ int *refs)
{
struct net *net = fq->q.fqdir->net;
unsigned int nhoff;
@@ -262,7 +264,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *skb,
int payload_len;
u8 ecn;
- inet_frag_kill(&fq->q);
+ inet_frag_kill(&fq->q, refs);
ecn = ip_frag_ecn_table[fq->ecn];
if (unlikely(ecn == 0xff))
@@ -303,9 +305,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *skb,
skb_postpush_rcsum(skb, skb_network_header(skb),
skb_network_header_len(skb));
- rcu_read_lock();
__IP6_INC_STATS(net, __in6_dev_stats_get(dev, skb), IPSTATS_MIB_REASMOKS);
- rcu_read_unlock();
fq->q.rb_fragments = RB_ROOT;
fq->q.fragments_tail = NULL;
fq->q.last_run_head = NULL;
@@ -317,10 +317,8 @@ out_oversize:
out_oom:
net_dbg_ratelimited("ip6_frag_reasm: no memory for reassembly\n");
out_fail:
- rcu_read_lock();
__IP6_INC_STATS(net, __in6_dev_stats_get(dev, skb), IPSTATS_MIB_REASMFAILS);
- rcu_read_unlock();
- inet_frag_kill(&fq->q);
+ inet_frag_kill(&fq->q, refs);
return -1;
}
@@ -377,19 +375,21 @@ static int ipv6_frag_rcv(struct sk_buff *skb)
}
iif = skb->dev ? skb->dev->ifindex : 0;
+ rcu_read_lock();
fq = fq_find(net, fhdr->identification, hdr, iif);
if (fq) {
u32 prob_offset = 0;
- int ret;
+ int ret, refs = 0;
spin_lock(&fq->q.lock);
fq->iif = iif;
ret = ip6_frag_queue(fq, skb, fhdr, IP6CB(skb)->nhoff,
- &prob_offset);
+ &prob_offset, &refs);
spin_unlock(&fq->q.lock);
- inet_frag_put(&fq->q);
+ rcu_read_unlock();
+ inet_frag_putn(&fq->q, refs);
if (prob_offset) {
__IP6_INC_STATS(net, __in6_dev_get_safely(skb->dev),
IPSTATS_MIB_INHDRERRORS);
@@ -398,6 +398,7 @@ static int ipv6_frag_rcv(struct sk_buff *skb)
}
return ret;
}
+ rcu_read_unlock();
__IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_REASMFAILS);
kfree_skb(skb);
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index b4251915585f..79c8f1acf8a3 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -374,6 +374,7 @@ static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev)
{
struct rt6_info *rt = dst_rt6_info(dst);
struct inet6_dev *idev = rt->rt6i_idev;
+ struct fib6_info *from;
if (idev && idev->dev != blackhole_netdev) {
struct inet6_dev *blackhole_idev = in6_dev_get(blackhole_netdev);
@@ -383,6 +384,8 @@ static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev)
in6_dev_put(idev);
}
}
+ from = unrcu_pointer(xchg(&rt->from, NULL));
+ fib6_info_release(from);
}
static bool __rt6_check_expired(const struct rt6_info *rt)
@@ -409,12 +412,37 @@ static bool rt6_check_expired(const struct rt6_info *rt)
return false;
}
+static struct fib6_info *
+rt6_multipath_first_sibling_rcu(const struct fib6_info *rt)
+{
+ struct fib6_info *iter;
+ struct fib6_node *fn;
+
+ fn = rcu_dereference(rt->fib6_node);
+ if (!fn)
+ goto out;
+ iter = rcu_dereference(fn->leaf);
+ if (!iter)
+ goto out;
+
+ while (iter) {
+ if (iter->fib6_metric == rt->fib6_metric &&
+ rt6_qualify_for_ecmp(iter))
+ return iter;
+ iter = rcu_dereference(iter->fib6_next);
+ }
+
+out:
+ return NULL;
+}
+
void fib6_select_path(const struct net *net, struct fib6_result *res,
struct flowi6 *fl6, int oif, bool have_oif_match,
const struct sk_buff *skb, int strict)
{
- struct fib6_info *sibling, *next_sibling;
- struct fib6_info *match = res->f6i;
+ struct fib6_info *first, *match = res->f6i;
+ struct fib6_info *sibling;
+ int hash;
if (!match->nh && (!match->fib6_nsiblings || have_oif_match))
goto out;
@@ -437,16 +465,25 @@ void fib6_select_path(const struct net *net, struct fib6_result *res,
return;
}
- if (fl6->mp_hash <= atomic_read(&match->fib6_nh->fib_nh_upper_bound))
+ first = rt6_multipath_first_sibling_rcu(match);
+ if (!first)
goto out;
- list_for_each_entry_safe(sibling, next_sibling, &match->fib6_siblings,
- fib6_siblings) {
+ hash = fl6->mp_hash;
+ if (hash <= atomic_read(&first->fib6_nh->fib_nh_upper_bound)) {
+ if (rt6_score_route(first->fib6_nh, first->fib6_flags, oif,
+ strict) >= 0)
+ match = first;
+ goto out;
+ }
+
+ list_for_each_entry_rcu(sibling, &first->fib6_siblings,
+ fib6_siblings) {
const struct fib6_nh *nh = sibling->fib6_nh;
int nh_upper_bound;
nh_upper_bound = atomic_read(&nh->fib_nh_upper_bound);
- if (fl6->mp_hash > nh_upper_bound)
+ if (hash > nh_upper_bound)
continue;
if (rt6_score_route(nh, sibling->fib6_flags, oif, strict) < 0)
break;
@@ -1455,7 +1492,6 @@ static DEFINE_SPINLOCK(rt6_exception_lock);
static void rt6_remove_exception(struct rt6_exception_bucket *bucket,
struct rt6_exception *rt6_ex)
{
- struct fib6_info *from;
struct net *net;
if (!bucket || !rt6_ex)
@@ -1467,8 +1503,6 @@ static void rt6_remove_exception(struct rt6_exception_bucket *bucket,
/* purge completely the exception to allow releasing the held resources:
* some [sk] cache may keep the dst around for unlimited time
*/
- from = unrcu_pointer(xchg(&rt6_ex->rt6i->from, NULL));
- fib6_info_release(from);
dst_dev_put(&rt6_ex->rt6i->dst);
hlist_del_rcu(&rt6_ex->hlist);
@@ -1737,6 +1771,7 @@ out:
if (!err) {
spin_lock_bh(&f6i->fib6_table->tb6_lock);
fib6_update_sernum(net, f6i);
+ fib6_add_gc_list(f6i);
spin_unlock_bh(&f6i->fib6_table->tb6_lock);
fib6_force_start_gc(net);
}
@@ -1785,11 +1820,13 @@ static int rt6_nh_flush_exceptions(struct fib6_nh *nh, void *arg)
void rt6_flush_exceptions(struct fib6_info *f6i)
{
- if (f6i->nh)
- nexthop_for_each_fib6_nh(f6i->nh, rt6_nh_flush_exceptions,
- f6i);
- else
+ if (f6i->nh) {
+ rcu_read_lock();
+ nexthop_for_each_fib6_nh(f6i->nh, rt6_nh_flush_exceptions, f6i);
+ rcu_read_unlock();
+ } else {
fib6_nh_flush_exceptions(f6i->fib6_nh, f6i);
+ }
}
/* Find cached rt in the hash table inside passed in rt
@@ -2457,8 +2494,12 @@ static u32 rt6_multipath_custom_hash_fl6(const struct net *net,
hash_keys.basic.ip_proto = fl6->flowi6_proto;
if (hash_fields & FIB_MULTIPATH_HASH_FIELD_FLOWLABEL)
hash_keys.tags.flow_label = (__force u32)flowi6_get_flowlabel(fl6);
- if (hash_fields & FIB_MULTIPATH_HASH_FIELD_SRC_PORT)
- hash_keys.ports.src = fl6->fl6_sport;
+ if (hash_fields & FIB_MULTIPATH_HASH_FIELD_SRC_PORT) {
+ if (fl6->flowi6_flags & FLOWI_FLAG_ANY_SPORT)
+ hash_keys.ports.src = (__force __be16)get_random_u16();
+ else
+ hash_keys.ports.src = fl6->fl6_sport;
+ }
if (hash_fields & FIB_MULTIPATH_HASH_FIELD_DST_PORT)
hash_keys.ports.dst = fl6->fl6_dport;
@@ -2512,7 +2553,10 @@ u32 rt6_multipath_hash(const struct net *net, const struct flowi6 *fl6,
hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
hash_keys.addrs.v6addrs.src = fl6->saddr;
hash_keys.addrs.v6addrs.dst = fl6->daddr;
- hash_keys.ports.src = fl6->fl6_sport;
+ if (fl6->flowi6_flags & FLOWI_FLAG_ANY_SPORT)
+ hash_keys.ports.src = (__force __be16)get_random_u16();
+ else
+ hash_keys.ports.src = fl6->fl6_sport;
hash_keys.ports.dst = fl6->fl6_dport;
hash_keys.basic.ip_proto = fl6->flowi6_proto;
}
@@ -2780,10 +2824,10 @@ static void ip6_negative_advice(struct sock *sk,
if (rt->rt6i_flags & RTF_CACHE) {
rcu_read_lock();
if (rt6_check_expired(rt)) {
- /* counteract the dst_release() in sk_dst_reset() */
- dst_hold(dst);
+ /* rt/dst can not be destroyed yet,
+ * because of rcu_read_lock()
+ */
sk_dst_reset(sk);
-
rt6_remove_exception_rt(rt);
}
rcu_read_unlock();
@@ -3196,13 +3240,18 @@ static unsigned int ip6_default_advmss(const struct dst_entry *dst)
{
struct net_device *dev = dst->dev;
unsigned int mtu = dst_mtu(dst);
- struct net *net = dev_net(dev);
+ struct net *net;
mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
+ rcu_read_lock();
+
+ net = dev_net_rcu(dev);
if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
+ rcu_read_unlock();
+
/*
* Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
* corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
@@ -3639,7 +3688,8 @@ out:
in6_dev_put(idev);
if (err) {
- lwtstate_put(fib6_nh->fib_nh_lws);
+ fib_nh_common_release(&fib6_nh->nh_common);
+ fib6_nh->nh_common.nhc_pcpu_rth_output = NULL;
fib6_nh->fib_nh_lws = NULL;
netdev_put(dev, dev_tracker);
}
@@ -3687,62 +3737,62 @@ void fib6_nh_release_dsts(struct fib6_nh *fib6_nh)
}
}
-static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg,
- gfp_t gfp_flags,
- struct netlink_ext_ack *extack)
+static int fib6_config_validate(struct fib6_config *cfg,
+ struct netlink_ext_ack *extack)
{
- struct net *net = cfg->fc_nlinfo.nl_net;
- struct fib6_info *rt = NULL;
- struct nexthop *nh = NULL;
- struct fib6_table *table;
- struct fib6_nh *fib6_nh;
- int err = -EINVAL;
- int addr_type;
-
/* RTF_PCPU is an internal flag; can not be set by userspace */
if (cfg->fc_flags & RTF_PCPU) {
NL_SET_ERR_MSG(extack, "Userspace can not set RTF_PCPU");
- goto out;
+ goto errout;
}
/* RTF_CACHE is an internal flag; can not be set by userspace */
if (cfg->fc_flags & RTF_CACHE) {
NL_SET_ERR_MSG(extack, "Userspace can not set RTF_CACHE");
- goto out;
+ goto errout;
}
if (cfg->fc_type > RTN_MAX) {
NL_SET_ERR_MSG(extack, "Invalid route type");
- goto out;
+ goto errout;
}
if (cfg->fc_dst_len > 128) {
NL_SET_ERR_MSG(extack, "Invalid prefix length");
- goto out;
+ goto errout;
}
+
+#ifdef CONFIG_IPV6_SUBTREES
if (cfg->fc_src_len > 128) {
NL_SET_ERR_MSG(extack, "Invalid source address length");
- goto out;
+ goto errout;
}
-#ifndef CONFIG_IPV6_SUBTREES
+
+ if (cfg->fc_nh_id && cfg->fc_src_len) {
+ NL_SET_ERR_MSG(extack, "Nexthops can not be used with source routing");
+ goto errout;
+ }
+#else
if (cfg->fc_src_len) {
NL_SET_ERR_MSG(extack,
"Specifying source address requires IPV6_SUBTREES to be enabled");
- goto out;
+ goto errout;
}
#endif
- if (cfg->fc_nh_id) {
- nh = nexthop_find_by_id(net, cfg->fc_nh_id);
- if (!nh) {
- NL_SET_ERR_MSG(extack, "Nexthop id does not exist");
- goto out;
- }
- err = fib6_check_nexthop(nh, cfg, extack);
- if (err)
- goto out;
- }
+ return 0;
+errout:
+ return -EINVAL;
+}
+
+static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg,
+ gfp_t gfp_flags,
+ struct netlink_ext_ack *extack)
+{
+ struct net *net = cfg->fc_nlinfo.nl_net;
+ struct fib6_table *table;
+ struct fib6_info *rt;
+ int err;
- err = -ENOBUFS;
if (cfg->fc_nlinfo.nlh &&
!(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
table = fib6_get_table(net, cfg->fc_table);
@@ -3753,22 +3803,22 @@ static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg,
} else {
table = fib6_new_table(net, cfg->fc_table);
}
+ if (!table) {
+ err = -ENOBUFS;
+ goto err;
+ }
- if (!table)
- goto out;
-
- err = -ENOMEM;
- rt = fib6_info_alloc(gfp_flags, !nh);
- if (!rt)
- goto out;
+ rt = fib6_info_alloc(gfp_flags, !cfg->fc_nh_id);
+ if (!rt) {
+ err = -ENOMEM;
+ goto err;
+ }
rt->fib6_metrics = ip_fib_metrics_init(cfg->fc_mx, cfg->fc_mx_len,
extack);
if (IS_ERR(rt->fib6_metrics)) {
err = PTR_ERR(rt->fib6_metrics);
- /* Do not leave garbage there. */
- rt->fib6_metrics = (struct dst_metrics *)&dst_default_metrics;
- goto out_free;
+ goto free;
}
if (cfg->fc_flags & RTF_ADDRCONF)
@@ -3776,12 +3826,12 @@ static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg,
if (cfg->fc_flags & RTF_EXPIRES)
fib6_set_expires(rt, jiffies +
- clock_t_to_jiffies(cfg->fc_expires));
+ clock_t_to_jiffies(cfg->fc_expires));
if (cfg->fc_protocol == RTPROT_UNSPEC)
cfg->fc_protocol = RTPROT_BOOT;
- rt->fib6_protocol = cfg->fc_protocol;
+ rt->fib6_protocol = cfg->fc_protocol;
rt->fib6_table = table;
rt->fib6_metric = cfg->fc_metric;
rt->fib6_type = cfg->fc_type ? : RTN_UNICAST;
@@ -3794,21 +3844,54 @@ static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg,
ipv6_addr_prefix(&rt->fib6_src.addr, &cfg->fc_src, cfg->fc_src_len);
rt->fib6_src.plen = cfg->fc_src_len;
#endif
- if (nh) {
- if (rt->fib6_src.plen) {
- NL_SET_ERR_MSG(extack, "Nexthops can not be used with source routing");
+ return rt;
+free:
+ kfree(rt);
+err:
+ return ERR_PTR(err);
+}
+
+static int ip6_route_info_create_nh(struct fib6_info *rt,
+ struct fib6_config *cfg,
+ gfp_t gfp_flags,
+ struct netlink_ext_ack *extack)
+{
+ struct net *net = cfg->fc_nlinfo.nl_net;
+ struct fib6_nh *fib6_nh;
+ int err;
+
+ if (cfg->fc_nh_id) {
+ struct nexthop *nh;
+
+ rcu_read_lock();
+
+ nh = nexthop_find_by_id(net, cfg->fc_nh_id);
+ if (!nh) {
+ err = -EINVAL;
+ NL_SET_ERR_MSG(extack, "Nexthop id does not exist");
goto out_free;
}
+
+ err = fib6_check_nexthop(nh, cfg, extack);
+ if (err)
+ goto out_free;
+
if (!nexthop_get(nh)) {
NL_SET_ERR_MSG(extack, "Nexthop has been deleted");
+ err = -ENOENT;
goto out_free;
}
+
rt->nh = nh;
fib6_nh = nexthop_fib6_nh(rt->nh);
+
+ rcu_read_unlock();
} else {
+ int addr_type;
+
err = fib6_nh_init(net, rt->fib6_nh, cfg, gfp_flags, extack);
if (err)
- goto out;
+ goto out_release;
fib6_nh = rt->fib6_nh;
@@ -3827,21 +3910,21 @@ static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg,
if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
NL_SET_ERR_MSG(extack, "Invalid source address");
err = -EINVAL;
- goto out;
+ goto out_release;
}
rt->fib6_prefsrc.addr = cfg->fc_prefsrc;
rt->fib6_prefsrc.plen = 128;
- } else
- rt->fib6_prefsrc.plen = 0;
+ }
- return rt;
-out:
+ return 0;
+out_release:
fib6_info_release(rt);
- return ERR_PTR(err);
+ return err;
out_free:
+ rcu_read_unlock();
ip_fib_metrics_put(rt->fib6_metrics);
kfree(rt);
- return ERR_PTR(err);
+ return err;
}
int ip6_route_add(struct fib6_config *cfg, gfp_t gfp_flags,
@@ -3850,10 +3933,18 @@ int ip6_route_add(struct fib6_config *cfg, gfp_t gfp_flags,
struct fib6_info *rt;
int err;
+ err = fib6_config_validate(cfg, extack);
+ if (err)
+ return err;
+
rt = ip6_route_info_create(cfg, gfp_flags, extack);
if (IS_ERR(rt))
return PTR_ERR(rt);
+ err = ip6_route_info_create_nh(rt, cfg, gfp_flags, extack);
+ if (err)
+ return err;
+
err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, extack);
fib6_info_release(rt);
@@ -4082,9 +4173,9 @@ static int ip6_route_del(struct fib6_config *cfg,
if (rt->nh) {
if (!fib6_info_hold_safe(rt))
continue;
- rcu_read_unlock();
- return __ip6_del_rt(rt, &cfg->fc_nlinfo);
+ err = __ip6_del_rt(rt, &cfg->fc_nlinfo);
+ break;
}
if (cfg->fc_nh_id)
continue;
@@ -4099,13 +4190,13 @@ static int ip6_route_del(struct fib6_config *cfg,
continue;
if (!fib6_info_hold_safe(rt))
continue;
- rcu_read_unlock();
/* if gateway was specified only delete the one hop */
if (cfg->fc_flags & RTF_GATEWAY)
- return __ip6_del_rt(rt, &cfg->fc_nlinfo);
-
- return __ip6_del_rt_siblings(rt, cfg);
+ err = __ip6_del_rt(rt, &cfg->fc_nlinfo);
+ else
+ err = __ip6_del_rt_siblings(rt, cfg);
+ break;
}
}
rcu_read_unlock();
@@ -4474,7 +4565,6 @@ int ipv6_route_ioctl(struct net *net, unsigned int cmd, struct in6_rtmsg *rtmsg)
rtmsg_to_fib6_config(net, rtmsg, &cfg);
- rtnl_lock();
switch (cmd) {
case SIOCADDRT:
/* Only do the default setting of fc_metric in route adding */
@@ -4486,7 +4576,7 @@ int ipv6_route_ioctl(struct net *net, unsigned int cmd, struct in6_rtmsg *rtmsg)
err = ip6_route_del(&cfg, NULL);
break;
}
- rtnl_unlock();
+
return err;
}
@@ -4576,6 +4666,7 @@ struct fib6_info *addrconf_f6i_alloc(struct net *net,
.fc_ignore_dev_down = true,
};
struct fib6_info *f6i;
+ int err;
if (anycast) {
cfg.fc_type = RTN_ANYCAST;
@@ -4586,14 +4677,19 @@ struct fib6_info *addrconf_f6i_alloc(struct net *net,
}
f6i = ip6_route_info_create(&cfg, gfp_flags, extack);
- if (!IS_ERR(f6i)) {
- f6i->dst_nocount = true;
+ if (IS_ERR(f6i))
+ return f6i;
- if (!anycast &&
- (READ_ONCE(net->ipv6.devconf_all->disable_policy) ||
- READ_ONCE(idev->cnf.disable_policy)))
- f6i->dst_nopolicy = true;
- }
+ err = ip6_route_info_create_nh(f6i, &cfg, gfp_flags, extack);
+ if (err)
+ return ERR_PTR(err);
+
+ f6i->dst_nocount = true;
+
+ if (!anycast &&
+ (READ_ONCE(net->ipv6.devconf_all->disable_policy) ||
+ READ_ONCE(idev->cnf.disable_policy)))
+ f6i->dst_nopolicy = true;
return f6i;
}
@@ -5005,14 +5101,63 @@ static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
[RTA_SPORT] = { .type = NLA_U16 },
[RTA_DPORT] = { .type = NLA_U16 },
[RTA_NH_ID] = { .type = NLA_U32 },
+ [RTA_FLOWLABEL] = { .type = NLA_BE32 },
};
+static int rtm_to_fib6_multipath_config(struct fib6_config *cfg,
+ struct netlink_ext_ack *extack,
+ bool newroute)
+{
+ struct rtnexthop *rtnh;
+ int remaining;
+
+ remaining = cfg->fc_mp_len;
+ rtnh = (struct rtnexthop *)cfg->fc_mp;
+
+ if (!rtnh_ok(rtnh, remaining)) {
+ NL_SET_ERR_MSG(extack, "Invalid nexthop configuration - no valid nexthops");
+ return -EINVAL;
+ }
+
+ do {
+ bool has_gateway = cfg->fc_flags & RTF_GATEWAY;
+ int attrlen = rtnh_attrlen(rtnh);
+
+ if (attrlen > 0) {
+ struct nlattr *nla, *attrs;
+
+ attrs = rtnh_attrs(rtnh);
+ nla = nla_find(attrs, attrlen, RTA_GATEWAY);
+ if (nla) {
+ if (nla_len(nla) < sizeof(cfg->fc_gateway)) {
+ NL_SET_ERR_MSG(extack,
+ "Invalid IPv6 address in RTA_GATEWAY");
+ return -EINVAL;
+ }
+
+ has_gateway = true;
+ }
+ }
+
+ if (newroute && (cfg->fc_nh_id || !has_gateway)) {
+ NL_SET_ERR_MSG(extack,
+ "Device only routes can not be added for IPv6 using the multipath API.");
+ return -EINVAL;
+ }
+
+ rtnh = rtnh_next(rtnh, &remaining);
+ } while (rtnh_ok(rtnh, remaining));
+
+ return lwtunnel_valid_encap_type_attr(cfg->fc_mp, cfg->fc_mp_len, extack);
+}
+
static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
struct fib6_config *cfg,
struct netlink_ext_ack *extack)
{
- struct rtmsg *rtm;
+ bool newroute = nlh->nlmsg_type == RTM_NEWROUTE;
struct nlattr *tb[RTA_MAX+1];
+ struct rtmsg *rtm;
unsigned int pref;
int err;
@@ -5030,6 +5175,12 @@ static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
goto errout;
}
+ if (tb[RTA_FLOWLABEL]) {
+ NL_SET_ERR_MSG_ATTR(extack, tb[RTA_FLOWLABEL],
+ "Flow label cannot be specified for this operation");
+ goto errout;
+ }
+
*cfg = (struct fib6_config){
.fc_table = rtm->rtm_table,
.fc_dst_len = rtm->rtm_dst_len,
@@ -5115,8 +5266,7 @@ static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]);
cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
- err = lwtunnel_valid_encap_type_attr(cfg->fc_mp,
- cfg->fc_mp_len, extack);
+ err = rtm_to_fib6_multipath_config(cfg, extack, newroute);
if (err < 0)
goto errout;
}
@@ -5157,29 +5307,28 @@ errout:
struct rt6_nh {
struct fib6_info *fib6_info;
struct fib6_config r_cfg;
- struct list_head next;
+ struct list_head list;
};
-static int ip6_route_info_append(struct net *net,
- struct list_head *rt6_nh_list,
+static int ip6_route_info_append(struct list_head *rt6_nh_list,
struct fib6_info *rt,
struct fib6_config *r_cfg)
{
struct rt6_nh *nh;
- int err = -EEXIST;
- list_for_each_entry(nh, rt6_nh_list, next) {
+ list_for_each_entry(nh, rt6_nh_list, list) {
/* check if fib6_info already exists */
if (rt6_duplicate_nexthop(nh->fib6_info, rt))
- return err;
+ return -EEXIST;
}
nh = kzalloc(sizeof(*nh), GFP_KERNEL);
if (!nh)
return -ENOMEM;
+
nh->fib6_info = rt;
memcpy(&nh->r_cfg, r_cfg, sizeof(*r_cfg));
- list_add_tail(&nh->next, rt6_nh_list);
+ list_add_tail(&nh->list, rt6_nh_list);
return 0;
}
@@ -5195,14 +5344,18 @@ static void ip6_route_mpath_notify(struct fib6_info *rt,
* nexthop. Since sibling routes are always added at the end of
* the list, find the first sibling of the last route appended
*/
+ rcu_read_lock();
+
if ((nlflags & NLM_F_APPEND) && rt_last && rt_last->fib6_nsiblings) {
- rt = list_first_entry(&rt_last->fib6_siblings,
- struct fib6_info,
- fib6_siblings);
+ rt = list_first_or_null_rcu(&rt_last->fib6_siblings,
+ struct fib6_info,
+ fib6_siblings);
}
if (rt)
inet6_rt_notify(RTM_NEWROUTE, rt, info, nlflags);
+
+ rcu_read_unlock();
}
static bool ip6_route_mpath_should_notify(const struct fib6_info *rt)
@@ -5231,37 +5384,30 @@ out:
return should_notify;
}
-static int fib6_gw_from_attr(struct in6_addr *gw, struct nlattr *nla,
- struct netlink_ext_ack *extack)
-{
- if (nla_len(nla) < sizeof(*gw)) {
- NL_SET_ERR_MSG(extack, "Invalid IPv6 address in RTA_GATEWAY");
- return -EINVAL;
- }
-
- *gw = nla_get_in6_addr(nla);
-
- return 0;
-}
-
static int ip6_route_multipath_add(struct fib6_config *cfg,
struct netlink_ext_ack *extack)
{
struct fib6_info *rt_notif = NULL, *rt_last = NULL;
struct nl_info *info = &cfg->fc_nlinfo;
+ struct rt6_nh *nh, *nh_safe;
struct fib6_config r_cfg;
struct rtnexthop *rtnh;
- struct fib6_info *rt;
+ LIST_HEAD(rt6_nh_list);
struct rt6_nh *err_nh;
- struct rt6_nh *nh, *nh_safe;
+ struct fib6_info *rt;
__u16 nlflags;
int remaining;
int attrlen;
- int err = 1;
+ int replace;
int nhn = 0;
- int replace = (cfg->fc_nlinfo.nlh &&
- (cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_REPLACE));
- LIST_HEAD(rt6_nh_list);
+ int err;
+
+ err = fib6_config_validate(cfg, extack);
+ if (err)
+ return err;
+
+ replace = (cfg->fc_nlinfo.nlh &&
+ (cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_REPLACE));
nlflags = replace ? NLM_F_REPLACE : NLM_F_CREATE;
if (info->nlh && info->nlh->nlmsg_flags & NLM_F_APPEND)
@@ -5284,18 +5430,11 @@ static int ip6_route_multipath_add(struct fib6_config *cfg,
nla = nla_find(attrs, attrlen, RTA_GATEWAY);
if (nla) {
- err = fib6_gw_from_attr(&r_cfg.fc_gateway, nla,
- extack);
- if (err)
- goto cleanup;
-
+ r_cfg.fc_gateway = nla_get_in6_addr(nla);
r_cfg.fc_flags |= RTF_GATEWAY;
}
- r_cfg.fc_encap = nla_find(attrs, attrlen, RTA_ENCAP);
- /* RTA_ENCAP_TYPE length checked in
- * lwtunnel_valid_encap_type_attr
- */
+ r_cfg.fc_encap = nla_find(attrs, attrlen, RTA_ENCAP);
nla = nla_find(attrs, attrlen, RTA_ENCAP_TYPE);
if (nla)
r_cfg.fc_encap_type = nla_get_u16(nla);
@@ -5308,18 +5447,16 @@ static int ip6_route_multipath_add(struct fib6_config *cfg,
rt = NULL;
goto cleanup;
}
- if (!rt6_qualify_for_ecmp(rt)) {
- err = -EINVAL;
- NL_SET_ERR_MSG(extack,
- "Device only routes can not be added for IPv6 using the multipath API.");
- fib6_info_release(rt);
+
+ err = ip6_route_info_create_nh(rt, &r_cfg, GFP_KERNEL, extack);
+ if (err) {
+ rt = NULL;
goto cleanup;
}
rt->fib6_nh->fib_nh_weight = rtnh->rtnh_hops + 1;
- err = ip6_route_info_append(info->nl_net, &rt6_nh_list,
- rt, &r_cfg);
+ err = ip6_route_info_append(&rt6_nh_list, rt, &r_cfg);
if (err) {
fib6_info_release(rt);
goto cleanup;
@@ -5328,12 +5465,6 @@ static int ip6_route_multipath_add(struct fib6_config *cfg,
rtnh = rtnh_next(rtnh, &remaining);
}
- if (list_empty(&rt6_nh_list)) {
- NL_SET_ERR_MSG(extack,
- "Invalid nexthop configuration - no valid nexthops");
- return -EINVAL;
- }
-
/* for add and replace send one notification with all nexthops.
* Skip the notification in fib6_add_rt2node and send one with
* the full route when done
@@ -5346,7 +5477,7 @@ static int ip6_route_multipath_add(struct fib6_config *cfg,
info->skip_notify_kernel = 1;
err_nh = NULL;
- list_for_each_entry(nh, &rt6_nh_list, next) {
+ list_for_each_entry(nh, &rt6_nh_list, list) {
err = __ip6_ins_rt(nh->fib6_info, info, extack);
if (err) {
@@ -5414,16 +5545,16 @@ add_errout:
ip6_route_mpath_notify(rt_notif, rt_last, info, nlflags);
/* Delete routes that were already added */
- list_for_each_entry(nh, &rt6_nh_list, next) {
+ list_for_each_entry(nh, &rt6_nh_list, list) {
if (err_nh == nh)
break;
ip6_route_del(&nh->r_cfg, extack);
}
cleanup:
- list_for_each_entry_safe(nh, nh_safe, &rt6_nh_list, next) {
+ list_for_each_entry_safe(nh, nh_safe, &rt6_nh_list, list) {
fib6_info_release(nh->fib6_info);
- list_del(&nh->next);
+ list_del(&nh->list);
kfree(nh);
}
@@ -5455,21 +5586,15 @@ static int ip6_route_multipath_del(struct fib6_config *cfg,
nla = nla_find(attrs, attrlen, RTA_GATEWAY);
if (nla) {
- err = fib6_gw_from_attr(&r_cfg.fc_gateway, nla,
- extack);
- if (err) {
- last_err = err;
- goto next_rtnh;
- }
-
+ r_cfg.fc_gateway = nla_get_in6_addr(nla);
r_cfg.fc_flags |= RTF_GATEWAY;
}
}
+
err = ip6_route_del(&r_cfg, extack);
if (err)
last_err = err;
-next_rtnh:
rtnh = rtnh_next(rtnh, &remaining);
}
@@ -5486,15 +5611,20 @@ static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh,
if (err < 0)
return err;
- if (cfg.fc_nh_id &&
- !nexthop_find_by_id(sock_net(skb->sk), cfg.fc_nh_id)) {
- NL_SET_ERR_MSG(extack, "Nexthop id does not exist");
- return -EINVAL;
+ if (cfg.fc_nh_id) {
+ rcu_read_lock();
+ err = !nexthop_find_by_id(sock_net(skb->sk), cfg.fc_nh_id);
+ rcu_read_unlock();
+
+ if (err) {
+ NL_SET_ERR_MSG(extack, "Nexthop id does not exist");
+ return -EINVAL;
+ }
}
- if (cfg.fc_mp)
+ if (cfg.fc_mp) {
return ip6_route_multipath_del(&cfg, extack);
- else {
+ } else {
cfg.fc_delete_all_nh = 1;
return ip6_route_del(&cfg, extack);
}
@@ -5547,17 +5677,21 @@ static size_t rt6_nlmsg_size(struct fib6_info *f6i)
nexthop_for_each_fib6_nh(f6i->nh, rt6_nh_nlmsg_size,
&nexthop_len);
} else {
- struct fib6_info *sibling, *next_sibling;
struct fib6_nh *nh = f6i->fib6_nh;
+ struct fib6_info *sibling;
nexthop_len = 0;
if (f6i->fib6_nsiblings) {
rt6_nh_nlmsg_size(nh, &nexthop_len);
- list_for_each_entry_safe(sibling, next_sibling,
- &f6i->fib6_siblings, fib6_siblings) {
+ rcu_read_lock();
+
+ list_for_each_entry_rcu(sibling, &f6i->fib6_siblings,
+ fib6_siblings) {
rt6_nh_nlmsg_size(sibling->fib6_nh, &nexthop_len);
}
+
+ rcu_read_unlock();
}
nexthop_len += lwtunnel_get_encap_size(nh->fib_nh_lws);
}
@@ -5721,7 +5855,7 @@ static int rt6_fill_node(struct net *net, struct sk_buff *skb,
lwtunnel_fill_encap(skb, dst->lwtstate, RTA_ENCAP, RTA_ENCAP_TYPE) < 0)
goto nla_put_failure;
} else if (rt->fib6_nsiblings) {
- struct fib6_info *sibling, *next_sibling;
+ struct fib6_info *sibling;
struct nlattr *mp;
mp = nla_nest_start_noflag(skb, RTA_MULTIPATH);
@@ -5733,14 +5867,21 @@ static int rt6_fill_node(struct net *net, struct sk_buff *skb,
0) < 0)
goto nla_put_failure;
- list_for_each_entry_safe(sibling, next_sibling,
- &rt->fib6_siblings, fib6_siblings) {
+ rcu_read_lock();
+
+ list_for_each_entry_rcu(sibling, &rt->fib6_siblings,
+ fib6_siblings) {
if (fib_add_nexthop(skb, &sibling->fib6_nh->nh_common,
sibling->fib6_nh->fib_nh_weight,
- AF_INET6, 0) < 0)
+ AF_INET6, 0) < 0) {
+ rcu_read_unlock();
+
goto nla_put_failure;
+ }
}
+ rcu_read_unlock();
+
nla_nest_end(skb, mp);
} else if (rt->nh) {
if (nla_put_u32(skb, RTA_NH_ID, rt->nh->id))
@@ -5963,7 +6104,8 @@ static int inet6_rtm_valid_getroute_req(struct sk_buff *skb,
struct rtmsg *rtm;
int i, err;
- if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*rtm))) {
+ rtm = nlmsg_payload(nlh, sizeof(*rtm));
+ if (!rtm) {
NL_SET_ERR_MSG_MOD(extack,
"Invalid header for get route request");
return -EINVAL;
@@ -5973,7 +6115,6 @@ static int inet6_rtm_valid_getroute_req(struct sk_buff *skb,
return nlmsg_parse_deprecated(nlh, sizeof(*rtm), tb, RTA_MAX,
rtm_ipv6_policy, extack);
- rtm = nlmsg_data(nlh);
if ((rtm->rtm_src_len && rtm->rtm_src_len != 128) ||
(rtm->rtm_dst_len && rtm->rtm_dst_len != 128) ||
rtm->rtm_table || rtm->rtm_protocol || rtm->rtm_scope ||
@@ -5998,6 +6139,13 @@ static int inet6_rtm_valid_getroute_req(struct sk_buff *skb,
return -EINVAL;
}
+ if (tb[RTA_FLOWLABEL] &&
+ (nla_get_be32(tb[RTA_FLOWLABEL]) & ~IPV6_FLOWLABEL_MASK)) {
+ NL_SET_ERR_MSG_ATTR(extack, tb[RTA_FLOWLABEL],
+ "Invalid flow label");
+ return -EINVAL;
+ }
+
for (i = 0; i <= RTA_MAX; i++) {
if (!tb[i])
continue;
@@ -6012,6 +6160,7 @@ static int inet6_rtm_valid_getroute_req(struct sk_buff *skb,
case RTA_SPORT:
case RTA_DPORT:
case RTA_IP_PROTO:
+ case RTA_FLOWLABEL:
break;
default:
NL_SET_ERR_MSG_MOD(extack, "Unsupported attribute in get route request");
@@ -6034,6 +6183,7 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
struct sk_buff *skb;
struct rtmsg *rtm;
struct flowi6 fl6 = {};
+ __be32 flowlabel;
bool fibmatch;
err = inet6_rtm_valid_getroute_req(in_skb, nlh, tb, extack);
@@ -6042,7 +6192,6 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
err = -EINVAL;
rtm = nlmsg_data(nlh);
- fl6.flowlabel = ip6_make_flowinfo(rtm->rtm_tos, 0);
fibmatch = !!(rtm->rtm_flags & RTM_F_FIB_MATCH);
if (tb[RTA_SRC]) {
@@ -6088,6 +6237,9 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
goto errout;
}
+ flowlabel = nla_get_be32_default(tb[RTA_FLOWLABEL], 0);
+ fl6.flowlabel = ip6_make_flowinfo(rtm->rtm_tos, flowlabel);
+
if (iif) {
struct net_device *dev;
int flags = 0;
@@ -6177,7 +6329,9 @@ void inet6_rt_notify(int event, struct fib6_info *rt, struct nl_info *info,
err = -ENOBUFS;
seq = info->nlh ? info->nlh->nlmsg_seq : 0;
- skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
+ rcu_read_lock();
+
+ skb = nlmsg_new(rt6_nlmsg_size(rt), GFP_ATOMIC);
if (!skb)
goto errout;
@@ -6189,10 +6343,14 @@ void inet6_rt_notify(int event, struct fib6_info *rt, struct nl_info *info,
kfree_skb(skb);
goto errout;
}
+
+ rcu_read_unlock();
+
rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
- info->nlh, gfp_any());
+ info->nlh, GFP_ATOMIC);
return;
errout:
+ rcu_read_unlock();
rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
}
@@ -6680,6 +6838,15 @@ static void bpf_iter_unregister(void)
#endif
#endif
+static const struct rtnl_msg_handler ip6_route_rtnl_msg_handlers[] __initconst_or_module = {
+ {.owner = THIS_MODULE, .protocol = PF_INET6, .msgtype = RTM_NEWROUTE,
+ .doit = inet6_rtm_newroute, .flags = RTNL_FLAG_DOIT_UNLOCKED},
+ {.owner = THIS_MODULE, .protocol = PF_INET6, .msgtype = RTM_DELROUTE,
+ .doit = inet6_rtm_delroute, .flags = RTNL_FLAG_DOIT_UNLOCKED},
+ {.owner = THIS_MODULE, .protocol = PF_INET6, .msgtype = RTM_GETROUTE,
+ .doit = inet6_rtm_getroute, .flags = RTNL_FLAG_DOIT_UNLOCKED},
+};
+
int __init ip6_route_init(void)
{
int ret;
@@ -6722,19 +6889,7 @@ int __init ip6_route_init(void)
if (ret)
goto fib6_rules_init;
- ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_NEWROUTE,
- inet6_rtm_newroute, NULL, 0);
- if (ret < 0)
- goto out_register_late_subsys;
-
- ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_DELROUTE,
- inet6_rtm_delroute, NULL, 0);
- if (ret < 0)
- goto out_register_late_subsys;
-
- ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETROUTE,
- inet6_rtm_getroute, NULL,
- RTNL_FLAG_DOIT_UNLOCKED);
+ ret = rtnl_register_many(ip6_route_rtnl_msg_handlers);
if (ret < 0)
goto out_register_late_subsys;
diff --git a/net/ipv6/rpl_iptunnel.c b/net/ipv6/rpl_iptunnel.c
index db3c19a42e1c..7c05ac846646 100644
--- a/net/ipv6/rpl_iptunnel.c
+++ b/net/ipv6/rpl_iptunnel.c
@@ -125,7 +125,8 @@ static void rpl_destroy_state(struct lwtunnel_state *lwt)
}
static int rpl_do_srh_inline(struct sk_buff *skb, const struct rpl_lwt *rlwt,
- const struct ipv6_rpl_sr_hdr *srh)
+ const struct ipv6_rpl_sr_hdr *srh,
+ struct dst_entry *cache_dst)
{
struct ipv6_rpl_sr_hdr *isrh, *csrh;
const struct ipv6hdr *oldhdr;
@@ -153,7 +154,7 @@ static int rpl_do_srh_inline(struct sk_buff *skb, const struct rpl_lwt *rlwt,
hdrlen = ((csrh->hdrlen + 1) << 3);
- err = skb_cow_head(skb, hdrlen + skb->mac_len);
+ err = skb_cow_head(skb, hdrlen + dst_dev_overhead(cache_dst, skb));
if (unlikely(err)) {
kfree(buf);
return err;
@@ -186,7 +187,8 @@ static int rpl_do_srh_inline(struct sk_buff *skb, const struct rpl_lwt *rlwt,
return 0;
}
-static int rpl_do_srh(struct sk_buff *skb, const struct rpl_lwt *rlwt)
+static int rpl_do_srh(struct sk_buff *skb, const struct rpl_lwt *rlwt,
+ struct dst_entry *cache_dst)
{
struct dst_entry *dst = skb_dst(skb);
struct rpl_iptunnel_encap *tinfo;
@@ -196,7 +198,7 @@ static int rpl_do_srh(struct sk_buff *skb, const struct rpl_lwt *rlwt)
tinfo = rpl_encap_lwtunnel(dst->lwtstate);
- return rpl_do_srh_inline(skb, rlwt, tinfo->srh);
+ return rpl_do_srh_inline(skb, rlwt, tinfo->srh, cache_dst);
}
static int rpl_output(struct net *net, struct sock *sk, struct sk_buff *skb)
@@ -208,14 +210,14 @@ static int rpl_output(struct net *net, struct sock *sk, struct sk_buff *skb)
rlwt = rpl_lwt_lwtunnel(orig_dst->lwtstate);
- err = rpl_do_srh(skb, rlwt);
- if (unlikely(err))
- goto drop;
-
local_bh_disable();
dst = dst_cache_get(&rlwt->cache);
local_bh_enable();
+ err = rpl_do_srh(skb, rlwt, dst);
+ if (unlikely(err))
+ goto drop;
+
if (unlikely(!dst)) {
struct ipv6hdr *hdr = ipv6_hdr(skb);
struct flowi6 fl6;
@@ -230,25 +232,28 @@ static int rpl_output(struct net *net, struct sock *sk, struct sk_buff *skb)
dst = ip6_route_output(net, NULL, &fl6);
if (dst->error) {
err = dst->error;
- dst_release(dst);
goto drop;
}
- local_bh_disable();
- dst_cache_set_ip6(&rlwt->cache, dst, &fl6.saddr);
- local_bh_enable();
+ /* cache only if we don't create a dst reference loop */
+ if (orig_dst->lwtstate != dst->lwtstate) {
+ local_bh_disable();
+ dst_cache_set_ip6(&rlwt->cache, dst, &fl6.saddr);
+ local_bh_enable();
+ }
+
+ err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev));
+ if (unlikely(err))
+ goto drop;
}
skb_dst_drop(skb);
skb_dst_set(skb, dst);
- err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev));
- if (unlikely(err))
- goto drop;
-
return dst_output(net, sk, skb);
drop:
+ dst_release(dst);
kfree_skb(skb);
return err;
}
@@ -257,34 +262,48 @@ static int rpl_input(struct sk_buff *skb)
{
struct dst_entry *orig_dst = skb_dst(skb);
struct dst_entry *dst = NULL;
+ struct lwtunnel_state *lwtst;
struct rpl_lwt *rlwt;
int err;
- rlwt = rpl_lwt_lwtunnel(orig_dst->lwtstate);
+ /* We cannot dereference "orig_dst" once ip6_route_input() or
+ * skb_dst_drop() is called. However, in order to detect a dst loop, we
+ * need the address of its lwtstate. So, save the address of lwtstate
+ * now and use it later as a comparison.
+ */
+ lwtst = orig_dst->lwtstate;
- err = rpl_do_srh(skb, rlwt);
- if (unlikely(err))
- goto drop;
+ rlwt = rpl_lwt_lwtunnel(lwtst);
local_bh_disable();
dst = dst_cache_get(&rlwt->cache);
+ local_bh_enable();
+
+ err = rpl_do_srh(skb, rlwt, dst);
+ if (unlikely(err)) {
+ dst_release(dst);
+ goto drop;
+ }
if (!dst) {
ip6_route_input(skb);
dst = skb_dst(skb);
- if (!dst->error) {
+
+ /* cache only if we don't create a dst reference loop */
+ if (!dst->error && lwtst != dst->lwtstate) {
+ local_bh_disable();
dst_cache_set_ip6(&rlwt->cache, dst,
&ipv6_hdr(skb)->saddr);
+ local_bh_enable();
}
+
+ err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev));
+ if (unlikely(err))
+ goto drop;
} else {
skb_dst_drop(skb);
skb_dst_set(skb, dst);
}
- local_bh_enable();
-
- err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev));
- if (unlikely(err))
- goto drop;
return dst_input(skb);
diff --git a/net/ipv6/seg6_hmac.c b/net/ipv6/seg6_hmac.c
index bbf5b84a70fc..f78ecb6ad838 100644
--- a/net/ipv6/seg6_hmac.c
+++ b/net/ipv6/seg6_hmac.c
@@ -40,7 +40,14 @@
#include <net/seg6_hmac.h>
#include <linux/random.h>
-static DEFINE_PER_CPU(char [SEG6_HMAC_RING_SIZE], hmac_ring);
+struct hmac_storage {
+ local_lock_t bh_lock;
+ char hmac_ring[SEG6_HMAC_RING_SIZE];
+};
+
+static DEFINE_PER_CPU(struct hmac_storage, hmac_storage) = {
+ .bh_lock = INIT_LOCAL_LOCK(bh_lock),
+};
static int seg6_hmac_cmpfn(struct rhashtable_compare_arg *arg, const void *obj)
{
@@ -187,7 +194,8 @@ int seg6_hmac_compute(struct seg6_hmac_info *hinfo, struct ipv6_sr_hdr *hdr,
*/
local_bh_disable();
- ring = this_cpu_ptr(hmac_ring);
+ local_lock_nested_bh(&hmac_storage.bh_lock);
+ ring = this_cpu_ptr(hmac_storage.hmac_ring);
off = ring;
/* source address */
@@ -212,6 +220,7 @@ int seg6_hmac_compute(struct seg6_hmac_info *hinfo, struct ipv6_sr_hdr *hdr,
dgsize = __do_hmac(hinfo, ring, plen, tmp_out,
SEG6_HMAC_MAX_DIGESTSIZE);
+ local_unlock_nested_bh(&hmac_storage.bh_lock);
local_bh_enable();
if (dgsize < 0)
diff --git a/net/ipv6/seg6_iptunnel.c b/net/ipv6/seg6_iptunnel.c
index 098632adc9b5..51583461ae29 100644
--- a/net/ipv6/seg6_iptunnel.c
+++ b/net/ipv6/seg6_iptunnel.c
@@ -124,8 +124,8 @@ static __be32 seg6_make_flowlabel(struct net *net, struct sk_buff *skb,
return flowlabel;
}
-/* encapsulate an IPv6 packet within an outer IPv6 header with a given SRH */
-int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh, int proto)
+static int __seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh,
+ int proto, struct dst_entry *cache_dst)
{
struct dst_entry *dst = skb_dst(skb);
struct net *net = dev_net(dst->dev);
@@ -137,7 +137,7 @@ int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh, int proto)
hdrlen = (osrh->hdrlen + 1) << 3;
tot_len = hdrlen + sizeof(*hdr);
- err = skb_cow_head(skb, tot_len + skb->mac_len);
+ err = skb_cow_head(skb, tot_len + dst_dev_overhead(cache_dst, skb));
if (unlikely(err))
return err;
@@ -197,11 +197,18 @@ int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh, int proto)
return 0;
}
+
+/* encapsulate an IPv6 packet within an outer IPv6 header with a given SRH */
+int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh, int proto)
+{
+ return __seg6_do_srh_encap(skb, osrh, proto, NULL);
+}
EXPORT_SYMBOL_GPL(seg6_do_srh_encap);
/* encapsulate an IPv6 packet within an outer IPv6 header with reduced SRH */
static int seg6_do_srh_encap_red(struct sk_buff *skb,
- struct ipv6_sr_hdr *osrh, int proto)
+ struct ipv6_sr_hdr *osrh, int proto,
+ struct dst_entry *cache_dst)
{
__u8 first_seg = osrh->first_segment;
struct dst_entry *dst = skb_dst(skb);
@@ -230,7 +237,7 @@ static int seg6_do_srh_encap_red(struct sk_buff *skb,
tot_len = red_hdrlen + sizeof(struct ipv6hdr);
- err = skb_cow_head(skb, tot_len + skb->mac_len);
+ err = skb_cow_head(skb, tot_len + dst_dev_overhead(cache_dst, skb));
if (unlikely(err))
return err;
@@ -317,8 +324,8 @@ out:
return 0;
}
-/* insert an SRH within an IPv6 packet, just after the IPv6 header */
-int seg6_do_srh_inline(struct sk_buff *skb, struct ipv6_sr_hdr *osrh)
+static int __seg6_do_srh_inline(struct sk_buff *skb, struct ipv6_sr_hdr *osrh,
+ struct dst_entry *cache_dst)
{
struct ipv6hdr *hdr, *oldhdr;
struct ipv6_sr_hdr *isrh;
@@ -326,7 +333,7 @@ int seg6_do_srh_inline(struct sk_buff *skb, struct ipv6_sr_hdr *osrh)
hdrlen = (osrh->hdrlen + 1) << 3;
- err = skb_cow_head(skb, hdrlen + skb->mac_len);
+ err = skb_cow_head(skb, hdrlen + dst_dev_overhead(cache_dst, skb));
if (unlikely(err))
return err;
@@ -369,9 +376,8 @@ int seg6_do_srh_inline(struct sk_buff *skb, struct ipv6_sr_hdr *osrh)
return 0;
}
-EXPORT_SYMBOL_GPL(seg6_do_srh_inline);
-static int seg6_do_srh(struct sk_buff *skb)
+static int seg6_do_srh(struct sk_buff *skb, struct dst_entry *cache_dst)
{
struct dst_entry *dst = skb_dst(skb);
struct seg6_iptunnel_encap *tinfo;
@@ -384,7 +390,7 @@ static int seg6_do_srh(struct sk_buff *skb)
if (skb->protocol != htons(ETH_P_IPV6))
return -EINVAL;
- err = seg6_do_srh_inline(skb, tinfo->srh);
+ err = __seg6_do_srh_inline(skb, tinfo->srh, cache_dst);
if (err)
return err;
break;
@@ -402,9 +408,11 @@ static int seg6_do_srh(struct sk_buff *skb)
return -EINVAL;
if (tinfo->mode == SEG6_IPTUN_MODE_ENCAP)
- err = seg6_do_srh_encap(skb, tinfo->srh, proto);
+ err = __seg6_do_srh_encap(skb, tinfo->srh,
+ proto, cache_dst);
else
- err = seg6_do_srh_encap_red(skb, tinfo->srh, proto);
+ err = seg6_do_srh_encap_red(skb, tinfo->srh,
+ proto, cache_dst);
if (err)
return err;
@@ -425,11 +433,13 @@ static int seg6_do_srh(struct sk_buff *skb)
skb_push(skb, skb->mac_len);
if (tinfo->mode == SEG6_IPTUN_MODE_L2ENCAP)
- err = seg6_do_srh_encap(skb, tinfo->srh,
- IPPROTO_ETHERNET);
+ err = __seg6_do_srh_encap(skb, tinfo->srh,
+ IPPROTO_ETHERNET,
+ cache_dst);
else
err = seg6_do_srh_encap_red(skb, tinfo->srh,
- IPPROTO_ETHERNET);
+ IPPROTO_ETHERNET,
+ cache_dst);
if (err)
return err;
@@ -444,6 +454,13 @@ static int seg6_do_srh(struct sk_buff *skb)
return 0;
}
+/* insert an SRH within an IPv6 packet, just after the IPv6 header */
+int seg6_do_srh_inline(struct sk_buff *skb, struct ipv6_sr_hdr *osrh)
+{
+ return __seg6_do_srh_inline(skb, osrh, NULL);
+}
+EXPORT_SYMBOL_GPL(seg6_do_srh_inline);
+
static int seg6_input_finish(struct net *net, struct sock *sk,
struct sk_buff *skb)
{
@@ -455,34 +472,48 @@ static int seg6_input_core(struct net *net, struct sock *sk,
{
struct dst_entry *orig_dst = skb_dst(skb);
struct dst_entry *dst = NULL;
+ struct lwtunnel_state *lwtst;
struct seg6_lwt *slwt;
int err;
- err = seg6_do_srh(skb);
- if (unlikely(err))
- goto drop;
+ /* We cannot dereference "orig_dst" once ip6_route_input() or
+ * skb_dst_drop() is called. However, in order to detect a dst loop, we
+ * need the address of its lwtstate. So, save the address of lwtstate
+ * now and use it later as a comparison.
+ */
+ lwtst = orig_dst->lwtstate;
- slwt = seg6_lwt_lwtunnel(orig_dst->lwtstate);
+ slwt = seg6_lwt_lwtunnel(lwtst);
local_bh_disable();
dst = dst_cache_get(&slwt->cache);
+ local_bh_enable();
+
+ err = seg6_do_srh(skb, dst);
+ if (unlikely(err)) {
+ dst_release(dst);
+ goto drop;
+ }
if (!dst) {
ip6_route_input(skb);
dst = skb_dst(skb);
- if (!dst->error) {
+
+ /* cache only if we don't create a dst reference loop */
+ if (!dst->error && lwtst != dst->lwtstate) {
+ local_bh_disable();
dst_cache_set_ip6(&slwt->cache, dst,
&ipv6_hdr(skb)->saddr);
+ local_bh_enable();
}
+
+ err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev));
+ if (unlikely(err))
+ goto drop;
} else {
skb_dst_drop(skb);
skb_dst_set(skb, dst);
}
- local_bh_enable();
-
- err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev));
- if (unlikely(err))
- goto drop;
if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled))
return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT,
@@ -528,16 +559,16 @@ static int seg6_output_core(struct net *net, struct sock *sk,
struct seg6_lwt *slwt;
int err;
- err = seg6_do_srh(skb);
- if (unlikely(err))
- goto drop;
-
slwt = seg6_lwt_lwtunnel(orig_dst->lwtstate);
local_bh_disable();
dst = dst_cache_get(&slwt->cache);
local_bh_enable();
+ err = seg6_do_srh(skb, dst);
+ if (unlikely(err))
+ goto drop;
+
if (unlikely(!dst)) {
struct ipv6hdr *hdr = ipv6_hdr(skb);
struct flowi6 fl6;
@@ -552,28 +583,31 @@ static int seg6_output_core(struct net *net, struct sock *sk,
dst = ip6_route_output(net, NULL, &fl6);
if (dst->error) {
err = dst->error;
- dst_release(dst);
goto drop;
}
- local_bh_disable();
- dst_cache_set_ip6(&slwt->cache, dst, &fl6.saddr);
- local_bh_enable();
+ /* cache only if we don't create a dst reference loop */
+ if (orig_dst->lwtstate != dst->lwtstate) {
+ local_bh_disable();
+ dst_cache_set_ip6(&slwt->cache, dst, &fl6.saddr);
+ local_bh_enable();
+ }
+
+ err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev));
+ if (unlikely(err))
+ goto drop;
}
skb_dst_drop(skb);
skb_dst_set(skb, dst);
- err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev));
- if (unlikely(err))
- goto drop;
-
if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled))
return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, net, sk, skb,
NULL, skb_dst(skb)->dev, dst_output);
return dst_output(net, sk, skb);
drop:
+ dst_release(dst);
kfree_skb(skb);
return err;
}
diff --git a/net/ipv6/seg6_local.c b/net/ipv6/seg6_local.c
index c74705ead984..a11a02b4ba95 100644
--- a/net/ipv6/seg6_local.c
+++ b/net/ipv6/seg6_local.c
@@ -954,10 +954,10 @@ static int input_action_end_dx4_finish(struct net *net, struct sock *sk,
struct sk_buff *skb)
{
struct dst_entry *orig_dst = skb_dst(skb);
+ enum skb_drop_reason reason;
struct seg6_local_lwt *slwt;
struct iphdr *iph;
__be32 nhaddr;
- int err;
slwt = seg6_local_lwtunnel(orig_dst->lwtstate);
@@ -967,9 +967,9 @@ static int input_action_end_dx4_finish(struct net *net, struct sock *sk,
skb_dst_drop(skb);
- err = ip_route_input(skb, nhaddr, iph->saddr, 0, skb->dev);
- if (err) {
- kfree_skb(skb);
+ reason = ip_route_input(skb, nhaddr, iph->saddr, 0, skb->dev);
+ if (reason) {
+ kfree_skb_reason(skb, reason);
return -EINVAL;
}
@@ -1174,8 +1174,8 @@ drop:
static int input_action_end_dt4(struct sk_buff *skb,
struct seg6_local_lwt *slwt)
{
+ enum skb_drop_reason reason;
struct iphdr *iph;
- int err;
if (!decap_and_validate(skb, IPPROTO_IPIP))
goto drop;
@@ -1193,8 +1193,8 @@ static int input_action_end_dt4(struct sk_buff *skb,
iph = ip_hdr(skb);
- err = ip_route_input(skb, iph->daddr, iph->saddr, 0, skb->dev);
- if (unlikely(err))
+ reason = ip_route_input(skb, iph->daddr, iph->saddr, 0, skb->dev);
+ if (unlikely(reason))
goto drop;
return dst_input(skb);
@@ -1644,10 +1644,8 @@ static const struct nla_policy seg6_local_policy[SEG6_LOCAL_MAX + 1] = {
[SEG6_LOCAL_SRH] = { .type = NLA_BINARY },
[SEG6_LOCAL_TABLE] = { .type = NLA_U32 },
[SEG6_LOCAL_VRFTABLE] = { .type = NLA_U32 },
- [SEG6_LOCAL_NH4] = { .type = NLA_BINARY,
- .len = sizeof(struct in_addr) },
- [SEG6_LOCAL_NH6] = { .type = NLA_BINARY,
- .len = sizeof(struct in6_addr) },
+ [SEG6_LOCAL_NH4] = NLA_POLICY_EXACT_LEN(sizeof(struct in_addr)),
+ [SEG6_LOCAL_NH6] = NLA_POLICY_EXACT_LEN(sizeof(struct in6_addr)),
[SEG6_LOCAL_IIF] = { .type = NLA_U32 },
[SEG6_LOCAL_OIF] = { .type = NLA_U32 },
[SEG6_LOCAL_BPF] = { .type = NLA_NESTED },
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 39bd8951bfca..a72dbca9e8fc 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -51,6 +51,7 @@
#include <net/dsfield.h>
#include <net/net_namespace.h>
#include <net/netns/generic.h>
+#include <net/netdev_lock.h>
#include <net/inet_dscp.h>
/*
@@ -201,8 +202,7 @@ static void ipip6_tunnel_clone_6rd(struct net_device *dev, struct sit_net *sitn)
static int ipip6_tunnel_create(struct net_device *dev)
{
struct ip_tunnel *t = netdev_priv(dev);
- struct net *net = dev_net(dev);
- struct sit_net *sitn = net_generic(net, sit_net_id);
+ struct sit_net *sitn = net_generic(t->net, sit_net_id);
int err;
__dev_addr_set(dev, &t->parms.iph.saddr, 4);
@@ -269,6 +269,7 @@ static struct ip_tunnel *ipip6_tunnel_locate(struct net *net,
nt = netdev_priv(dev);
+ nt->net = net;
nt->parms = *parms;
if (ipip6_tunnel_create(dev) < 0)
goto failed_free;
@@ -1449,7 +1450,6 @@ static int ipip6_tunnel_init(struct net_device *dev)
int err;
tunnel->dev = dev;
- tunnel->net = dev_net(dev);
strcpy(tunnel->parms.name, dev->name);
ipip6_tunnel_bind_dev(dev);
@@ -1550,19 +1550,23 @@ static bool ipip6_netlink_6rd_parms(struct nlattr *data[],
}
#endif
-static int ipip6_newlink(struct net *src_net, struct net_device *dev,
- struct nlattr *tb[], struct nlattr *data[],
+static int ipip6_newlink(struct net_device *dev,
+ struct rtnl_newlink_params *params,
struct netlink_ext_ack *extack)
{
- struct net *net = dev_net(dev);
+ struct nlattr **data = params->data;
+ struct nlattr **tb = params->tb;
struct ip_tunnel *nt;
struct ip_tunnel_encap ipencap;
#ifdef CONFIG_IPV6_SIT_6RD
struct ip_tunnel_6rd ip6rd;
#endif
+ struct net *net;
int err;
+ net = params->link_net ? : dev_net(dev);
nt = netdev_priv(dev);
+ nt->net = net;
if (ip_tunnel_netlink_encap_parms(data, &ipencap)) {
err = ip_tunnel_encap_setup(nt, &ipencap);
@@ -1800,8 +1804,7 @@ static struct xfrm_tunnel mplsip_handler __read_mostly = {
};
#endif
-static void __net_exit sit_destroy_tunnels(struct net *net,
- struct list_head *head)
+static void __net_exit sit_exit_rtnl_net(struct net *net, struct list_head *head)
{
struct sit_net *sitn = net_generic(net, sit_net_id);
struct net_device *dev, *aux;
@@ -1816,15 +1819,15 @@ static void __net_exit sit_destroy_tunnels(struct net *net,
for (h = 0; h < (prio ? IP6_SIT_HASH_SIZE : 1); h++) {
struct ip_tunnel *t;
- t = rtnl_dereference(sitn->tunnels[prio][h]);
+ t = rtnl_net_dereference(net, sitn->tunnels[prio][h]);
while (t) {
/* If dev is in the same netns, it has already
* been added to the list by the previous loop.
*/
if (!net_eq(dev_net(t->dev), net))
- unregister_netdevice_queue(t->dev,
- head);
- t = rtnl_dereference(t->next);
+ unregister_netdevice_queue(t->dev, head);
+
+ t = rtnl_net_dereference(net, t->next);
}
}
}
@@ -1856,7 +1859,10 @@ static int __net_init sit_init_net(struct net *net)
/* FB netdevice is special: we have one, and only one per netns.
* Allowing to move it to another netns is clearly unsafe.
*/
- sitn->fb_tunnel_dev->netns_local = true;
+ sitn->fb_tunnel_dev->netns_immutable = true;
+
+ t = netdev_priv(sitn->fb_tunnel_dev);
+ t->net = net;
err = register_netdev(sitn->fb_tunnel_dev);
if (err)
@@ -1865,8 +1871,6 @@ static int __net_init sit_init_net(struct net *net)
ipip6_tunnel_clone_6rd(sitn->fb_tunnel_dev, sitn);
ipip6_fb_tunnel_init(sitn->fb_tunnel_dev);
- t = netdev_priv(sitn->fb_tunnel_dev);
-
strcpy(t->parms.name, sitn->fb_tunnel_dev->name);
return 0;
@@ -1876,19 +1880,9 @@ err_alloc_dev:
return err;
}
-static void __net_exit sit_exit_batch_rtnl(struct list_head *net_list,
- struct list_head *dev_to_kill)
-{
- struct net *net;
-
- ASSERT_RTNL();
- list_for_each_entry(net, net_list, exit_list)
- sit_destroy_tunnels(net, dev_to_kill);
-}
-
static struct pernet_operations sit_net_ops = {
.init = sit_init_net,
- .exit_batch_rtnl = sit_exit_batch_rtnl,
+ .exit_rtnl = sit_exit_rtnl_net,
.id = &sit_net_id,
.size = sizeof(struct sit_net),
};
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index d71ab4e1efe1..e8e68a142649 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -267,6 +267,8 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
fl6.flowi6_mark = sk->sk_mark;
fl6.fl6_dport = usin->sin6_port;
fl6.fl6_sport = inet->inet_sport;
+ if (IS_ENABLED(CONFIG_IP_ROUTE_MULTIPATH) && !fl6.fl6_sport)
+ fl6.flowi6_flags = FLOWI_FLAG_ANY_SPORT;
fl6.flowi6_uid = sk->sk_uid;
opt = rcu_dereference_protected(np->opt, lockdep_sock_is_held(sk));
@@ -376,7 +378,7 @@ static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
{
const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data;
const struct tcphdr *th = (struct tcphdr *)(skb->data+offset);
- struct net *net = dev_net(skb->dev);
+ struct net *net = dev_net_rcu(skb->dev);
struct request_sock *fastopen;
struct ipv6_pinfo *np;
struct tcp_sock *tp;
@@ -798,6 +800,8 @@ static void tcp_v6_init_req(struct request_sock *req,
ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr;
ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr;
+ ireq->ir_rmt_addr = LOOPBACK4_IPV6;
+ ireq->ir_loc_addr = LOOPBACK4_IPV6;
/* So that link locals have meaning */
if ((!sk_listener->sk_bound_dev_if || l3_slave) &&
@@ -864,16 +868,16 @@ static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32
int oif, int rst, u8 tclass, __be32 label,
u32 priority, u32 txhash, struct tcp_key *key)
{
- const struct tcphdr *th = tcp_hdr(skb);
- struct tcphdr *t1;
- struct sk_buff *buff;
- struct flowi6 fl6;
- struct net *net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
- struct sock *ctl_sk = net->ipv6.tcp_sk;
+ struct net *net = sk ? sock_net(sk) : dev_net_rcu(skb_dst(skb)->dev);
unsigned int tot_len = sizeof(struct tcphdr);
+ struct sock *ctl_sk = net->ipv6.tcp_sk;
+ const struct tcphdr *th = tcp_hdr(skb);
__be32 mrst = 0, *topt;
struct dst_entry *dst;
- __u32 mark = 0;
+ struct sk_buff *buff;
+ struct tcphdr *t1;
+ struct flowi6 fl6;
+ u32 mark = 0;
if (tsecr)
tot_len += TCPOLEN_TSTAMP_ALIGNED;
@@ -967,6 +971,9 @@ static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32
}
if (sk) {
+ /* unconstify the socket only to attach it to buff with care. */
+ skb_set_owner_edemux(buff, (struct sock *)sk);
+
if (sk->sk_state == TCP_TIME_WAIT)
mark = inet_twsk(sk)->tw_mark;
else
@@ -994,7 +1001,7 @@ static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32
if (!IS_ERR(dst)) {
skb_dst_set(buff, dst);
ip6_xmit(ctl_sk, buff, &fl6, fl6.flowi6_mark, NULL,
- tclass & ~INET_ECN_MASK, priority);
+ tclass, priority);
TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
if (rst)
TCP_INC_STATS(net, TCP_MIB_OUTRSTS);
@@ -1036,7 +1043,7 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb,
if (!sk && !ipv6_unicast_destination(skb))
return;
- net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
+ net = sk ? sock_net(sk) : dev_net_rcu(skb_dst(skb)->dev);
/* Invalid TCP option size or twice included auth */
if (tcp_parse_auth_options(th, &md5_hash_location, &aoh))
return;
@@ -1130,7 +1137,8 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb,
trace_tcp_send_reset(sk, skb, reason);
tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, oif, 1,
- ipv6_get_dsfield(ipv6h), label, priority, txhash,
+ ipv6_get_dsfield(ipv6h) & ~INET_ECN_MASK,
+ label, priority, txhash,
&key);
#if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO)
@@ -1150,11 +1158,16 @@ static void tcp_v6_send_ack(const struct sock *sk, struct sk_buff *skb, u32 seq,
tclass, label, priority, txhash, key);
}
-static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
+static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb,
+ enum tcp_tw_status tw_status)
{
struct inet_timewait_sock *tw = inet_twsk(sk);
struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
+ u8 tclass = tw->tw_tclass;
struct tcp_key key = {};
+
+ if (tw_status == TCP_TW_ACK_OOW)
+ tclass &= ~INET_ECN_MASK;
#ifdef CONFIG_TCP_AO
struct tcp_ao_info *ao_info;
@@ -1169,8 +1182,8 @@ static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
if (tcp_parse_auth_options(tcp_hdr(skb), NULL, &aoh))
goto out;
if (aoh)
- key.ao_key = tcp_ao_established_key(ao_info,
- aoh->rnext_keyid, -1);
+ key.ao_key = tcp_ao_established_key(sk, ao_info,
+ aoh->rnext_keyid, -1);
}
}
if (key.ao_key) {
@@ -1198,7 +1211,7 @@ static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
tcp_tw_tsval(tcptw),
READ_ONCE(tcptw->tw_ts_recent), tw->tw_bound_dev_if,
- &key, tw->tw_tclass, cpu_to_be32(tw->tw_flowlabel),
+ &key, tclass, cpu_to_be32(tw->tw_flowlabel),
tw->tw_priority, tw->tw_txhash);
#ifdef CONFIG_TCP_AO
@@ -1274,8 +1287,9 @@ static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
tcp_rsk(req)->rcv_nxt,
tcp_synack_window(req) >> inet_rsk(req)->rcv_wscale,
tcp_rsk_tsval(tcp_rsk(req)),
- READ_ONCE(req->ts_recent), sk->sk_bound_dev_if,
- &key, ipv6_get_dsfield(ipv6_hdr(skb)), 0,
+ req->ts_recent, sk->sk_bound_dev_if,
+ &key, ipv6_get_dsfield(ipv6_hdr(skb)) & ~INET_ECN_MASK,
+ 0,
READ_ONCE(sk->sk_priority),
READ_ONCE(tcp_rsk(req)->txhash));
if (tcp_key_is_ao(&key))
@@ -1448,10 +1462,7 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *
ip6_dst_store(newsk, dst, NULL, NULL);
- newsk->sk_v6_daddr = ireq->ir_v6_rmt_addr;
newnp->saddr = ireq->ir_v6_loc_addr;
- newsk->sk_v6_rcv_saddr = ireq->ir_v6_loc_addr;
- newsk->sk_bound_dev_if = ireq->ir_iif;
/* Now IPv6 options...
@@ -1504,9 +1515,6 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *
tcp_initialize_rcv_mss(newsk);
- newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6;
- newinet->inet_rcv_saddr = LOOPBACK4_IPV6;
-
#ifdef CONFIG_TCP_MD5SIG
l3index = l3mdev_master_ifindex_by_index(sock_net(sk), ireq->ir_iif);
@@ -1618,7 +1626,7 @@ int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
by tcp. Feel free to propose better solution.
--ANK (980728)
*/
- if (np->rxopt.all)
+ if (np->rxopt.all && sk->sk_state != TCP_LISTEN)
opt_skb = skb_clone_and_charge_r(skb, sk);
if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
@@ -1656,8 +1664,6 @@ int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
if (reason)
goto reset;
}
- if (opt_skb)
- __kfree_skb(opt_skb);
return 0;
}
} else
@@ -1734,7 +1740,7 @@ static void tcp_v6_fill_cb(struct sk_buff *skb, const struct ipv6hdr *hdr,
TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
skb->len - th->doff*4);
TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
- TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th);
+ TCP_SKB_CB(skb)->tcp_flags = tcp_flags_ntohs(th);
TCP_SKB_CB(skb)->ip_dsfield = ipv6_get_dsfield(hdr);
TCP_SKB_CB(skb)->sacked = 0;
TCP_SKB_CB(skb)->has_rxtstamp =
@@ -1743,7 +1749,9 @@ static void tcp_v6_fill_cb(struct sk_buff *skb, const struct ipv6hdr *hdr,
INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb)
{
+ struct net *net = dev_net_rcu(skb->dev);
enum skb_drop_reason drop_reason;
+ enum tcp_tw_status tw_status;
int sdif = inet6_sdif(skb);
int dif = inet6_iif(skb);
const struct tcphdr *th;
@@ -1752,7 +1760,6 @@ INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb)
bool refcounted;
int ret;
u32 isn;
- struct net *net = dev_net(skb->dev);
drop_reason = SKB_DROP_REASON_NOT_SPECIFIED;
if (skb->pkt_type != PACKET_HOST)
@@ -1831,7 +1838,8 @@ lookup:
th = (const struct tcphdr *)skb->data;
hdr = ipv6_hdr(skb);
tcp_v6_fill_cb(skb, hdr, th);
- nsk = tcp_check_req(sk, skb, req, false, &req_stolen);
+ nsk = tcp_check_req(sk, skb, req, false, &req_stolen,
+ &drop_reason);
} else {
drop_reason = SKB_DROP_REASON_SOCKET_FILTER;
}
@@ -1964,7 +1972,9 @@ do_time_wait:
goto csum_error;
}
- switch (tcp_timewait_state_process(inet_twsk(sk), skb, th, &isn)) {
+ tw_status = tcp_timewait_state_process(inet_twsk(sk), skb, th, &isn,
+ &drop_reason);
+ switch (tw_status) {
case TCP_TW_SYN:
{
struct sock *sk2;
@@ -1989,7 +1999,8 @@ do_time_wait:
/* to ACK */
fallthrough;
case TCP_TW_ACK:
- tcp_v6_timewait_ack(sk, skb);
+ case TCP_TW_ACK_OOW:
+ tcp_v6_timewait_ack(sk, skb, tw_status);
break;
case TCP_TW_RST:
tcp_v6_send_reset(sk, skb, SK_RST_REASON_TCP_TIMEWAIT_SOCKET);
@@ -2003,7 +2014,7 @@ do_time_wait:
void tcp_v6_early_demux(struct sk_buff *skb)
{
- struct net *net = dev_net(skb->dev);
+ struct net *net = dev_net_rcu(skb->dev);
const struct ipv6hdr *hdr;
const struct tcphdr *th;
struct sock *sk;
@@ -2060,8 +2071,6 @@ const struct inet_connection_sock_af_ops ipv6_specific = {
.net_header_len = sizeof(struct ipv6hdr),
.setsockopt = ipv6_setsockopt,
.getsockopt = ipv6_getsockopt,
- .addr2sockaddr = inet6_csk_addr2sockaddr,
- .sockaddr_len = sizeof(struct sockaddr_in6),
.mtu_reduced = tcp_v6_mtu_reduced,
};
@@ -2094,8 +2103,6 @@ static const struct inet_connection_sock_af_ops ipv6_mapped = {
.net_header_len = sizeof(struct iphdr),
.setsockopt = ipv6_setsockopt,
.getsockopt = ipv6_getsockopt,
- .addr2sockaddr = inet6_csk_addr2sockaddr,
- .sockaddr_len = sizeof(struct sockaddr_in6),
.mtu_reduced = tcp_v4_mtu_reduced,
};
@@ -2177,6 +2184,7 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
const struct tcp_sock *tp = tcp_sk(sp);
const struct inet_connection_sock *icsk = inet_csk(sp);
const struct fastopen_queue *fastopenq = &icsk->icsk_accept_queue.fastopenq;
+ u8 icsk_pending;
int rx_queue;
int state;
@@ -2185,14 +2193,15 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
destp = ntohs(inet->inet_dport);
srcp = ntohs(inet->inet_sport);
- if (icsk->icsk_pending == ICSK_TIME_RETRANS ||
- icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT ||
- icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
+ icsk_pending = smp_load_acquire(&icsk->icsk_pending);
+ if (icsk_pending == ICSK_TIME_RETRANS ||
+ icsk_pending == ICSK_TIME_REO_TIMEOUT ||
+ icsk_pending == ICSK_TIME_LOSS_PROBE) {
timer_active = 1;
- timer_expires = icsk->icsk_timeout;
- } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
+ timer_expires = icsk_timeout(icsk);
+ } else if (icsk_pending == ICSK_TIME_PROBE0) {
timer_active = 4;
- timer_expires = icsk->icsk_timeout;
+ timer_expires = icsk_timeout(icsk);
} else if (timer_pending(&sp->sk_timer)) {
timer_active = 2;
timer_expires = sp->sk_timer.expires;
diff --git a/net/ipv6/tcpv6_offload.c b/net/ipv6/tcpv6_offload.c
index a45bf17cb2a1..a8a04f441e78 100644
--- a/net/ipv6/tcpv6_offload.c
+++ b/net/ipv6/tcpv6_offload.c
@@ -35,14 +35,14 @@ static void tcp6_check_fraglist_gro(struct list_head *head, struct sk_buff *skb,
inet6_get_iif_sdif(skb, &iif, &sdif);
hdr = skb_gro_network_header(skb);
- net = dev_net(skb->dev);
+ net = dev_net_rcu(skb->dev);
sk = __inet6_lookup_established(net, net->ipv4.tcp_death_row.hashinfo,
&hdr->saddr, th->source,
&hdr->daddr, ntohs(th->dest),
iif, sdif);
NAPI_GRO_CB(skb)->is_flist = !sk;
if (sk)
- sock_put(sk);
+ sock_gen_put(sk);
#endif /* IS_ENABLED(CONFIG_IPV6) */
}
@@ -94,14 +94,23 @@ INDIRECT_CALLABLE_SCOPE int tcp6_gro_complete(struct sk_buff *skb, int thoff)
}
static void __tcpv6_gso_segment_csum(struct sk_buff *seg,
+ struct in6_addr *oldip,
+ const struct in6_addr *newip,
__be16 *oldport, __be16 newport)
{
- struct tcphdr *th;
+ struct tcphdr *th = tcp_hdr(seg);
+
+ if (!ipv6_addr_equal(oldip, newip)) {
+ inet_proto_csum_replace16(&th->check, seg,
+ oldip->s6_addr32,
+ newip->s6_addr32,
+ true);
+ *oldip = *newip;
+ }
if (*oldport == newport)
return;
- th = tcp_hdr(seg);
inet_proto_csum_replace2(&th->check, seg, *oldport, newport, false);
*oldport = newport;
}
@@ -129,10 +138,10 @@ static struct sk_buff *__tcpv6_gso_segment_list_csum(struct sk_buff *segs)
th2 = tcp_hdr(seg);
iph2 = ipv6_hdr(seg);
- iph2->saddr = iph->saddr;
- iph2->daddr = iph->daddr;
- __tcpv6_gso_segment_csum(seg, &th2->source, th->source);
- __tcpv6_gso_segment_csum(seg, &th2->dest, th->dest);
+ __tcpv6_gso_segment_csum(seg, &iph2->saddr, &iph->saddr,
+ &th2->source, th->source);
+ __tcpv6_gso_segment_csum(seg, &iph2->daddr, &iph->daddr,
+ &th2->dest, th->dest);
}
return segs;
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 0cef8ae5d1ea..7317f8e053f1 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -46,6 +46,7 @@
#include <net/tcp_states.h>
#include <net/ip6_checksum.h>
#include <net/ip6_tunnel.h>
+#include <net/udp_tunnel.h>
#include <net/xfrm.h>
#include <net/inet_hashtables.h>
#include <net/inet6_hashtables.h>
@@ -110,8 +111,19 @@ void udp_v6_rehash(struct sock *sk)
u16 new_hash = ipv6_portaddr_hash(sock_net(sk),
&sk->sk_v6_rcv_saddr,
inet_sk(sk)->inet_num);
+ u16 new_hash4;
- udp_lib_rehash(sk, new_hash);
+ if (ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr)) {
+ new_hash4 = udp_ehashfn(sock_net(sk),
+ sk->sk_rcv_saddr, sk->sk_num,
+ sk->sk_daddr, sk->sk_dport);
+ } else {
+ new_hash4 = udp6_ehashfn(sock_net(sk),
+ &sk->sk_v6_rcv_saddr, sk->sk_num,
+ &sk->sk_v6_daddr, sk->sk_dport);
+ }
+
+ udp_lib_rehash(sk, new_hash, new_hash4);
}
static int compute_score(struct sock *sk, const struct net *net,
@@ -159,6 +171,49 @@ static int compute_score(struct sock *sk, const struct net *net,
return score;
}
+/**
+ * udp6_lib_lookup1() - Simplified lookup using primary hash (destination port)
+ * @net: Network namespace
+ * @saddr: Source address, network order
+ * @sport: Source port, network order
+ * @daddr: Destination address, network order
+ * @hnum: Destination port, host order
+ * @dif: Destination interface index
+ * @sdif: Destination bridge port index, if relevant
+ * @udptable: Set of UDP hash tables
+ *
+ * Simplified lookup to be used as fallback if no sockets are found due to a
+ * potential race between (receive) address change, and lookup happening before
+ * the rehash operation. This function ignores SO_REUSEPORT groups while scoring
+ * result sockets, because if we have one, we don't need the fallback at all.
+ *
+ * Called under rcu_read_lock().
+ *
+ * Return: socket with highest matching score if any, NULL if none
+ */
+static struct sock *udp6_lib_lookup1(const struct net *net,
+ const struct in6_addr *saddr, __be16 sport,
+ const struct in6_addr *daddr,
+ unsigned int hnum, int dif, int sdif,
+ const struct udp_table *udptable)
+{
+ unsigned int slot = udp_hashfn(net, hnum, udptable->mask);
+ struct udp_hslot *hslot = &udptable->hash[slot];
+ struct sock *sk, *result = NULL;
+ int score, badness = 0;
+
+ sk_for_each_rcu(sk, &hslot->head) {
+ score = compute_score(sk, net,
+ saddr, sport, daddr, hnum, dif, sdif);
+ if (score > badness) {
+ result = sk;
+ badness = score;
+ }
+ }
+
+ return result;
+}
+
/* called with rcu_read_lock() */
static struct sock *udp6_lib_lookup2(const struct net *net,
const struct in6_addr *saddr, __be16 sport,
@@ -216,6 +271,74 @@ rescore:
return result;
}
+#if IS_ENABLED(CONFIG_BASE_SMALL)
+static struct sock *udp6_lib_lookup4(const struct net *net,
+ const struct in6_addr *saddr, __be16 sport,
+ const struct in6_addr *daddr,
+ unsigned int hnum, int dif, int sdif,
+ struct udp_table *udptable)
+{
+ return NULL;
+}
+
+static void udp6_hash4(struct sock *sk)
+{
+}
+#else /* !CONFIG_BASE_SMALL */
+static struct sock *udp6_lib_lookup4(const struct net *net,
+ const struct in6_addr *saddr, __be16 sport,
+ const struct in6_addr *daddr,
+ unsigned int hnum, int dif, int sdif,
+ struct udp_table *udptable)
+{
+ const __portpair ports = INET_COMBINED_PORTS(sport, hnum);
+ const struct hlist_nulls_node *node;
+ struct udp_hslot *hslot4;
+ unsigned int hash4, slot;
+ struct udp_sock *up;
+ struct sock *sk;
+
+ hash4 = udp6_ehashfn(net, daddr, hnum, saddr, sport);
+ slot = hash4 & udptable->mask;
+ hslot4 = &udptable->hash4[slot];
+
+begin:
+ udp_lrpa_for_each_entry_rcu(up, node, &hslot4->nulls_head) {
+ sk = (struct sock *)up;
+ if (inet6_match(net, sk, saddr, daddr, ports, dif, sdif))
+ return sk;
+ }
+
+ /* if the nulls value we got at the end of this lookup is not the
+ * expected one, we must restart lookup. We probably met an item that
+ * was moved to another chain due to rehash.
+ */
+ if (get_nulls_value(node) != slot)
+ goto begin;
+
+ return NULL;
+}
+
+static void udp6_hash4(struct sock *sk)
+{
+ struct net *net = sock_net(sk);
+ unsigned int hash;
+
+ if (ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr)) {
+ udp4_hash4(sk);
+ return;
+ }
+
+ if (sk_unhashed(sk) || ipv6_addr_any(&sk->sk_v6_rcv_saddr))
+ return;
+
+ hash = udp6_ehashfn(net, &sk->sk_v6_rcv_saddr, sk->sk_num,
+ &sk->sk_v6_daddr, sk->sk_dport);
+
+ udp_lib_hash4(sk, hash);
+}
+#endif /* CONFIG_BASE_SMALL */
+
/* rcu_read_lock() must be held */
struct sock *__udp6_lib_lookup(const struct net *net,
const struct in6_addr *saddr, __be16 sport,
@@ -224,13 +347,19 @@ struct sock *__udp6_lib_lookup(const struct net *net,
struct sk_buff *skb)
{
unsigned short hnum = ntohs(dport);
- unsigned int hash2, slot2;
struct udp_hslot *hslot2;
struct sock *result, *sk;
+ unsigned int hash2;
hash2 = ipv6_portaddr_hash(net, daddr, hnum);
- slot2 = hash2 & udptable->mask;
- hslot2 = &udptable->hash2[slot2];
+ hslot2 = udp_hashslot2(udptable, hash2);
+
+ if (udp_has_hash4(hslot2)) {
+ result = udp6_lib_lookup4(net, saddr, sport, daddr, hnum,
+ dif, sdif, udptable);
+ if (result) /* udp6_lib_lookup4 return sk or NULL */
+ return result;
+ }
/* Lookup connected or non-wildcard sockets */
result = udp6_lib_lookup2(net, saddr, sport,
@@ -257,12 +386,18 @@ struct sock *__udp6_lib_lookup(const struct net *net,
/* Lookup wildcard sockets */
hash2 = ipv6_portaddr_hash(net, &in6addr_any, hnum);
- slot2 = hash2 & udptable->mask;
- hslot2 = &udptable->hash2[slot2];
+ hslot2 = udp_hashslot2(udptable, hash2);
result = udp6_lib_lookup2(net, saddr, sport,
&in6addr_any, hnum, dif, sdif,
hslot2, skb);
+ if (!IS_ERR_OR_NULL(result))
+ goto done;
+
+ /* Cover address change/lookup/rehash race: see __udp4_lib_lookup() */
+ result = udp6_lib_lookup1(net, saddr, sport, daddr, hnum, dif, sdif,
+ udptable);
+
done:
if (IS_ERR(result))
return NULL;
@@ -452,7 +587,7 @@ csum_copy_err:
SNMP_INC_STATS(mib, UDP_MIB_CSUMERRORS);
SNMP_INC_STATS(mib, UDP_MIB_INERRORS);
}
- kfree_skb(skb);
+ kfree_skb_reason(skb, SKB_DROP_REASON_UDP_CSUM);
/* starting over for a new packet, but check if we need to yield */
cond_resched();
@@ -859,7 +994,7 @@ static int __udp6_lib_mcast_deliver(struct net *net, struct sk_buff *skb,
udptable->mask;
hash2 = ipv6_portaddr_hash(net, daddr, hnum) & udptable->mask;
start_lookup:
- hslot = &udptable->hash2[hash2];
+ hslot = &udptable->hash2[hash2].hslot;
offset = offsetof(typeof(*sk), __sk_common.skc_portaddr_node);
}
@@ -1065,14 +1200,13 @@ static struct sock *__udp6_lib_demux_lookup(struct net *net,
{
struct udp_table *udptable = net->ipv4.udp_table;
unsigned short hnum = ntohs(loc_port);
- unsigned int hash2, slot2;
struct udp_hslot *hslot2;
+ unsigned int hash2;
__portpair ports;
struct sock *sk;
hash2 = ipv6_portaddr_hash(net, loc_addr, hnum);
- slot2 = hash2 & udptable->mask;
- hslot2 = &udptable->hash2[slot2];
+ hslot2 = udp_hashslot2(udptable, hash2);
ports = INET_COMBINED_PORTS(rmt_port, hnum);
udp_portaddr_for_each_entry_rcu(sk, &hslot2->head) {
@@ -1169,6 +1303,18 @@ static int udpv6_pre_connect(struct sock *sk, struct sockaddr *uaddr,
return BPF_CGROUP_RUN_PROG_INET6_CONNECT_LOCK(sk, uaddr, &addr_len);
}
+static int udpv6_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
+{
+ int res;
+
+ lock_sock(sk);
+ res = __ip6_datagram_connect(sk, uaddr, addr_len);
+ if (!res)
+ udp6_hash4(sk);
+ release_sock(sk);
+ return res;
+}
+
/**
* udp6_hwcsum_outgoing - handle outgoing HW checksumming
* @sk: socket we are sending on
@@ -1244,9 +1390,9 @@ static int udp_v6_send_skb(struct sk_buff *skb, struct flowi6 *fl6,
const int hlen = skb_network_header_len(skb) +
sizeof(struct udphdr);
- if (hlen + cork->gso_size > cork->fragsize) {
+ if (hlen + min(datalen, cork->gso_size) > cork->fragsize) {
kfree_skb(skb);
- return -EINVAL;
+ return -EMSGSIZE;
}
if (datalen > cork->gso_size * UDP_MAX_SEGMENTS) {
kfree_skb(skb);
@@ -1349,10 +1495,8 @@ int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
int is_udplite = IS_UDPLITE(sk);
int (*getfrag)(void *, char *, int, int, int, struct sk_buff *);
- ipcm6_init(&ipc6);
+ ipcm6_init_sk(&ipc6, sk);
ipc6.gso_size = READ_ONCE(up->gso_size);
- ipc6.sockc.tsflags = READ_ONCE(sk->sk_tsflags);
- ipc6.sockc.mark = READ_ONCE(sk->sk_mark);
/* destination address check */
if (sin6) {
@@ -1558,9 +1702,6 @@ do_udp_sendmsg:
security_sk_classify_flow(sk, flowi6_to_flowi_common(fl6));
- if (ipc6.tclass < 0)
- ipc6.tclass = np->tclass;
-
fl6->flowlabel = ip6_make_flowinfo(ipc6.tclass, fl6->flowlabel);
dst = ip6_sk_dst_lookup_flow(sk, fl6, final_p, connected);
@@ -1606,8 +1747,6 @@ back_from_confirm:
WRITE_ONCE(up->pending, AF_INET6);
do_append_data:
- if (ipc6.dontfrag < 0)
- ipc6.dontfrag = inet6_test_bit(DONTFRAG, sk);
up->len += ulen;
err = ip6_append_data(sk, getfrag, msg, ulen, sizeof(struct udphdr),
&ipc6, fl6, dst_rt6_info(dst),
@@ -1687,6 +1826,7 @@ void udpv6_destroy_sock(struct sock *sk)
if (udp_test_bit(ENCAP_ENABLED, sk)) {
static_branch_dec(&udpv6_encap_needed_key);
udp_encap_disable();
+ udp_tunnel_cleanup_gro(sk);
}
}
}
@@ -1764,7 +1904,7 @@ struct proto udpv6_prot = {
.owner = THIS_MODULE,
.close = udp_lib_close,
.pre_connect = udpv6_pre_connect,
- .connect = ip6_datagram_connect,
+ .connect = udpv6_connect,
.disconnect = udp_disconnect,
.ioctl = udp_ioctl,
.init = udpv6_init_sock,
diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c
index b41152dd4246..d8445ac1b2e4 100644
--- a/net/ipv6/udp_offload.c
+++ b/net/ipv6/udp_offload.c
@@ -117,9 +117,14 @@ static struct sock *udp6_gro_lookup_skb(struct sk_buff *skb, __be16 sport,
__be16 dport)
{
const struct ipv6hdr *iph = skb_gro_network_header(skb);
- struct net *net = dev_net(skb->dev);
+ struct net *net = dev_net_rcu(skb->dev);
+ struct sock *sk;
int iif, sdif;
+ sk = udp_tunnel_sk(net, true);
+ if (sk && dport == htons(sk->sk_num))
+ return sk;
+
inet6_get_iif_sdif(skb, &iif, &sdif);
return __udp6_lib_lookup(net, &iph->saddr, sport,
diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c
index 4abc5e9d6322..841c81abaaf4 100644
--- a/net/ipv6/xfrm6_input.c
+++ b/net/ipv6/xfrm6_input.c
@@ -179,14 +179,18 @@ struct sk_buff *xfrm6_gro_udp_encap_rcv(struct sock *sk, struct list_head *head,
int offset = skb_gro_offset(skb);
const struct net_offload *ops;
struct sk_buff *pp = NULL;
- int ret;
+ int len, dlen;
+ __u8 *udpdata;
+ __be32 *udpdata32;
if (skb->protocol == htons(ETH_P_IP))
return xfrm4_gro_udp_encap_rcv(sk, head, skb);
- offset = offset - sizeof(struct udphdr);
-
- if (!pskb_pull(skb, offset))
+ len = skb->len - offset;
+ dlen = offset + min(len, 8);
+ udpdata = skb_gro_header(skb, dlen, offset);
+ udpdata32 = (__be32 *)udpdata;
+ if (unlikely(!udpdata))
return NULL;
rcu_read_lock();
@@ -194,11 +198,10 @@ struct sk_buff *xfrm6_gro_udp_encap_rcv(struct sock *sk, struct list_head *head,
if (!ops || !ops->callbacks.gro_receive)
goto out;
- ret = __xfrm6_udp_encap_rcv(sk, skb, false);
- if (ret)
+ /* check if it is a keepalive or IKE packet */
+ if (len <= sizeof(struct ip_esp_hdr) || udpdata32[0] == 0)
goto out;
- skb_push(skb, offset);
NAPI_GRO_CB(skb)->proto = IPPROTO_UDP;
pp = call_gro_receive(ops->callbacks.gro_receive, head, skb);
@@ -208,7 +211,6 @@ struct sk_buff *xfrm6_gro_udp_encap_rcv(struct sock *sk, struct list_head *head,
out:
rcu_read_unlock();
- skb_push(skb, offset);
NAPI_GRO_CB(skb)->same_flow = 0;
NAPI_GRO_CB(skb)->flush = 1;
diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c
index 5f7b1fdbffe6..b3d5d1f266ee 100644
--- a/net/ipv6/xfrm6_output.c
+++ b/net/ipv6/xfrm6_output.c
@@ -82,14 +82,14 @@ static int __xfrm6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
toobig = skb->len > mtu && !skb_is_gso(skb);
- if (toobig && xfrm6_local_dontfrag(skb->sk)) {
+ if (toobig && xfrm6_local_dontfrag(sk)) {
xfrm6_local_rxpmtu(skb, mtu);
kfree_skb(skb);
return -EMSGSIZE;
} else if (toobig && xfrm6_noneed_fragment(skb)) {
skb->ignore_df = 1;
goto skip_frag;
- } else if (!skb->ignore_df && toobig && skb->sk) {
+ } else if (!skb->ignore_df && toobig && sk) {
xfrm_local_error(skb, mtu);
kfree_skb(skb);
return -EMSGSIZE;