summaryrefslogtreecommitdiff
path: root/net/ipv6
diff options
context:
space:
mode:
authorDmitry Torokhov <dmitry.torokhov@gmail.com>2025-02-26 03:03:25 +0300
committerDmitry Torokhov <dmitry.torokhov@gmail.com>2025-02-26 03:03:25 +0300
commit0b119045b79a672bc6d8f18641c60fc8ce1b4585 (patch)
tree69c63ecfec55b9576c34dc742e0c38f46f8a317a /net/ipv6
parent7f7573bd4f37d4edc168c5b5def0bc2a1951c657 (diff)
parentd082ecbc71e9e0bf49883ee4afd435a77a5101b6 (diff)
downloadlinux-0b119045b79a672bc6d8f18641c60fc8ce1b4585.tar.xz
Merge tag 'v6.14-rc4' into next
Sync up with the mainline.
Diffstat (limited to 'net/ipv6')
-rw-r--r--net/ipv6/addrconf.c287
-rw-r--r--net/ipv6/anycast.c35
-rw-r--r--net/ipv6/esp6.c5
-rw-r--r--net/ipv6/fib6_rules.c57
-rw-r--r--net/ipv6/icmp.c48
-rw-r--r--net/ipv6/ila/ila_xlat.c16
-rw-r--r--net/ipv6/ioam6_iptunnel.c75
-rw-r--r--net/ipv6/ip6_input.c14
-rw-r--r--net/ipv6/ip6_output.c22
-rw-r--r--net/ipv6/ip6mr.c28
-rw-r--r--net/ipv6/mcast.c145
-rw-r--r--net/ipv6/ndisc.c32
-rw-r--r--net/ipv6/ping.c1
-rw-r--r--net/ipv6/raw.c3
-rw-r--r--net/ipv6/route.c27
-rw-r--r--net/ipv6/rpl_iptunnel.c59
-rw-r--r--net/ipv6/seg6_iptunnel.c98
-rw-r--r--net/ipv6/udp.c55
-rw-r--r--net/ipv6/xfrm6_output.c4
19 files changed, 634 insertions, 377 deletions
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 0e765466d7f7..ac8cc1076536 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -852,7 +852,7 @@ static void addrconf_forward_change(struct net *net, __s32 newf)
struct inet6_dev *idev;
for_each_netdev(net, dev) {
- idev = __in6_dev_get(dev);
+ idev = __in6_dev_get_rtnl_net(dev);
if (idev) {
int changed = (!idev->cnf.forwarding) ^ (!newf);
@@ -865,13 +865,12 @@ static void addrconf_forward_change(struct net *net, __s32 newf)
static int addrconf_fixup_forwarding(const struct ctl_table *table, int *p, int newf)
{
- struct net *net;
+ struct net *net = (struct net *)table->extra2;
int old;
- if (!rtnl_trylock())
+ if (!rtnl_net_trylock(net))
return restart_syscall();
- net = (struct net *)table->extra2;
old = *p;
WRITE_ONCE(*p, newf);
@@ -881,7 +880,7 @@ static int addrconf_fixup_forwarding(const struct ctl_table *table, int *p, int
NETCONFA_FORWARDING,
NETCONFA_IFINDEX_DEFAULT,
net->ipv6.devconf_dflt);
- rtnl_unlock();
+ rtnl_net_unlock(net);
return 0;
}
@@ -903,7 +902,7 @@ static int addrconf_fixup_forwarding(const struct ctl_table *table, int *p, int
net->ipv6.devconf_all);
} else if ((!newf) ^ (!old))
dev_forward_change((struct inet6_dev *)table->extra1);
- rtnl_unlock();
+ rtnl_net_unlock(net);
if (newf)
rt6_purge_dflt_routers(net);
@@ -916,7 +915,7 @@ static void addrconf_linkdown_change(struct net *net, __s32 newf)
struct inet6_dev *idev;
for_each_netdev(net, dev) {
- idev = __in6_dev_get(dev);
+ idev = __in6_dev_get_rtnl_net(dev);
if (idev) {
int changed = (!idev->cnf.ignore_routes_with_linkdown) ^ (!newf);
@@ -933,13 +932,12 @@ static void addrconf_linkdown_change(struct net *net, __s32 newf)
static int addrconf_fixup_linkdown(const struct ctl_table *table, int *p, int newf)
{
- struct net *net;
+ struct net *net = (struct net *)table->extra2;
int old;
- if (!rtnl_trylock())
+ if (!rtnl_net_trylock(net))
return restart_syscall();
- net = (struct net *)table->extra2;
old = *p;
WRITE_ONCE(*p, newf);
@@ -950,7 +948,7 @@ static int addrconf_fixup_linkdown(const struct ctl_table *table, int *p, int ne
NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
NETCONFA_IFINDEX_DEFAULT,
net->ipv6.devconf_dflt);
- rtnl_unlock();
+ rtnl_net_unlock(net);
return 0;
}
@@ -964,7 +962,8 @@ static int addrconf_fixup_linkdown(const struct ctl_table *table, int *p, int ne
NETCONFA_IFINDEX_ALL,
net->ipv6.devconf_all);
}
- rtnl_unlock();
+
+ rtnl_net_unlock(net);
return 1;
}
@@ -2980,11 +2979,11 @@ int addrconf_set_dstaddr(struct net *net, void __user *arg)
if (copy_from_user(&ireq, arg, sizeof(struct in6_ifreq)))
return -EFAULT;
- rtnl_lock();
+ rtnl_net_lock(net);
dev = __dev_get_by_index(net, ireq.ifr6_ifindex);
if (dev && dev->type == ARPHRD_SIT)
err = addrconf_set_sit_dstaddr(net, dev, &ireq);
- rtnl_unlock();
+ rtnl_net_unlock(net);
return err;
}
@@ -3008,39 +3007,25 @@ static int ipv6_mc_config(struct sock *sk, bool join,
/*
* Manual configuration of address on an interface
*/
-static int inet6_addr_add(struct net *net, int ifindex,
- struct ifa6_config *cfg,
+static int inet6_addr_add(struct net *net, struct net_device *dev,
+ struct ifa6_config *cfg, clock_t expires, u32 flags,
struct netlink_ext_ack *extack)
{
struct inet6_ifaddr *ifp;
struct inet6_dev *idev;
- struct net_device *dev;
- unsigned long timeout;
- clock_t expires;
- u32 flags;
- ASSERT_RTNL();
+ ASSERT_RTNL_NET(net);
if (cfg->plen > 128) {
NL_SET_ERR_MSG_MOD(extack, "Invalid prefix length");
return -EINVAL;
}
- /* check the lifetime */
- if (!cfg->valid_lft || cfg->preferred_lft > cfg->valid_lft) {
- NL_SET_ERR_MSG_MOD(extack, "address lifetime invalid");
- return -EINVAL;
- }
-
if (cfg->ifa_flags & IFA_F_MANAGETEMPADDR && cfg->plen != 64) {
NL_SET_ERR_MSG_MOD(extack, "address with \"mngtmpaddr\" flag must have a prefix length of 64");
return -EINVAL;
}
- dev = __dev_get_by_index(net, ifindex);
- if (!dev)
- return -ENODEV;
-
idev = addrconf_add_dev(dev);
if (IS_ERR(idev)) {
NL_SET_ERR_MSG_MOD(extack, "IPv6 is disabled on this device");
@@ -3049,7 +3034,7 @@ static int inet6_addr_add(struct net *net, int ifindex,
if (cfg->ifa_flags & IFA_F_MCAUTOJOIN) {
int ret = ipv6_mc_config(net->ipv6.mc_autojoin_sk,
- true, cfg->pfx, ifindex);
+ true, cfg->pfx, dev->ifindex);
if (ret < 0) {
NL_SET_ERR_MSG_MOD(extack, "Multicast auto join failed");
@@ -3059,24 +3044,6 @@ static int inet6_addr_add(struct net *net, int ifindex,
cfg->scope = ipv6_addr_scope(cfg->pfx);
- timeout = addrconf_timeout_fixup(cfg->valid_lft, HZ);
- if (addrconf_finite_timeout(timeout)) {
- expires = jiffies_to_clock_t(timeout * HZ);
- cfg->valid_lft = timeout;
- flags = RTF_EXPIRES;
- } else {
- expires = 0;
- flags = 0;
- cfg->ifa_flags |= IFA_F_PERMANENT;
- }
-
- timeout = addrconf_timeout_fixup(cfg->preferred_lft, HZ);
- if (addrconf_finite_timeout(timeout)) {
- if (timeout == 0)
- cfg->ifa_flags |= IFA_F_DEPRECATED;
- cfg->preferred_lft = timeout;
- }
-
ifp = ipv6_add_addr(idev, cfg, true, extack);
if (!IS_ERR(ifp)) {
if (!(cfg->ifa_flags & IFA_F_NOPREFIXROUTE)) {
@@ -3104,7 +3071,7 @@ static int inet6_addr_add(struct net *net, int ifindex,
return 0;
} else if (cfg->ifa_flags & IFA_F_MCAUTOJOIN) {
ipv6_mc_config(net->ipv6.mc_autojoin_sk, false,
- cfg->pfx, ifindex);
+ cfg->pfx, dev->ifindex);
}
return PTR_ERR(ifp);
@@ -3129,7 +3096,7 @@ static int inet6_addr_del(struct net *net, int ifindex, u32 ifa_flags,
return -ENODEV;
}
- idev = __in6_dev_get(dev);
+ idev = __in6_dev_get_rtnl_net(dev);
if (!idev) {
NL_SET_ERR_MSG_MOD(extack, "IPv6 is disabled on this device");
return -ENXIO;
@@ -3170,6 +3137,7 @@ int addrconf_add_ifaddr(struct net *net, void __user *arg)
.preferred_lft = INFINITY_LIFE_TIME,
.valid_lft = INFINITY_LIFE_TIME,
};
+ struct net_device *dev;
struct in6_ifreq ireq;
int err;
@@ -3182,9 +3150,13 @@ int addrconf_add_ifaddr(struct net *net, void __user *arg)
cfg.pfx = &ireq.ifr6_addr;
cfg.plen = ireq.ifr6_prefixlen;
- rtnl_lock();
- err = inet6_addr_add(net, ireq.ifr6_ifindex, &cfg, NULL);
- rtnl_unlock();
+ rtnl_net_lock(net);
+ dev = __dev_get_by_index(net, ireq.ifr6_ifindex);
+ if (dev)
+ err = inet6_addr_add(net, dev, &cfg, 0, 0, NULL);
+ else
+ err = -ENODEV;
+ rtnl_net_unlock(net);
return err;
}
@@ -3199,10 +3171,10 @@ int addrconf_del_ifaddr(struct net *net, void __user *arg)
if (copy_from_user(&ireq, arg, sizeof(struct in6_ifreq)))
return -EFAULT;
- rtnl_lock();
+ rtnl_net_lock(net);
err = inet6_addr_del(net, ireq.ifr6_ifindex, 0, &ireq.ifr6_addr,
ireq.ifr6_prefixlen, NULL);
- rtnl_unlock();
+ rtnl_net_unlock(net);
return err;
}
@@ -4205,6 +4177,7 @@ static void addrconf_dad_work(struct work_struct *w)
struct inet6_dev *idev = ifp->idev;
bool bump_id, disable_ipv6 = false;
struct in6_addr mcaddr;
+ struct net *net;
enum {
DAD_PROCESS,
@@ -4212,7 +4185,9 @@ static void addrconf_dad_work(struct work_struct *w)
DAD_ABORT,
} action = DAD_PROCESS;
- rtnl_lock();
+ net = dev_net(idev->dev);
+
+ rtnl_net_lock(net);
spin_lock_bh(&ifp->lock);
if (ifp->state == INET6_IFADDR_STATE_PREDAD) {
@@ -4222,7 +4197,7 @@ static void addrconf_dad_work(struct work_struct *w)
action = DAD_ABORT;
ifp->state = INET6_IFADDR_STATE_POSTDAD;
- if ((READ_ONCE(dev_net(idev->dev)->ipv6.devconf_all->accept_dad) > 1 ||
+ if ((READ_ONCE(net->ipv6.devconf_all->accept_dad) > 1 ||
READ_ONCE(idev->cnf.accept_dad) > 1) &&
!idev->cnf.disable_ipv6 &&
!(ifp->flags & IFA_F_STABLE_PRIVACY)) {
@@ -4304,7 +4279,7 @@ static void addrconf_dad_work(struct work_struct *w)
ifp->dad_nonce);
out:
in6_ifa_put(ifp);
- rtnl_unlock();
+ rtnl_net_unlock(net);
}
/* ifp->idev must be at least read locked */
@@ -4752,9 +4727,9 @@ static void addrconf_verify_work(struct work_struct *w)
struct net *net = container_of(to_delayed_work(w), struct net,
ipv6.addr_chk_work);
- rtnl_lock();
+ rtnl_net_lock(net);
addrconf_verify_rtnl(net);
- rtnl_unlock();
+ rtnl_net_unlock(net);
}
static void addrconf_verify(struct net *net)
@@ -4817,8 +4792,12 @@ inet6_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh,
/* We ignore other flags so far. */
ifa_flags &= IFA_F_MANAGETEMPADDR;
- return inet6_addr_del(net, ifm->ifa_index, ifa_flags, pfx,
- ifm->ifa_prefixlen, extack);
+ rtnl_net_lock(net);
+ err = inet6_addr_del(net, ifm->ifa_index, ifa_flags, pfx,
+ ifm->ifa_prefixlen, extack);
+ rtnl_net_unlock(net);
+
+ return err;
}
static int modify_prefix_route(struct net *net, struct inet6_ifaddr *ifp,
@@ -4867,19 +4846,14 @@ static int modify_prefix_route(struct net *net, struct inet6_ifaddr *ifp,
}
static int inet6_addr_modify(struct net *net, struct inet6_ifaddr *ifp,
- struct ifa6_config *cfg)
+ struct ifa6_config *cfg, clock_t expires,
+ u32 flags)
{
- u32 flags;
- clock_t expires;
- unsigned long timeout;
bool was_managetempaddr;
- bool had_prefixroute;
bool new_peer = false;
+ bool had_prefixroute;
- ASSERT_RTNL();
-
- if (!cfg->valid_lft || cfg->preferred_lft > cfg->valid_lft)
- return -EINVAL;
+ ASSERT_RTNL_NET(net);
if (cfg->ifa_flags & IFA_F_MANAGETEMPADDR &&
(ifp->flags & IFA_F_TEMPORARY || ifp->prefix_len != 64))
@@ -4888,24 +4862,6 @@ static int inet6_addr_modify(struct net *net, struct inet6_ifaddr *ifp,
if (!(ifp->flags & IFA_F_TENTATIVE) || ifp->flags & IFA_F_DADFAILED)
cfg->ifa_flags &= ~IFA_F_OPTIMISTIC;
- timeout = addrconf_timeout_fixup(cfg->valid_lft, HZ);
- if (addrconf_finite_timeout(timeout)) {
- expires = jiffies_to_clock_t(timeout * HZ);
- cfg->valid_lft = timeout;
- flags = RTF_EXPIRES;
- } else {
- expires = 0;
- flags = 0;
- cfg->ifa_flags |= IFA_F_PERMANENT;
- }
-
- timeout = addrconf_timeout_fixup(cfg->preferred_lft, HZ);
- if (addrconf_finite_timeout(timeout)) {
- if (timeout == 0)
- cfg->ifa_flags |= IFA_F_DEPRECATED;
- cfg->preferred_lft = timeout;
- }
-
if (cfg->peer_pfx &&
memcmp(&ifp->peer_addr, cfg->peer_pfx, sizeof(struct in6_addr))) {
if (!ipv6_addr_any(&ifp->peer_addr))
@@ -4990,13 +4946,16 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh,
struct netlink_ext_ack *extack)
{
struct net *net = sock_net(skb->sk);
- struct ifaddrmsg *ifm;
struct nlattr *tb[IFA_MAX+1];
struct in6_addr *peer_pfx;
struct inet6_ifaddr *ifa;
struct net_device *dev;
struct inet6_dev *idev;
struct ifa6_config cfg;
+ struct ifaddrmsg *ifm;
+ unsigned long timeout;
+ clock_t expires;
+ u32 flags;
int err;
err = nlmsg_parse_deprecated(nlh, sizeof(*ifm), tb, IFA_MAX,
@@ -5019,8 +4978,18 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh,
if (tb[IFA_PROTO])
cfg.ifa_proto = nla_get_u8(tb[IFA_PROTO]);
+ cfg.ifa_flags = nla_get_u32_default(tb[IFA_FLAGS], ifm->ifa_flags);
+
+ /* We ignore other flags so far. */
+ cfg.ifa_flags &= IFA_F_NODAD | IFA_F_HOMEADDRESS |
+ IFA_F_MANAGETEMPADDR | IFA_F_NOPREFIXROUTE |
+ IFA_F_MCAUTOJOIN | IFA_F_OPTIMISTIC;
+
+ cfg.ifa_flags |= IFA_F_PERMANENT;
cfg.valid_lft = INFINITY_LIFE_TIME;
cfg.preferred_lft = INFINITY_LIFE_TIME;
+ expires = 0;
+ flags = 0;
if (tb[IFA_CACHEINFO]) {
struct ifa_cacheinfo *ci;
@@ -5028,24 +4997,43 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh,
ci = nla_data(tb[IFA_CACHEINFO]);
cfg.valid_lft = ci->ifa_valid;
cfg.preferred_lft = ci->ifa_prefered;
+
+ if (!cfg.valid_lft || cfg.preferred_lft > cfg.valid_lft) {
+ NL_SET_ERR_MSG_MOD(extack, "address lifetime invalid");
+ return -EINVAL;
+ }
+
+ timeout = addrconf_timeout_fixup(cfg.valid_lft, HZ);
+ if (addrconf_finite_timeout(timeout)) {
+ cfg.ifa_flags &= ~IFA_F_PERMANENT;
+ cfg.valid_lft = timeout;
+ expires = jiffies_to_clock_t(timeout * HZ);
+ flags = RTF_EXPIRES;
+ }
+
+ timeout = addrconf_timeout_fixup(cfg.preferred_lft, HZ);
+ if (addrconf_finite_timeout(timeout)) {
+ if (timeout == 0)
+ cfg.ifa_flags |= IFA_F_DEPRECATED;
+
+ cfg.preferred_lft = timeout;
+ }
}
+ rtnl_net_lock(net);
+
dev = __dev_get_by_index(net, ifm->ifa_index);
if (!dev) {
NL_SET_ERR_MSG_MOD(extack, "Unable to find the interface");
- return -ENODEV;
+ err = -ENODEV;
+ goto unlock;
}
- cfg.ifa_flags = nla_get_u32_default(tb[IFA_FLAGS], ifm->ifa_flags);
-
- /* We ignore other flags so far. */
- cfg.ifa_flags &= IFA_F_NODAD | IFA_F_HOMEADDRESS |
- IFA_F_MANAGETEMPADDR | IFA_F_NOPREFIXROUTE |
- IFA_F_MCAUTOJOIN | IFA_F_OPTIMISTIC;
-
idev = ipv6_find_idev(dev);
- if (IS_ERR(idev))
- return PTR_ERR(idev);
+ if (IS_ERR(idev)) {
+ err = PTR_ERR(idev);
+ goto unlock;
+ }
if (!ipv6_allow_optimistic_dad(net, idev))
cfg.ifa_flags &= ~IFA_F_OPTIMISTIC;
@@ -5053,7 +5041,8 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh,
if (cfg.ifa_flags & IFA_F_NODAD &&
cfg.ifa_flags & IFA_F_OPTIMISTIC) {
NL_SET_ERR_MSG(extack, "IFA_F_NODAD and IFA_F_OPTIMISTIC are mutually exclusive");
- return -EINVAL;
+ err = -EINVAL;
+ goto unlock;
}
ifa = ipv6_get_ifaddr(net, cfg.pfx, dev, 1);
@@ -5062,7 +5051,8 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh,
* It would be best to check for !NLM_F_CREATE here but
* userspace already relies on not having to provide this.
*/
- return inet6_addr_add(net, ifm->ifa_index, &cfg, extack);
+ err = inet6_addr_add(net, dev, &cfg, expires, flags, extack);
+ goto unlock;
}
if (nlh->nlmsg_flags & NLM_F_EXCL ||
@@ -5070,10 +5060,12 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh,
NL_SET_ERR_MSG_MOD(extack, "address already assigned");
err = -EEXIST;
} else {
- err = inet6_addr_modify(net, ifa, &cfg);
+ err = inet6_addr_modify(net, ifa, &cfg, expires, flags);
}
in6_ifa_put(ifa);
+unlock:
+ rtnl_net_unlock(net);
return err;
}
@@ -5127,22 +5119,6 @@ static inline int inet6_ifaddr_msgsize(void)
+ nla_total_size(4) /* IFA_RT_PRIORITY */;
}
-enum addr_type_t {
- UNICAST_ADDR,
- MULTICAST_ADDR,
- ANYCAST_ADDR,
-};
-
-struct inet6_fill_args {
- u32 portid;
- u32 seq;
- int event;
- unsigned int flags;
- int netnsid;
- int ifindex;
- enum addr_type_t type;
-};
-
static int inet6_fill_ifaddr(struct sk_buff *skb,
const struct inet6_ifaddr *ifa,
struct inet6_fill_args *args)
@@ -5221,15 +5197,16 @@ error:
return -EMSGSIZE;
}
-static int inet6_fill_ifmcaddr(struct sk_buff *skb,
- const struct ifmcaddr6 *ifmca,
- struct inet6_fill_args *args)
+int inet6_fill_ifmcaddr(struct sk_buff *skb,
+ const struct ifmcaddr6 *ifmca,
+ struct inet6_fill_args *args)
{
int ifindex = ifmca->idev->dev->ifindex;
u8 scope = RT_SCOPE_UNIVERSE;
struct nlmsghdr *nlh;
- if (ipv6_addr_scope(&ifmca->mca_addr) & IFA_SITE)
+ if (!args->force_rt_scope_universe &&
+ ipv6_addr_scope(&ifmca->mca_addr) & IFA_SITE)
scope = RT_SCOPE_SITE;
nlh = nlmsg_put(skb, args->portid, args->seq, args->event,
@@ -5255,9 +5232,9 @@ static int inet6_fill_ifmcaddr(struct sk_buff *skb,
return 0;
}
-static int inet6_fill_ifacaddr(struct sk_buff *skb,
- const struct ifacaddr6 *ifaca,
- struct inet6_fill_args *args)
+int inet6_fill_ifacaddr(struct sk_buff *skb,
+ const struct ifacaddr6 *ifaca,
+ struct inet6_fill_args *args)
{
struct net_device *dev = fib6_info_nh_dev(ifaca->aca_rt);
int ifindex = dev ? dev->ifindex : 1;
@@ -5418,6 +5395,7 @@ static int inet6_dump_addr(struct sk_buff *skb, struct netlink_callback *cb,
.flags = NLM_F_MULTI,
.netnsid = -1,
.type = type,
+ .force_rt_scope_universe = false,
};
struct {
unsigned long ifindex;
@@ -5546,6 +5524,7 @@ static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr *nlh,
.event = RTM_NEWADDR,
.flags = 0,
.netnsid = -1,
+ .force_rt_scope_universe = false,
};
struct ifaddrmsg *ifm;
struct nlattr *tb[IFA_MAX+1];
@@ -5617,6 +5596,7 @@ static void inet6_ifa_notify(int event, struct inet6_ifaddr *ifa)
.event = event,
.flags = 0,
.netnsid = -1,
+ .force_rt_scope_universe = false,
};
int err = -ENOBUFS;
@@ -6382,7 +6362,7 @@ static void addrconf_disable_change(struct net *net, __s32 newf)
struct inet6_dev *idev;
for_each_netdev(net, dev) {
- idev = __in6_dev_get(dev);
+ idev = __in6_dev_get_rtnl_net(dev);
if (idev) {
int changed = (!idev->cnf.disable_ipv6) ^ (!newf);
@@ -6403,7 +6383,7 @@ static int addrconf_disable_ipv6(const struct ctl_table *table, int *p, int newf
return 0;
}
- if (!rtnl_trylock())
+ if (!rtnl_net_trylock(net))
return restart_syscall();
old = *p;
@@ -6412,10 +6392,11 @@ static int addrconf_disable_ipv6(const struct ctl_table *table, int *p, int newf
if (p == &net->ipv6.devconf_all->disable_ipv6) {
WRITE_ONCE(net->ipv6.devconf_dflt->disable_ipv6, newf);
addrconf_disable_change(net, newf);
- } else if ((!newf) ^ (!old))
+ } else if ((!newf) ^ (!old)) {
dev_disable_change((struct inet6_dev *)table->extra1);
+ }
- rtnl_unlock();
+ rtnl_net_unlock(net);
return 0;
}
@@ -6458,20 +6439,20 @@ static int addrconf_sysctl_proxy_ndp(const struct ctl_table *ctl, int write,
if (write && old != new) {
struct net *net = ctl->extra2;
- if (!rtnl_trylock())
+ if (!rtnl_net_trylock(net))
return restart_syscall();
- if (valp == &net->ipv6.devconf_dflt->proxy_ndp)
+ if (valp == &net->ipv6.devconf_dflt->proxy_ndp) {
inet6_netconf_notify_devconf(net, RTM_NEWNETCONF,
NETCONFA_PROXY_NEIGH,
NETCONFA_IFINDEX_DEFAULT,
net->ipv6.devconf_dflt);
- else if (valp == &net->ipv6.devconf_all->proxy_ndp)
+ } else if (valp == &net->ipv6.devconf_all->proxy_ndp) {
inet6_netconf_notify_devconf(net, RTM_NEWNETCONF,
NETCONFA_PROXY_NEIGH,
NETCONFA_IFINDEX_ALL,
net->ipv6.devconf_all);
- else {
+ } else {
struct inet6_dev *idev = ctl->extra1;
inet6_netconf_notify_devconf(net, RTM_NEWNETCONF,
@@ -6479,7 +6460,7 @@ static int addrconf_sysctl_proxy_ndp(const struct ctl_table *ctl, int write,
idev->dev->ifindex,
&idev->cnf);
}
- rtnl_unlock();
+ rtnl_net_unlock(net);
}
return ret;
@@ -6499,7 +6480,7 @@ static int addrconf_sysctl_addr_gen_mode(const struct ctl_table *ctl, int write,
.mode = ctl->mode,
};
- if (!rtnl_trylock())
+ if (!rtnl_net_trylock(net))
return restart_syscall();
new_val = *((u32 *)ctl->data);
@@ -6529,7 +6510,7 @@ static int addrconf_sysctl_addr_gen_mode(const struct ctl_table *ctl, int write,
WRITE_ONCE(net->ipv6.devconf_dflt->addr_gen_mode, new_val);
for_each_netdev(net, dev) {
- idev = __in6_dev_get(dev);
+ idev = __in6_dev_get_rtnl_net(dev);
if (idev &&
idev->cnf.addr_gen_mode != new_val) {
WRITE_ONCE(idev->cnf.addr_gen_mode,
@@ -6543,7 +6524,7 @@ static int addrconf_sysctl_addr_gen_mode(const struct ctl_table *ctl, int write,
}
out:
- rtnl_unlock();
+ rtnl_net_unlock(net);
return ret;
}
@@ -6565,7 +6546,7 @@ static int addrconf_sysctl_stable_secret(const struct ctl_table *ctl, int write,
lctl.maxlen = IPV6_MAX_STRLEN;
lctl.data = str;
- if (!rtnl_trylock())
+ if (!rtnl_net_trylock(net))
return restart_syscall();
if (!write && !secret->initialized) {
@@ -6595,7 +6576,7 @@ static int addrconf_sysctl_stable_secret(const struct ctl_table *ctl, int write,
struct net_device *dev;
for_each_netdev(net, dev) {
- struct inet6_dev *idev = __in6_dev_get(dev);
+ struct inet6_dev *idev = __in6_dev_get_rtnl_net(dev);
if (idev) {
WRITE_ONCE(idev->cnf.addr_gen_mode,
@@ -6610,7 +6591,7 @@ static int addrconf_sysctl_stable_secret(const struct ctl_table *ctl, int write,
}
out:
- rtnl_unlock();
+ rtnl_net_unlock(net);
return err;
}
@@ -6694,7 +6675,7 @@ int addrconf_disable_policy(const struct ctl_table *ctl, int *valp, int val)
return 0;
}
- if (!rtnl_trylock())
+ if (!rtnl_net_trylock(net))
return restart_syscall();
WRITE_ONCE(*valp, val);
@@ -6703,7 +6684,7 @@ int addrconf_disable_policy(const struct ctl_table *ctl, int *valp, int val)
struct net_device *dev;
for_each_netdev(net, dev) {
- idev = __in6_dev_get(dev);
+ idev = __in6_dev_get_rtnl_net(dev);
if (idev)
addrconf_disable_policy_idev(idev, val);
}
@@ -6712,7 +6693,7 @@ int addrconf_disable_policy(const struct ctl_table *ctl, int *valp, int val)
addrconf_disable_policy_idev(idev, val);
}
- rtnl_unlock();
+ rtnl_net_unlock(net);
return 0;
}
@@ -7425,9 +7406,9 @@ static const struct rtnl_msg_handler addrconf_rtnl_msg_handlers[] __initconst_or
{.owner = THIS_MODULE, .protocol = PF_INET6, .msgtype = RTM_GETLINK,
.dumpit = inet6_dump_ifinfo, .flags = RTNL_FLAG_DUMP_UNLOCKED},
{.owner = THIS_MODULE, .protocol = PF_INET6, .msgtype = RTM_NEWADDR,
- .doit = inet6_rtm_newaddr},
+ .doit = inet6_rtm_newaddr, .flags = RTNL_FLAG_DOIT_PERNET},
{.owner = THIS_MODULE, .protocol = PF_INET6, .msgtype = RTM_DELADDR,
- .doit = inet6_rtm_deladdr},
+ .doit = inet6_rtm_deladdr, .flags = RTNL_FLAG_DOIT_PERNET},
{.owner = THIS_MODULE, .protocol = PF_INET6, .msgtype = RTM_GETADDR,
.doit = inet6_rtm_getaddr, .dumpit = inet6_dump_ifaddr,
.flags = RTNL_FLAG_DOIT_UNLOCKED | RTNL_FLAG_DUMP_UNLOCKED},
@@ -7469,9 +7450,9 @@ int __init addrconf_init(void)
goto out_nowq;
}
- rtnl_lock();
+ rtnl_net_lock(&init_net);
idev = ipv6_add_dev(blackhole_netdev);
- rtnl_unlock();
+ rtnl_net_unlock(&init_net);
if (IS_ERR(idev)) {
err = PTR_ERR(idev);
goto errlo;
@@ -7521,17 +7502,17 @@ void addrconf_cleanup(void)
rtnl_af_unregister(&inet6_ops);
- rtnl_lock();
+ rtnl_net_lock(&init_net);
/* clean dev list */
for_each_netdev(&init_net, dev) {
- if (__in6_dev_get(dev) == NULL)
+ if (!__in6_dev_get_rtnl_net(dev))
continue;
addrconf_ifdown(dev, true);
}
addrconf_ifdown(init_net.loopback_dev, true);
- rtnl_unlock();
+ rtnl_net_unlock(&init_net);
destroy_workqueue(addrconf_wq);
}
diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c
index 562cace50ca9..21e01695b48c 100644
--- a/net/ipv6/anycast.c
+++ b/net/ipv6/anycast.c
@@ -278,6 +278,37 @@ static struct ifacaddr6 *aca_alloc(struct fib6_info *f6i,
return aca;
}
+static void inet6_ifacaddr_notify(struct net_device *dev,
+ const struct ifacaddr6 *ifaca, int event)
+{
+ struct inet6_fill_args fillargs = {
+ .event = event,
+ .netnsid = -1,
+ };
+ struct net *net = dev_net(dev);
+ struct sk_buff *skb;
+ int err = -ENOMEM;
+
+ skb = nlmsg_new(NLMSG_ALIGN(sizeof(struct ifaddrmsg)) +
+ nla_total_size(sizeof(struct in6_addr)) +
+ nla_total_size(sizeof(struct ifa_cacheinfo)),
+ GFP_KERNEL);
+ if (!skb)
+ goto error;
+
+ err = inet6_fill_ifacaddr(skb, ifaca, &fillargs);
+ if (err < 0) {
+ pr_err("Failed to fill in anycast addresses (err %d)\n", err);
+ nlmsg_free(skb);
+ goto error;
+ }
+
+ rtnl_notify(skb, net, 0, RTNLGRP_IPV6_ACADDR, NULL, GFP_KERNEL);
+ return;
+error:
+ rtnl_set_sk_err(net, RTNLGRP_IPV6_ACADDR, err);
+}
+
/*
* device anycast group inc (add if not found)
*/
@@ -333,6 +364,8 @@ int __ipv6_dev_ac_inc(struct inet6_dev *idev, const struct in6_addr *addr)
addrconf_join_solict(idev->dev, &aca->aca_addr);
+ inet6_ifacaddr_notify(idev->dev, aca, RTM_NEWANYCAST);
+
aca_put(aca);
return 0;
out:
@@ -375,6 +408,8 @@ int __ipv6_dev_ac_dec(struct inet6_dev *idev, const struct in6_addr *addr)
ip6_del_rt(dev_net(idev->dev), aca->aca_rt, false);
+ inet6_ifacaddr_notify(idev->dev, aca, RTM_DELANYCAST);
+
aca_put(aca);
return 0;
}
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index b2400c226a32..9e73944e3b53 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -315,7 +315,7 @@ static void esp_output_done(void *data, int err)
x->encap && x->encap->encap_type == TCP_ENCAP_ESPINTCP)
esp_output_tail_tcp(x, skb);
else
- xfrm_output_resume(skb->sk, skb, err);
+ xfrm_output_resume(skb_to_full_sk(skb), skb, err);
}
}
@@ -859,7 +859,8 @@ int esp6_input_done2(struct sk_buff *skb, int err)
skb_postpull_rcsum(skb, skb_network_header(skb),
skb_network_header_len(skb));
skb_pull_rcsum(skb, hlen);
- if (x->props.mode == XFRM_MODE_TUNNEL)
+ if (x->props.mode == XFRM_MODE_TUNNEL ||
+ x->props.mode == XFRM_MODE_IPTFS)
skb_reset_transport_header(skb);
else
skb_set_transport_header(skb, -hdr_len);
diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c
index c85c1627cb16..67d39114d9a6 100644
--- a/net/ipv6/fib6_rules.c
+++ b/net/ipv6/fib6_rules.c
@@ -26,6 +26,8 @@ struct fib6_rule {
struct fib_rule common;
struct rt6key src;
struct rt6key dst;
+ __be32 flowlabel;
+ __be32 flowlabel_mask;
dscp_t dscp;
u8 dscp_full:1; /* DSCP or TOS selector */
};
@@ -34,7 +36,7 @@ static bool fib6_rule_matchall(const struct fib_rule *rule)
{
struct fib6_rule *r = container_of(rule, struct fib6_rule, common);
- if (r->dst.plen || r->src.plen || r->dscp)
+ if (r->dst.plen || r->src.plen || r->dscp || r->flowlabel_mask)
return false;
return fib_rule_matchall(rule);
}
@@ -332,6 +334,9 @@ INDIRECT_CALLABLE_SCOPE int fib6_rule_match(struct fib_rule *rule,
if (r->dscp && r->dscp != ip6_dscp(fl6->flowlabel))
return 0;
+ if ((r->flowlabel ^ flowi6_get_flowlabel(fl6)) & r->flowlabel_mask)
+ return 0;
+
if (rule->ip_proto && (rule->ip_proto != fl6->flowi6_proto))
return 0;
@@ -360,6 +365,35 @@ static int fib6_nl2rule_dscp(const struct nlattr *nla, struct fib6_rule *rule6,
return 0;
}
+static int fib6_nl2rule_flowlabel(struct nlattr **tb, struct fib6_rule *rule6,
+ struct netlink_ext_ack *extack)
+{
+ __be32 flowlabel, flowlabel_mask;
+
+ if (NL_REQ_ATTR_CHECK(extack, NULL, tb, FRA_FLOWLABEL) ||
+ NL_REQ_ATTR_CHECK(extack, NULL, tb, FRA_FLOWLABEL_MASK))
+ return -EINVAL;
+
+ flowlabel = nla_get_be32(tb[FRA_FLOWLABEL]);
+ flowlabel_mask = nla_get_be32(tb[FRA_FLOWLABEL_MASK]);
+
+ if (flowlabel_mask & ~IPV6_FLOWLABEL_MASK) {
+ NL_SET_ERR_MSG_ATTR(extack, tb[FRA_FLOWLABEL_MASK],
+ "Invalid flow label mask");
+ return -EINVAL;
+ }
+
+ if (flowlabel & ~flowlabel_mask) {
+ NL_SET_ERR_MSG(extack, "Flow label and mask do not match");
+ return -EINVAL;
+ }
+
+ rule6->flowlabel = flowlabel;
+ rule6->flowlabel_mask = flowlabel_mask;
+
+ return 0;
+}
+
static int fib6_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
struct fib_rule_hdr *frh,
struct nlattr **tb,
@@ -379,6 +413,10 @@ static int fib6_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
if (tb[FRA_DSCP] && fib6_nl2rule_dscp(tb[FRA_DSCP], rule6, extack) < 0)
goto errout;
+ if ((tb[FRA_FLOWLABEL] || tb[FRA_FLOWLABEL_MASK]) &&
+ fib6_nl2rule_flowlabel(tb, rule6, extack) < 0)
+ goto errout;
+
if (rule->action == FR_ACT_TO_TBL && !rule->l3mdev) {
if (rule->table == RT6_TABLE_UNSPEC) {
NL_SET_ERR_MSG(extack, "Invalid table");
@@ -444,6 +482,14 @@ static int fib6_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
return 0;
}
+ if (tb[FRA_FLOWLABEL] &&
+ nla_get_be32(tb[FRA_FLOWLABEL]) != rule6->flowlabel)
+ return 0;
+
+ if (tb[FRA_FLOWLABEL_MASK] &&
+ nla_get_be32(tb[FRA_FLOWLABEL_MASK]) != rule6->flowlabel_mask)
+ return 0;
+
if (frh->src_len &&
nla_memcmp(tb[FRA_SRC], &rule6->src.addr, sizeof(struct in6_addr)))
return 0;
@@ -472,6 +518,11 @@ static int fib6_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
frh->tos = inet_dscp_to_dsfield(rule6->dscp);
}
+ if (rule6->flowlabel_mask &&
+ (nla_put_be32(skb, FRA_FLOWLABEL, rule6->flowlabel) ||
+ nla_put_be32(skb, FRA_FLOWLABEL_MASK, rule6->flowlabel_mask)))
+ goto nla_put_failure;
+
if ((rule6->dst.plen &&
nla_put_in6_addr(skb, FRA_DST, &rule6->dst.addr)) ||
(rule6->src.plen &&
@@ -487,7 +538,9 @@ static size_t fib6_rule_nlmsg_payload(struct fib_rule *rule)
{
return nla_total_size(16) /* dst */
+ nla_total_size(16) /* src */
- + nla_total_size(1); /* dscp */
+ + nla_total_size(1) /* dscp */
+ + nla_total_size(4) /* flowlabel */
+ + nla_total_size(4); /* flowlabel mask */
}
static void fib6_rule_flush_cache(struct fib_rules_ops *ops)
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index 071b0bc1179d..4d14ab7f7e99 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -76,7 +76,7 @@ static int icmpv6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
{
/* icmpv6_notify checks 8 bytes can be pulled, icmp6hdr is 8 bytes */
struct icmp6hdr *icmp6 = (struct icmp6hdr *) (skb->data + offset);
- struct net *net = dev_net(skb->dev);
+ struct net *net = dev_net_rcu(skb->dev);
if (type == ICMPV6_PKT_TOOBIG)
ip6_update_pmtu(skb, net, info, skb->dev->ifindex, 0, sock_net_uid(net, NULL));
@@ -222,10 +222,10 @@ static bool icmpv6_xrlim_allow(struct sock *sk, u8 type,
if (rt->rt6i_dst.plen < 128)
tmo >>= ((128 - rt->rt6i_dst.plen)>>5);
- peer = inet_getpeer_v6(net->ipv6.peers, &fl6->daddr, 1);
+ rcu_read_lock();
+ peer = inet_getpeer_v6(net->ipv6.peers, &fl6->daddr);
res = inet_peer_xrlim_allow(peer, tmo);
- if (peer)
- inet_putpeer(peer);
+ rcu_read_unlock();
}
if (!res)
__ICMP6_INC_STATS(net, ip6_dst_idev(dst),
@@ -473,7 +473,10 @@ void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
if (!skb->dev)
return;
- net = dev_net(skb->dev);
+
+ rcu_read_lock();
+
+ net = dev_net_rcu(skb->dev);
mark = IP6_REPLY_MARK(net, skb->mark);
/*
* Make sure we respect the rules
@@ -496,7 +499,7 @@ void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
!(type == ICMPV6_PARAMPROB &&
code == ICMPV6_UNK_OPTION &&
(opt_unrec(skb, info))))
- return;
+ goto out;
saddr = NULL;
}
@@ -526,7 +529,7 @@ void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
if ((addr_type == IPV6_ADDR_ANY) || (addr_type & IPV6_ADDR_MULTICAST)) {
net_dbg_ratelimited("icmp6_send: addr_any/mcast source [%pI6c > %pI6c]\n",
&hdr->saddr, &hdr->daddr);
- return;
+ goto out;
}
/*
@@ -535,7 +538,7 @@ void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
if (is_ineligible(skb)) {
net_dbg_ratelimited("icmp6_send: no reply to icmp error [%pI6c > %pI6c]\n",
&hdr->saddr, &hdr->daddr);
- return;
+ goto out;
}
/* Needed by both icmpv6_global_allow and icmpv6_xmit_lock */
@@ -582,7 +585,7 @@ void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
np = inet6_sk(sk);
if (!icmpv6_xrlim_allow(sk, type, &fl6, apply_ratelimit))
- goto out;
+ goto out_unlock;
tmp_hdr.icmp6_type = type;
tmp_hdr.icmp6_code = code;
@@ -600,7 +603,7 @@ void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
dst = icmpv6_route_lookup(net, skb, sk, &fl6);
if (IS_ERR(dst))
- goto out;
+ goto out_unlock;
ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
@@ -616,7 +619,6 @@ void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
goto out_dst_release;
}
- rcu_read_lock();
idev = __in6_dev_get(skb->dev);
if (ip6_append_data(sk, icmpv6_getfrag, &msg,
@@ -630,13 +632,15 @@ void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr,
len + sizeof(struct icmp6hdr));
}
- rcu_read_unlock();
+
out_dst_release:
dst_release(dst);
-out:
+out_unlock:
icmpv6_xmit_unlock(sk);
out_bh_enable:
local_bh_enable();
+out:
+ rcu_read_unlock();
}
EXPORT_SYMBOL(icmp6_send);
@@ -679,8 +683,8 @@ int ip6_err_gen_icmpv6_unreach(struct sk_buff *skb, int nhs, int type,
skb_pull(skb2, nhs);
skb_reset_network_header(skb2);
- rt = rt6_lookup(dev_net(skb->dev), &ipv6_hdr(skb2)->saddr, NULL, 0,
- skb, 0);
+ rt = rt6_lookup(dev_net_rcu(skb->dev), &ipv6_hdr(skb2)->saddr,
+ NULL, 0, skb, 0);
if (rt && rt->dst.dev)
skb2->dev = rt->dst.dev;
@@ -717,7 +721,7 @@ EXPORT_SYMBOL(ip6_err_gen_icmpv6_unreach);
static enum skb_drop_reason icmpv6_echo_reply(struct sk_buff *skb)
{
- struct net *net = dev_net(skb->dev);
+ struct net *net = dev_net_rcu(skb->dev);
struct sock *sk;
struct inet6_dev *idev;
struct ipv6_pinfo *np;
@@ -832,7 +836,7 @@ enum skb_drop_reason icmpv6_notify(struct sk_buff *skb, u8 type,
u8 code, __be32 info)
{
struct inet6_skb_parm *opt = IP6CB(skb);
- struct net *net = dev_net(skb->dev);
+ struct net *net = dev_net_rcu(skb->dev);
const struct inet6_protocol *ipprot;
enum skb_drop_reason reason;
int inner_offset;
@@ -889,7 +893,7 @@ out:
static int icmpv6_rcv(struct sk_buff *skb)
{
enum skb_drop_reason reason = SKB_DROP_REASON_NOT_SPECIFIED;
- struct net *net = dev_net(skb->dev);
+ struct net *net = dev_net_rcu(skb->dev);
struct net_device *dev = icmp6_dev(skb);
struct inet6_dev *idev = __in6_dev_get(dev);
const struct in6_addr *saddr, *daddr;
@@ -921,7 +925,7 @@ static int icmpv6_rcv(struct sk_buff *skb)
skb_set_network_header(skb, nh);
}
- __ICMP6_INC_STATS(dev_net(dev), idev, ICMP6_MIB_INMSGS);
+ __ICMP6_INC_STATS(dev_net_rcu(dev), idev, ICMP6_MIB_INMSGS);
saddr = &ipv6_hdr(skb)->saddr;
daddr = &ipv6_hdr(skb)->daddr;
@@ -939,7 +943,7 @@ static int icmpv6_rcv(struct sk_buff *skb)
type = hdr->icmp6_type;
- ICMP6MSGIN_INC_STATS(dev_net(dev), idev, type);
+ ICMP6MSGIN_INC_STATS(dev_net_rcu(dev), idev, type);
switch (type) {
case ICMPV6_ECHO_REQUEST:
@@ -1034,9 +1038,9 @@ static int icmpv6_rcv(struct sk_buff *skb)
csum_error:
reason = SKB_DROP_REASON_ICMP_CSUM;
- __ICMP6_INC_STATS(dev_net(dev), idev, ICMP6_MIB_CSUMERRORS);
+ __ICMP6_INC_STATS(dev_net_rcu(dev), idev, ICMP6_MIB_CSUMERRORS);
discard_it:
- __ICMP6_INC_STATS(dev_net(dev), idev, ICMP6_MIB_INERRORS);
+ __ICMP6_INC_STATS(dev_net_rcu(dev), idev, ICMP6_MIB_INERRORS);
drop_no_count:
kfree_skb_reason(skb, reason);
return 0;
diff --git a/net/ipv6/ila/ila_xlat.c b/net/ipv6/ila/ila_xlat.c
index 7646e401c630..1d41b2ab4884 100644
--- a/net/ipv6/ila/ila_xlat.c
+++ b/net/ipv6/ila/ila_xlat.c
@@ -195,6 +195,8 @@ static const struct nf_hook_ops ila_nf_hook_ops[] = {
},
};
+static DEFINE_MUTEX(ila_mutex);
+
static int ila_add_mapping(struct net *net, struct ila_xlat_params *xp)
{
struct ila_net *ilan = net_generic(net, ila_net_id);
@@ -202,16 +204,20 @@ static int ila_add_mapping(struct net *net, struct ila_xlat_params *xp)
spinlock_t *lock = ila_get_lock(ilan, xp->ip.locator_match);
int err = 0, order;
- if (!ilan->xlat.hooks_registered) {
+ if (!READ_ONCE(ilan->xlat.hooks_registered)) {
/* We defer registering net hooks in the namespace until the
* first mapping is added.
*/
- err = nf_register_net_hooks(net, ila_nf_hook_ops,
- ARRAY_SIZE(ila_nf_hook_ops));
+ mutex_lock(&ila_mutex);
+ if (!ilan->xlat.hooks_registered) {
+ err = nf_register_net_hooks(net, ila_nf_hook_ops,
+ ARRAY_SIZE(ila_nf_hook_ops));
+ if (!err)
+ WRITE_ONCE(ilan->xlat.hooks_registered, true);
+ }
+ mutex_unlock(&ila_mutex);
if (err)
return err;
-
- ilan->xlat.hooks_registered = true;
}
ila = kzalloc(sizeof(*ila), GFP_KERNEL);
diff --git a/net/ipv6/ioam6_iptunnel.c b/net/ipv6/ioam6_iptunnel.c
index 9d8422e350f8..2c383c12a431 100644
--- a/net/ipv6/ioam6_iptunnel.c
+++ b/net/ipv6/ioam6_iptunnel.c
@@ -253,14 +253,15 @@ static int ioam6_do_fill(struct net *net, struct sk_buff *skb)
}
static int ioam6_do_inline(struct net *net, struct sk_buff *skb,
- struct ioam6_lwt_encap *tuninfo)
+ struct ioam6_lwt_encap *tuninfo,
+ struct dst_entry *cache_dst)
{
struct ipv6hdr *oldhdr, *hdr;
int hdrlen, err;
hdrlen = (tuninfo->eh.hdrlen + 1) << 3;
- err = skb_cow_head(skb, hdrlen + skb->mac_len);
+ err = skb_cow_head(skb, hdrlen + dst_dev_overhead(cache_dst, skb));
if (unlikely(err))
return err;
@@ -291,7 +292,8 @@ static int ioam6_do_encap(struct net *net, struct sk_buff *skb,
struct ioam6_lwt_encap *tuninfo,
bool has_tunsrc,
struct in6_addr *tunsrc,
- struct in6_addr *tundst)
+ struct in6_addr *tundst,
+ struct dst_entry *cache_dst)
{
struct dst_entry *dst = skb_dst(skb);
struct ipv6hdr *hdr, *inner_hdr;
@@ -300,7 +302,7 @@ static int ioam6_do_encap(struct net *net, struct sk_buff *skb,
hdrlen = (tuninfo->eh.hdrlen + 1) << 3;
len = sizeof(*hdr) + hdrlen;
- err = skb_cow_head(skb, len + skb->mac_len);
+ err = skb_cow_head(skb, len + dst_dev_overhead(cache_dst, skb));
if (unlikely(err))
return err;
@@ -334,7 +336,7 @@ static int ioam6_do_encap(struct net *net, struct sk_buff *skb,
static int ioam6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
{
- struct dst_entry *dst = skb_dst(skb);
+ struct dst_entry *dst = skb_dst(skb), *cache_dst = NULL;
struct in6_addr orig_daddr;
struct ioam6_lwt *ilwt;
int err = -EINVAL;
@@ -352,6 +354,10 @@ static int ioam6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
orig_daddr = ipv6_hdr(skb)->daddr;
+ local_bh_disable();
+ cache_dst = dst_cache_get(&ilwt->cache);
+ local_bh_enable();
+
switch (ilwt->mode) {
case IOAM6_IPTUNNEL_MODE_INLINE:
do_inline:
@@ -359,7 +365,7 @@ do_inline:
if (ipv6_hdr(skb)->nexthdr == NEXTHDR_HOP)
goto out;
- err = ioam6_do_inline(net, skb, &ilwt->tuninfo);
+ err = ioam6_do_inline(net, skb, &ilwt->tuninfo, cache_dst);
if (unlikely(err))
goto drop;
@@ -369,7 +375,7 @@ do_encap:
/* Encapsulation (ip6ip6) */
err = ioam6_do_encap(net, skb, &ilwt->tuninfo,
ilwt->has_tunsrc, &ilwt->tunsrc,
- &ilwt->tundst);
+ &ilwt->tundst, cache_dst);
if (unlikely(err))
goto drop;
@@ -387,46 +393,45 @@ do_encap:
goto drop;
}
- err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev));
- if (unlikely(err))
- goto drop;
+ if (unlikely(!cache_dst)) {
+ struct ipv6hdr *hdr = ipv6_hdr(skb);
+ struct flowi6 fl6;
- if (!ipv6_addr_equal(&orig_daddr, &ipv6_hdr(skb)->daddr)) {
- local_bh_disable();
- dst = dst_cache_get(&ilwt->cache);
- local_bh_enable();
-
- if (unlikely(!dst)) {
- struct ipv6hdr *hdr = ipv6_hdr(skb);
- struct flowi6 fl6;
-
- memset(&fl6, 0, sizeof(fl6));
- fl6.daddr = hdr->daddr;
- fl6.saddr = hdr->saddr;
- fl6.flowlabel = ip6_flowinfo(hdr);
- fl6.flowi6_mark = skb->mark;
- fl6.flowi6_proto = hdr->nexthdr;
-
- dst = ip6_route_output(net, NULL, &fl6);
- if (dst->error) {
- err = dst->error;
- dst_release(dst);
- goto drop;
- }
+ memset(&fl6, 0, sizeof(fl6));
+ fl6.daddr = hdr->daddr;
+ fl6.saddr = hdr->saddr;
+ fl6.flowlabel = ip6_flowinfo(hdr);
+ fl6.flowi6_mark = skb->mark;
+ fl6.flowi6_proto = hdr->nexthdr;
+
+ cache_dst = ip6_route_output(net, NULL, &fl6);
+ if (cache_dst->error) {
+ err = cache_dst->error;
+ goto drop;
+ }
+ /* cache only if we don't create a dst reference loop */
+ if (dst->lwtstate != cache_dst->lwtstate) {
local_bh_disable();
- dst_cache_set_ip6(&ilwt->cache, dst, &fl6.saddr);
+ dst_cache_set_ip6(&ilwt->cache, cache_dst, &fl6.saddr);
local_bh_enable();
}
- skb_dst_drop(skb);
- skb_dst_set(skb, dst);
+ err = skb_cow_head(skb, LL_RESERVED_SPACE(cache_dst->dev));
+ if (unlikely(err))
+ goto drop;
+ }
+ if (!ipv6_addr_equal(&orig_daddr, &ipv6_hdr(skb)->daddr)) {
+ skb_dst_drop(skb);
+ skb_dst_set(skb, cache_dst);
return dst_output(net, sk, skb);
}
out:
+ dst_release(cache_dst);
return dst->lwtstate->orig_output(net, sk, skb);
drop:
+ dst_release(cache_dst);
kfree_skb(skb);
return err;
}
diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c
index 70c0e16c0ae6..39da6a7ce5f1 100644
--- a/net/ipv6/ip6_input.c
+++ b/net/ipv6/ip6_input.c
@@ -477,9 +477,7 @@ discard:
static int ip6_input_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
{
skb_clear_delivery_time(skb);
- rcu_read_lock();
ip6_protocol_deliver_rcu(net, skb, 0, false);
- rcu_read_unlock();
return 0;
}
@@ -487,9 +485,15 @@ static int ip6_input_finish(struct net *net, struct sock *sk, struct sk_buff *sk
int ip6_input(struct sk_buff *skb)
{
- return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_IN,
- dev_net(skb->dev), NULL, skb, skb->dev, NULL,
- ip6_input_finish);
+ int res;
+
+ rcu_read_lock();
+ res = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_IN,
+ dev_net_rcu(skb->dev), NULL, skb, skb->dev, NULL,
+ ip6_input_finish);
+ rcu_read_unlock();
+
+ return res;
}
EXPORT_SYMBOL_GPL(ip6_input);
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index f7b4608bb316..d577bf2f3053 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -613,15 +613,15 @@ int ip6_forward(struct sk_buff *skb)
else
target = &hdr->daddr;
- peer = inet_getpeer_v6(net->ipv6.peers, &hdr->daddr, 1);
+ rcu_read_lock();
+ peer = inet_getpeer_v6(net->ipv6.peers, &hdr->daddr);
/* Limit redirects both by destination (here)
and by source (inside ndisc_send_redirect)
*/
if (inet_peer_xrlim_allow(peer, 1*HZ))
ndisc_send_redirect(skb, target);
- if (peer)
- inet_putpeer(peer);
+ rcu_read_unlock();
} else {
int addrtype = ipv6_addr_type(&hdr->saddr);
@@ -1401,6 +1401,7 @@ static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork,
cork->base.gso_size = ipc6->gso_size;
cork->base.tx_flags = 0;
cork->base.mark = ipc6->sockc.mark;
+ cork->base.priority = ipc6->sockc.priority;
sock_tx_timestamp(sk, &ipc6->sockc, &cork->base.tx_flags);
if (ipc6->sockc.tsflags & SOCKCM_FLAG_TS_OPT_ID) {
cork->base.flags |= IPCORK_TS_OPT_ID;
@@ -1697,8 +1698,9 @@ alloc_new_skb:
pskb_trim_unique(skb_prev, maxfraglen);
}
if (copy > 0 &&
- getfrag(from, data + transhdrlen, offset,
- copy, fraggap, skb) < 0) {
+ INDIRECT_CALL_1(getfrag, ip_generic_getfrag,
+ from, data + transhdrlen, offset,
+ copy, fraggap, skb) < 0) {
err = -EFAULT;
kfree_skb(skb);
goto error;
@@ -1742,8 +1744,9 @@ alloc_new_skb:
unsigned int off;
off = skb->len;
- if (getfrag(from, skb_put(skb, copy),
- offset, copy, off, skb) < 0) {
+ if (INDIRECT_CALL_1(getfrag, ip_generic_getfrag,
+ from, skb_put(skb, copy),
+ offset, copy, off, skb) < 0) {
__skb_trim(skb, off);
err = -EFAULT;
goto error;
@@ -1781,7 +1784,8 @@ alloc_new_skb:
get_page(pfrag->page);
}
copy = min_t(int, copy, pfrag->size - pfrag->offset);
- if (getfrag(from,
+ if (INDIRECT_CALL_1(getfrag, ip_generic_getfrag,
+ from,
page_address(pfrag->page) + pfrag->offset,
offset, copy, skb->len, skb) < 0)
goto error_efault;
@@ -1939,7 +1943,7 @@ struct sk_buff *__ip6_make_skb(struct sock *sk,
hdr->saddr = fl6->saddr;
hdr->daddr = *final_dst;
- skb->priority = READ_ONCE(sk->sk_priority);
+ skb->priority = cork->base.priority;
skb->mark = cork->base.mark;
if (sk_is_tcp(sk))
skb_set_delivery_time(skb, cork->base.transmit_time, SKB_CLOCK_MONOTONIC);
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 578ff1336afe..535e9f72514c 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -520,9 +520,9 @@ static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
if (it->cache != &mrt->mfc_unres_queue) {
seq_printf(seq, " %8lu %8lu %8lu",
- mfc->_c.mfc_un.res.pkt,
- mfc->_c.mfc_un.res.bytes,
- mfc->_c.mfc_un.res.wrong_if);
+ atomic_long_read(&mfc->_c.mfc_un.res.pkt),
+ atomic_long_read(&mfc->_c.mfc_un.res.bytes),
+ atomic_long_read(&mfc->_c.mfc_un.res.wrong_if));
for (n = mfc->_c.mfc_un.res.minvif;
n < mfc->_c.mfc_un.res.maxvif; n++) {
if (VIF_EXISTS(mrt, n) &&
@@ -884,7 +884,7 @@ static void ip6mr_update_thresholds(struct mr_table *mrt,
cache->mfc_un.res.maxvif = vifi + 1;
}
}
- cache->mfc_un.res.lastuse = jiffies;
+ WRITE_ONCE(cache->mfc_un.res.lastuse, jiffies);
}
static int mif6_add(struct net *net, struct mr_table *mrt,
@@ -1945,9 +1945,9 @@ int ip6mr_ioctl(struct sock *sk, int cmd, void *arg)
c = ip6mr_cache_find(mrt, &sr->src.sin6_addr,
&sr->grp.sin6_addr);
if (c) {
- sr->pktcnt = c->_c.mfc_un.res.pkt;
- sr->bytecnt = c->_c.mfc_un.res.bytes;
- sr->wrong_if = c->_c.mfc_un.res.wrong_if;
+ sr->pktcnt = atomic_long_read(&c->_c.mfc_un.res.pkt);
+ sr->bytecnt = atomic_long_read(&c->_c.mfc_un.res.bytes);
+ sr->wrong_if = atomic_long_read(&c->_c.mfc_un.res.wrong_if);
rcu_read_unlock();
return 0;
}
@@ -2017,9 +2017,9 @@ int ip6mr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg)
rcu_read_lock();
c = ip6mr_cache_find(mrt, &sr.src.sin6_addr, &sr.grp.sin6_addr);
if (c) {
- sr.pktcnt = c->_c.mfc_un.res.pkt;
- sr.bytecnt = c->_c.mfc_un.res.bytes;
- sr.wrong_if = c->_c.mfc_un.res.wrong_if;
+ sr.pktcnt = atomic_long_read(&c->_c.mfc_un.res.pkt);
+ sr.bytecnt = atomic_long_read(&c->_c.mfc_un.res.bytes);
+ sr.wrong_if = atomic_long_read(&c->_c.mfc_un.res.wrong_if);
rcu_read_unlock();
if (copy_to_user(arg, &sr, sizeof(sr)))
@@ -2142,9 +2142,9 @@ static void ip6_mr_forward(struct net *net, struct mr_table *mrt,
int true_vifi = ip6mr_find_vif(mrt, dev);
vif = c->_c.mfc_parent;
- c->_c.mfc_un.res.pkt++;
- c->_c.mfc_un.res.bytes += skb->len;
- c->_c.mfc_un.res.lastuse = jiffies;
+ atomic_long_inc(&c->_c.mfc_un.res.pkt);
+ atomic_long_add(skb->len, &c->_c.mfc_un.res.bytes);
+ WRITE_ONCE(c->_c.mfc_un.res.lastuse, jiffies);
if (ipv6_addr_any(&c->mf6c_origin) && true_vifi >= 0) {
struct mfc6_cache *cache_proxy;
@@ -2162,7 +2162,7 @@ static void ip6_mr_forward(struct net *net, struct mr_table *mrt,
* Wrong interface: drop packet and (maybe) send PIM assert.
*/
if (rcu_access_pointer(mrt->vif_table[vif].dev) != dev) {
- c->_c.mfc_un.res.wrong_if++;
+ atomic_long_inc(&c->_c.mfc_un.res.wrong_if);
if (true_vifi >= 0 && mrt->mroute_do_assert &&
/* pimsm uses asserts, when switching from RPT to SPT,
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index b244dbf61d5f..65831b4fee1f 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -33,8 +33,10 @@
#include <linux/in.h>
#include <linux/in6.h>
#include <linux/netdevice.h>
+#include <linux/if_addr.h>
#include <linux/if_arp.h>
#include <linux/route.h>
+#include <linux/rtnetlink.h>
#include <linux/init.h>
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
@@ -47,6 +49,7 @@
#include <linux/netfilter_ipv6.h>
#include <net/net_namespace.h>
+#include <net/netlink.h>
#include <net/sock.h>
#include <net/snmp.h>
@@ -901,6 +904,41 @@ static struct ifmcaddr6 *mca_alloc(struct inet6_dev *idev,
return mc;
}
+static void inet6_ifmcaddr_notify(struct net_device *dev,
+ const struct ifmcaddr6 *ifmca, int event)
+{
+ struct inet6_fill_args fillargs = {
+ .portid = 0,
+ .seq = 0,
+ .event = event,
+ .flags = 0,
+ .netnsid = -1,
+ .force_rt_scope_universe = true,
+ };
+ struct net *net = dev_net(dev);
+ struct sk_buff *skb;
+ int err = -ENOMEM;
+
+ skb = nlmsg_new(NLMSG_ALIGN(sizeof(struct ifaddrmsg)) +
+ nla_total_size(sizeof(struct in6_addr)) +
+ nla_total_size(sizeof(struct ifa_cacheinfo)),
+ GFP_KERNEL);
+ if (!skb)
+ goto error;
+
+ err = inet6_fill_ifmcaddr(skb, ifmca, &fillargs);
+ if (err < 0) {
+ WARN_ON_ONCE(err == -EMSGSIZE);
+ nlmsg_free(skb);
+ goto error;
+ }
+
+ rtnl_notify(skb, net, 0, RTNLGRP_IPV6_MCADDR, NULL, GFP_KERNEL);
+ return;
+error:
+ rtnl_set_sk_err(net, RTNLGRP_IPV6_MCADDR, err);
+}
+
/*
* device multicast group inc (add if not found)
*/
@@ -948,6 +986,7 @@ static int __ipv6_dev_mc_inc(struct net_device *dev,
mld_del_delrec(idev, mc);
igmp6_group_added(mc);
+ inet6_ifmcaddr_notify(dev, mc, RTM_NEWMULTICAST);
mutex_unlock(&idev->mc_lock);
ma_put(mc);
return 0;
@@ -977,6 +1016,8 @@ int __ipv6_dev_mc_dec(struct inet6_dev *idev, const struct in6_addr *addr)
*map = ma->next;
igmp6_group_dropped(ma);
+ inet6_ifmcaddr_notify(idev->dev, ma,
+ RTM_DELMULTICAST);
ip6_mc_clear_src(ma);
mutex_unlock(&idev->mc_lock);
@@ -1021,29 +1062,31 @@ bool ipv6_chk_mcast_addr(struct net_device *dev, const struct in6_addr *group,
rcu_read_lock();
idev = __in6_dev_get(dev);
- if (idev) {
- for_each_mc_rcu(idev, mc) {
- if (ipv6_addr_equal(&mc->mca_addr, group))
- break;
- }
- if (mc) {
- if (src_addr && !ipv6_addr_any(src_addr)) {
- struct ip6_sf_list *psf;
+ if (!idev)
+ goto unlock;
+ for_each_mc_rcu(idev, mc) {
+ if (ipv6_addr_equal(&mc->mca_addr, group))
+ break;
+ }
+ if (!mc)
+ goto unlock;
+ if (src_addr && !ipv6_addr_any(src_addr)) {
+ struct ip6_sf_list *psf;
- for_each_psf_rcu(mc, psf) {
- if (ipv6_addr_equal(&psf->sf_addr, src_addr))
- break;
- }
- if (psf)
- rv = psf->sf_count[MCAST_INCLUDE] ||
- psf->sf_count[MCAST_EXCLUDE] !=
- mc->mca_sfcount[MCAST_EXCLUDE];
- else
- rv = mc->mca_sfcount[MCAST_EXCLUDE] != 0;
- } else
- rv = true; /* don't filter unspecified source */
+ for_each_psf_rcu(mc, psf) {
+ if (ipv6_addr_equal(&psf->sf_addr, src_addr))
+ break;
}
+ if (psf)
+ rv = READ_ONCE(psf->sf_count[MCAST_INCLUDE]) ||
+ READ_ONCE(psf->sf_count[MCAST_EXCLUDE]) !=
+ READ_ONCE(mc->mca_sfcount[MCAST_EXCLUDE]);
+ else
+ rv = READ_ONCE(mc->mca_sfcount[MCAST_EXCLUDE]) != 0;
+ } else {
+ rv = true; /* don't filter unspecified source */
}
+unlock:
rcu_read_unlock();
return rv;
}
@@ -1730,21 +1773,19 @@ static struct sk_buff *mld_newpack(struct inet6_dev *idev, unsigned int mtu)
struct net_device *dev = idev->dev;
int hlen = LL_RESERVED_SPACE(dev);
int tlen = dev->needed_tailroom;
- struct net *net = dev_net(dev);
const struct in6_addr *saddr;
struct in6_addr addr_buf;
struct mld2_report *pmr;
struct sk_buff *skb;
unsigned int size;
struct sock *sk;
- int err;
+ struct net *net;
- sk = net->ipv6.igmp_sk;
/* we assume size > sizeof(ra) here
* Also try to not allocate high-order pages for big MTU
*/
size = min_t(int, mtu, PAGE_SIZE / 2) + hlen + tlen;
- skb = sock_alloc_send_skb(sk, size, 1, &err);
+ skb = alloc_skb(size, GFP_KERNEL);
if (!skb)
return NULL;
@@ -1752,6 +1793,12 @@ static struct sk_buff *mld_newpack(struct inet6_dev *idev, unsigned int mtu)
skb_reserve(skb, hlen);
skb_tailroom_reserve(skb, mtu, tlen);
+ rcu_read_lock();
+
+ net = dev_net_rcu(dev);
+ sk = net->ipv6.igmp_sk;
+ skb_set_owner_w(skb, sk);
+
if (ipv6_get_lladdr(dev, &addr_buf, IFA_F_TENTATIVE)) {
/* <draft-ietf-magma-mld-source-05.txt>:
* use unspecified address as the source address
@@ -1763,6 +1810,8 @@ static struct sk_buff *mld_newpack(struct inet6_dev *idev, unsigned int mtu)
ip6_mc_hdr(sk, skb, dev, saddr, &mld2_all_mcr, NEXTHDR_HOP, 0);
+ rcu_read_unlock();
+
skb_put_data(skb, ra, sizeof(ra));
skb_set_transport_header(skb, skb_tail_pointer(skb) - skb->data);
@@ -2122,21 +2171,21 @@ static void mld_send_cr(struct inet6_dev *idev)
static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type)
{
- struct net *net = dev_net(dev);
- struct sock *sk = net->ipv6.igmp_sk;
+ const struct in6_addr *snd_addr, *saddr;
+ int err, len, payload_len, full_len;
+ struct in6_addr addr_buf;
struct inet6_dev *idev;
struct sk_buff *skb;
struct mld_msg *hdr;
- const struct in6_addr *snd_addr, *saddr;
- struct in6_addr addr_buf;
int hlen = LL_RESERVED_SPACE(dev);
int tlen = dev->needed_tailroom;
- int err, len, payload_len, full_len;
u8 ra[8] = { IPPROTO_ICMPV6, 0,
IPV6_TLV_ROUTERALERT, 2, 0, 0,
IPV6_TLV_PADN, 0 };
- struct flowi6 fl6;
struct dst_entry *dst;
+ struct flowi6 fl6;
+ struct net *net;
+ struct sock *sk;
if (type == ICMPV6_MGM_REDUCTION)
snd_addr = &in6addr_linklocal_allrouters;
@@ -2147,19 +2196,21 @@ static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type)
payload_len = len + sizeof(ra);
full_len = sizeof(struct ipv6hdr) + payload_len;
- rcu_read_lock();
- IP6_INC_STATS(net, __in6_dev_get(dev), IPSTATS_MIB_OUTREQUESTS);
- rcu_read_unlock();
+ skb = alloc_skb(hlen + tlen + full_len, GFP_KERNEL);
- skb = sock_alloc_send_skb(sk, hlen + tlen + full_len, 1, &err);
+ rcu_read_lock();
+ net = dev_net_rcu(dev);
+ idev = __in6_dev_get(dev);
+ IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTREQUESTS);
if (!skb) {
- rcu_read_lock();
- IP6_INC_STATS(net, __in6_dev_get(dev),
- IPSTATS_MIB_OUTDISCARDS);
+ IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS);
rcu_read_unlock();
return;
}
+ sk = net->ipv6.igmp_sk;
+ skb_set_owner_w(skb, sk);
+
skb->priority = TC_PRIO_CONTROL;
skb_reserve(skb, hlen);
@@ -2184,9 +2235,6 @@ static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type)
IPPROTO_ICMPV6,
csum_partial(hdr, len, 0));
- rcu_read_lock();
- idev = __in6_dev_get(skb->dev);
-
icmpv6_flow_init(sk, &fl6, type,
&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr,
skb->dev->ifindex);
@@ -2285,7 +2333,7 @@ static int ip6_mc_del1_src(struct ifmcaddr6 *pmc, int sfmode,
/* source filter not found, or count wrong => bug */
return -ESRCH;
}
- psf->sf_count[sfmode]--;
+ WRITE_ONCE(psf->sf_count[sfmode], psf->sf_count[sfmode] - 1);
if (!psf->sf_count[MCAST_INCLUDE] && !psf->sf_count[MCAST_EXCLUDE]) {
struct inet6_dev *idev = pmc->idev;
@@ -2391,7 +2439,7 @@ static int ip6_mc_add1_src(struct ifmcaddr6 *pmc, int sfmode,
rcu_assign_pointer(pmc->mca_sources, psf);
}
}
- psf->sf_count[sfmode]++;
+ WRITE_ONCE(psf->sf_count[sfmode], psf->sf_count[sfmode] + 1);
return 0;
}
@@ -2503,7 +2551,8 @@ static int ip6_mc_add_src(struct inet6_dev *idev, const struct in6_addr *pmca,
sf_markstate(pmc);
isexclude = pmc->mca_sfmode == MCAST_EXCLUDE;
if (!delta)
- pmc->mca_sfcount[sfmode]++;
+ WRITE_ONCE(pmc->mca_sfcount[sfmode],
+ pmc->mca_sfcount[sfmode] + 1);
err = 0;
for (i = 0; i < sfcount; i++) {
err = ip6_mc_add1_src(pmc, sfmode, &psfsrc[i]);
@@ -2514,7 +2563,8 @@ static int ip6_mc_add_src(struct inet6_dev *idev, const struct in6_addr *pmca,
int j;
if (!delta)
- pmc->mca_sfcount[sfmode]--;
+ WRITE_ONCE(pmc->mca_sfcount[sfmode],
+ pmc->mca_sfcount[sfmode] - 1);
for (j = 0; j < i; j++)
ip6_mc_del1_src(pmc, sfmode, &psfsrc[j]);
} else if (isexclude != (pmc->mca_sfcount[MCAST_EXCLUDE] != 0)) {
@@ -2559,7 +2609,8 @@ static void ip6_mc_clear_src(struct ifmcaddr6 *pmc)
RCU_INIT_POINTER(pmc->mca_sources, NULL);
pmc->mca_sfmode = MCAST_EXCLUDE;
pmc->mca_sfcount[MCAST_INCLUDE] = 0;
- pmc->mca_sfcount[MCAST_EXCLUDE] = 1;
+ /* Paired with the READ_ONCE() from ipv6_chk_mcast_addr() */
+ WRITE_ONCE(pmc->mca_sfcount[MCAST_EXCLUDE], 1);
}
/* called with mc_lock */
@@ -3074,8 +3125,8 @@ static int igmp6_mcf_seq_show(struct seq_file *seq, void *v)
state->dev->ifindex, state->dev->name,
&state->im->mca_addr,
&psf->sf_addr,
- psf->sf_count[MCAST_INCLUDE],
- psf->sf_count[MCAST_EXCLUDE]);
+ READ_ONCE(psf->sf_count[MCAST_INCLUDE]),
+ READ_ONCE(psf->sf_count[MCAST_EXCLUDE]));
}
return 0;
}
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index aba94a348673..8699d1a188dc 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -418,15 +418,11 @@ static struct sk_buff *ndisc_alloc_skb(struct net_device *dev,
{
int hlen = LL_RESERVED_SPACE(dev);
int tlen = dev->needed_tailroom;
- struct sock *sk = dev_net(dev)->ipv6.ndisc_sk;
struct sk_buff *skb;
skb = alloc_skb(hlen + sizeof(struct ipv6hdr) + len + tlen, GFP_ATOMIC);
- if (!skb) {
- ND_PRINTK(0, err, "ndisc: %s failed to allocate an skb\n",
- __func__);
+ if (!skb)
return NULL;
- }
skb->protocol = htons(ETH_P_IPV6);
skb->dev = dev;
@@ -437,7 +433,9 @@ static struct sk_buff *ndisc_alloc_skb(struct net_device *dev,
/* Manually assign socket ownership as we avoid calling
* sock_alloc_send_pskb() to bypass wmem buffer limits
*/
- skb_set_owner_w(skb, sk);
+ rcu_read_lock();
+ skb_set_owner_w(skb, dev_net_rcu(dev)->ipv6.ndisc_sk);
+ rcu_read_unlock();
return skb;
}
@@ -473,16 +471,20 @@ static void ip6_nd_hdr(struct sk_buff *skb,
void ndisc_send_skb(struct sk_buff *skb, const struct in6_addr *daddr,
const struct in6_addr *saddr)
{
+ struct icmp6hdr *icmp6h = icmp6_hdr(skb);
struct dst_entry *dst = skb_dst(skb);
- struct net *net = dev_net(skb->dev);
- struct sock *sk = net->ipv6.ndisc_sk;
struct inet6_dev *idev;
+ struct net *net;
+ struct sock *sk;
int err;
- struct icmp6hdr *icmp6h = icmp6_hdr(skb);
u8 type;
type = icmp6h->icmp6_type;
+ rcu_read_lock();
+
+ net = dev_net_rcu(skb->dev);
+ sk = net->ipv6.ndisc_sk;
if (!dst) {
struct flowi6 fl6;
int oif = skb->dev->ifindex;
@@ -490,6 +492,7 @@ void ndisc_send_skb(struct sk_buff *skb, const struct in6_addr *daddr,
icmpv6_flow_init(sk, &fl6, type, saddr, daddr, oif);
dst = icmp6_dst_alloc(skb->dev, &fl6);
if (IS_ERR(dst)) {
+ rcu_read_unlock();
kfree_skb(skb);
return;
}
@@ -504,7 +507,6 @@ void ndisc_send_skb(struct sk_buff *skb, const struct in6_addr *daddr,
ip6_nd_hdr(skb, saddr, daddr, READ_ONCE(inet6_sk(sk)->hop_limit), skb->len);
- rcu_read_lock();
idev = __in6_dev_get(dst->dev);
IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTREQUESTS);
@@ -1694,7 +1696,7 @@ void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target)
bool ret;
if (netif_is_l3_master(skb->dev)) {
- dev = __dev_get_by_index(dev_net(skb->dev), IPCB(skb)->iif);
+ dev = dev_get_by_index_rcu(dev_net(skb->dev), IPCB(skb)->iif);
if (!dev)
return;
}
@@ -1731,10 +1733,12 @@ void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target)
"Redirect: destination is not a neighbour\n");
goto release;
}
- peer = inet_getpeer_v6(net->ipv6.peers, &ipv6_hdr(skb)->saddr, 1);
+
+ rcu_read_lock();
+ peer = inet_getpeer_v6(net->ipv6.peers, &ipv6_hdr(skb)->saddr);
ret = inet_peer_xrlim_allow(peer, 1*HZ);
- if (peer)
- inet_putpeer(peer);
+ rcu_read_unlock();
+
if (!ret)
goto release;
diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c
index 88b3fcacd4f9..46b8adf6e7f8 100644
--- a/net/ipv6/ping.c
+++ b/net/ipv6/ping.c
@@ -119,6 +119,7 @@ static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
return -EINVAL;
ipcm6_init_sk(&ipc6, sk);
+ ipc6.sockc.priority = READ_ONCE(sk->sk_priority);
ipc6.sockc.tsflags = READ_ONCE(sk->sk_tsflags);
ipc6.sockc.mark = READ_ONCE(sk->sk_mark);
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 8476a3944a88..a45aba090aa4 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -619,7 +619,7 @@ static int rawv6_send_hdrinc(struct sock *sk, struct msghdr *msg, int length,
skb_reserve(skb, hlen);
skb->protocol = htons(ETH_P_IPV6);
- skb->priority = READ_ONCE(sk->sk_priority);
+ skb->priority = sockc->priority;
skb->mark = sockc->mark;
skb_set_delivery_type_by_clockid(skb, sockc->transmit_time, sk->sk_clockid);
@@ -780,6 +780,7 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
ipcm6_init(&ipc6);
ipc6.sockc.tsflags = READ_ONCE(sk->sk_tsflags);
ipc6.sockc.mark = fl6.flowi6_mark;
+ ipc6.sockc.priority = READ_ONCE(sk->sk_priority);
if (sin6) {
if (addr_len < SIN6_LEN_RFC2133)
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 67ff16c04718..ef2d23a1e3d5 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -3196,13 +3196,18 @@ static unsigned int ip6_default_advmss(const struct dst_entry *dst)
{
struct net_device *dev = dst->dev;
unsigned int mtu = dst_mtu(dst);
- struct net *net = dev_net(dev);
+ struct net *net;
mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
+ rcu_read_lock();
+
+ net = dev_net_rcu(dev);
if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
+ rcu_read_unlock();
+
/*
* Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
* corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
@@ -5005,6 +5010,7 @@ static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
[RTA_SPORT] = { .type = NLA_U16 },
[RTA_DPORT] = { .type = NLA_U16 },
[RTA_NH_ID] = { .type = NLA_U32 },
+ [RTA_FLOWLABEL] = { .type = NLA_BE32 },
};
static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
@@ -5030,6 +5036,12 @@ static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
goto errout;
}
+ if (tb[RTA_FLOWLABEL]) {
+ NL_SET_ERR_MSG_ATTR(extack, tb[RTA_FLOWLABEL],
+ "Flow label cannot be specified for this operation");
+ goto errout;
+ }
+
*cfg = (struct fib6_config){
.fc_table = rtm->rtm_table,
.fc_dst_len = rtm->rtm_dst_len,
@@ -6013,6 +6025,13 @@ static int inet6_rtm_valid_getroute_req(struct sk_buff *skb,
return -EINVAL;
}
+ if (tb[RTA_FLOWLABEL] &&
+ (nla_get_be32(tb[RTA_FLOWLABEL]) & ~IPV6_FLOWLABEL_MASK)) {
+ NL_SET_ERR_MSG_ATTR(extack, tb[RTA_FLOWLABEL],
+ "Invalid flow label");
+ return -EINVAL;
+ }
+
for (i = 0; i <= RTA_MAX; i++) {
if (!tb[i])
continue;
@@ -6027,6 +6046,7 @@ static int inet6_rtm_valid_getroute_req(struct sk_buff *skb,
case RTA_SPORT:
case RTA_DPORT:
case RTA_IP_PROTO:
+ case RTA_FLOWLABEL:
break;
default:
NL_SET_ERR_MSG_MOD(extack, "Unsupported attribute in get route request");
@@ -6049,6 +6069,7 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
struct sk_buff *skb;
struct rtmsg *rtm;
struct flowi6 fl6 = {};
+ __be32 flowlabel;
bool fibmatch;
err = inet6_rtm_valid_getroute_req(in_skb, nlh, tb, extack);
@@ -6057,7 +6078,6 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
err = -EINVAL;
rtm = nlmsg_data(nlh);
- fl6.flowlabel = ip6_make_flowinfo(rtm->rtm_tos, 0);
fibmatch = !!(rtm->rtm_flags & RTM_F_FIB_MATCH);
if (tb[RTA_SRC]) {
@@ -6103,6 +6123,9 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
goto errout;
}
+ flowlabel = nla_get_be32_default(tb[RTA_FLOWLABEL], 0);
+ fl6.flowlabel = ip6_make_flowinfo(rtm->rtm_tos, flowlabel);
+
if (iif) {
struct net_device *dev;
int flags = 0;
diff --git a/net/ipv6/rpl_iptunnel.c b/net/ipv6/rpl_iptunnel.c
index db3c19a42e1c..0ac4283acdf2 100644
--- a/net/ipv6/rpl_iptunnel.c
+++ b/net/ipv6/rpl_iptunnel.c
@@ -125,7 +125,8 @@ static void rpl_destroy_state(struct lwtunnel_state *lwt)
}
static int rpl_do_srh_inline(struct sk_buff *skb, const struct rpl_lwt *rlwt,
- const struct ipv6_rpl_sr_hdr *srh)
+ const struct ipv6_rpl_sr_hdr *srh,
+ struct dst_entry *cache_dst)
{
struct ipv6_rpl_sr_hdr *isrh, *csrh;
const struct ipv6hdr *oldhdr;
@@ -153,7 +154,7 @@ static int rpl_do_srh_inline(struct sk_buff *skb, const struct rpl_lwt *rlwt,
hdrlen = ((csrh->hdrlen + 1) << 3);
- err = skb_cow_head(skb, hdrlen + skb->mac_len);
+ err = skb_cow_head(skb, hdrlen + dst_dev_overhead(cache_dst, skb));
if (unlikely(err)) {
kfree(buf);
return err;
@@ -186,7 +187,8 @@ static int rpl_do_srh_inline(struct sk_buff *skb, const struct rpl_lwt *rlwt,
return 0;
}
-static int rpl_do_srh(struct sk_buff *skb, const struct rpl_lwt *rlwt)
+static int rpl_do_srh(struct sk_buff *skb, const struct rpl_lwt *rlwt,
+ struct dst_entry *cache_dst)
{
struct dst_entry *dst = skb_dst(skb);
struct rpl_iptunnel_encap *tinfo;
@@ -196,7 +198,7 @@ static int rpl_do_srh(struct sk_buff *skb, const struct rpl_lwt *rlwt)
tinfo = rpl_encap_lwtunnel(dst->lwtstate);
- return rpl_do_srh_inline(skb, rlwt, tinfo->srh);
+ return rpl_do_srh_inline(skb, rlwt, tinfo->srh, cache_dst);
}
static int rpl_output(struct net *net, struct sock *sk, struct sk_buff *skb)
@@ -208,14 +210,14 @@ static int rpl_output(struct net *net, struct sock *sk, struct sk_buff *skb)
rlwt = rpl_lwt_lwtunnel(orig_dst->lwtstate);
- err = rpl_do_srh(skb, rlwt);
- if (unlikely(err))
- goto drop;
-
local_bh_disable();
dst = dst_cache_get(&rlwt->cache);
local_bh_enable();
+ err = rpl_do_srh(skb, rlwt, dst);
+ if (unlikely(err))
+ goto drop;
+
if (unlikely(!dst)) {
struct ipv6hdr *hdr = ipv6_hdr(skb);
struct flowi6 fl6;
@@ -230,25 +232,28 @@ static int rpl_output(struct net *net, struct sock *sk, struct sk_buff *skb)
dst = ip6_route_output(net, NULL, &fl6);
if (dst->error) {
err = dst->error;
- dst_release(dst);
goto drop;
}
- local_bh_disable();
- dst_cache_set_ip6(&rlwt->cache, dst, &fl6.saddr);
- local_bh_enable();
+ /* cache only if we don't create a dst reference loop */
+ if (orig_dst->lwtstate != dst->lwtstate) {
+ local_bh_disable();
+ dst_cache_set_ip6(&rlwt->cache, dst, &fl6.saddr);
+ local_bh_enable();
+ }
+
+ err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev));
+ if (unlikely(err))
+ goto drop;
}
skb_dst_drop(skb);
skb_dst_set(skb, dst);
- err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev));
- if (unlikely(err))
- goto drop;
-
return dst_output(net, sk, skb);
drop:
+ dst_release(dst);
kfree_skb(skb);
return err;
}
@@ -262,29 +267,33 @@ static int rpl_input(struct sk_buff *skb)
rlwt = rpl_lwt_lwtunnel(orig_dst->lwtstate);
- err = rpl_do_srh(skb, rlwt);
- if (unlikely(err))
- goto drop;
-
local_bh_disable();
dst = dst_cache_get(&rlwt->cache);
+ local_bh_enable();
+
+ err = rpl_do_srh(skb, rlwt, dst);
+ if (unlikely(err)) {
+ dst_release(dst);
+ goto drop;
+ }
if (!dst) {
ip6_route_input(skb);
dst = skb_dst(skb);
if (!dst->error) {
+ local_bh_disable();
dst_cache_set_ip6(&rlwt->cache, dst,
&ipv6_hdr(skb)->saddr);
+ local_bh_enable();
}
+
+ err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev));
+ if (unlikely(err))
+ goto drop;
} else {
skb_dst_drop(skb);
skb_dst_set(skb, dst);
}
- local_bh_enable();
-
- err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev));
- if (unlikely(err))
- goto drop;
return dst_input(skb);
diff --git a/net/ipv6/seg6_iptunnel.c b/net/ipv6/seg6_iptunnel.c
index 098632adc9b5..33833b2064c0 100644
--- a/net/ipv6/seg6_iptunnel.c
+++ b/net/ipv6/seg6_iptunnel.c
@@ -124,8 +124,8 @@ static __be32 seg6_make_flowlabel(struct net *net, struct sk_buff *skb,
return flowlabel;
}
-/* encapsulate an IPv6 packet within an outer IPv6 header with a given SRH */
-int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh, int proto)
+static int __seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh,
+ int proto, struct dst_entry *cache_dst)
{
struct dst_entry *dst = skb_dst(skb);
struct net *net = dev_net(dst->dev);
@@ -137,7 +137,7 @@ int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh, int proto)
hdrlen = (osrh->hdrlen + 1) << 3;
tot_len = hdrlen + sizeof(*hdr);
- err = skb_cow_head(skb, tot_len + skb->mac_len);
+ err = skb_cow_head(skb, tot_len + dst_dev_overhead(cache_dst, skb));
if (unlikely(err))
return err;
@@ -197,11 +197,18 @@ int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh, int proto)
return 0;
}
+
+/* encapsulate an IPv6 packet within an outer IPv6 header with a given SRH */
+int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh, int proto)
+{
+ return __seg6_do_srh_encap(skb, osrh, proto, NULL);
+}
EXPORT_SYMBOL_GPL(seg6_do_srh_encap);
/* encapsulate an IPv6 packet within an outer IPv6 header with reduced SRH */
static int seg6_do_srh_encap_red(struct sk_buff *skb,
- struct ipv6_sr_hdr *osrh, int proto)
+ struct ipv6_sr_hdr *osrh, int proto,
+ struct dst_entry *cache_dst)
{
__u8 first_seg = osrh->first_segment;
struct dst_entry *dst = skb_dst(skb);
@@ -230,7 +237,7 @@ static int seg6_do_srh_encap_red(struct sk_buff *skb,
tot_len = red_hdrlen + sizeof(struct ipv6hdr);
- err = skb_cow_head(skb, tot_len + skb->mac_len);
+ err = skb_cow_head(skb, tot_len + dst_dev_overhead(cache_dst, skb));
if (unlikely(err))
return err;
@@ -317,8 +324,8 @@ out:
return 0;
}
-/* insert an SRH within an IPv6 packet, just after the IPv6 header */
-int seg6_do_srh_inline(struct sk_buff *skb, struct ipv6_sr_hdr *osrh)
+static int __seg6_do_srh_inline(struct sk_buff *skb, struct ipv6_sr_hdr *osrh,
+ struct dst_entry *cache_dst)
{
struct ipv6hdr *hdr, *oldhdr;
struct ipv6_sr_hdr *isrh;
@@ -326,7 +333,7 @@ int seg6_do_srh_inline(struct sk_buff *skb, struct ipv6_sr_hdr *osrh)
hdrlen = (osrh->hdrlen + 1) << 3;
- err = skb_cow_head(skb, hdrlen + skb->mac_len);
+ err = skb_cow_head(skb, hdrlen + dst_dev_overhead(cache_dst, skb));
if (unlikely(err))
return err;
@@ -369,9 +376,8 @@ int seg6_do_srh_inline(struct sk_buff *skb, struct ipv6_sr_hdr *osrh)
return 0;
}
-EXPORT_SYMBOL_GPL(seg6_do_srh_inline);
-static int seg6_do_srh(struct sk_buff *skb)
+static int seg6_do_srh(struct sk_buff *skb, struct dst_entry *cache_dst)
{
struct dst_entry *dst = skb_dst(skb);
struct seg6_iptunnel_encap *tinfo;
@@ -384,7 +390,7 @@ static int seg6_do_srh(struct sk_buff *skb)
if (skb->protocol != htons(ETH_P_IPV6))
return -EINVAL;
- err = seg6_do_srh_inline(skb, tinfo->srh);
+ err = __seg6_do_srh_inline(skb, tinfo->srh, cache_dst);
if (err)
return err;
break;
@@ -402,9 +408,11 @@ static int seg6_do_srh(struct sk_buff *skb)
return -EINVAL;
if (tinfo->mode == SEG6_IPTUN_MODE_ENCAP)
- err = seg6_do_srh_encap(skb, tinfo->srh, proto);
+ err = __seg6_do_srh_encap(skb, tinfo->srh,
+ proto, cache_dst);
else
- err = seg6_do_srh_encap_red(skb, tinfo->srh, proto);
+ err = seg6_do_srh_encap_red(skb, tinfo->srh,
+ proto, cache_dst);
if (err)
return err;
@@ -425,11 +433,13 @@ static int seg6_do_srh(struct sk_buff *skb)
skb_push(skb, skb->mac_len);
if (tinfo->mode == SEG6_IPTUN_MODE_L2ENCAP)
- err = seg6_do_srh_encap(skb, tinfo->srh,
- IPPROTO_ETHERNET);
+ err = __seg6_do_srh_encap(skb, tinfo->srh,
+ IPPROTO_ETHERNET,
+ cache_dst);
else
err = seg6_do_srh_encap_red(skb, tinfo->srh,
- IPPROTO_ETHERNET);
+ IPPROTO_ETHERNET,
+ cache_dst);
if (err)
return err;
@@ -444,6 +454,13 @@ static int seg6_do_srh(struct sk_buff *skb)
return 0;
}
+/* insert an SRH within an IPv6 packet, just after the IPv6 header */
+int seg6_do_srh_inline(struct sk_buff *skb, struct ipv6_sr_hdr *osrh)
+{
+ return __seg6_do_srh_inline(skb, osrh, NULL);
+}
+EXPORT_SYMBOL_GPL(seg6_do_srh_inline);
+
static int seg6_input_finish(struct net *net, struct sock *sk,
struct sk_buff *skb)
{
@@ -458,31 +475,35 @@ static int seg6_input_core(struct net *net, struct sock *sk,
struct seg6_lwt *slwt;
int err;
- err = seg6_do_srh(skb);
- if (unlikely(err))
- goto drop;
-
slwt = seg6_lwt_lwtunnel(orig_dst->lwtstate);
local_bh_disable();
dst = dst_cache_get(&slwt->cache);
+ local_bh_enable();
+
+ err = seg6_do_srh(skb, dst);
+ if (unlikely(err)) {
+ dst_release(dst);
+ goto drop;
+ }
if (!dst) {
ip6_route_input(skb);
dst = skb_dst(skb);
if (!dst->error) {
+ local_bh_disable();
dst_cache_set_ip6(&slwt->cache, dst,
&ipv6_hdr(skb)->saddr);
+ local_bh_enable();
}
+
+ err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev));
+ if (unlikely(err))
+ goto drop;
} else {
skb_dst_drop(skb);
skb_dst_set(skb, dst);
}
- local_bh_enable();
-
- err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev));
- if (unlikely(err))
- goto drop;
if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled))
return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT,
@@ -528,16 +549,16 @@ static int seg6_output_core(struct net *net, struct sock *sk,
struct seg6_lwt *slwt;
int err;
- err = seg6_do_srh(skb);
- if (unlikely(err))
- goto drop;
-
slwt = seg6_lwt_lwtunnel(orig_dst->lwtstate);
local_bh_disable();
dst = dst_cache_get(&slwt->cache);
local_bh_enable();
+ err = seg6_do_srh(skb, dst);
+ if (unlikely(err))
+ goto drop;
+
if (unlikely(!dst)) {
struct ipv6hdr *hdr = ipv6_hdr(skb);
struct flowi6 fl6;
@@ -552,28 +573,31 @@ static int seg6_output_core(struct net *net, struct sock *sk,
dst = ip6_route_output(net, NULL, &fl6);
if (dst->error) {
err = dst->error;
- dst_release(dst);
goto drop;
}
- local_bh_disable();
- dst_cache_set_ip6(&slwt->cache, dst, &fl6.saddr);
- local_bh_enable();
+ /* cache only if we don't create a dst reference loop */
+ if (orig_dst->lwtstate != dst->lwtstate) {
+ local_bh_disable();
+ dst_cache_set_ip6(&slwt->cache, dst, &fl6.saddr);
+ local_bh_enable();
+ }
+
+ err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev));
+ if (unlikely(err))
+ goto drop;
}
skb_dst_drop(skb);
skb_dst_set(skb, dst);
- err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev));
- if (unlikely(err))
- goto drop;
-
if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled))
return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, net, sk, skb,
NULL, skb_dst(skb)->dev, dst_output);
return dst_output(net, sk, skb);
drop:
+ dst_release(dst);
kfree_skb(skb);
return err;
}
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index d766fd798ecf..c6ea438b5c75 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -170,6 +170,49 @@ static int compute_score(struct sock *sk, const struct net *net,
return score;
}
+/**
+ * udp6_lib_lookup1() - Simplified lookup using primary hash (destination port)
+ * @net: Network namespace
+ * @saddr: Source address, network order
+ * @sport: Source port, network order
+ * @daddr: Destination address, network order
+ * @hnum: Destination port, host order
+ * @dif: Destination interface index
+ * @sdif: Destination bridge port index, if relevant
+ * @udptable: Set of UDP hash tables
+ *
+ * Simplified lookup to be used as fallback if no sockets are found due to a
+ * potential race between (receive) address change, and lookup happening before
+ * the rehash operation. This function ignores SO_REUSEPORT groups while scoring
+ * result sockets, because if we have one, we don't need the fallback at all.
+ *
+ * Called under rcu_read_lock().
+ *
+ * Return: socket with highest matching score if any, NULL if none
+ */
+static struct sock *udp6_lib_lookup1(const struct net *net,
+ const struct in6_addr *saddr, __be16 sport,
+ const struct in6_addr *daddr,
+ unsigned int hnum, int dif, int sdif,
+ const struct udp_table *udptable)
+{
+ unsigned int slot = udp_hashfn(net, hnum, udptable->mask);
+ struct udp_hslot *hslot = &udptable->hash[slot];
+ struct sock *sk, *result = NULL;
+ int score, badness = 0;
+
+ sk_for_each_rcu(sk, &hslot->head) {
+ score = compute_score(sk, net,
+ saddr, sport, daddr, hnum, dif, sdif);
+ if (score > badness) {
+ result = sk;
+ badness = score;
+ }
+ }
+
+ return result;
+}
+
/* called with rcu_read_lock() */
static struct sock *udp6_lib_lookup2(const struct net *net,
const struct in6_addr *saddr, __be16 sport,
@@ -347,6 +390,13 @@ struct sock *__udp6_lib_lookup(const struct net *net,
result = udp6_lib_lookup2(net, saddr, sport,
&in6addr_any, hnum, dif, sdif,
hslot2, skb);
+ if (!IS_ERR_OR_NULL(result))
+ goto done;
+
+ /* Cover address change/lookup/rehash race: see __udp4_lib_lookup() */
+ result = udp6_lib_lookup1(net, saddr, sport, daddr, hnum, dif, sdif,
+ udptable);
+
done:
if (IS_ERR(result))
return NULL;
@@ -1339,9 +1389,9 @@ static int udp_v6_send_skb(struct sk_buff *skb, struct flowi6 *fl6,
const int hlen = skb_network_header_len(skb) +
sizeof(struct udphdr);
- if (hlen + cork->gso_size > cork->fragsize) {
+ if (hlen + min(datalen, cork->gso_size) > cork->fragsize) {
kfree_skb(skb);
- return -EINVAL;
+ return -EMSGSIZE;
}
if (datalen > cork->gso_size * UDP_MAX_SEGMENTS) {
kfree_skb(skb);
@@ -1448,6 +1498,7 @@ int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
ipc6.gso_size = READ_ONCE(up->gso_size);
ipc6.sockc.tsflags = READ_ONCE(sk->sk_tsflags);
ipc6.sockc.mark = READ_ONCE(sk->sk_mark);
+ ipc6.sockc.priority = READ_ONCE(sk->sk_priority);
/* destination address check */
if (sin6) {
diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c
index 5f7b1fdbffe6..b3d5d1f266ee 100644
--- a/net/ipv6/xfrm6_output.c
+++ b/net/ipv6/xfrm6_output.c
@@ -82,14 +82,14 @@ static int __xfrm6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
toobig = skb->len > mtu && !skb_is_gso(skb);
- if (toobig && xfrm6_local_dontfrag(skb->sk)) {
+ if (toobig && xfrm6_local_dontfrag(sk)) {
xfrm6_local_rxpmtu(skb, mtu);
kfree_skb(skb);
return -EMSGSIZE;
} else if (toobig && xfrm6_noneed_fragment(skb)) {
skb->ignore_df = 1;
goto skip_frag;
- } else if (!skb->ignore_df && toobig && skb->sk) {
+ } else if (!skb->ignore_df && toobig && sk) {
xfrm_local_error(skb, mtu);
kfree_skb(skb);
return -EMSGSIZE;