summaryrefslogtreecommitdiff
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/core/dev.c12
-rw-r--r--net/core/dst.c24
-rw-r--r--net/core/ethtool.c69
-rw-r--r--net/core/rtnetlink.c25
-rw-r--r--net/core/skbuff.c35
-rw-r--r--net/ipv4/ip_sockglue.c24
-rw-r--r--net/ipv4/proc.c4
-rw-r--r--net/ipv4/tcp.c60
-rw-r--r--net/ipv4/tcp_cubic.c31
-rw-r--r--net/ipv4/tcp_output.c59
-rw-r--r--net/ipv4/udp.c111
-rw-r--r--net/ipv6/datagram.c21
-rw-r--r--net/ipv6/ip6_vti.c17
-rw-r--r--net/ipv6/ipv6_sockglue.c6
-rw-r--r--net/ipv6/udp.c113
-rw-r--r--net/netfilter/ipvs/ip_vs_xmit.c4
-rw-r--r--net/openvswitch/vport.c2
-rw-r--r--net/sched/cls_basic.c26
-rw-r--r--net/sched/cls_bpf.c26
-rw-r--r--net/sched/cls_cgroup.c11
-rw-r--r--net/sched/cls_flow.c12
-rw-r--r--net/sched/cls_fw.c5
-rw-r--r--net/sched/cls_route.c5
-rw-r--r--net/sched/cls_rsvp.h5
-rw-r--r--net/sched/cls_tcindex.c8
-rw-r--r--net/sched/cls_u32.c5
-rw-r--r--net/socket.c22
-rw-r--r--net/tipc/bcast.c9
-rw-r--r--net/tipc/name_distr.c83
-rw-r--r--net/tipc/name_table.c202
-rw-r--r--net/tipc/name_table.h20
-rw-r--r--net/tipc/socket.c38
-rw-r--r--net/tipc/subscr.c1
-rw-r--r--net/xfrm/xfrm_policy.c52
-rw-r--r--net/xfrm/xfrm_user.c12
35 files changed, 579 insertions, 580 deletions
diff --git a/net/core/dev.c b/net/core/dev.c
index 0814a560e5f3..dd3bf582e6f0 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -5925,6 +5925,8 @@ static void rollback_registered_many(struct list_head *head)
synchronize_net();
list_for_each_entry(dev, head, unreg_list) {
+ struct sk_buff *skb = NULL;
+
/* Shutdown queueing discipline. */
dev_shutdown(dev);
@@ -5934,6 +5936,11 @@ static void rollback_registered_many(struct list_head *head)
*/
call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
+ if (!dev->rtnl_link_ops ||
+ dev->rtnl_link_state == RTNL_LINK_INITIALIZED)
+ skb = rtmsg_ifinfo_build_skb(RTM_DELLINK, dev, ~0U,
+ GFP_KERNEL);
+
/*
* Flush the unicast and multicast chains
*/
@@ -5943,9 +5950,8 @@ static void rollback_registered_many(struct list_head *head)
if (dev->netdev_ops->ndo_uninit)
dev->netdev_ops->ndo_uninit(dev);
- if (!dev->rtnl_link_ops ||
- dev->rtnl_link_state == RTNL_LINK_INITIALIZED)
- rtmsg_ifinfo(RTM_DELLINK, dev, ~0U, GFP_KERNEL);
+ if (skb)
+ rtmsg_ifinfo_send(skb, dev, GFP_KERNEL);
/* Notifier chain MUST detach us all upper devices. */
WARN_ON(netdev_has_any_upper_dev(dev));
diff --git a/net/core/dst.c b/net/core/dst.c
index a028409ee438..e956ce6d1378 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -327,30 +327,6 @@ void __dst_destroy_metrics_generic(struct dst_entry *dst, unsigned long old)
}
EXPORT_SYMBOL(__dst_destroy_metrics_generic);
-/**
- * __skb_dst_set_noref - sets skb dst, without a reference
- * @skb: buffer
- * @dst: dst entry
- * @force: if force is set, use noref version even for DST_NOCACHE entries
- *
- * Sets skb dst, assuming a reference was not taken on dst
- * skb_dst_drop() should not dst_release() this dst
- */
-void __skb_dst_set_noref(struct sk_buff *skb, struct dst_entry *dst, bool force)
-{
- WARN_ON(!rcu_read_lock_held() && !rcu_read_lock_bh_held());
- /* If dst not in cache, we must take a reference, because
- * dst_release() will destroy dst as soon as its refcount becomes zero
- */
- if (unlikely((dst->flags & DST_NOCACHE) && !force)) {
- dst_hold(dst);
- skb_dst_set(skb, dst);
- } else {
- skb->_skb_refdst = (unsigned long)dst | SKB_DST_NOREF;
- }
-}
-EXPORT_SYMBOL(__skb_dst_set_noref);
-
/* Dirty hack. We did it in 2.2 (in __dst_free),
* we have _very_ good reasons not to repeat
* this mistake in 2.3, but we have no choice
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 715f51f321e9..550892cd6b3f 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -100,6 +100,12 @@ static const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN]
[NETIF_F_BUSY_POLL_BIT] = "busy-poll",
};
+static const char
+rss_hash_func_strings[ETH_RSS_HASH_FUNCS_COUNT][ETH_GSTRING_LEN] = {
+ [ETH_RSS_HASH_TOP_BIT] = "toeplitz",
+ [ETH_RSS_HASH_XOR_BIT] = "xor",
+};
+
static int ethtool_get_features(struct net_device *dev, void __user *useraddr)
{
struct ethtool_gfeatures cmd = {
@@ -185,6 +191,9 @@ static int __ethtool_get_sset_count(struct net_device *dev, int sset)
if (sset == ETH_SS_FEATURES)
return ARRAY_SIZE(netdev_features_strings);
+ if (sset == ETH_SS_RSS_HASH_FUNCS)
+ return ARRAY_SIZE(rss_hash_func_strings);
+
if (ops->get_sset_count && ops->get_strings)
return ops->get_sset_count(dev, sset);
else
@@ -199,6 +208,9 @@ static void __ethtool_get_strings(struct net_device *dev,
if (stringset == ETH_SS_FEATURES)
memcpy(data, netdev_features_strings,
sizeof(netdev_features_strings));
+ else if (stringset == ETH_SS_RSS_HASH_FUNCS)
+ memcpy(data, rss_hash_func_strings,
+ sizeof(rss_hash_func_strings));
else
/* ops->get_strings is valid because checked earlier */
ops->get_strings(dev, stringset, data);
@@ -618,7 +630,7 @@ static noinline_for_stack int ethtool_get_rxfh_indir(struct net_device *dev,
if (!indir)
return -ENOMEM;
- ret = dev->ethtool_ops->get_rxfh(dev, indir, NULL);
+ ret = dev->ethtool_ops->get_rxfh(dev, indir, NULL, NULL);
if (ret)
goto out;
@@ -679,7 +691,7 @@ static noinline_for_stack int ethtool_set_rxfh_indir(struct net_device *dev,
goto out;
}
- ret = ops->set_rxfh(dev, indir, NULL);
+ ret = ops->set_rxfh(dev, indir, NULL, ETH_RSS_HASH_NO_CHANGE);
out:
kfree(indir);
@@ -697,12 +709,11 @@ static noinline_for_stack int ethtool_get_rxfh(struct net_device *dev,
u32 total_size;
u32 indir_bytes;
u32 *indir = NULL;
+ u8 dev_hfunc = 0;
u8 *hkey = NULL;
u8 *rss_config;
- if (!(dev->ethtool_ops->get_rxfh_indir_size ||
- dev->ethtool_ops->get_rxfh_key_size) ||
- !dev->ethtool_ops->get_rxfh)
+ if (!ops->get_rxfh)
return -EOPNOTSUPP;
if (ops->get_rxfh_indir_size)
@@ -710,16 +721,14 @@ static noinline_for_stack int ethtool_get_rxfh(struct net_device *dev,
if (ops->get_rxfh_key_size)
dev_key_size = ops->get_rxfh_key_size(dev);
- if ((dev_key_size + dev_indir_size) == 0)
- return -EOPNOTSUPP;
-
if (copy_from_user(&rxfh, useraddr, sizeof(rxfh)))
return -EFAULT;
user_indir_size = rxfh.indir_size;
user_key_size = rxfh.key_size;
/* Check that reserved fields are 0 for now */
- if (rxfh.rss_context || rxfh.rsvd[0] || rxfh.rsvd[1])
+ if (rxfh.rss_context || rxfh.rsvd8[0] || rxfh.rsvd8[1] ||
+ rxfh.rsvd8[2] || rxfh.rsvd32)
return -EINVAL;
rxfh.indir_size = dev_indir_size;
@@ -727,13 +736,6 @@ static noinline_for_stack int ethtool_get_rxfh(struct net_device *dev,
if (copy_to_user(useraddr, &rxfh, sizeof(rxfh)))
return -EFAULT;
- /* If the user buffer size is 0, this is just a query for the
- * device table size and key size. Otherwise, if the User size is
- * not equal to device table size or key size it's an error.
- */
- if (!user_indir_size && !user_key_size)
- return 0;
-
if ((user_indir_size && (user_indir_size != dev_indir_size)) ||
(user_key_size && (user_key_size != dev_key_size)))
return -EINVAL;
@@ -750,14 +752,19 @@ static noinline_for_stack int ethtool_get_rxfh(struct net_device *dev,
if (user_key_size)
hkey = rss_config + indir_bytes;
- ret = dev->ethtool_ops->get_rxfh(dev, indir, hkey);
- if (!ret) {
- if (copy_to_user(useraddr +
- offsetof(struct ethtool_rxfh, rss_config[0]),
- rss_config, total_size))
- ret = -EFAULT;
- }
+ ret = dev->ethtool_ops->get_rxfh(dev, indir, hkey, &dev_hfunc);
+ if (ret)
+ goto out;
+ if (copy_to_user(useraddr + offsetof(struct ethtool_rxfh, hfunc),
+ &dev_hfunc, sizeof(rxfh.hfunc))) {
+ ret = -EFAULT;
+ } else if (copy_to_user(useraddr +
+ offsetof(struct ethtool_rxfh, rss_config[0]),
+ rss_config, total_size)) {
+ ret = -EFAULT;
+ }
+out:
kfree(rss_config);
return ret;
@@ -776,33 +783,31 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev,
u8 *rss_config;
u32 rss_cfg_offset = offsetof(struct ethtool_rxfh, rss_config[0]);
- if (!(ops->get_rxfh_indir_size || ops->get_rxfh_key_size) ||
- !ops->get_rxnfc || !ops->set_rxfh)
+ if (!ops->get_rxnfc || !ops->set_rxfh)
return -EOPNOTSUPP;
if (ops->get_rxfh_indir_size)
dev_indir_size = ops->get_rxfh_indir_size(dev);
if (ops->get_rxfh_key_size)
dev_key_size = dev->ethtool_ops->get_rxfh_key_size(dev);
- if ((dev_key_size + dev_indir_size) == 0)
- return -EOPNOTSUPP;
if (copy_from_user(&rxfh, useraddr, sizeof(rxfh)))
return -EFAULT;
/* Check that reserved fields are 0 for now */
- if (rxfh.rss_context || rxfh.rsvd[0] || rxfh.rsvd[1])
+ if (rxfh.rss_context || rxfh.rsvd8[0] || rxfh.rsvd8[1] ||
+ rxfh.rsvd8[2] || rxfh.rsvd32)
return -EINVAL;
- /* If either indir or hash key is valid, proceed further.
- * It is not valid to request that both be unchanged.
+ /* If either indir, hash key or function is valid, proceed further.
+ * Must request at least one change: indir size, hash key or function.
*/
if ((rxfh.indir_size &&
rxfh.indir_size != ETH_RXFH_INDIR_NO_CHANGE &&
rxfh.indir_size != dev_indir_size) ||
(rxfh.key_size && (rxfh.key_size != dev_key_size)) ||
(rxfh.indir_size == ETH_RXFH_INDIR_NO_CHANGE &&
- rxfh.key_size == 0))
+ rxfh.key_size == 0 && rxfh.hfunc == ETH_RSS_HASH_NO_CHANGE))
return -EINVAL;
if (rxfh.indir_size != ETH_RXFH_INDIR_NO_CHANGE)
@@ -845,7 +850,7 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev,
}
}
- ret = ops->set_rxfh(dev, indir, hkey);
+ ret = ops->set_rxfh(dev, indir, hkey, rxfh.hfunc);
out:
kfree(rss_config);
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 61cb7e7cc3c7..a9be2c161702 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -2245,8 +2245,8 @@ static int rtnl_dump_all(struct sk_buff *skb, struct netlink_callback *cb)
return skb->len;
}
-void rtmsg_ifinfo(int type, struct net_device *dev, unsigned int change,
- gfp_t flags)
+struct sk_buff *rtmsg_ifinfo_build_skb(int type, struct net_device *dev,
+ unsigned int change, gfp_t flags)
{
struct net *net = dev_net(dev);
struct sk_buff *skb;
@@ -2264,11 +2264,28 @@ void rtmsg_ifinfo(int type, struct net_device *dev, unsigned int change,
kfree_skb(skb);
goto errout;
}
- rtnl_notify(skb, net, 0, RTNLGRP_LINK, NULL, flags);
- return;
+ return skb;
errout:
if (err < 0)
rtnl_set_sk_err(net, RTNLGRP_LINK, err);
+ return NULL;
+}
+
+void rtmsg_ifinfo_send(struct sk_buff *skb, struct net_device *dev, gfp_t flags)
+{
+ struct net *net = dev_net(dev);
+
+ rtnl_notify(skb, net, 0, RTNLGRP_LINK, NULL, flags);
+}
+
+void rtmsg_ifinfo(int type, struct net_device *dev, unsigned int change,
+ gfp_t flags)
+{
+ struct sk_buff *skb;
+
+ skb = rtmsg_ifinfo_build_skb(type, dev, change, flags);
+ if (skb)
+ rtmsg_ifinfo_send(skb, dev, flags);
}
EXPORT_SYMBOL(rtmsg_ifinfo);
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 92116dfe827c..7a338fb55cc4 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -265,7 +265,7 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
skb->fclone = SKB_FCLONE_ORIG;
atomic_set(&fclones->fclone_ref, 1);
- fclones->skb2.fclone = SKB_FCLONE_FREE;
+ fclones->skb2.fclone = SKB_FCLONE_CLONE;
fclones->skb2.pfmemalloc = pfmemalloc;
}
out:
@@ -541,26 +541,27 @@ static void kfree_skbmem(struct sk_buff *skb)
switch (skb->fclone) {
case SKB_FCLONE_UNAVAILABLE:
kmem_cache_free(skbuff_head_cache, skb);
- break;
+ return;
case SKB_FCLONE_ORIG:
fclones = container_of(skb, struct sk_buff_fclones, skb1);
- if (atomic_dec_and_test(&fclones->fclone_ref))
- kmem_cache_free(skbuff_fclone_cache, fclones);
- break;
-
- case SKB_FCLONE_CLONE:
- fclones = container_of(skb, struct sk_buff_fclones, skb2);
- /* The clone portion is available for
- * fast-cloning again.
+ /* We usually free the clone (TX completion) before original skb
+ * This test would have no chance to be true for the clone,
+ * while here, branch prediction will be good.
*/
- skb->fclone = SKB_FCLONE_FREE;
+ if (atomic_read(&fclones->fclone_ref) == 1)
+ goto fastpath;
+ break;
- if (atomic_dec_and_test(&fclones->fclone_ref))
- kmem_cache_free(skbuff_fclone_cache, fclones);
+ default: /* SKB_FCLONE_CLONE */
+ fclones = container_of(skb, struct sk_buff_fclones, skb2);
break;
}
+ if (!atomic_dec_and_test(&fclones->fclone_ref))
+ return;
+fastpath:
+ kmem_cache_free(skbuff_fclone_cache, fclones);
}
static void skb_release_head_state(struct sk_buff *skb)
@@ -872,15 +873,15 @@ struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t gfp_mask)
struct sk_buff_fclones *fclones = container_of(skb,
struct sk_buff_fclones,
skb1);
- struct sk_buff *n = &fclones->skb2;
+ struct sk_buff *n;
if (skb_orphan_frags(skb, gfp_mask))
return NULL;
if (skb->fclone == SKB_FCLONE_ORIG &&
- n->fclone == SKB_FCLONE_FREE) {
- n->fclone = SKB_FCLONE_CLONE;
- atomic_inc(&fclones->fclone_ref);
+ atomic_read(&fclones->fclone_ref) == 1) {
+ n = &fclones->skb2;
+ atomic_set(&fclones->fclone_ref, 2);
} else {
if (skb_pfmemalloc(skb))
gfp_mask |= __GFP_MEMALLOC;
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index b7826575d215..640f26c6a9fe 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -399,6 +399,22 @@ void ip_local_error(struct sock *sk, int err, __be32 daddr, __be16 port, u32 inf
kfree_skb(skb);
}
+static bool ipv4_pktinfo_prepare_errqueue(const struct sock *sk,
+ const struct sk_buff *skb,
+ int ee_origin)
+{
+ struct in_pktinfo *info = PKTINFO_SKB_CB(skb);
+
+ if ((ee_origin != SO_EE_ORIGIN_TIMESTAMPING) ||
+ (!(sk->sk_tsflags & SOF_TIMESTAMPING_OPT_CMSG)) ||
+ (!skb->dev))
+ return false;
+
+ info->ipi_spec_dst.s_addr = ip_hdr(skb)->saddr;
+ info->ipi_ifindex = skb->dev->ifindex;
+ return true;
+}
+
/*
* Handle MSG_ERRQUEUE
*/
@@ -414,6 +430,8 @@ int ip_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len)
int err;
int copied;
+ WARN_ON_ONCE(sk->sk_family == AF_INET6);
+
err = -EAGAIN;
skb = sock_dequeue_err_skb(sk);
if (skb == NULL)
@@ -444,7 +462,9 @@ int ip_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len)
memcpy(&errhdr.ee, &serr->ee, sizeof(struct sock_extended_err));
sin = &errhdr.offender;
sin->sin_family = AF_UNSPEC;
- if (serr->ee.ee_origin == SO_EE_ORIGIN_ICMP) {
+
+ if (serr->ee.ee_origin == SO_EE_ORIGIN_ICMP ||
+ ipv4_pktinfo_prepare_errqueue(sk, skb, serr->ee.ee_origin)) {
struct inet_sock *inet = inet_sk(sk);
sin->sin_family = AF_INET;
@@ -1049,7 +1069,7 @@ e_inval:
}
/**
- * ipv4_pktinfo_prepare - transfert some info from rtable to skb
+ * ipv4_pktinfo_prepare - transfer some info from rtable to skb
* @sk: socket
* @skb: buffer
*
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index 6513ade8d6dc..8f9cd200ce20 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -288,6 +288,10 @@ static const struct snmp_mib snmp4_net_list[] = {
SNMP_MIB_ITEM("TCPWantZeroWindowAdv", LINUX_MIB_TCPWANTZEROWINDOWADV),
SNMP_MIB_ITEM("TCPSynRetrans", LINUX_MIB_TCPSYNRETRANS),
SNMP_MIB_ITEM("TCPOrigDataSent", LINUX_MIB_TCPORIGDATASENT),
+ SNMP_MIB_ITEM("TCPHystartTrainDetect", LINUX_MIB_TCPHYSTARTTRAINDETECT),
+ SNMP_MIB_ITEM("TCPHystartTrainCwnd", LINUX_MIB_TCPHYSTARTTRAINCWND),
+ SNMP_MIB_ITEM("TCPHystartDelayDetect", LINUX_MIB_TCPHYSTARTDELAYDETECT),
+ SNMP_MIB_ITEM("TCPHystartDelayCwnd", LINUX_MIB_TCPHYSTARTDELAYCWND),
SNMP_MIB_SENTINEL
};
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index dc13a3657e8e..427aee33ffc0 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -835,47 +835,29 @@ static unsigned int tcp_xmit_size_goal(struct sock *sk, u32 mss_now,
int large_allowed)
{
struct tcp_sock *tp = tcp_sk(sk);
- u32 xmit_size_goal, old_size_goal;
-
- xmit_size_goal = mss_now;
-
- if (large_allowed && sk_can_gso(sk)) {
- u32 gso_size, hlen;
-
- /* Maybe we should/could use sk->sk_prot->max_header here ? */
- hlen = inet_csk(sk)->icsk_af_ops->net_header_len +
- inet_csk(sk)->icsk_ext_hdr_len +
- tp->tcp_header_len;
-
- /* Goal is to send at least one packet per ms,
- * not one big TSO packet every 100 ms.
- * This preserves ACK clocking and is consistent
- * with tcp_tso_should_defer() heuristic.
- */
- gso_size = sk->sk_pacing_rate / (2 * MSEC_PER_SEC);
- gso_size = max_t(u32, gso_size,
- sysctl_tcp_min_tso_segs * mss_now);
-
- xmit_size_goal = min_t(u32, gso_size,
- sk->sk_gso_max_size - 1 - hlen);
-
- xmit_size_goal = tcp_bound_to_half_wnd(tp, xmit_size_goal);
-
- /* We try hard to avoid divides here */
- old_size_goal = tp->xmit_size_goal_segs * mss_now;
-
- if (likely(old_size_goal <= xmit_size_goal &&
- old_size_goal + mss_now > xmit_size_goal)) {
- xmit_size_goal = old_size_goal;
- } else {
- tp->xmit_size_goal_segs =
- min_t(u16, xmit_size_goal / mss_now,
- sk->sk_gso_max_segs);
- xmit_size_goal = tp->xmit_size_goal_segs * mss_now;
- }
+ u32 new_size_goal, size_goal, hlen;
+
+ if (!large_allowed || !sk_can_gso(sk))
+ return mss_now;
+
+ /* Maybe we should/could use sk->sk_prot->max_header here ? */
+ hlen = inet_csk(sk)->icsk_af_ops->net_header_len +
+ inet_csk(sk)->icsk_ext_hdr_len +
+ tp->tcp_header_len;
+
+ new_size_goal = sk->sk_gso_max_size - 1 - hlen;
+ new_size_goal = tcp_bound_to_half_wnd(tp, new_size_goal);
+
+ /* We try hard to avoid divides here */
+ size_goal = tp->gso_segs * mss_now;
+ if (unlikely(new_size_goal < size_goal ||
+ new_size_goal >= size_goal + mss_now)) {
+ tp->gso_segs = min_t(u16, new_size_goal / mss_now,
+ sk->sk_gso_max_segs);
+ size_goal = tp->gso_segs * mss_now;
}
- return max(xmit_size_goal, mss_now);
+ return max(size_goal, mss_now);
}
static int tcp_send_mss(struct sock *sk, int *size_goal, int flags)
diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c
index 20de0118c98e..6b6002416a73 100644
--- a/net/ipv4/tcp_cubic.c
+++ b/net/ipv4/tcp_cubic.c
@@ -363,16 +363,28 @@ static void hystart_update(struct sock *sk, u32 delay)
struct tcp_sock *tp = tcp_sk(sk);
struct bictcp *ca = inet_csk_ca(sk);
- if (!(ca->found & hystart_detect)) {
+ if (ca->found & hystart_detect)
+ return;
+
+ if (hystart_detect & HYSTART_ACK_TRAIN) {
u32 now = bictcp_clock();
/* first detection parameter - ack-train detection */
if ((s32)(now - ca->last_ack) <= hystart_ack_delta) {
ca->last_ack = now;
- if ((s32)(now - ca->round_start) > ca->delay_min >> 4)
+ if ((s32)(now - ca->round_start) > ca->delay_min >> 4) {
ca->found |= HYSTART_ACK_TRAIN;
+ NET_INC_STATS_BH(sock_net(sk),
+ LINUX_MIB_TCPHYSTARTTRAINDETECT);
+ NET_ADD_STATS_BH(sock_net(sk),
+ LINUX_MIB_TCPHYSTARTTRAINCWND,
+ tp->snd_cwnd);
+ tp->snd_ssthresh = tp->snd_cwnd;
+ }
}
+ }
+ if (hystart_detect & HYSTART_DELAY) {
/* obtain the minimum delay of more than sampling packets */
if (ca->sample_cnt < HYSTART_MIN_SAMPLES) {
if (ca->curr_rtt == 0 || ca->curr_rtt > delay)
@@ -381,15 +393,16 @@ static void hystart_update(struct sock *sk, u32 delay)
ca->sample_cnt++;
} else {
if (ca->curr_rtt > ca->delay_min +
- HYSTART_DELAY_THRESH(ca->delay_min>>4))
+ HYSTART_DELAY_THRESH(ca->delay_min >> 3)) {
ca->found |= HYSTART_DELAY;
+ NET_INC_STATS_BH(sock_net(sk),
+ LINUX_MIB_TCPHYSTARTDELAYDETECT);
+ NET_ADD_STATS_BH(sock_net(sk),
+ LINUX_MIB_TCPHYSTARTDELAYCWND,
+ tp->snd_cwnd);
+ tp->snd_ssthresh = tp->snd_cwnd;
+ }
}
- /*
- * Either one of two conditions are met,
- * we exit from slow start immediately.
- */
- if (ca->found & hystart_detect)
- tp->snd_ssthresh = tp->snd_cwnd;
}
}
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index f5bd4bd3f7e6..f37ecf53ee8a 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1524,6 +1524,27 @@ static bool tcp_nagle_check(bool partial, const struct tcp_sock *tp,
((nonagle & TCP_NAGLE_CORK) ||
(!nonagle && tp->packets_out && tcp_minshall_check(tp)));
}
+
+/* Return how many segs we'd like on a TSO packet,
+ * to send one TSO packet per ms
+ */
+static u32 tcp_tso_autosize(const struct sock *sk, unsigned int mss_now)
+{
+ u32 bytes, segs;
+
+ bytes = min(sk->sk_pacing_rate >> 10,
+ sk->sk_gso_max_size - 1 - MAX_TCP_HEADER);
+
+ /* Goal is to send at least one packet per ms,
+ * not one big TSO packet every 100 ms.
+ * This preserves ACK clocking and is consistent
+ * with tcp_tso_should_defer() heuristic.
+ */
+ segs = max_t(u32, bytes / mss_now, sysctl_tcp_min_tso_segs);
+
+ return min_t(u32, segs, sk->sk_gso_max_segs);
+}
+
/* Returns the portion of skb which can be sent right away */
static unsigned int tcp_mss_split_point(const struct sock *sk,
const struct sk_buff *skb,
@@ -1731,7 +1752,7 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len,
* This algorithm is from John Heffner.
*/
static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb,
- bool *is_cwnd_limited)
+ bool *is_cwnd_limited, u32 max_segs)
{
struct tcp_sock *tp = tcp_sk(sk);
const struct inet_connection_sock *icsk = inet_csk(sk);
@@ -1761,8 +1782,7 @@ static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb,
limit = min(send_win, cong_win);
/* If a full-sized TSO skb can be sent, do it. */
- if (limit >= min_t(unsigned int, sk->sk_gso_max_size,
- tp->xmit_size_goal_segs * tp->mss_cache))
+ if (limit >= max_segs * tp->mss_cache)
goto send_now;
/* Middle in queue won't get any more data, full sendable already? */
@@ -1959,6 +1979,7 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
int cwnd_quota;
int result;
bool is_cwnd_limited = false;
+ u32 max_segs;
sent_pkts = 0;
@@ -1972,6 +1993,7 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
}
}
+ max_segs = tcp_tso_autosize(sk, mss_now);
while ((skb = tcp_send_head(sk))) {
unsigned int limit;
@@ -2004,10 +2026,23 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
break;
} else {
if (!push_one &&
- tcp_tso_should_defer(sk, skb, &is_cwnd_limited))
+ tcp_tso_should_defer(sk, skb, &is_cwnd_limited,
+ max_segs))
break;
}
+ limit = mss_now;
+ if (tso_segs > 1 && !tcp_urg_mode(tp))
+ limit = tcp_mss_split_point(sk, skb, mss_now,
+ min_t(unsigned int,
+ cwnd_quota,
+ max_segs),
+ nonagle);
+
+ if (skb->len > limit &&
+ unlikely(tso_fragment(sk, skb, limit, mss_now, gfp)))
+ break;
+
/* TCP Small Queues :
* Control number of packets in qdisc/devices to two packets / or ~1 ms.
* This allows for :
@@ -2018,8 +2053,8 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
* of queued bytes to ensure line rate.
* One example is wifi aggregation (802.11 AMPDU)
*/
- limit = max_t(unsigned int, sysctl_tcp_limit_output_bytes,
- sk->sk_pacing_rate >> 10);
+ limit = max(2 * skb->truesize, sk->sk_pacing_rate >> 10);
+ limit = min_t(u32, limit, sysctl_tcp_limit_output_bytes);
if (atomic_read(&sk->sk_wmem_alloc) > limit) {
set_bit(TSQ_THROTTLED, &tp->tsq_flags);
@@ -2032,18 +2067,6 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
break;
}
- limit = mss_now;
- if (tso_segs > 1 && !tcp_urg_mode(tp))
- limit = tcp_mss_split_point(sk, skb, mss_now,
- min_t(unsigned int,
- cwnd_quota,
- sk->sk_gso_max_segs),
- nonagle);
-
- if (skb->len > limit &&
- unlikely(tso_fragment(sk, skb, limit, mss_now, gfp)))
- break;
-
if (unlikely(tcp_transmit_skb(sk, skb, 1, gfp)))
break;
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index b2d606833ce4..dd8e00634563 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -336,38 +336,45 @@ int udp_v4_get_port(struct sock *sk, unsigned short snum)
return udp_lib_get_port(sk, snum, ipv4_rcv_saddr_equal, hash2_nulladdr);
}
-static inline int compute_score(struct sock *sk, struct net *net, __be32 saddr,
- unsigned short hnum,
- __be16 sport, __be32 daddr, __be16 dport, int dif)
+static inline int compute_score(struct sock *sk, struct net *net,
+ __be32 saddr, unsigned short hnum, __be16 sport,
+ __be32 daddr, __be16 dport, int dif)
{
- int score = -1;
+ int score;
+ struct inet_sock *inet;
- if (net_eq(sock_net(sk), net) && udp_sk(sk)->udp_port_hash == hnum &&
- !ipv6_only_sock(sk)) {
- struct inet_sock *inet = inet_sk(sk);
+ if (!net_eq(sock_net(sk), net) ||
+ udp_sk(sk)->udp_port_hash != hnum ||
+ ipv6_only_sock(sk))
+ return -1;
- score = (sk->sk_family == PF_INET ? 2 : 1);
- if (inet->inet_rcv_saddr) {
- if (inet->inet_rcv_saddr != daddr)
- return -1;
- score += 4;
- }
- if (inet->inet_daddr) {
- if (inet->inet_daddr != saddr)
- return -1;
- score += 4;
- }
- if (inet->inet_dport) {
- if (inet->inet_dport != sport)
- return -1;
- score += 4;
- }
- if (sk->sk_bound_dev_if) {
- if (sk->sk_bound_dev_if != dif)
- return -1;
- score += 4;
- }
+ score = (sk->sk_family == PF_INET) ? 2 : 1;
+ inet = inet_sk(sk);
+
+ if (inet->inet_rcv_saddr) {
+ if (inet->inet_rcv_saddr != daddr)
+ return -1;
+ score += 4;
+ }
+
+ if (inet->inet_daddr) {
+ if (inet->inet_daddr != saddr)
+ return -1;
+ score += 4;
}
+
+ if (inet->inet_dport) {
+ if (inet->inet_dport != sport)
+ return -1;
+ score += 4;
+ }
+
+ if (sk->sk_bound_dev_if) {
+ if (sk->sk_bound_dev_if != dif)
+ return -1;
+ score += 4;
+ }
+
return score;
}
@@ -378,33 +385,39 @@ static inline int compute_score2(struct sock *sk, struct net *net,
__be32 saddr, __be16 sport,
__be32 daddr, unsigned int hnum, int dif)
{
- int score = -1;
+ int score;
+ struct inet_sock *inet;
+
+ if (!net_eq(sock_net(sk), net) ||
+ ipv6_only_sock(sk))
+ return -1;
- if (net_eq(sock_net(sk), net) && !ipv6_only_sock(sk)) {
- struct inet_sock *inet = inet_sk(sk);
+ inet = inet_sk(sk);
- if (inet->inet_rcv_saddr != daddr)
+ if (inet->inet_rcv_saddr != daddr ||
+ inet->inet_num != hnum)
+ return -1;
+
+ score = (sk->sk_family == PF_INET) ? 2 : 1;
+
+ if (inet->inet_daddr) {
+ if (inet->inet_daddr != saddr)
return -1;
- if (inet->inet_num != hnum)
+ score += 4;
+ }
+
+ if (inet->inet_dport) {
+ if (inet->inet_dport != sport)
return -1;
+ score += 4;
+ }
- score = (sk->sk_family == PF_INET ? 2 : 1);
- if (inet->inet_daddr) {
- if (inet->inet_daddr != saddr)
- return -1;
- score += 4;
- }
- if (inet->inet_dport) {
- if (inet->inet_dport != sport)
- return -1;
- score += 4;
- }
- if (sk->sk_bound_dev_if) {
- if (sk->sk_bound_dev_if != dif)
- return -1;
- score += 4;
- }
+ if (sk->sk_bound_dev_if) {
+ if (sk->sk_bound_dev_if != dif)
+ return -1;
+ score += 4;
}
+
return score;
}
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index cc1139687fd7..2464a00e36ab 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -325,6 +325,16 @@ void ipv6_local_rxpmtu(struct sock *sk, struct flowi6 *fl6, u32 mtu)
kfree_skb(skb);
}
+static void ip6_datagram_prepare_pktinfo_errqueue(struct sk_buff *skb)
+{
+ int ifindex = skb->dev ? skb->dev->ifindex : -1;
+
+ if (skb->protocol == htons(ETH_P_IPV6))
+ IP6CB(skb)->iif = ifindex;
+ else
+ PKTINFO_SKB_CB(skb)->ipi_ifindex = ifindex;
+}
+
/*
* Handle MSG_ERRQUEUE
*/
@@ -388,8 +398,12 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len)
sin->sin6_family = AF_INET6;
sin->sin6_flowinfo = 0;
sin->sin6_port = 0;
- if (np->rxopt.all)
+ if (np->rxopt.all) {
+ if (serr->ee.ee_origin != SO_EE_ORIGIN_ICMP &&
+ serr->ee.ee_origin != SO_EE_ORIGIN_ICMP6)
+ ip6_datagram_prepare_pktinfo_errqueue(skb);
ip6_datagram_recv_common_ctl(sk, msg, skb);
+ }
if (skb->protocol == htons(ETH_P_IPV6)) {
sin->sin6_addr = ipv6_hdr(skb)->saddr;
if (np->rxopt.all)
@@ -491,7 +505,10 @@ void ip6_datagram_recv_common_ctl(struct sock *sk, struct msghdr *msg,
ipv6_addr_set_v4mapped(ip_hdr(skb)->daddr,
&src_info.ipi6_addr);
}
- put_cmsg(msg, SOL_IPV6, IPV6_PKTINFO, sizeof(src_info), &src_info);
+
+ if (src_info.ipi6_ifindex >= 0)
+ put_cmsg(msg, SOL_IPV6, IPV6_PKTINFO,
+ sizeof(src_info), &src_info);
}
}
diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c
index 16a7e81e3f99..ace10d0b3aac 100644
--- a/net/ipv6/ip6_vti.c
+++ b/net/ipv6/ip6_vti.c
@@ -95,6 +95,7 @@ vti6_tnl_lookup(struct net *net, const struct in6_addr *remote,
unsigned int hash = HASH(remote, local);
struct ip6_tnl *t;
struct vti6_net *ip6n = net_generic(net, vti6_net_id);
+ struct in6_addr any;
for_each_vti6_tunnel_rcu(ip6n->tnls_r_l[hash]) {
if (ipv6_addr_equal(local, &t->parms.laddr) &&
@@ -102,6 +103,22 @@ vti6_tnl_lookup(struct net *net, const struct in6_addr *remote,
(t->dev->flags & IFF_UP))
return t;
}
+
+ memset(&any, 0, sizeof(any));
+ hash = HASH(&any, local);
+ for_each_vti6_tunnel_rcu(ip6n->tnls_r_l[hash]) {
+ if (ipv6_addr_equal(local, &t->parms.laddr) &&
+ (t->dev->flags & IFF_UP))
+ return t;
+ }
+
+ hash = HASH(remote, &any);
+ for_each_vti6_tunnel_rcu(ip6n->tnls_r_l[hash]) {
+ if (ipv6_addr_equal(remote, &t->parms.raddr) &&
+ (t->dev->flags & IFF_UP))
+ return t;
+ }
+
t = rcu_dereference(ip6n->tnls_wc[0]);
if (t && (t->dev->flags & IFF_UP))
return t;
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index e1a9583bb419..66980d8d98d1 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -110,12 +110,8 @@ struct ipv6_txoptions *ipv6_update_options(struct sock *sk,
icsk->icsk_ext_hdr_len = opt->opt_flen + opt->opt_nflen;
icsk->icsk_sync_mss(sk, icsk->icsk_pmtu_cookie);
}
- opt = xchg(&inet6_sk(sk)->opt, opt);
- } else {
- spin_lock(&sk->sk_dst_lock);
- opt = xchg(&inet6_sk(sk)->opt, opt);
- spin_unlock(&sk->sk_dst_lock);
}
+ opt = xchg(&inet6_sk(sk)->opt, opt);
sk_dst_reset(sk);
return opt;
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 7cfb5d745a2d..7f96432292ce 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -148,72 +148,85 @@ static inline int compute_score(struct sock *sk, struct net *net,
const struct in6_addr *daddr, __be16 dport,
int dif)
{
- int score = -1;
+ int score;
+ struct inet_sock *inet;
- if (net_eq(sock_net(sk), net) && udp_sk(sk)->udp_port_hash == hnum &&
- sk->sk_family == PF_INET6) {
- struct inet_sock *inet = inet_sk(sk);
+ if (!net_eq(sock_net(sk), net) ||
+ udp_sk(sk)->udp_port_hash != hnum ||
+ sk->sk_family != PF_INET6)
+ return -1;
- score = 0;
- if (inet->inet_dport) {
- if (inet->inet_dport != sport)
- return -1;
- score++;
- }
- if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr)) {
- if (!ipv6_addr_equal(&sk->sk_v6_rcv_saddr, daddr))
- return -1;
- score++;
- }
- if (!ipv6_addr_any(&sk->sk_v6_daddr)) {
- if (!ipv6_addr_equal(&sk->sk_v6_daddr, saddr))
- return -1;
- score++;
- }
- if (sk->sk_bound_dev_if) {
- if (sk->sk_bound_dev_if != dif)
- return -1;
- score++;
- }
+ score = 0;
+ inet = inet_sk(sk);
+
+ if (inet->inet_dport) {
+ if (inet->inet_dport != sport)
+ return -1;
+ score++;
}
+
+ if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr)) {
+ if (!ipv6_addr_equal(&sk->sk_v6_rcv_saddr, daddr))
+ return -1;
+ score++;
+ }
+
+ if (!ipv6_addr_any(&sk->sk_v6_daddr)) {
+ if (!ipv6_addr_equal(&sk->sk_v6_daddr, saddr))
+ return -1;
+ score++;
+ }
+
+ if (sk->sk_bound_dev_if) {
+ if (sk->sk_bound_dev_if != dif)
+ return -1;
+ score++;
+ }
+
return score;
}
#define SCORE2_MAX (1 + 1 + 1)
static inline int compute_score2(struct sock *sk, struct net *net,
- const struct in6_addr *saddr, __be16 sport,
- const struct in6_addr *daddr, unsigned short hnum,
- int dif)
+ const struct in6_addr *saddr, __be16 sport,
+ const struct in6_addr *daddr,
+ unsigned short hnum, int dif)
{
- int score = -1;
+ int score;
+ struct inet_sock *inet;
- if (net_eq(sock_net(sk), net) && udp_sk(sk)->udp_port_hash == hnum &&
- sk->sk_family == PF_INET6) {
- struct inet_sock *inet = inet_sk(sk);
+ if (!net_eq(sock_net(sk), net) ||
+ udp_sk(sk)->udp_port_hash != hnum ||
+ sk->sk_family != PF_INET6)
+ return -1;
- if (!ipv6_addr_equal(&sk->sk_v6_rcv_saddr, daddr))
+ if (!ipv6_addr_equal(&sk->sk_v6_rcv_saddr, daddr))
+ return -1;
+
+ score = 0;
+ inet = inet_sk(sk);
+
+ if (inet->inet_dport) {
+ if (inet->inet_dport != sport)
return -1;
- score = 0;
- if (inet->inet_dport) {
- if (inet->inet_dport != sport)
- return -1;
- score++;
- }
- if (!ipv6_addr_any(&sk->sk_v6_daddr)) {
- if (!ipv6_addr_equal(&sk->sk_v6_daddr, saddr))
- return -1;
- score++;
- }
- if (sk->sk_bound_dev_if) {
- if (sk->sk_bound_dev_if != dif)
- return -1;
- score++;
- }
+ score++;
}
+
+ if (!ipv6_addr_any(&sk->sk_v6_daddr)) {
+ if (!ipv6_addr_equal(&sk->sk_v6_daddr, saddr))
+ return -1;
+ score++;
+ }
+
+ if (sk->sk_bound_dev_if) {
+ if (sk->sk_bound_dev_if != dif)
+ return -1;
+ score++;
+ }
+
return score;
}
-
/* called with read_rcu_lock() */
static struct sock *udp6_lib_lookup2(struct net *net,
const struct in6_addr *saddr, __be16 sport,
diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c
index 1f933136155a..3aedbda7658a 100644
--- a/net/netfilter/ipvs/ip_vs_xmit.c
+++ b/net/netfilter/ipvs/ip_vs_xmit.c
@@ -343,7 +343,7 @@ __ip_vs_get_out_rt(int skb_af, struct sk_buff *skb, struct ip_vs_dest *dest,
skb_dst_drop(skb);
if (noref) {
if (!local)
- skb_dst_set_noref_force(skb, &rt->dst);
+ skb_dst_set_noref(skb, &rt->dst);
else
skb_dst_set(skb, dst_clone(&rt->dst));
} else
@@ -487,7 +487,7 @@ __ip_vs_get_out_rt_v6(int skb_af, struct sk_buff *skb, struct ip_vs_dest *dest,
skb_dst_drop(skb);
if (noref) {
if (!local)
- skb_dst_set_noref_force(skb, &rt->dst);
+ skb_dst_set_noref(skb, &rt->dst);
else
skb_dst_set(skb, dst_clone(&rt->dst));
} else
diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c
index e771a46933e5..9584526c0778 100644
--- a/net/openvswitch/vport.c
+++ b/net/openvswitch/vport.c
@@ -600,7 +600,7 @@ int ovs_tunnel_get_egress_info(struct ovs_tunnel_info *egress_tun_info,
fl.saddr = tun_key->ipv4_src;
fl.flowi4_tos = RT_TOS(tun_key->ipv4_tos);
fl.flowi4_mark = skb_mark;
- fl.flowi4_proto = IPPROTO_GRE;
+ fl.flowi4_proto = ipproto;
rt = ip_route_output_key(net, &fl);
if (IS_ERR(rt))
diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c
index cd61280941e5..5aed341406c2 100644
--- a/net/sched/cls_basic.c
+++ b/net/sched/cls_basic.c
@@ -72,10 +72,6 @@ static unsigned long basic_get(struct tcf_proto *tp, u32 handle)
return l;
}
-static void basic_put(struct tcf_proto *tp, unsigned long f)
-{
-}
-
static int basic_init(struct tcf_proto *tp)
{
struct basic_head *head;
@@ -113,18 +109,12 @@ static void basic_destroy(struct tcf_proto *tp)
static int basic_delete(struct tcf_proto *tp, unsigned long arg)
{
- struct basic_head *head = rtnl_dereference(tp->root);
- struct basic_filter *t, *f = (struct basic_filter *) arg;
-
- list_for_each_entry(t, &head->flist, link)
- if (t == f) {
- list_del_rcu(&t->link);
- tcf_unbind_filter(tp, &t->res);
- call_rcu(&t->rcu, basic_delete_filter);
- return 0;
- }
+ struct basic_filter *f = (struct basic_filter *) arg;
- return -ENOENT;
+ list_del_rcu(&f->link);
+ tcf_unbind_filter(tp, &f->res);
+ call_rcu(&f->rcu, basic_delete_filter);
+ return 0;
}
static const struct nla_policy basic_policy[TCA_BASIC_MAX + 1] = {
@@ -188,10 +178,9 @@ static int basic_change(struct net *net, struct sk_buff *in_skb,
return -EINVAL;
}
- err = -ENOBUFS;
fnew = kzalloc(sizeof(*fnew), GFP_KERNEL);
- if (fnew == NULL)
- goto errout;
+ if (!fnew)
+ return -ENOBUFS;
tcf_exts_init(&fnew->exts, TCA_BASIC_ACT, TCA_BASIC_POLICE);
err = -EINVAL;
@@ -293,7 +282,6 @@ static struct tcf_proto_ops cls_basic_ops __read_mostly = {
.init = basic_init,
.destroy = basic_destroy,
.get = basic_get,
- .put = basic_put,
.change = basic_change,
.delete = basic_delete,
.walk = basic_walk,
diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c
index eed49d1d0878..84c8219c3e1c 100644
--- a/net/sched/cls_bpf.c
+++ b/net/sched/cls_bpf.c
@@ -109,19 +109,12 @@ static void __cls_bpf_delete_prog(struct rcu_head *rcu)
static int cls_bpf_delete(struct tcf_proto *tp, unsigned long arg)
{
- struct cls_bpf_head *head = rtnl_dereference(tp->root);
- struct cls_bpf_prog *prog, *todel = (struct cls_bpf_prog *) arg;
+ struct cls_bpf_prog *prog = (struct cls_bpf_prog *) arg;
- list_for_each_entry(prog, &head->plist, link) {
- if (prog == todel) {
- list_del_rcu(&prog->link);
- tcf_unbind_filter(tp, &prog->res);
- call_rcu(&prog->rcu, __cls_bpf_delete_prog);
- return 0;
- }
- }
-
- return -ENOENT;
+ list_del_rcu(&prog->link);
+ tcf_unbind_filter(tp, &prog->res);
+ call_rcu(&prog->rcu, __cls_bpf_delete_prog);
+ return 0;
}
static void cls_bpf_destroy(struct tcf_proto *tp)
@@ -148,7 +141,7 @@ static unsigned long cls_bpf_get(struct tcf_proto *tp, u32 handle)
if (head == NULL)
return 0UL;
- list_for_each_entry_rcu(prog, &head->plist, link) {
+ list_for_each_entry(prog, &head->plist, link) {
if (prog->handle == handle) {
ret = (unsigned long) prog;
break;
@@ -158,10 +151,6 @@ static unsigned long cls_bpf_get(struct tcf_proto *tp, u32 handle)
return ret;
}
-static void cls_bpf_put(struct tcf_proto *tp, unsigned long f)
-{
-}
-
static int cls_bpf_modify_existing(struct net *net, struct tcf_proto *tp,
struct cls_bpf_prog *prog,
unsigned long base, struct nlattr **tb,
@@ -344,7 +333,7 @@ static void cls_bpf_walk(struct tcf_proto *tp, struct tcf_walker *arg)
struct cls_bpf_head *head = rtnl_dereference(tp->root);
struct cls_bpf_prog *prog;
- list_for_each_entry_rcu(prog, &head->plist, link) {
+ list_for_each_entry(prog, &head->plist, link) {
if (arg->count < arg->skip)
goto skip;
if (arg->fn(tp, (unsigned long) prog, arg) < 0) {
@@ -363,7 +352,6 @@ static struct tcf_proto_ops cls_bpf_ops __read_mostly = {
.init = cls_bpf_init,
.destroy = cls_bpf_destroy,
.get = cls_bpf_get,
- .put = cls_bpf_put,
.change = cls_bpf_change,
.delete = cls_bpf_delete,
.walk = cls_bpf_walk,
diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c
index d61a801222c1..741bfa7debb2 100644
--- a/net/sched/cls_cgroup.c
+++ b/net/sched/cls_cgroup.c
@@ -67,10 +67,6 @@ static unsigned long cls_cgroup_get(struct tcf_proto *tp, u32 handle)
return 0UL;
}
-static void cls_cgroup_put(struct tcf_proto *tp, unsigned long f)
-{
-}
-
static int cls_cgroup_init(struct tcf_proto *tp)
{
return 0;
@@ -117,11 +113,7 @@ static int cls_cgroup_change(struct net *net, struct sk_buff *in_skb,
return -ENOBUFS;
tcf_exts_init(&new->exts, TCA_CGROUP_ACT, TCA_CGROUP_POLICE);
- if (head)
- new->handle = head->handle;
- else
- new->handle = handle;
-
+ new->handle = handle;
new->tp = tp;
err = nla_parse_nested(tb, TCA_CGROUP_MAX, tca[TCA_OPTIONS],
cgroup_policy);
@@ -217,7 +209,6 @@ static struct tcf_proto_ops cls_cgroup_ops __read_mostly = {
.classify = cls_cgroup_classify,
.destroy = cls_cgroup_destroy,
.get = cls_cgroup_get,
- .put = cls_cgroup_put,
.delete = cls_cgroup_delete,
.walk = cls_cgroup_walk,
.dump = cls_cgroup_dump,
diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c
index 4ac515f2a6ce..8e227180cabb 100644
--- a/net/sched/cls_flow.c
+++ b/net/sched/cls_flow.c
@@ -426,10 +426,7 @@ static int flow_change(struct net *net, struct sk_buff *in_skb,
goto err2;
/* Copy fold into fnew */
- fnew->handle = fold->handle;
- fnew->keymask = fold->keymask;
fnew->tp = fold->tp;
-
fnew->handle = fold->handle;
fnew->nkeys = fold->nkeys;
fnew->keymask = fold->keymask;
@@ -578,16 +575,12 @@ static unsigned long flow_get(struct tcf_proto *tp, u32 handle)
struct flow_head *head = rtnl_dereference(tp->root);
struct flow_filter *f;
- list_for_each_entry_rcu(f, &head->filters, list)
+ list_for_each_entry(f, &head->filters, list)
if (f->handle == handle)
return (unsigned long)f;
return 0;
}
-static void flow_put(struct tcf_proto *tp, unsigned long f)
-{
-}
-
static int flow_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
struct sk_buff *skb, struct tcmsg *t)
{
@@ -654,7 +647,7 @@ static void flow_walk(struct tcf_proto *tp, struct tcf_walker *arg)
struct flow_head *head = rtnl_dereference(tp->root);
struct flow_filter *f;
- list_for_each_entry_rcu(f, &head->filters, list) {
+ list_for_each_entry(f, &head->filters, list) {
if (arg->count < arg->skip)
goto skip;
if (arg->fn(tp, (unsigned long)f, arg) < 0) {
@@ -674,7 +667,6 @@ static struct tcf_proto_ops cls_flow_ops __read_mostly = {
.change = flow_change,
.delete = flow_delete,
.get = flow_get,
- .put = flow_put,
.dump = flow_dump,
.walk = flow_walk,
.owner = THIS_MODULE,
diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c
index dbfdfd1f1a9f..23fda2ac0d19 100644
--- a/net/sched/cls_fw.c
+++ b/net/sched/cls_fw.c
@@ -111,10 +111,6 @@ static unsigned long fw_get(struct tcf_proto *tp, u32 handle)
return 0;
}
-static void fw_put(struct tcf_proto *tp, unsigned long f)
-{
-}
-
static int fw_init(struct tcf_proto *tp)
{
return 0;
@@ -411,7 +407,6 @@ static struct tcf_proto_ops cls_fw_ops __read_mostly = {
.init = fw_init,
.destroy = fw_destroy,
.get = fw_get,
- .put = fw_put,
.change = fw_change,
.delete = fw_delete,
.walk = fw_walk,
diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c
index 109a329b7198..098a27360b91 100644
--- a/net/sched/cls_route.c
+++ b/net/sched/cls_route.c
@@ -256,10 +256,6 @@ static unsigned long route4_get(struct tcf_proto *tp, u32 handle)
return 0;
}
-static void route4_put(struct tcf_proto *tp, unsigned long f)
-{
-}
-
static int route4_init(struct tcf_proto *tp)
{
return 0;
@@ -649,7 +645,6 @@ static struct tcf_proto_ops cls_route4_ops __read_mostly = {
.init = route4_init,
.destroy = route4_destroy,
.get = route4_get,
- .put = route4_put,
.change = route4_change,
.delete = route4_delete,
.walk = route4_walk,
diff --git a/net/sched/cls_rsvp.h b/net/sched/cls_rsvp.h
index 6bb55f277a5a..b7af3623a26a 100644
--- a/net/sched/cls_rsvp.h
+++ b/net/sched/cls_rsvp.h
@@ -271,10 +271,6 @@ static unsigned long rsvp_get(struct tcf_proto *tp, u32 handle)
return 0;
}
-static void rsvp_put(struct tcf_proto *tp, unsigned long f)
-{
-}
-
static int rsvp_init(struct tcf_proto *tp)
{
struct rsvp_head *data;
@@ -708,7 +704,6 @@ static struct tcf_proto_ops RSVP_OPS __read_mostly = {
.init = rsvp_init,
.destroy = rsvp_destroy,
.get = rsvp_get,
- .put = rsvp_put,
.change = rsvp_change,
.delete = rsvp_delete,
.walk = rsvp_walk,
diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c
index 30f10fb07f4a..0d9d8911a621 100644
--- a/net/sched/cls_tcindex.c
+++ b/net/sched/cls_tcindex.c
@@ -116,13 +116,6 @@ static unsigned long tcindex_get(struct tcf_proto *tp, u32 handle)
return r && tcindex_filter_is_set(r) ? (unsigned long) r : 0UL;
}
-
-static void tcindex_put(struct tcf_proto *tp, unsigned long f)
-{
- pr_debug("tcindex_put(tp %p,f 0x%lx)\n", tp, f);
-}
-
-
static int tcindex_init(struct tcf_proto *tp)
{
struct tcindex_data *p;
@@ -560,7 +553,6 @@ static struct tcf_proto_ops cls_tcindex_ops __read_mostly = {
.init = tcindex_init,
.destroy = tcindex_destroy,
.get = tcindex_get,
- .put = tcindex_put,
.change = tcindex_change,
.delete = tcindex_delete,
.walk = tcindex_walk,
diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
index 0472909bb014..09487afbfd51 100644
--- a/net/sched/cls_u32.c
+++ b/net/sched/cls_u32.c
@@ -299,10 +299,6 @@ static unsigned long u32_get(struct tcf_proto *tp, u32 handle)
return (unsigned long)u32_lookup_key(ht, handle);
}
-static void u32_put(struct tcf_proto *tp, unsigned long f)
-{
-}
-
static u32 gen_new_htid(struct tc_u_common *tp_c)
{
int i = 0x800;
@@ -1021,7 +1017,6 @@ static struct tcf_proto_ops cls_u32_ops __read_mostly = {
.init = u32_init,
.destroy = u32_destroy,
.get = u32_get,
- .put = u32_put,
.change = u32_change,
.delete = u32_delete,
.walk = u32_walk,
diff --git a/net/socket.c b/net/socket.c
index ee3ee39eefa5..f676ac4a3701 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -651,7 +651,8 @@ static inline int __sock_sendmsg(struct kiocb *iocb, struct socket *sock,
return err ?: __sock_sendmsg_nosec(iocb, sock, msg, size);
}
-int sock_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
+static int do_sock_sendmsg(struct socket *sock, struct msghdr *msg,
+ size_t size, bool nosec)
{
struct kiocb iocb;
struct sock_iocb siocb;
@@ -659,25 +660,22 @@ int sock_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
init_sync_kiocb(&iocb, NULL);
iocb.private = &siocb;
- ret = __sock_sendmsg(&iocb, sock, msg, size);
+ ret = nosec ? __sock_sendmsg_nosec(&iocb, sock, msg, size) :
+ __sock_sendmsg(&iocb, sock, msg, size);
if (-EIOCBQUEUED == ret)
ret = wait_on_sync_kiocb(&iocb);
return ret;
}
+
+int sock_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
+{
+ return do_sock_sendmsg(sock, msg, size, false);
+}
EXPORT_SYMBOL(sock_sendmsg);
static int sock_sendmsg_nosec(struct socket *sock, struct msghdr *msg, size_t size)
{
- struct kiocb iocb;
- struct sock_iocb siocb;
- int ret;
-
- init_sync_kiocb(&iocb, NULL);
- iocb.private = &siocb;
- ret = __sock_sendmsg_nosec(&iocb, sock, msg, size);
- if (-EIOCBQUEUED == ret)
- ret = wait_on_sync_kiocb(&iocb);
- return ret;
+ return do_sock_sendmsg(sock, msg, size, true);
}
int kernel_sendmsg(struct socket *sock, struct msghdr *msg,
diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c
index f0761c771734..96ceefeb9daf 100644
--- a/net/tipc/bcast.c
+++ b/net/tipc/bcast.c
@@ -233,8 +233,11 @@ static void bclink_retransmit_pkt(u32 after, u32 to)
*/
void tipc_bclink_wakeup_users(void)
{
- while (skb_queue_len(&bclink->link.waiting_sks))
- tipc_sk_rcv(skb_dequeue(&bclink->link.waiting_sks));
+ struct sk_buff *skb;
+
+ while ((skb = skb_dequeue(&bclink->link.waiting_sks)))
+ tipc_sk_rcv(skb);
+
}
/**
@@ -950,7 +953,7 @@ int tipc_bclink_init(void)
spin_lock_init(&bclink->lock);
__skb_queue_head_init(&bcl->outqueue);
__skb_queue_head_init(&bcl->deferred_queue);
- __skb_queue_head_init(&bcl->waiting_sks);
+ skb_queue_head_init(&bcl->waiting_sks);
bcl->next_out_no = 1;
spin_lock_init(&bclink->node.lock);
__skb_queue_head_init(&bclink->node.waiting_sks);
diff --git a/net/tipc/name_distr.c b/net/tipc/name_distr.c
index 56248db75274..ba6083dca95b 100644
--- a/net/tipc/name_distr.c
+++ b/net/tipc/name_distr.c
@@ -38,39 +38,6 @@
#include "link.h"
#include "name_distr.h"
-/**
- * struct publ_list - list of publications made by this node
- * @list: circular list of publications
- * @list_size: number of entries in list
- */
-struct publ_list {
- struct list_head list;
- u32 size;
-};
-
-static struct publ_list publ_zone = {
- .list = LIST_HEAD_INIT(publ_zone.list),
- .size = 0,
-};
-
-static struct publ_list publ_cluster = {
- .list = LIST_HEAD_INIT(publ_cluster.list),
- .size = 0,
-};
-
-static struct publ_list publ_node = {
- .list = LIST_HEAD_INIT(publ_node.list),
- .size = 0,
-};
-
-static struct publ_list *publ_lists[] = {
- NULL,
- &publ_zone, /* publ_lists[TIPC_ZONE_SCOPE] */
- &publ_cluster, /* publ_lists[TIPC_CLUSTER_SCOPE] */
- &publ_node /* publ_lists[TIPC_NODE_SCOPE] */
-};
-
-
int sysctl_tipc_named_timeout __read_mostly = 2000;
/**
@@ -146,8 +113,8 @@ struct sk_buff *tipc_named_publish(struct publication *publ)
struct sk_buff *buf;
struct distr_item *item;
- list_add_tail(&publ->local_list, &publ_lists[publ->scope]->list);
- publ_lists[publ->scope]->size++;
+ list_add_tail_rcu(&publ->local_list,
+ &tipc_nametbl->publ_list[publ->scope]);
if (publ->scope == TIPC_NODE_SCOPE)
return NULL;
@@ -172,7 +139,6 @@ struct sk_buff *tipc_named_withdraw(struct publication *publ)
struct distr_item *item;
list_del(&publ->local_list);
- publ_lists[publ->scope]->size--;
if (publ->scope == TIPC_NODE_SCOPE)
return NULL;
@@ -195,21 +161,17 @@ struct sk_buff *tipc_named_withdraw(struct publication *publ)
* @pls: linked list of publication items to be packed into buffer chain
*/
static void named_distribute(struct sk_buff_head *list, u32 dnode,
- struct publ_list *pls)
+ struct list_head *pls)
{
struct publication *publ;
struct sk_buff *skb = NULL;
struct distr_item *item = NULL;
- uint dsz = pls->size * ITEM_SIZE;
uint msg_dsz = (tipc_node_get_mtu(dnode, 0) / ITEM_SIZE) * ITEM_SIZE;
- uint rem = dsz;
- uint msg_rem = 0;
+ uint msg_rem = msg_dsz;
- list_for_each_entry(publ, &pls->list, local_list) {
+ list_for_each_entry(publ, pls, local_list) {
/* Prepare next buffer: */
if (!skb) {
- msg_rem = min_t(uint, rem, msg_dsz);
- rem -= msg_rem;
skb = named_prepare_buf(PUBLICATION, msg_rem, dnode);
if (!skb) {
pr_warn("Bulk publication failure\n");
@@ -227,8 +189,14 @@ static void named_distribute(struct sk_buff_head *list, u32 dnode,
if (!msg_rem) {
__skb_queue_tail(list, skb);
skb = NULL;
+ msg_rem = msg_dsz;
}
}
+ if (skb) {
+ msg_set_size(buf_msg(skb), INT_H_SIZE + (msg_dsz - msg_rem));
+ skb_trim(skb, INT_H_SIZE + (msg_dsz - msg_rem));
+ __skb_queue_tail(list, skb);
+ }
}
/**
@@ -240,10 +208,12 @@ void tipc_named_node_up(u32 dnode)
__skb_queue_head_init(&head);
- read_lock_bh(&tipc_nametbl_lock);
- named_distribute(&head, dnode, &publ_cluster);
- named_distribute(&head, dnode, &publ_zone);
- read_unlock_bh(&tipc_nametbl_lock);
+ rcu_read_lock();
+ named_distribute(&head, dnode,
+ &tipc_nametbl->publ_list[TIPC_CLUSTER_SCOPE]);
+ named_distribute(&head, dnode,
+ &tipc_nametbl->publ_list[TIPC_ZONE_SCOPE]);
+ rcu_read_unlock();
tipc_link_xmit(&head, dnode, dnode);
}
@@ -290,12 +260,12 @@ static void tipc_publ_purge(struct publication *publ, u32 addr)
{
struct publication *p;
- write_lock_bh(&tipc_nametbl_lock);
+ spin_lock_bh(&tipc_nametbl_lock);
p = tipc_nametbl_remove_publ(publ->type, publ->lower,
publ->node, publ->ref, publ->key);
if (p)
tipc_publ_unsubscribe(p, addr);
- write_unlock_bh(&tipc_nametbl_lock);
+ spin_unlock_bh(&tipc_nametbl_lock);
if (p != publ) {
pr_err("Unable to remove publication from failed node\n"
@@ -304,7 +274,7 @@ static void tipc_publ_purge(struct publication *publ, u32 addr)
publ->key);
}
- kfree(p);
+ kfree_rcu(p, rcu);
}
void tipc_publ_notify(struct list_head *nsub_list, u32 addr)
@@ -341,7 +311,7 @@ static bool tipc_update_nametbl(struct distr_item *i, u32 node, u32 dtype)
ntohl(i->key));
if (publ) {
tipc_publ_unsubscribe(publ, node);
- kfree(publ);
+ kfree_rcu(publ, rcu);
return true;
}
} else {
@@ -406,14 +376,14 @@ void tipc_named_rcv(struct sk_buff *buf)
u32 count = msg_data_sz(msg) / ITEM_SIZE;
u32 node = msg_orignode(msg);
- write_lock_bh(&tipc_nametbl_lock);
+ spin_lock_bh(&tipc_nametbl_lock);
while (count--) {
if (!tipc_update_nametbl(item, node, msg_type(msg)))
tipc_named_add_backlog(item, msg_type(msg), node);
item++;
}
tipc_named_process_backlog();
- write_unlock_bh(&tipc_nametbl_lock);
+ spin_unlock_bh(&tipc_nametbl_lock);
kfree_skb(buf);
}
@@ -429,11 +399,12 @@ void tipc_named_reinit(void)
struct publication *publ;
int scope;
- write_lock_bh(&tipc_nametbl_lock);
+ spin_lock_bh(&tipc_nametbl_lock);
for (scope = TIPC_ZONE_SCOPE; scope <= TIPC_NODE_SCOPE; scope++)
- list_for_each_entry(publ, &publ_lists[scope]->list, local_list)
+ list_for_each_entry_rcu(publ, &tipc_nametbl->publ_list[scope],
+ local_list)
publ->node = tipc_own_addr;
- write_unlock_bh(&tipc_nametbl_lock);
+ spin_unlock_bh(&tipc_nametbl_lock);
}
diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c
index 772be1cd8bf6..aafa684c4db9 100644
--- a/net/tipc/name_table.c
+++ b/net/tipc/name_table.c
@@ -2,7 +2,7 @@
* net/tipc/name_table.c: TIPC name table code
*
* Copyright (c) 2000-2006, 2014, Ericsson AB
- * Copyright (c) 2004-2008, 2010-2011, Wind River Systems
+ * Copyright (c) 2004-2008, 2010-2014, Wind River Systems
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -92,6 +92,7 @@ struct sub_seq {
* @ns_list: links to adjacent name sequences in hash chain
* @subscriptions: list of subscriptions for this 'type'
* @lock: spinlock controlling access to publication lists of all sub-sequences
+ * @rcu: RCU callback head used for deferred freeing
*/
struct name_seq {
u32 type;
@@ -101,21 +102,11 @@ struct name_seq {
struct hlist_node ns_list;
struct list_head subscriptions;
spinlock_t lock;
+ struct rcu_head rcu;
};
-/**
- * struct name_table - table containing all existing port name publications
- * @types: pointer to fixed-sized array of name sequence lists,
- * accessed via hashing on 'type'; name sequence lists are *not* sorted
- * @local_publ_count: number of publications issued by this node
- */
-struct name_table {
- struct hlist_head *types;
- u32 local_publ_count;
-};
-
-static struct name_table table;
-DEFINE_RWLOCK(tipc_nametbl_lock);
+struct name_table *tipc_nametbl;
+DEFINE_SPINLOCK(tipc_nametbl_lock);
static int hash(int x)
{
@@ -142,9 +133,7 @@ static struct publication *publ_create(u32 type, u32 lower, u32 upper,
publ->node = node;
publ->ref = port_ref;
publ->key = key;
- INIT_LIST_HEAD(&publ->local_list);
INIT_LIST_HEAD(&publ->pport_list);
- INIT_LIST_HEAD(&publ->nodesub_list);
return publ;
}
@@ -179,22 +168,10 @@ static struct name_seq *tipc_nameseq_create(u32 type, struct hlist_head *seq_hea
nseq->alloc = 1;
INIT_HLIST_NODE(&nseq->ns_list);
INIT_LIST_HEAD(&nseq->subscriptions);
- hlist_add_head(&nseq->ns_list, seq_head);
+ hlist_add_head_rcu(&nseq->ns_list, seq_head);
return nseq;
}
-/*
- * nameseq_delete_empty - deletes a name sequence structure if now unused
- */
-static void nameseq_delete_empty(struct name_seq *seq)
-{
- if (!seq->first_free && list_empty(&seq->subscriptions)) {
- hlist_del_init(&seq->ns_list);
- kfree(seq->sseqs);
- kfree(seq);
- }
-}
-
/**
* nameseq_find_subseq - find sub-sequence (if any) matching a name instance
*
@@ -475,8 +452,8 @@ static struct name_seq *nametbl_find_seq(u32 type)
struct hlist_head *seq_head;
struct name_seq *ns;
- seq_head = &table.types[hash(type)];
- hlist_for_each_entry(ns, seq_head, ns_list) {
+ seq_head = &tipc_nametbl->seq_hlist[hash(type)];
+ hlist_for_each_entry_rcu(ns, seq_head, ns_list) {
if (ns->type == type)
return ns;
}
@@ -487,7 +464,9 @@ static struct name_seq *nametbl_find_seq(u32 type)
struct publication *tipc_nametbl_insert_publ(u32 type, u32 lower, u32 upper,
u32 scope, u32 node, u32 port, u32 key)
{
+ struct publication *publ;
struct name_seq *seq = nametbl_find_seq(type);
+ int index = hash(type);
if ((scope < TIPC_ZONE_SCOPE) || (scope > TIPC_NODE_SCOPE) ||
(lower > upper)) {
@@ -497,12 +476,16 @@ struct publication *tipc_nametbl_insert_publ(u32 type, u32 lower, u32 upper,
}
if (!seq)
- seq = tipc_nameseq_create(type, &table.types[hash(type)]);
+ seq = tipc_nameseq_create(type,
+ &tipc_nametbl->seq_hlist[index]);
if (!seq)
return NULL;
- return tipc_nameseq_insert_publ(seq, type, lower, upper,
+ spin_lock_bh(&seq->lock);
+ publ = tipc_nameseq_insert_publ(seq, type, lower, upper,
scope, node, port, key);
+ spin_unlock_bh(&seq->lock);
+ return publ;
}
struct publication *tipc_nametbl_remove_publ(u32 type, u32 lower,
@@ -514,8 +497,16 @@ struct publication *tipc_nametbl_remove_publ(u32 type, u32 lower,
if (!seq)
return NULL;
+ spin_lock_bh(&seq->lock);
publ = tipc_nameseq_remove_publ(seq, lower, node, ref, key);
- nameseq_delete_empty(seq);
+ if (!seq->first_free && list_empty(&seq->subscriptions)) {
+ hlist_del_init_rcu(&seq->ns_list);
+ kfree(seq->sseqs);
+ spin_unlock_bh(&seq->lock);
+ kfree_rcu(seq, rcu);
+ return publ;
+ }
+ spin_unlock_bh(&seq->lock);
return publ;
}
@@ -544,14 +535,14 @@ u32 tipc_nametbl_translate(u32 type, u32 instance, u32 *destnode)
if (!tipc_in_scope(*destnode, tipc_own_addr))
return 0;
- read_lock_bh(&tipc_nametbl_lock);
+ rcu_read_lock();
seq = nametbl_find_seq(type);
if (unlikely(!seq))
goto not_found;
+ spin_lock_bh(&seq->lock);
sseq = nameseq_find_subseq(seq, instance);
if (unlikely(!sseq))
- goto not_found;
- spin_lock_bh(&seq->lock);
+ goto no_match;
info = sseq->info;
/* Closest-First Algorithm */
@@ -601,7 +592,7 @@ u32 tipc_nametbl_translate(u32 type, u32 instance, u32 *destnode)
no_match:
spin_unlock_bh(&seq->lock);
not_found:
- read_unlock_bh(&tipc_nametbl_lock);
+ rcu_read_unlock();
*destnode = node;
return ref;
}
@@ -627,13 +618,12 @@ int tipc_nametbl_mc_translate(u32 type, u32 lower, u32 upper, u32 limit,
struct name_info *info;
int res = 0;
- read_lock_bh(&tipc_nametbl_lock);
+ rcu_read_lock();
seq = nametbl_find_seq(type);
if (!seq)
goto exit;
spin_lock_bh(&seq->lock);
-
sseq = seq->sseqs + nameseq_locate_subseq(seq, lower);
sseq_stop = seq->sseqs + seq->first_free;
for (; sseq != sseq_stop; sseq++) {
@@ -651,10 +641,9 @@ int tipc_nametbl_mc_translate(u32 type, u32 lower, u32 upper, u32 limit,
if (info->cluster_list_size != info->node_list_size)
res = 1;
}
-
spin_unlock_bh(&seq->lock);
exit:
- read_unlock_bh(&tipc_nametbl_lock);
+ rcu_read_unlock();
return res;
}
@@ -667,22 +656,23 @@ struct publication *tipc_nametbl_publish(u32 type, u32 lower, u32 upper,
struct publication *publ;
struct sk_buff *buf = NULL;
- if (table.local_publ_count >= TIPC_MAX_PUBLICATIONS) {
+ spin_lock_bh(&tipc_nametbl_lock);
+ if (tipc_nametbl->local_publ_count >= TIPC_MAX_PUBLICATIONS) {
pr_warn("Publication failed, local publication limit reached (%u)\n",
TIPC_MAX_PUBLICATIONS);
+ spin_unlock_bh(&tipc_nametbl_lock);
return NULL;
}
- write_lock_bh(&tipc_nametbl_lock);
publ = tipc_nametbl_insert_publ(type, lower, upper, scope,
tipc_own_addr, port_ref, key);
if (likely(publ)) {
- table.local_publ_count++;
+ tipc_nametbl->local_publ_count++;
buf = tipc_named_publish(publ);
/* Any pending external events? */
tipc_named_process_backlog();
}
- write_unlock_bh(&tipc_nametbl_lock);
+ spin_unlock_bh(&tipc_nametbl_lock);
if (buf)
named_cluster_distribute(buf);
@@ -695,27 +685,28 @@ struct publication *tipc_nametbl_publish(u32 type, u32 lower, u32 upper,
int tipc_nametbl_withdraw(u32 type, u32 lower, u32 ref, u32 key)
{
struct publication *publ;
- struct sk_buff *buf;
+ struct sk_buff *skb = NULL;
- write_lock_bh(&tipc_nametbl_lock);
+ spin_lock_bh(&tipc_nametbl_lock);
publ = tipc_nametbl_remove_publ(type, lower, tipc_own_addr, ref, key);
if (likely(publ)) {
- table.local_publ_count--;
- buf = tipc_named_withdraw(publ);
+ tipc_nametbl->local_publ_count--;
+ skb = tipc_named_withdraw(publ);
/* Any pending external events? */
tipc_named_process_backlog();
- write_unlock_bh(&tipc_nametbl_lock);
list_del_init(&publ->pport_list);
- kfree(publ);
+ kfree_rcu(publ, rcu);
+ } else {
+ pr_err("Unable to remove local publication\n"
+ "(type=%u, lower=%u, ref=%u, key=%u)\n",
+ type, lower, ref, key);
+ }
+ spin_unlock_bh(&tipc_nametbl_lock);
- if (buf)
- named_cluster_distribute(buf);
+ if (skb) {
+ named_cluster_distribute(skb);
return 1;
}
- write_unlock_bh(&tipc_nametbl_lock);
- pr_err("Unable to remove local publication\n"
- "(type=%u, lower=%u, ref=%u, key=%u)\n",
- type, lower, ref, key);
return 0;
}
@@ -725,12 +716,14 @@ int tipc_nametbl_withdraw(u32 type, u32 lower, u32 ref, u32 key)
void tipc_nametbl_subscribe(struct tipc_subscription *s)
{
u32 type = s->seq.type;
+ int index = hash(type);
struct name_seq *seq;
- write_lock_bh(&tipc_nametbl_lock);
+ spin_lock_bh(&tipc_nametbl_lock);
seq = nametbl_find_seq(type);
if (!seq)
- seq = tipc_nameseq_create(type, &table.types[hash(type)]);
+ seq = tipc_nameseq_create(type,
+ &tipc_nametbl->seq_hlist[index]);
if (seq) {
spin_lock_bh(&seq->lock);
tipc_nameseq_subscribe(seq, s);
@@ -739,7 +732,7 @@ void tipc_nametbl_subscribe(struct tipc_subscription *s)
pr_warn("Failed to create subscription for {%u,%u,%u}\n",
s->seq.type, s->seq.lower, s->seq.upper);
}
- write_unlock_bh(&tipc_nametbl_lock);
+ spin_unlock_bh(&tipc_nametbl_lock);
}
/**
@@ -749,18 +742,23 @@ void tipc_nametbl_unsubscribe(struct tipc_subscription *s)
{
struct name_seq *seq;
- write_lock_bh(&tipc_nametbl_lock);
+ spin_lock_bh(&tipc_nametbl_lock);
seq = nametbl_find_seq(s->seq.type);
if (seq != NULL) {
spin_lock_bh(&seq->lock);
list_del_init(&s->nameseq_list);
- spin_unlock_bh(&seq->lock);
- nameseq_delete_empty(seq);
+ if (!seq->first_free && list_empty(&seq->subscriptions)) {
+ hlist_del_init_rcu(&seq->ns_list);
+ kfree(seq->sseqs);
+ spin_unlock_bh(&seq->lock);
+ kfree_rcu(seq, rcu);
+ } else {
+ spin_unlock_bh(&seq->lock);
+ }
}
- write_unlock_bh(&tipc_nametbl_lock);
+ spin_unlock_bh(&tipc_nametbl_lock);
}
-
/**
* subseq_list - print specified sub-sequence contents into the given buffer
*/
@@ -882,8 +880,8 @@ static int nametbl_list(char *buf, int len, u32 depth_info,
lowbound = 0;
upbound = ~0;
for (i = 0; i < TIPC_NAMETBL_SIZE; i++) {
- seq_head = &table.types[i];
- hlist_for_each_entry(seq, seq_head, ns_list) {
+ seq_head = &tipc_nametbl->seq_hlist[i];
+ hlist_for_each_entry_rcu(seq, seq_head, ns_list) {
ret += nameseq_list(seq, buf + ret, len - ret,
depth, seq->type,
lowbound, upbound, i);
@@ -898,8 +896,8 @@ static int nametbl_list(char *buf, int len, u32 depth_info,
}
ret += nametbl_header(buf + ret, len - ret, depth);
i = hash(type);
- seq_head = &table.types[i];
- hlist_for_each_entry(seq, seq_head, ns_list) {
+ seq_head = &tipc_nametbl->seq_hlist[i];
+ hlist_for_each_entry_rcu(seq, seq_head, ns_list) {
if (seq->type == type) {
ret += nameseq_list(seq, buf + ret, len - ret,
depth, type,
@@ -931,11 +929,11 @@ struct sk_buff *tipc_nametbl_get(const void *req_tlv_area, int req_tlv_space)
pb = TLV_DATA(rep_tlv);
pb_len = ULTRA_STRING_MAX_LEN;
argv = (struct tipc_name_table_query *)TLV_DATA(req_tlv_area);
- read_lock_bh(&tipc_nametbl_lock);
+ rcu_read_lock();
str_len = nametbl_list(pb, pb_len, ntohl(argv->depth),
ntohl(argv->type),
ntohl(argv->lowbound), ntohl(argv->upbound));
- read_unlock_bh(&tipc_nametbl_lock);
+ rcu_read_unlock();
str_len += 1; /* for "\0" */
skb_put(buf, TLV_SPACE(str_len));
TLV_SET(rep_tlv, TIPC_TLV_ULTRA_STRING, NULL, str_len);
@@ -945,12 +943,18 @@ struct sk_buff *tipc_nametbl_get(const void *req_tlv_area, int req_tlv_space)
int tipc_nametbl_init(void)
{
- table.types = kcalloc(TIPC_NAMETBL_SIZE, sizeof(struct hlist_head),
- GFP_ATOMIC);
- if (!table.types)
+ int i;
+
+ tipc_nametbl = kzalloc(sizeof(*tipc_nametbl), GFP_ATOMIC);
+ if (!tipc_nametbl)
return -ENOMEM;
- table.local_publ_count = 0;
+ for (i = 0; i < TIPC_NAMETBL_SIZE; i++)
+ INIT_HLIST_HEAD(&tipc_nametbl->seq_hlist[i]);
+
+ INIT_LIST_HEAD(&tipc_nametbl->publ_list[TIPC_ZONE_SCOPE]);
+ INIT_LIST_HEAD(&tipc_nametbl->publ_list[TIPC_CLUSTER_SCOPE]);
+ INIT_LIST_HEAD(&tipc_nametbl->publ_list[TIPC_NODE_SCOPE]);
return 0;
}
@@ -965,17 +969,19 @@ static void tipc_purge_publications(struct name_seq *seq)
struct sub_seq *sseq;
struct name_info *info;
- if (!seq->sseqs) {
- nameseq_delete_empty(seq);
- return;
- }
+ spin_lock_bh(&seq->lock);
sseq = seq->sseqs;
info = sseq->info;
list_for_each_entry_safe(publ, safe, &info->zone_list, zone_list) {
tipc_nametbl_remove_publ(publ->type, publ->lower, publ->node,
publ->ref, publ->key);
- kfree(publ);
+ kfree_rcu(publ, rcu);
}
+ hlist_del_init_rcu(&seq->ns_list);
+ kfree(seq->sseqs);
+ spin_lock_bh(&seq->lock);
+
+ kfree_rcu(seq, rcu);
}
void tipc_nametbl_stop(void)
@@ -983,23 +989,24 @@ void tipc_nametbl_stop(void)
u32 i;
struct name_seq *seq;
struct hlist_head *seq_head;
- struct hlist_node *safe;
/* Verify name table is empty and purge any lingering
* publications, then release the name table
*/
- write_lock_bh(&tipc_nametbl_lock);
+ spin_lock_bh(&tipc_nametbl_lock);
for (i = 0; i < TIPC_NAMETBL_SIZE; i++) {
- if (hlist_empty(&table.types[i]))
+ if (hlist_empty(&tipc_nametbl->seq_hlist[i]))
continue;
- seq_head = &table.types[i];
- hlist_for_each_entry_safe(seq, safe, seq_head, ns_list) {
+ seq_head = &tipc_nametbl->seq_hlist[i];
+ hlist_for_each_entry_rcu(seq, seq_head, ns_list) {
tipc_purge_publications(seq);
}
}
- kfree(table.types);
- table.types = NULL;
- write_unlock_bh(&tipc_nametbl_lock);
+ spin_unlock_bh(&tipc_nametbl_lock);
+
+ synchronize_net();
+ kfree(tipc_nametbl);
+
}
static int __tipc_nl_add_nametable_publ(struct tipc_nl_msg *msg,
@@ -1103,7 +1110,7 @@ static int __tipc_nl_seq_list(struct tipc_nl_msg *msg, u32 *last_type,
u32 *last_lower, u32 *last_publ)
{
struct hlist_head *seq_head;
- struct name_seq *seq;
+ struct name_seq *seq = NULL;
int err;
int i;
@@ -1113,22 +1120,21 @@ static int __tipc_nl_seq_list(struct tipc_nl_msg *msg, u32 *last_type,
i = 0;
for (; i < TIPC_NAMETBL_SIZE; i++) {
- seq_head = &table.types[i];
+ seq_head = &tipc_nametbl->seq_hlist[i];
if (*last_type) {
seq = nametbl_find_seq(*last_type);
if (!seq)
return -EPIPE;
} else {
- seq = hlist_entry_safe((seq_head)->first,
- struct name_seq, ns_list);
+ hlist_for_each_entry_rcu(seq, seq_head, ns_list)
+ break;
if (!seq)
continue;
}
- hlist_for_each_entry_from(seq, ns_list) {
+ hlist_for_each_entry_from_rcu(seq, ns_list) {
spin_lock_bh(&seq->lock);
-
err = __tipc_nl_subseq_list(msg, seq, last_lower,
last_publ);
@@ -1160,8 +1166,7 @@ int tipc_nl_name_table_dump(struct sk_buff *skb, struct netlink_callback *cb)
msg.portid = NETLINK_CB(cb->skb).portid;
msg.seq = cb->nlh->nlmsg_seq;
- read_lock_bh(&tipc_nametbl_lock);
-
+ rcu_read_lock();
err = __tipc_nl_seq_list(&msg, &last_type, &last_lower, &last_publ);
if (!err) {
done = 1;
@@ -1174,8 +1179,7 @@ int tipc_nl_name_table_dump(struct sk_buff *skb, struct netlink_callback *cb)
*/
cb->prev_seq = 1;
}
-
- read_unlock_bh(&tipc_nametbl_lock);
+ rcu_read_unlock();
cb->args[0] = last_type;
cb->args[1] = last_lower;
diff --git a/net/tipc/name_table.h b/net/tipc/name_table.h
index c62877826655..5f0dee92010d 100644
--- a/net/tipc/name_table.h
+++ b/net/tipc/name_table.h
@@ -43,7 +43,9 @@ struct tipc_port_list;
/*
* TIPC name types reserved for internal TIPC use (both current and planned)
*/
-#define TIPC_ZM_SRV 3 /* zone master service name type */
+#define TIPC_ZM_SRV 3 /* zone master service name type */
+#define TIPC_PUBL_SCOPE_NUM (TIPC_NODE_SCOPE + 1)
+#define TIPC_NAMETBL_SIZE 1024 /* must be a power of 2 */
/**
* struct publication - info about a published (name or) name sequence
@@ -60,6 +62,7 @@ struct tipc_port_list;
* @node_list: adjacent matching name seq publications with >= node scope
* @cluster_list: adjacent matching name seq publications with >= cluster scope
* @zone_list: adjacent matching name seq publications with >= zone scope
+ * @rcu: RCU callback head used for deferred freeing
*
* Note that the node list, cluster list, and zone list are circular lists.
*/
@@ -77,10 +80,23 @@ struct publication {
struct list_head node_list;
struct list_head cluster_list;
struct list_head zone_list;
+ struct rcu_head rcu;
};
+/**
+ * struct name_table - table containing all existing port name publications
+ * @seq_hlist: name sequence hash lists
+ * @publ_list: pulication lists
+ * @local_publ_count: number of publications issued by this node
+ */
+struct name_table {
+ struct hlist_head seq_hlist[TIPC_NAMETBL_SIZE];
+ struct list_head publ_list[TIPC_PUBL_SCOPE_NUM];
+ u32 local_publ_count;
+};
-extern rwlock_t tipc_nametbl_lock;
+extern spinlock_t tipc_nametbl_lock;
+extern struct name_table *tipc_nametbl;
int tipc_nl_name_table_dump(struct sk_buff *skb, struct netlink_callback *cb);
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 9658d9b63876..4731cad99d1c 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -827,39 +827,6 @@ exit:
return TIPC_OK;
}
-/**
- * dest_name_check - verify user is permitted to send to specified port name
- * @dest: destination address
- * @m: descriptor for message to be sent
- *
- * Prevents restricted configuration commands from being issued by
- * unauthorized users.
- *
- * Returns 0 if permission is granted, otherwise errno
- */
-static int dest_name_check(struct sockaddr_tipc *dest, struct msghdr *m)
-{
- struct tipc_cfg_msg_hdr hdr;
-
- if (unlikely(dest->addrtype == TIPC_ADDR_ID))
- return 0;
- if (likely(dest->addr.name.name.type >= TIPC_RESERVED_TYPES))
- return 0;
- if (likely(dest->addr.name.name.type == TIPC_TOP_SRV))
- return 0;
- if (likely(dest->addr.name.name.type != TIPC_CFG_SRV))
- return -EACCES;
-
- if (!m->msg_iovlen || (m->msg_iov[0].iov_len < sizeof(hdr)))
- return -EMSGSIZE;
- if (copy_from_user(&hdr, m->msg_iov[0].iov_base, sizeof(hdr)))
- return -EFAULT;
- if ((ntohs(hdr.tcm_type) & 0xC000) && (!capable(CAP_NET_ADMIN)))
- return -EACCES;
-
- return 0;
-}
-
static int tipc_wait_for_sndmsg(struct socket *sock, long *timeo_p)
{
struct sock *sk = sock->sk;
@@ -912,7 +879,7 @@ static int tipc_sendmsg(struct kiocb *iocb, struct socket *sock,
struct tipc_name_seq *seq = &dest->addr.nameseq;
u32 mtu;
long timeo;
- int rc = -EINVAL;
+ int rc;
if (unlikely(!dest))
return -EDESTADDRREQ;
@@ -945,9 +912,6 @@ static int tipc_sendmsg(struct kiocb *iocb, struct socket *sock,
tsk->conn_instance = dest->addr.name.name.instance;
}
}
- rc = dest_name_check(dest, m);
- if (rc)
- goto exit;
timeo = sock_sndtimeo(sk, m->msg_flags & MSG_DONTWAIT);
diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c
index 31b5cb232a43..0344206b984f 100644
--- a/net/tipc/subscr.c
+++ b/net/tipc/subscr.c
@@ -305,7 +305,6 @@ static int subscr_subscribe(struct tipc_subscr *s,
kfree(sub);
return -EINVAL;
}
- INIT_LIST_HEAD(&sub->nameseq_list);
list_add(&sub->subscription_list, &subscriber->subscription_list);
sub->subscriber = subscriber;
sub->swap = swap;
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 88bf289abdc9..cee479bc655c 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -55,6 +55,7 @@ static int stale_bundle(struct dst_entry *dst);
static int xfrm_bundle_ok(struct xfrm_dst *xdst);
static void xfrm_policy_queue_process(unsigned long arg);
+static void __xfrm_policy_link(struct xfrm_policy *pol, int dir);
static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol,
int dir);
@@ -561,7 +562,7 @@ static void xfrm_hash_resize(struct work_struct *work)
mutex_lock(&hash_resize_mutex);
total = 0;
- for (dir = 0; dir < XFRM_POLICY_MAX * 2; dir++) {
+ for (dir = 0; dir < XFRM_POLICY_MAX; dir++) {
if (xfrm_bydst_should_resize(net, dir, &total))
xfrm_bydst_resize(net, dir);
}
@@ -601,7 +602,7 @@ static void xfrm_hash_rebuild(struct work_struct *work)
write_lock_bh(&net->xfrm.xfrm_policy_lock);
/* reset the bydst and inexact table in all directions */
- for (dir = 0; dir < XFRM_POLICY_MAX * 2; dir++) {
+ for (dir = 0; dir < XFRM_POLICY_MAX; dir++) {
INIT_HLIST_HEAD(&net->xfrm.policy_inexact[dir]);
hmask = net->xfrm.policy_bydst[dir].hmask;
odst = net->xfrm.policy_bydst[dir].table;
@@ -779,8 +780,7 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
hlist_add_behind(&policy->bydst, newpos);
else
hlist_add_head(&policy->bydst, chain);
- xfrm_pol_hold(policy);
- net->xfrm.policy_count[dir]++;
+ __xfrm_policy_link(policy, dir);
atomic_inc(&net->xfrm.flow_cache_genid);
/* After previous checking, family can either be AF_INET or AF_INET6 */
@@ -799,7 +799,6 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
policy->curlft.use_time = 0;
if (!mod_timer(&policy->timer, jiffies + HZ))
xfrm_pol_hold(policy);
- list_add(&policy->walk.all, &net->xfrm.policy_all);
write_unlock_bh(&net->xfrm.xfrm_policy_lock);
if (delpol)
@@ -1247,17 +1246,10 @@ out:
static void __xfrm_policy_link(struct xfrm_policy *pol, int dir)
{
struct net *net = xp_net(pol);
- struct hlist_head *chain = policy_hash_bysel(net, &pol->selector,
- pol->family, dir);
list_add(&pol->walk.all, &net->xfrm.policy_all);
- hlist_add_head(&pol->bydst, chain);
- hlist_add_head(&pol->byidx, net->xfrm.policy_byidx+idx_hash(net, pol->index));
net->xfrm.policy_count[dir]++;
xfrm_pol_hold(pol);
-
- if (xfrm_bydst_should_resize(net, dir, NULL))
- schedule_work(&net->xfrm.policy_hash_work);
}
static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol,
@@ -1265,17 +1257,31 @@ static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol,
{
struct net *net = xp_net(pol);
- if (hlist_unhashed(&pol->bydst))
+ if (list_empty(&pol->walk.all))
return NULL;
- hlist_del_init(&pol->bydst);
- hlist_del(&pol->byidx);
- list_del(&pol->walk.all);
+ /* Socket policies are not hashed. */
+ if (!hlist_unhashed(&pol->bydst)) {
+ hlist_del(&pol->bydst);
+ hlist_del(&pol->byidx);
+ }
+
+ list_del_init(&pol->walk.all);
net->xfrm.policy_count[dir]--;
return pol;
}
+static void xfrm_sk_policy_link(struct xfrm_policy *pol, int dir)
+{
+ __xfrm_policy_link(pol, XFRM_POLICY_MAX + dir);
+}
+
+static void xfrm_sk_policy_unlink(struct xfrm_policy *pol, int dir)
+{
+ __xfrm_policy_unlink(pol, XFRM_POLICY_MAX + dir);
+}
+
int xfrm_policy_delete(struct xfrm_policy *pol, int dir)
{
struct net *net = xp_net(pol);
@@ -1307,7 +1313,7 @@ int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol)
if (pol) {
pol->curlft.add_time = get_seconds();
pol->index = xfrm_gen_index(net, XFRM_POLICY_MAX+dir, 0);
- __xfrm_policy_link(pol, XFRM_POLICY_MAX+dir);
+ xfrm_sk_policy_link(pol, dir);
}
if (old_pol) {
if (pol)
@@ -1316,7 +1322,7 @@ int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol)
/* Unlinking succeeds always. This is the only function
* allowed to delete or replace socket policy.
*/
- __xfrm_policy_unlink(old_pol, XFRM_POLICY_MAX+dir);
+ xfrm_sk_policy_unlink(old_pol, dir);
}
write_unlock_bh(&net->xfrm.xfrm_policy_lock);
@@ -1349,7 +1355,7 @@ static struct xfrm_policy *clone_policy(const struct xfrm_policy *old, int dir)
memcpy(newp->xfrm_vec, old->xfrm_vec,
newp->xfrm_nr*sizeof(struct xfrm_tmpl));
write_lock_bh(&net->xfrm.xfrm_policy_lock);
- __xfrm_policy_link(newp, XFRM_POLICY_MAX+dir);
+ xfrm_sk_policy_link(newp, dir);
write_unlock_bh(&net->xfrm.xfrm_policy_lock);
xfrm_pol_put(newp);
}
@@ -1878,7 +1884,6 @@ xfrm_resolve_and_create_bundle(struct xfrm_policy **pols, int num_pols,
static void xfrm_policy_queue_process(unsigned long arg)
{
- int err = 0;
struct sk_buff *skb;
struct sock *sk;
struct dst_entry *dst;
@@ -1941,7 +1946,7 @@ static void xfrm_policy_queue_process(unsigned long arg)
skb_dst_drop(skb);
skb_dst_set(skb, dst);
- err = dst_output(skb);
+ dst_output(skb);
}
out:
@@ -2966,10 +2971,11 @@ static int __net_init xfrm_policy_init(struct net *net)
goto out_byidx;
net->xfrm.policy_idx_hmask = hmask;
- for (dir = 0; dir < XFRM_POLICY_MAX * 2; dir++) {
+ for (dir = 0; dir < XFRM_POLICY_MAX; dir++) {
struct xfrm_policy_hash *htab;
net->xfrm.policy_count[dir] = 0;
+ net->xfrm.policy_count[XFRM_POLICY_MAX + dir] = 0;
INIT_HLIST_HEAD(&net->xfrm.policy_inexact[dir]);
htab = &net->xfrm.policy_bydst[dir];
@@ -3021,7 +3027,7 @@ static void xfrm_policy_fini(struct net *net)
WARN_ON(!list_empty(&net->xfrm.policy_all));
- for (dir = 0; dir < XFRM_POLICY_MAX * 2; dir++) {
+ for (dir = 0; dir < XFRM_POLICY_MAX; dir++) {
struct xfrm_policy_hash *htab;
WARN_ON(!hlist_empty(&net->xfrm.policy_inexact[dir]));
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index e812e988c111..8128594ab379 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -824,13 +824,15 @@ static int copy_to_user_state_extra(struct xfrm_state *x,
ret = xfrm_mark_put(skb, &x->mark);
if (ret)
goto out;
- if (x->replay_esn) {
+ if (x->replay_esn)
ret = nla_put(skb, XFRMA_REPLAY_ESN_VAL,
xfrm_replay_state_esn_len(x->replay_esn),
x->replay_esn);
- if (ret)
- goto out;
- }
+ else
+ ret = nla_put(skb, XFRMA_REPLAY_VAL, sizeof(x->replay),
+ &x->replay);
+ if (ret)
+ goto out;
if (x->security)
ret = copy_sec_ctx(x->security, skb);
out:
@@ -2569,6 +2571,8 @@ static inline size_t xfrm_sa_len(struct xfrm_state *x)
l += nla_total_size(sizeof(x->tfcpad));
if (x->replay_esn)
l += nla_total_size(xfrm_replay_state_esn_len(x->replay_esn));
+ else
+ l += nla_total_size(sizeof(struct xfrm_replay_state));
if (x->security)
l += nla_total_size(sizeof(struct xfrm_user_sec_ctx) +
x->security->ctx_len);