summaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2017-02-07 21:07:56 +0300
committerDavid S. Miller <davem@davemloft.net>2017-02-07 21:07:56 +0300
commit29ba6e7400a317725bdfb86a725d1824447dbcd7 (patch)
treeb009850c5a2e7c633a94eeacb71a25f91b4b64f0 /include
parentb08d46b01e995dd7b653b22d35bd1d958d6ee9b4 (diff)
parent51ce8bd4d17a761e1a90a34a1b5c9b762cce7553 (diff)
downloadlinux-29ba6e7400a317725bdfb86a725d1824447dbcd7.tar.xz
Merge branch 'replace-dst_confirm'
Julian Anastasov says: ==================== net: dst_confirm replacement This patchset addresses the problem of neighbour confirmation where received replies from one nexthop can cause confirmation of different nexthop when using the same dst. Thanks to YueHaibing <yuehaibing@huawei.com> for tracking the dst->pending_confirm problem. Sockets can obtain cached output route. Such routes can be to known nexthop (rt_gateway=IP) or to be used simultaneously for different nexthop IPs by different subnet prefixes (nh->nh_scope = RT_SCOPE_HOST, rt_gateway=0). At first look, there are more problems: - dst_confirm() sets flag on dst and not on dst->path, as result, indication is lost when XFRM is used - DNAT can change the nexthop, so the really used nexthop is not confirmed So, the following solution is to avoid using dst->pending_confirm. The current dst_confirm() usage is as follows: Protocols confirming dst on received packets: - TCP (1 dst per socket) - SCTP (1 dst per transport) - CXGB* Protocols supporting sendmsg with MSG_CONFIRM [ | MSG_PROBE ] to confirm neighbour: - UDP IPv4/IPv6 - ICMPv4 PING - RAW IPv4/IPv6 - L2TP/IPv6 MSG_CONFIRM for other purposes (fix not needed): - CAN Sending without locking the socket: - UDP (when no cork) - RAW (when hdrincl=1) Redirects from old to new GW: - rt6_do_redirect The patchset includes the following changes: 1. sock: add sk_dst_pending_confirm flag - used only by TCP with patch 4 to remember the received indication in sk->sk_dst_pending_confirm 2. net: add dst_pending_confirm flag to skbuff - skb->dst_pending_confirm will be used by all protocols in following patches, via skb_{set,get}_dst_pending_confirm 3. sctp: add dst_pending_confirm flag - SCTP uses per-transport dsts and can not use sk->sk_dst_pending_confirm like TCP 4. tcp: replace dst_confirm with sk_dst_confirm 5. net: add confirm_neigh method to dst_ops - IPv4 and IPv6 provision for slow neigh lookups for MSG_PROBE users. I decided to use neigh lookup only for this case because on MSG_PROBE the skb may pass MTU checks but it does not reach the neigh confirmation code. This patch will be used from patch 6. - xfrm_confirm_neigh: we use the last tunnel address, if present. When there are only transports, the original dest address is used. 6. net: use dst_confirm_neigh for UDP, RAW, ICMP, L2TP - dst_confirm conversion for UDP, RAW, ICMP and L2TP/IPv6 - these protocols use MSG_CONFIRM propagated by ip*_append_data to skb->dst_pending_confirm. sk->sk_dst_pending_confirm is not used because some sending paths do not lock the socket. For MSG_PROBE we use the slow lookup (dst_confirm_neigh). - there are also 2 cases that need the slow lookup: __ip6_rt_update_pmtu and rt6_do_redirect. I hope &ipv6_hdr(skb)->saddr is the correct nexthop address to use here. 7. net: pending_confirm is not used anymore - I failed to understand the CXGB* code, I see dst_confirm() calls but I'm not sure dst_neigh_output() was called. For now I just removed the dst->pending_confirm flag and left all dst_confirm() calls there. Any better idea? - Now may be old function neigh_output() should be restored instead of dst_neigh_output? ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'include')
-rw-r--r--include/linux/skbuff.h12
-rw-r--r--include/net/arp.h16
-rw-r--r--include/net/dst.h21
-rw-r--r--include/net/dst_ops.h2
-rw-r--r--include/net/ndisc.h17
-rw-r--r--include/net/sctp/sctp.h6
-rw-r--r--include/net/sctp/structs.h4
-rw-r--r--include/net/sock.h26
8 files changed, 88 insertions, 16 deletions
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index c6a78e1892b6..f1adddc1c5ac 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -612,6 +612,7 @@ static inline bool skb_mstamp_after(const struct skb_mstamp *t1,
* @wifi_acked_valid: wifi_acked was set
* @wifi_acked: whether frame was acked on wifi or not
* @no_fcs: Request NIC to treat last 4 bytes as Ethernet FCS
+ * @dst_pending_confirm: need to confirm neighbour
* @napi_id: id of the NAPI struct this skb came from
* @secmark: security marking
* @mark: Generic packet mark
@@ -741,6 +742,7 @@ struct sk_buff {
__u8 csum_level:2;
__u8 csum_bad:1;
+ __u8 dst_pending_confirm:1;
#ifdef CONFIG_IPV6_NDISC_NODETYPE
__u8 ndisc_nodetype:2;
#endif
@@ -3698,6 +3700,16 @@ static inline bool skb_rx_queue_recorded(const struct sk_buff *skb)
return skb->queue_mapping != 0;
}
+static inline void skb_set_dst_pending_confirm(struct sk_buff *skb, u32 val)
+{
+ skb->dst_pending_confirm = val;
+}
+
+static inline bool skb_get_dst_pending_confirm(const struct sk_buff *skb)
+{
+ return skb->dst_pending_confirm != 0;
+}
+
static inline struct sec_path *skb_sec_path(struct sk_buff *skb)
{
#ifdef CONFIG_XFRM
diff --git a/include/net/arp.h b/include/net/arp.h
index 5e0f891d476c..65619a2de6f4 100644
--- a/include/net/arp.h
+++ b/include/net/arp.h
@@ -35,6 +35,22 @@ static inline struct neighbour *__ipv4_neigh_lookup(struct net_device *dev, u32
return n;
}
+static inline void __ipv4_confirm_neigh(struct net_device *dev, u32 key)
+{
+ struct neighbour *n;
+
+ rcu_read_lock_bh();
+ n = __ipv4_neigh_lookup_noref(dev, key);
+ if (n) {
+ unsigned long now = jiffies;
+
+ /* avoid dirtying neighbour */
+ if (n->confirmed != now)
+ n->confirmed = now;
+ }
+ rcu_read_unlock_bh();
+}
+
void arp_init(void);
int arp_ioctl(struct net *net, unsigned int cmd, void __user *arg);
void arp_send(int type, int ptype, __be32 dest_ip,
diff --git a/include/net/dst.h b/include/net/dst.h
index 6835d224d47b..84a1043dd6a1 100644
--- a/include/net/dst.h
+++ b/include/net/dst.h
@@ -59,8 +59,6 @@ struct dst_entry {
#define DST_XFRM_QUEUE 0x0100
#define DST_METADATA 0x0200
- unsigned short pending_confirm;
-
short error;
/* A non-zero value of dst->obsolete forces by-hand validation
@@ -78,6 +76,8 @@ struct dst_entry {
#define DST_OBSOLETE_KILL -2
unsigned short header_len; /* more space at head required */
unsigned short trailer_len; /* space to reserve at tail */
+ unsigned short __pad3;
+
#ifdef CONFIG_IP_ROUTE_CLASSID
__u32 tclassid;
#else
@@ -440,7 +440,6 @@ static inline void dst_rcu_free(struct rcu_head *head)
static inline void dst_confirm(struct dst_entry *dst)
{
- dst->pending_confirm = 1;
}
static inline int dst_neigh_output(struct dst_entry *dst, struct neighbour *n,
@@ -448,15 +447,6 @@ static inline int dst_neigh_output(struct dst_entry *dst, struct neighbour *n,
{
const struct hh_cache *hh;
- if (dst->pending_confirm) {
- unsigned long now = jiffies;
-
- dst->pending_confirm = 0;
- /* avoid dirtying neighbour */
- if (n->confirmed != now)
- n->confirmed = now;
- }
-
hh = &n->hh;
if ((n->nud_state & NUD_CONNECTED) && hh->hh_len)
return neigh_hh_output(hh, skb);
@@ -477,6 +467,13 @@ static inline struct neighbour *dst_neigh_lookup_skb(const struct dst_entry *dst
return IS_ERR(n) ? NULL : n;
}
+static inline void dst_confirm_neigh(const struct dst_entry *dst,
+ const void *daddr)
+{
+ if (dst->ops->confirm_neigh)
+ dst->ops->confirm_neigh(dst, daddr);
+}
+
static inline void dst_link_failure(struct sk_buff *skb)
{
struct dst_entry *dst = skb_dst(skb);
diff --git a/include/net/dst_ops.h b/include/net/dst_ops.h
index 8a2b66d8d78d..c84b3287e38b 100644
--- a/include/net/dst_ops.h
+++ b/include/net/dst_ops.h
@@ -33,6 +33,8 @@ struct dst_ops {
struct neighbour * (*neigh_lookup)(const struct dst_entry *dst,
struct sk_buff *skb,
const void *daddr);
+ void (*confirm_neigh)(const struct dst_entry *dst,
+ const void *daddr);
struct kmem_cache *kmem_cachep;
diff --git a/include/net/ndisc.h b/include/net/ndisc.h
index d562a2fe4860..8a0214654b6b 100644
--- a/include/net/ndisc.h
+++ b/include/net/ndisc.h
@@ -391,6 +391,23 @@ static inline struct neighbour *__ipv6_neigh_lookup(struct net_device *dev, cons
return n;
}
+static inline void __ipv6_confirm_neigh(struct net_device *dev,
+ const void *pkey)
+{
+ struct neighbour *n;
+
+ rcu_read_lock_bh();
+ n = __ipv6_neigh_lookup_noref(dev, pkey);
+ if (n) {
+ unsigned long now = jiffies;
+
+ /* avoid dirtying neighbour */
+ if (n->confirmed != now)
+ n->confirmed = now;
+ }
+ rcu_read_unlock_bh();
+}
+
int ndisc_init(void);
int ndisc_late_init(void);
diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h
index 3cfd365bcfbc..480b65a24aff 100644
--- a/include/net/sctp/sctp.h
+++ b/include/net/sctp/sctp.h
@@ -593,10 +593,8 @@ static inline void sctp_v4_map_v6(union sctp_addr *addr)
*/
static inline struct dst_entry *sctp_transport_dst_check(struct sctp_transport *t)
{
- if (t->dst && !dst_check(t->dst, t->dst_cookie)) {
- dst_release(t->dst);
- t->dst = NULL;
- }
+ if (t->dst && !dst_check(t->dst, t->dst_cookie))
+ sctp_transport_dst_release(t);
return t->dst;
}
diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index 231fa9ac50bd..6a685049f67f 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -804,6 +804,8 @@ struct sctp_transport {
__u32 burst_limited; /* Holds old cwnd when max.burst is applied */
+ __u32 dst_pending_confirm; /* need to confirm neighbour */
+
/* Destination */
struct dst_entry *dst;
/* Source address. */
@@ -950,6 +952,8 @@ unsigned long sctp_transport_timeout(struct sctp_transport *);
void sctp_transport_reset(struct sctp_transport *);
void sctp_transport_update_pmtu(struct sock *, struct sctp_transport *, u32);
void sctp_transport_immediate_rtx(struct sctp_transport *);
+void sctp_transport_dst_release(struct sctp_transport *t);
+void sctp_transport_dst_confirm(struct sctp_transport *t);
/* This is the structure we use to queue packets as they come into
diff --git a/include/net/sock.h b/include/net/sock.h
index 94e65fd70354..6f83e78eaa5a 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -240,6 +240,7 @@ struct sock_common {
* @sk_wq: sock wait queue and async head
* @sk_rx_dst: receive input route used by early demux
* @sk_dst_cache: destination cache
+ * @sk_dst_pending_confirm: need to confirm neighbour
* @sk_policy: flow policy
* @sk_receive_queue: incoming packets
* @sk_wmem_alloc: transmit queue bytes committed
@@ -393,6 +394,8 @@ struct sock {
struct sk_buff_head sk_write_queue;
__s32 sk_peek_off;
int sk_write_pending;
+ __u32 sk_dst_pending_confirm;
+ /* Note: 32bit hole on 64bit arches */
long sk_sndtimeo;
struct timer_list sk_timer;
__u32 sk_priority;
@@ -1764,6 +1767,7 @@ static inline void dst_negative_advice(struct sock *sk)
if (ndst != dst) {
rcu_assign_pointer(sk->sk_dst_cache, ndst);
sk_tx_queue_clear(sk);
+ sk->sk_dst_pending_confirm = 0;
}
}
}
@@ -1774,6 +1778,7 @@ __sk_dst_set(struct sock *sk, struct dst_entry *dst)
struct dst_entry *old_dst;
sk_tx_queue_clear(sk);
+ sk->sk_dst_pending_confirm = 0;
/*
* This can be called while sk is owned by the caller only,
* with no state that can be checked in a rcu_dereference_check() cond
@@ -1789,6 +1794,7 @@ sk_dst_set(struct sock *sk, struct dst_entry *dst)
struct dst_entry *old_dst;
sk_tx_queue_clear(sk);
+ sk->sk_dst_pending_confirm = 0;
old_dst = xchg((__force struct dst_entry **)&sk->sk_dst_cache, dst);
dst_release(old_dst);
}
@@ -1809,6 +1815,26 @@ struct dst_entry *__sk_dst_check(struct sock *sk, u32 cookie);
struct dst_entry *sk_dst_check(struct sock *sk, u32 cookie);
+static inline void sk_dst_confirm(struct sock *sk)
+{
+ if (!sk->sk_dst_pending_confirm)
+ sk->sk_dst_pending_confirm = 1;
+}
+
+static inline void sock_confirm_neigh(struct sk_buff *skb, struct neighbour *n)
+{
+ if (skb_get_dst_pending_confirm(skb)) {
+ struct sock *sk = skb->sk;
+ unsigned long now = jiffies;
+
+ /* avoid dirtying neighbour */
+ if (n->confirmed != now)
+ n->confirmed = now;
+ if (sk && sk->sk_dst_pending_confirm)
+ sk->sk_dst_pending_confirm = 0;
+ }
+}
+
bool sk_mc_loop(struct sock *sk);
static inline bool sk_can_gso(const struct sock *sk)