diff options
| author | David S. Miller <davem@davemloft.net> | 2017-02-07 21:07:56 +0300 |
|---|---|---|
| committer | David S. Miller <davem@davemloft.net> | 2017-02-07 21:07:56 +0300 |
| commit | 29ba6e7400a317725bdfb86a725d1824447dbcd7 (patch) | |
| tree | b009850c5a2e7c633a94eeacb71a25f91b4b64f0 /include | |
| parent | b08d46b01e995dd7b653b22d35bd1d958d6ee9b4 (diff) | |
| parent | 51ce8bd4d17a761e1a90a34a1b5c9b762cce7553 (diff) | |
| download | linux-29ba6e7400a317725bdfb86a725d1824447dbcd7.tar.xz | |
Merge branch 'replace-dst_confirm'
Julian Anastasov says:
====================
net: dst_confirm replacement
This patchset addresses the problem of neighbour
confirmation where received replies from one nexthop
can cause confirmation of different nexthop when using
the same dst. Thanks to YueHaibing <yuehaibing@huawei.com>
for tracking the dst->pending_confirm problem.
Sockets can obtain cached output route. Such
routes can be to known nexthop (rt_gateway=IP) or to be
used simultaneously for different nexthop IPs by different
subnet prefixes (nh->nh_scope = RT_SCOPE_HOST, rt_gateway=0).
At first look, there are more problems:
- dst_confirm() sets flag on dst and not on dst->path,
as result, indication is lost when XFRM is used
- DNAT can change the nexthop, so the really used nexthop is
not confirmed
So, the following solution is to avoid using
dst->pending_confirm.
The current dst_confirm() usage is as follows:
Protocols confirming dst on received packets:
- TCP (1 dst per socket)
- SCTP (1 dst per transport)
- CXGB*
Protocols supporting sendmsg with MSG_CONFIRM [ | MSG_PROBE ] to
confirm neighbour:
- UDP IPv4/IPv6
- ICMPv4 PING
- RAW IPv4/IPv6
- L2TP/IPv6
MSG_CONFIRM for other purposes (fix not needed):
- CAN
Sending without locking the socket:
- UDP (when no cork)
- RAW (when hdrincl=1)
Redirects from old to new GW:
- rt6_do_redirect
The patchset includes the following changes:
1. sock: add sk_dst_pending_confirm flag
- used only by TCP with patch 4 to remember the received
indication in sk->sk_dst_pending_confirm
2. net: add dst_pending_confirm flag to skbuff
- skb->dst_pending_confirm will be used by all protocols
in following patches, via skb_{set,get}_dst_pending_confirm
3. sctp: add dst_pending_confirm flag
- SCTP uses per-transport dsts and can not use
sk->sk_dst_pending_confirm like TCP
4. tcp: replace dst_confirm with sk_dst_confirm
5. net: add confirm_neigh method to dst_ops
- IPv4 and IPv6 provision for slow neigh lookups for MSG_PROBE users.
I decided to use neigh lookup only for this case because on
MSG_PROBE the skb may pass MTU checks but it does not reach
the neigh confirmation code. This patch will be used from patch 6.
- xfrm_confirm_neigh: we use the last tunnel address, if present.
When there are only transports, the original dest address is used.
6. net: use dst_confirm_neigh for UDP, RAW, ICMP, L2TP
- dst_confirm conversion for UDP, RAW, ICMP and L2TP/IPv6
- these protocols use MSG_CONFIRM propagated by ip*_append_data
to skb->dst_pending_confirm. sk->sk_dst_pending_confirm is not
used because some sending paths do not lock the socket. For
MSG_PROBE we use the slow lookup (dst_confirm_neigh).
- there are also 2 cases that need the slow lookup:
__ip6_rt_update_pmtu and rt6_do_redirect. I hope
&ipv6_hdr(skb)->saddr is the correct nexthop address to use here.
7. net: pending_confirm is not used anymore
- I failed to understand the CXGB* code, I see dst_confirm()
calls but I'm not sure dst_neigh_output() was called. For now
I just removed the dst->pending_confirm flag and left all
dst_confirm() calls there. Any better idea?
- Now may be old function neigh_output() should be restored
instead of dst_neigh_output?
====================
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'include')
| -rw-r--r-- | include/linux/skbuff.h | 12 | ||||
| -rw-r--r-- | include/net/arp.h | 16 | ||||
| -rw-r--r-- | include/net/dst.h | 21 | ||||
| -rw-r--r-- | include/net/dst_ops.h | 2 | ||||
| -rw-r--r-- | include/net/ndisc.h | 17 | ||||
| -rw-r--r-- | include/net/sctp/sctp.h | 6 | ||||
| -rw-r--r-- | include/net/sctp/structs.h | 4 | ||||
| -rw-r--r-- | include/net/sock.h | 26 |
8 files changed, 88 insertions, 16 deletions
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index c6a78e1892b6..f1adddc1c5ac 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -612,6 +612,7 @@ static inline bool skb_mstamp_after(const struct skb_mstamp *t1, * @wifi_acked_valid: wifi_acked was set * @wifi_acked: whether frame was acked on wifi or not * @no_fcs: Request NIC to treat last 4 bytes as Ethernet FCS + * @dst_pending_confirm: need to confirm neighbour * @napi_id: id of the NAPI struct this skb came from * @secmark: security marking * @mark: Generic packet mark @@ -741,6 +742,7 @@ struct sk_buff { __u8 csum_level:2; __u8 csum_bad:1; + __u8 dst_pending_confirm:1; #ifdef CONFIG_IPV6_NDISC_NODETYPE __u8 ndisc_nodetype:2; #endif @@ -3698,6 +3700,16 @@ static inline bool skb_rx_queue_recorded(const struct sk_buff *skb) return skb->queue_mapping != 0; } +static inline void skb_set_dst_pending_confirm(struct sk_buff *skb, u32 val) +{ + skb->dst_pending_confirm = val; +} + +static inline bool skb_get_dst_pending_confirm(const struct sk_buff *skb) +{ + return skb->dst_pending_confirm != 0; +} + static inline struct sec_path *skb_sec_path(struct sk_buff *skb) { #ifdef CONFIG_XFRM diff --git a/include/net/arp.h b/include/net/arp.h index 5e0f891d476c..65619a2de6f4 100644 --- a/include/net/arp.h +++ b/include/net/arp.h @@ -35,6 +35,22 @@ static inline struct neighbour *__ipv4_neigh_lookup(struct net_device *dev, u32 return n; } +static inline void __ipv4_confirm_neigh(struct net_device *dev, u32 key) +{ + struct neighbour *n; + + rcu_read_lock_bh(); + n = __ipv4_neigh_lookup_noref(dev, key); + if (n) { + unsigned long now = jiffies; + + /* avoid dirtying neighbour */ + if (n->confirmed != now) + n->confirmed = now; + } + rcu_read_unlock_bh(); +} + void arp_init(void); int arp_ioctl(struct net *net, unsigned int cmd, void __user *arg); void arp_send(int type, int ptype, __be32 dest_ip, diff --git a/include/net/dst.h b/include/net/dst.h index 6835d224d47b..84a1043dd6a1 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -59,8 +59,6 @@ struct dst_entry { #define DST_XFRM_QUEUE 0x0100 #define DST_METADATA 0x0200 - unsigned short pending_confirm; - short error; /* A non-zero value of dst->obsolete forces by-hand validation @@ -78,6 +76,8 @@ struct dst_entry { #define DST_OBSOLETE_KILL -2 unsigned short header_len; /* more space at head required */ unsigned short trailer_len; /* space to reserve at tail */ + unsigned short __pad3; + #ifdef CONFIG_IP_ROUTE_CLASSID __u32 tclassid; #else @@ -440,7 +440,6 @@ static inline void dst_rcu_free(struct rcu_head *head) static inline void dst_confirm(struct dst_entry *dst) { - dst->pending_confirm = 1; } static inline int dst_neigh_output(struct dst_entry *dst, struct neighbour *n, @@ -448,15 +447,6 @@ static inline int dst_neigh_output(struct dst_entry *dst, struct neighbour *n, { const struct hh_cache *hh; - if (dst->pending_confirm) { - unsigned long now = jiffies; - - dst->pending_confirm = 0; - /* avoid dirtying neighbour */ - if (n->confirmed != now) - n->confirmed = now; - } - hh = &n->hh; if ((n->nud_state & NUD_CONNECTED) && hh->hh_len) return neigh_hh_output(hh, skb); @@ -477,6 +467,13 @@ static inline struct neighbour *dst_neigh_lookup_skb(const struct dst_entry *dst return IS_ERR(n) ? NULL : n; } +static inline void dst_confirm_neigh(const struct dst_entry *dst, + const void *daddr) +{ + if (dst->ops->confirm_neigh) + dst->ops->confirm_neigh(dst, daddr); +} + static inline void dst_link_failure(struct sk_buff *skb) { struct dst_entry *dst = skb_dst(skb); diff --git a/include/net/dst_ops.h b/include/net/dst_ops.h index 8a2b66d8d78d..c84b3287e38b 100644 --- a/include/net/dst_ops.h +++ b/include/net/dst_ops.h @@ -33,6 +33,8 @@ struct dst_ops { struct neighbour * (*neigh_lookup)(const struct dst_entry *dst, struct sk_buff *skb, const void *daddr); + void (*confirm_neigh)(const struct dst_entry *dst, + const void *daddr); struct kmem_cache *kmem_cachep; diff --git a/include/net/ndisc.h b/include/net/ndisc.h index d562a2fe4860..8a0214654b6b 100644 --- a/include/net/ndisc.h +++ b/include/net/ndisc.h @@ -391,6 +391,23 @@ static inline struct neighbour *__ipv6_neigh_lookup(struct net_device *dev, cons return n; } +static inline void __ipv6_confirm_neigh(struct net_device *dev, + const void *pkey) +{ + struct neighbour *n; + + rcu_read_lock_bh(); + n = __ipv6_neigh_lookup_noref(dev, pkey); + if (n) { + unsigned long now = jiffies; + + /* avoid dirtying neighbour */ + if (n->confirmed != now) + n->confirmed = now; + } + rcu_read_unlock_bh(); +} + int ndisc_init(void); int ndisc_late_init(void); diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index 3cfd365bcfbc..480b65a24aff 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -593,10 +593,8 @@ static inline void sctp_v4_map_v6(union sctp_addr *addr) */ static inline struct dst_entry *sctp_transport_dst_check(struct sctp_transport *t) { - if (t->dst && !dst_check(t->dst, t->dst_cookie)) { - dst_release(t->dst); - t->dst = NULL; - } + if (t->dst && !dst_check(t->dst, t->dst_cookie)) + sctp_transport_dst_release(t); return t->dst; } diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 231fa9ac50bd..6a685049f67f 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -804,6 +804,8 @@ struct sctp_transport { __u32 burst_limited; /* Holds old cwnd when max.burst is applied */ + __u32 dst_pending_confirm; /* need to confirm neighbour */ + /* Destination */ struct dst_entry *dst; /* Source address. */ @@ -950,6 +952,8 @@ unsigned long sctp_transport_timeout(struct sctp_transport *); void sctp_transport_reset(struct sctp_transport *); void sctp_transport_update_pmtu(struct sock *, struct sctp_transport *, u32); void sctp_transport_immediate_rtx(struct sctp_transport *); +void sctp_transport_dst_release(struct sctp_transport *t); +void sctp_transport_dst_confirm(struct sctp_transport *t); /* This is the structure we use to queue packets as they come into diff --git a/include/net/sock.h b/include/net/sock.h index 94e65fd70354..6f83e78eaa5a 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -240,6 +240,7 @@ struct sock_common { * @sk_wq: sock wait queue and async head * @sk_rx_dst: receive input route used by early demux * @sk_dst_cache: destination cache + * @sk_dst_pending_confirm: need to confirm neighbour * @sk_policy: flow policy * @sk_receive_queue: incoming packets * @sk_wmem_alloc: transmit queue bytes committed @@ -393,6 +394,8 @@ struct sock { struct sk_buff_head sk_write_queue; __s32 sk_peek_off; int sk_write_pending; + __u32 sk_dst_pending_confirm; + /* Note: 32bit hole on 64bit arches */ long sk_sndtimeo; struct timer_list sk_timer; __u32 sk_priority; @@ -1764,6 +1767,7 @@ static inline void dst_negative_advice(struct sock *sk) if (ndst != dst) { rcu_assign_pointer(sk->sk_dst_cache, ndst); sk_tx_queue_clear(sk); + sk->sk_dst_pending_confirm = 0; } } } @@ -1774,6 +1778,7 @@ __sk_dst_set(struct sock *sk, struct dst_entry *dst) struct dst_entry *old_dst; sk_tx_queue_clear(sk); + sk->sk_dst_pending_confirm = 0; /* * This can be called while sk is owned by the caller only, * with no state that can be checked in a rcu_dereference_check() cond @@ -1789,6 +1794,7 @@ sk_dst_set(struct sock *sk, struct dst_entry *dst) struct dst_entry *old_dst; sk_tx_queue_clear(sk); + sk->sk_dst_pending_confirm = 0; old_dst = xchg((__force struct dst_entry **)&sk->sk_dst_cache, dst); dst_release(old_dst); } @@ -1809,6 +1815,26 @@ struct dst_entry *__sk_dst_check(struct sock *sk, u32 cookie); struct dst_entry *sk_dst_check(struct sock *sk, u32 cookie); +static inline void sk_dst_confirm(struct sock *sk) +{ + if (!sk->sk_dst_pending_confirm) + sk->sk_dst_pending_confirm = 1; +} + +static inline void sock_confirm_neigh(struct sk_buff *skb, struct neighbour *n) +{ + if (skb_get_dst_pending_confirm(skb)) { + struct sock *sk = skb->sk; + unsigned long now = jiffies; + + /* avoid dirtying neighbour */ + if (n->confirmed != now) + n->confirmed = now; + if (sk && sk->sk_dst_pending_confirm) + sk->sk_dst_pending_confirm = 0; + } +} + bool sk_mc_loop(struct sock *sk); static inline bool sk_can_gso(const struct sock *sk) |
