diff options
author | Eric Dumazet <edumazet@google.com> | 2023-09-22 06:42:15 +0300 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2023-10-01 21:39:18 +0300 |
commit | ceaa714138a372ac63cc2c5c19ee0882d22827f9 (patch) | |
tree | 73b347539e9039e35d9ef9566cf38ec5e6cfb214 /net | |
parent | c9746e6a19c24b2d9a74d6657daee3b39fdc1bec (diff) | |
download | linux-ceaa714138a372ac63cc2c5c19ee0882d22827f9.tar.xz |
inet: implement lockless IP_MTU_DISCOVER
inet->pmtudisc can be read locklessly.
Implement proper lockless reads and writes to inet->pmtudisc
ip_sock_set_mtu_discover() can now be called from arbitrary
contexts.
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: David Ahern <dsahern@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net')
-rw-r--r-- | net/ipv4/ip_output.c | 7 | ||||
-rw-r--r-- | net/ipv4/ip_sockglue.c | 17 | ||||
-rw-r--r-- | net/ipv4/ping.c | 2 | ||||
-rw-r--r-- | net/ipv4/raw.c | 2 | ||||
-rw-r--r-- | net/ipv4/udp.c | 2 | ||||
-rw-r--r-- | net/netfilter/ipvs/ip_vs_sync.c | 2 |
6 files changed, 14 insertions, 18 deletions
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index f07ce051760d..9fc7be2c2033 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -1387,8 +1387,8 @@ struct sk_buff *__ip_make_skb(struct sock *sk, struct ip_options *opt = NULL; struct rtable *rt = (struct rtable *)cork->dst; struct iphdr *iph; + u8 pmtudisc, ttl; __be16 df = 0; - __u8 ttl; skb = __skb_dequeue(queue); if (!skb) @@ -1418,8 +1418,9 @@ struct sk_buff *__ip_make_skb(struct sock *sk, /* DF bit is set when we want to see DF on outgoing frames. * If ignore_df is set too, we still allow to fragment this frame * locally. */ - if (inet->pmtudisc == IP_PMTUDISC_DO || - inet->pmtudisc == IP_PMTUDISC_PROBE || + pmtudisc = READ_ONCE(inet->pmtudisc); + if (pmtudisc == IP_PMTUDISC_DO || + pmtudisc == IP_PMTUDISC_PROBE || (skb->len <= dst_mtu(&rt->dst) && ip_dont_fragment(sk, &rt->dst))) df = htons(IP_DF); diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index 4ad3003378ae..6d874cc03c8b 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -622,9 +622,7 @@ int ip_sock_set_mtu_discover(struct sock *sk, int val) { if (val < IP_PMTUDISC_DONT || val > IP_PMTUDISC_OMIT) return -EINVAL; - lock_sock(sk); - inet_sk(sk)->pmtudisc = val; - release_sock(sk); + WRITE_ONCE(inet_sk(sk)->pmtudisc, val); return 0; } EXPORT_SYMBOL(ip_sock_set_mtu_discover); @@ -1050,6 +1048,8 @@ int do_ip_setsockopt(struct sock *sk, int level, int optname, return -EINVAL; WRITE_ONCE(inet->mc_ttl, val); return 0; + case IP_MTU_DISCOVER: + return ip_sock_set_mtu_discover(sk, val); } err = 0; @@ -1107,11 +1107,6 @@ int do_ip_setsockopt(struct sock *sk, int level, int optname, case IP_TOS: /* This sets both TOS and Precedence */ __ip_sock_set_tos(sk, val); break; - case IP_MTU_DISCOVER: - if (val < IP_PMTUDISC_DONT || val > IP_PMTUDISC_OMIT) - goto e_inval; - inet->pmtudisc = val; - break; case IP_UNICAST_IF: { struct net_device *dev = NULL; @@ -1595,6 +1590,9 @@ int do_ip_getsockopt(struct sock *sk, int level, int optname, case IP_MULTICAST_TTL: val = READ_ONCE(inet->mc_ttl); goto copyval; + case IP_MTU_DISCOVER: + val = READ_ONCE(inet->pmtudisc); + goto copyval; } if (needs_rtnl) @@ -1634,9 +1632,6 @@ int do_ip_getsockopt(struct sock *sk, int level, int optname, case IP_TOS: val = inet->tos; break; - case IP_MTU_DISCOVER: - val = inet->pmtudisc; - break; case IP_MTU: { struct dst_entry *dst; diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index 4dd809b7b188..50d12b0c8d46 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -551,7 +551,7 @@ void ping_err(struct sk_buff *skb, int offset, u32 info) case ICMP_DEST_UNREACH: if (code == ICMP_FRAG_NEEDED) { /* Path MTU discovery */ ipv4_sk_update_pmtu(skb, sk, info); - if (inet_sock->pmtudisc != IP_PMTUDISC_DONT) { + if (READ_ONCE(inet_sock->pmtudisc) != IP_PMTUDISC_DONT) { err = EMSGSIZE; harderr = 1; break; diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 4b5db5d1edc2..ade1aecd7c71 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -239,7 +239,7 @@ static void raw_err(struct sock *sk, struct sk_buff *skb, u32 info) if (code > NR_ICMP_UNREACH) break; if (code == ICMP_FRAG_NEEDED) { - harderr = inet->pmtudisc != IP_PMTUDISC_DONT; + harderr = READ_ONCE(inet->pmtudisc) != IP_PMTUDISC_DONT; err = EMSGSIZE; } else { err = icmp_err_convert[code].errno; diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index c3ff984b6354..731a723dc808 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -750,7 +750,7 @@ int __udp4_lib_err(struct sk_buff *skb, u32 info, struct udp_table *udptable) case ICMP_DEST_UNREACH: if (code == ICMP_FRAG_NEEDED) { /* Path MTU discovery */ ipv4_sk_update_pmtu(skb, sk, info); - if (inet->pmtudisc != IP_PMTUDISC_DONT) { + if (READ_ONCE(inet->pmtudisc) != IP_PMTUDISC_DONT) { err = EMSGSIZE; harderr = 1; break; diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c index 3eed16702248..4f6c795588fb 100644 --- a/net/netfilter/ipvs/ip_vs_sync.c +++ b/net/netfilter/ipvs/ip_vs_sync.c @@ -1335,7 +1335,7 @@ static void set_mcast_pmtudisc(struct sock *sk, int val) /* setsockopt(sock, SOL_IP, IP_MTU_DISCOVER, &val, sizeof(val)); */ lock_sock(sk); - inet->pmtudisc = val; + WRITE_ONCE(inet->pmtudisc, val); #ifdef CONFIG_IP_VS_IPV6 if (sk->sk_family == AF_INET6) { struct ipv6_pinfo *np = inet6_sk(sk); |