diff options
Diffstat (limited to 'net/ipv4')
-rw-r--r-- | net/ipv4/arp.c | 6 | ||||
-rw-r--r-- | net/ipv4/fib_frontend.c | 2 | ||||
-rw-r--r-- | net/ipv4/inet_diag.c | 4 | ||||
-rw-r--r-- | net/ipv4/ip_tunnel.c | 3 | ||||
-rw-r--r-- | net/ipv4/ip_tunnel_core.c | 4 | ||||
-rw-r--r-- | net/ipv4/netfilter.c | 8 | ||||
-rw-r--r-- | net/ipv4/netfilter/iptable_mangle.c | 2 | ||||
-rw-r--r-- | net/ipv4/netfilter/nf_reject_ipv4.c | 2 | ||||
-rw-r--r-- | net/ipv4/syncookies.c | 9 | ||||
-rw-r--r-- | net/ipv4/tcp.c | 2 | ||||
-rw-r--r-- | net/ipv4/tcp_bbr.c | 2 | ||||
-rw-r--r-- | net/ipv4/tcp_bpf.c | 18 | ||||
-rw-r--r-- | net/ipv4/tcp_input.c | 3 | ||||
-rw-r--r-- | net/ipv4/udp_offload.c | 19 | ||||
-rw-r--r-- | net/ipv4/xfrm4_tunnel.c | 4 |
15 files changed, 60 insertions, 28 deletions
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index 687971d83b4e..922dd73e5740 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -125,6 +125,7 @@ static int arp_constructor(struct neighbour *neigh); static void arp_solicit(struct neighbour *neigh, struct sk_buff *skb); static void arp_error_report(struct neighbour *neigh, struct sk_buff *skb); static void parp_redo(struct sk_buff *skb); +static int arp_is_multicast(const void *pkey); static const struct neigh_ops arp_generic_ops = { .family = AF_INET, @@ -156,6 +157,7 @@ struct neigh_table arp_tbl = { .key_eq = arp_key_eq, .constructor = arp_constructor, .proxy_redo = parp_redo, + .is_multicast = arp_is_multicast, .id = "arp_cache", .parms = { .tbl = &arp_tbl, @@ -928,6 +930,10 @@ static void parp_redo(struct sk_buff *skb) arp_process(dev_net(skb->dev), NULL, skb); } +static int arp_is_multicast(const void *pkey) +{ + return ipv4_is_multicast(*((__be32 *)pkey)); +} /* * Receive an arp request from the device layer. diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 86a23e4a6a50..b87140a1fa28 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -696,7 +696,7 @@ int fib_gw_from_via(struct fib_config *cfg, struct nlattr *nla, cfg->fc_gw4 = *((__be32 *)via->rtvia_addr); break; case AF_INET6: -#ifdef CONFIG_IPV6 +#if IS_ENABLED(CONFIG_IPV6) if (alen != sizeof(struct in6_addr)) { NL_SET_ERR_MSG(extack, "Invalid IPv6 address in RTA_VIA"); return -EINVAL; diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index 366a4507b5a3..93474b1bea4e 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -479,8 +479,10 @@ static int inet_req_diag_fill(struct sock *sk, struct sk_buff *skb, r->idiag_inode = 0; if (net_admin && nla_put_u32(skb, INET_DIAG_MARK, - inet_rsk(reqsk)->ir_mark)) + inet_rsk(reqsk)->ir_mark)) { + nlmsg_cancel(skb, nlh); return -EMSGSIZE; + } nlmsg_end(skb, nlh); return 0; diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index 8b04d1dcfec4..ee65c9225178 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -608,9 +608,6 @@ void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, ttl = ip4_dst_hoplimit(&rt->dst); } - if (!df && skb->protocol == htons(ETH_P_IP)) - df = inner_iph->frag_off & htons(IP_DF); - headroom += LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len; if (headroom > dev->needed_headroom) dev->needed_headroom = headroom; diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c index 25f1caf5abf9..e25be2d01a7a 100644 --- a/net/ipv4/ip_tunnel_core.c +++ b/net/ipv4/ip_tunnel_core.c @@ -263,7 +263,7 @@ static int iptunnel_pmtud_check_icmp(struct sk_buff *skb, int mtu) const struct icmphdr *icmph = icmp_hdr(skb); const struct iphdr *iph = ip_hdr(skb); - if (mtu <= 576 || iph->frag_off != htons(IP_DF)) + if (mtu < 576 || iph->frag_off != htons(IP_DF)) return 0; if (ipv4_is_lbcast(iph->daddr) || ipv4_is_multicast(iph->daddr) || @@ -359,7 +359,7 @@ static int iptunnel_pmtud_check_icmpv6(struct sk_buff *skb, int mtu) __be16 frag_off; int offset; - if (mtu <= IPV6_MIN_MTU) + if (mtu < IPV6_MIN_MTU) return 0; if (stype == IPV6_ADDR_ANY || stype == IPV6_ADDR_MULTICAST || diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c index a058213b77a7..7c841037c533 100644 --- a/net/ipv4/netfilter.c +++ b/net/ipv4/netfilter.c @@ -17,17 +17,19 @@ #include <net/netfilter/nf_queue.h> /* route_me_harder function, used by iptable_nat, iptable_mangle + ip_queue */ -int ip_route_me_harder(struct net *net, struct sk_buff *skb, unsigned int addr_type) +int ip_route_me_harder(struct net *net, struct sock *sk, struct sk_buff *skb, unsigned int addr_type) { const struct iphdr *iph = ip_hdr(skb); struct rtable *rt; struct flowi4 fl4 = {}; __be32 saddr = iph->saddr; - const struct sock *sk = skb_to_full_sk(skb); - __u8 flags = sk ? inet_sk_flowi_flags(sk) : 0; + __u8 flags; struct net_device *dev = skb_dst(skb)->dev; unsigned int hh_len; + sk = sk_to_full_sk(sk); + flags = sk ? inet_sk_flowi_flags(sk) : 0; + if (addr_type == RTN_UNSPEC) addr_type = inet_addr_type_dev_table(net, dev, saddr); if (addr_type == RTN_LOCAL || addr_type == RTN_UNICAST) diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c index f703a717ab1d..833079589273 100644 --- a/net/ipv4/netfilter/iptable_mangle.c +++ b/net/ipv4/netfilter/iptable_mangle.c @@ -62,7 +62,7 @@ ipt_mangle_out(struct sk_buff *skb, const struct nf_hook_state *state) iph->daddr != daddr || skb->mark != mark || iph->tos != tos) { - err = ip_route_me_harder(state->net, skb, RTN_UNSPEC); + err = ip_route_me_harder(state->net, state->sk, skb, RTN_UNSPEC); if (err < 0) ret = NF_DROP_ERR(err); } diff --git a/net/ipv4/netfilter/nf_reject_ipv4.c b/net/ipv4/netfilter/nf_reject_ipv4.c index 9dcfa4e461b6..93b07739807b 100644 --- a/net/ipv4/netfilter/nf_reject_ipv4.c +++ b/net/ipv4/netfilter/nf_reject_ipv4.c @@ -145,7 +145,7 @@ void nf_send_reset(struct net *net, struct sk_buff *oldskb, int hook) ip4_dst_hoplimit(skb_dst(nskb))); nf_reject_ip_tcphdr_put(nskb, oldskb, oth); - if (ip_route_me_harder(net, nskb, RTN_UNSPEC)) + if (ip_route_me_harder(net, nskb->sk, nskb, RTN_UNSPEC)) goto free_nskb; niph = ip_hdr(nskb); diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index 6ac473b47f30..00dc3f943c80 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -331,7 +331,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) __u32 cookie = ntohl(th->ack_seq) - 1; struct sock *ret = sk; struct request_sock *req; - int mss; + int full_space, mss; struct rtable *rt; __u8 rcv_wscale; struct flowi4 fl4; @@ -427,8 +427,13 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) /* Try to redo what tcp_v4_send_synack did. */ req->rsk_window_clamp = tp->window_clamp ? :dst_metric(&rt->dst, RTAX_WINDOW); + /* limit the window selection if the user enforce a smaller rx buffer */ + full_space = tcp_full_space(sk); + if (sk->sk_userlocks & SOCK_RCVBUF_LOCK && + (req->rsk_window_clamp > full_space || req->rsk_window_clamp == 0)) + req->rsk_window_clamp = full_space; - tcp_select_initial_window(sk, tcp_full_space(sk), req->mss, + tcp_select_initial_window(sk, full_space, req->mss, &req->rsk_rcv_wnd, &req->rsk_window_clamp, ireq->wscale_ok, &rcv_wscale, dst_metric(&rt->dst, RTAX_INITRWND)); diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index bae4284bf542..b2bc3d7fe9e8 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -485,6 +485,8 @@ static inline bool tcp_stream_is_readable(const struct tcp_sock *tp, return true; if (tcp_rmem_pressure(sk)) return true; + if (tcp_receive_window(tp) <= inet_csk(sk)->icsk_ack.rcv_mss) + return true; } if (sk->sk_prot->stream_memory_read) return sk->sk_prot->stream_memory_read(sk); diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c index 6c4d79baff26..6ea3dc2e4219 100644 --- a/net/ipv4/tcp_bbr.c +++ b/net/ipv4/tcp_bbr.c @@ -945,7 +945,7 @@ static void bbr_update_min_rtt(struct sock *sk, const struct rate_sample *rs) filter_expired = after(tcp_jiffies32, bbr->min_rtt_stamp + bbr_min_rtt_win_sec * HZ); if (rs->rtt_us >= 0 && - (rs->rtt_us <= bbr->min_rtt_us || + (rs->rtt_us < bbr->min_rtt_us || (filter_expired && !rs->is_ack_delayed))) { bbr->min_rtt_us = rs->rtt_us; bbr->min_rtt_stamp = tcp_jiffies32; diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c index 37f4cb2bba5c..bc7d2a586e18 100644 --- a/net/ipv4/tcp_bpf.c +++ b/net/ipv4/tcp_bpf.c @@ -15,8 +15,8 @@ int __tcp_bpf_recvmsg(struct sock *sk, struct sk_psock *psock, { struct iov_iter *iter = &msg->msg_iter; int peek = flags & MSG_PEEK; - int i, ret, copied = 0; struct sk_msg *msg_rx; + int i, copied = 0; msg_rx = list_first_entry_or_null(&psock->ingress_msg, struct sk_msg, list); @@ -37,17 +37,16 @@ int __tcp_bpf_recvmsg(struct sock *sk, struct sk_psock *psock, page = sg_page(sge); if (copied + copy > len) copy = len - copied; - ret = copy_page_to_iter(page, sge->offset, copy, iter); - if (ret != copy) { - msg_rx->sg.start = i; - return -EFAULT; - } + copy = copy_page_to_iter(page, sge->offset, copy, iter); + if (!copy) + return copied ? copied : -EFAULT; copied += copy; if (likely(!peek)) { sge->offset += copy; sge->length -= copy; - sk_mem_uncharge(sk, copy); + if (!msg_rx->skb) + sk_mem_uncharge(sk, copy); msg_rx->sg.size -= copy; if (!sge->length) { @@ -56,6 +55,11 @@ int __tcp_bpf_recvmsg(struct sock *sk, struct sk_psock *psock, put_page(page); } } else { + /* Lets not optimize peek case if copy_page_to_iter + * didn't copy the entire length lets just break. + */ + if (copy != sge->length) + return copied; sk_msg_iter_var_next(i); } diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index fc445833b5e5..389d1b340248 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -4908,7 +4908,8 @@ void tcp_data_ready(struct sock *sk) int avail = tp->rcv_nxt - tp->copied_seq; if (avail < sk->sk_rcvlowat && !tcp_rmem_pressure(sk) && - !sock_flag(sk, SOCK_DONE)) + !sock_flag(sk, SOCK_DONE) && + tcp_receive_window(tp) > inet_csk(sk)->icsk_ack.rcv_mss) return; sk->sk_data_ready(sk); diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index e67a66fbf27b..c62805cd3131 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -366,7 +366,7 @@ out: static struct sk_buff *udp_gro_receive_segment(struct list_head *head, struct sk_buff *skb) { - struct udphdr *uh = udp_hdr(skb); + struct udphdr *uh = udp_gro_udphdr(skb); struct sk_buff *pp = NULL; struct udphdr *uh2; struct sk_buff *p; @@ -500,12 +500,22 @@ out: } EXPORT_SYMBOL(udp_gro_receive); +static struct sock *udp4_gro_lookup_skb(struct sk_buff *skb, __be16 sport, + __be16 dport) +{ + const struct iphdr *iph = skb_gro_network_header(skb); + + return __udp4_lib_lookup(dev_net(skb->dev), iph->saddr, sport, + iph->daddr, dport, inet_iif(skb), + inet_sdif(skb), &udp_table, NULL); +} + INDIRECT_CALLABLE_SCOPE struct sk_buff *udp4_gro_receive(struct list_head *head, struct sk_buff *skb) { struct udphdr *uh = udp_gro_udphdr(skb); + struct sock *sk = NULL; struct sk_buff *pp; - struct sock *sk; if (unlikely(!uh)) goto flush; @@ -523,7 +533,10 @@ struct sk_buff *udp4_gro_receive(struct list_head *head, struct sk_buff *skb) skip: NAPI_GRO_CB(skb)->is_ipv6 = 0; rcu_read_lock(); - sk = static_branch_unlikely(&udp_encap_needed_key) ? udp4_lib_lookup_skb(skb, uh->source, uh->dest) : NULL; + + if (static_branch_unlikely(&udp_encap_needed_key)) + sk = udp4_gro_lookup_skb(skb, uh->source, uh->dest); + pp = udp_gro_receive(head, skb, uh, sk); rcu_read_unlock(); return pp; diff --git a/net/ipv4/xfrm4_tunnel.c b/net/ipv4/xfrm4_tunnel.c index dc19aff7c2e0..fb0648e7fb32 100644 --- a/net/ipv4/xfrm4_tunnel.c +++ b/net/ipv4/xfrm4_tunnel.c @@ -64,14 +64,14 @@ static int xfrm_tunnel_err(struct sk_buff *skb, u32 info) static struct xfrm_tunnel xfrm_tunnel_handler __read_mostly = { .handler = xfrm_tunnel_rcv, .err_handler = xfrm_tunnel_err, - .priority = 3, + .priority = 4, }; #if IS_ENABLED(CONFIG_IPV6) static struct xfrm_tunnel xfrm64_tunnel_handler __read_mostly = { .handler = xfrm_tunnel_rcv, .err_handler = xfrm_tunnel_err, - .priority = 2, + .priority = 3, }; #endif |