diff options
Diffstat (limited to 'net/ipv4')
-rw-r--r-- | net/ipv4/devinet.c | 2 | ||||
-rw-r--r-- | net/ipv4/fou.c | 18 | ||||
-rw-r--r-- | net/ipv4/icmp.c | 17 | ||||
-rw-r--r-- | net/ipv4/ip_sockglue.c | 7 | ||||
-rw-r--r-- | net/ipv4/tcp_input.c | 20 | ||||
-rw-r--r-- | net/ipv4/tcp_output.c | 10 |
6 files changed, 33 insertions, 41 deletions
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 59ebe16d06fc..f0b4a31d7bd6 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -2322,7 +2322,7 @@ static __net_initdata struct pernet_operations devinet_ops = { .exit = devinet_exit_net, }; -static struct rtnl_af_ops inet_af_ops = { +static struct rtnl_af_ops inet_af_ops __read_mostly = { .family = AF_INET, .fill_link_af = inet_fill_link_af, .get_link_af_size = inet_get_link_af_size, diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c index 3bc0cf07661c..92ddea1e6457 100644 --- a/net/ipv4/fou.c +++ b/net/ipv4/fou.c @@ -70,7 +70,6 @@ static struct guehdr *gue_remcsum(struct sk_buff *skb, struct guehdr *guehdr, size_t start = ntohs(pd[0]); size_t offset = ntohs(pd[1]); size_t plen = hdrlen + max_t(size_t, offset + sizeof(u16), start); - __wsum delta; if (skb->remcsum_offload) { /* Already processed in GRO path */ @@ -82,14 +81,7 @@ static struct guehdr *gue_remcsum(struct sk_buff *skb, struct guehdr *guehdr, return NULL; guehdr = (struct guehdr *)&udp_hdr(skb)[1]; - if (unlikely(skb->ip_summed != CHECKSUM_COMPLETE)) - __skb_checksum_complete(skb); - - delta = remcsum_adjust((void *)guehdr + hdrlen, - skb->csum, start, offset); - - /* Adjust skb->csum since we changed the packet */ - skb->csum = csum_add(skb->csum, delta); + skb_remcsum_process(skb, (void *)guehdr + hdrlen, start, offset); return guehdr; } @@ -228,7 +220,6 @@ static struct guehdr *gue_gro_remcsum(struct sk_buff *skb, unsigned int off, size_t start = ntohs(pd[0]); size_t offset = ntohs(pd[1]); size_t plen = hdrlen + max_t(size_t, offset + sizeof(u16), start); - __wsum delta; if (skb->remcsum_offload) return guehdr; @@ -243,12 +234,7 @@ static struct guehdr *gue_gro_remcsum(struct sk_buff *skb, unsigned int off, return NULL; } - delta = remcsum_adjust((void *)guehdr + hdrlen, - NAPI_GRO_CB(skb)->csum, start, offset); - - /* Adjust skb->csum since we changed the packet */ - skb->csum = csum_add(skb->csum, delta); - NAPI_GRO_CB(skb)->csum = csum_add(NAPI_GRO_CB(skb)->csum, delta); + skb_gro_remcsum_process(skb, (void *)guehdr + hdrlen, start, offset); skb->remcsum_offload = 1; diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 36f5584d93c5..5e564014a0b7 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -205,7 +205,7 @@ static const struct icmp_control icmp_pointers[NR_ICMP_TYPES+1]; */ static struct sock *icmp_sk(struct net *net) { - return net->ipv4.icmp_sk[smp_processor_id()]; + return *this_cpu_ptr(net->ipv4.icmp_sk); } static inline struct sock *icmp_xmit_lock(struct net *net) @@ -1140,8 +1140,8 @@ static void __net_exit icmp_sk_exit(struct net *net) int i; for_each_possible_cpu(i) - inet_ctl_sock_destroy(net->ipv4.icmp_sk[i]); - kfree(net->ipv4.icmp_sk); + inet_ctl_sock_destroy(*per_cpu_ptr(net->ipv4.icmp_sk, i)); + free_percpu(net->ipv4.icmp_sk); net->ipv4.icmp_sk = NULL; } @@ -1149,9 +1149,8 @@ static int __net_init icmp_sk_init(struct net *net) { int i, err; - net->ipv4.icmp_sk = - kzalloc(nr_cpu_ids * sizeof(struct sock *), GFP_KERNEL); - if (net->ipv4.icmp_sk == NULL) + net->ipv4.icmp_sk = alloc_percpu(struct sock *); + if (!net->ipv4.icmp_sk) return -ENOMEM; for_each_possible_cpu(i) { @@ -1162,7 +1161,7 @@ static int __net_init icmp_sk_init(struct net *net) if (err < 0) goto fail; - net->ipv4.icmp_sk[i] = sk; + *per_cpu_ptr(net->ipv4.icmp_sk, i) = sk; /* Enough space for 2 64K ICMP packets, including * sk_buff/skb_shared_info struct overhead. @@ -1203,8 +1202,8 @@ static int __net_init icmp_sk_init(struct net *net) fail: for_each_possible_cpu(i) - inet_ctl_sock_destroy(net->ipv4.icmp_sk[i]); - kfree(net->ipv4.icmp_sk); + inet_ctl_sock_destroy(*per_cpu_ptr(net->ipv4.icmp_sk, i)); + free_percpu(net->ipv4.icmp_sk); return err; } diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index db5e0f81ce0a..31d8c71986b4 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -483,7 +483,7 @@ int ip_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len) serr = SKB_EXT_ERR(skb); - if (sin) { + if (sin && skb->len) { sin->sin_family = AF_INET; sin->sin_addr.s_addr = *(__be32 *)(skb_network_header(skb) + serr->addr_offset); @@ -496,8 +496,9 @@ int ip_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len) sin = &errhdr.offender; memset(sin, 0, sizeof(*sin)); - if (serr->ee.ee_origin == SO_EE_ORIGIN_ICMP || - ipv4_pktinfo_prepare_errqueue(sk, skb, serr->ee.ee_origin)) { + if (skb->len && + (serr->ee.ee_origin == SO_EE_ORIGIN_ICMP || + ipv4_pktinfo_prepare_errqueue(sk, skb, serr->ee.ee_origin))) { sin->sin_family = AF_INET; sin->sin_addr.s_addr = ip_hdr(skb)->saddr; if (inet_sk(sk)->cmsg_flags) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 71fb37c70581..d3dfff78fa19 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -3183,8 +3183,10 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, tp->fackets_out -= min(pkts_acked, tp->fackets_out); - if (ca_ops->pkts_acked) - ca_ops->pkts_acked(sk, pkts_acked, ca_seq_rtt_us); + if (ca_ops->pkts_acked) { + long rtt_us = min_t(ulong, ca_seq_rtt_us, sack_rtt_us); + ca_ops->pkts_acked(sk, pkts_acked, rtt_us); + } } else if (skb && rtt_update && sack_rtt_us >= 0 && sack_rtt_us > skb_mstamp_us_delta(&now, &skb->skb_mstamp)) { @@ -5870,10 +5872,9 @@ static inline void pr_drop_req(struct request_sock *req, __u16 port, int family) * TCP ECN negotiation. * * Exception: tcp_ca wants ECN. This is required for DCTCP - * congestion control; it requires setting ECT on all packets, - * including SYN. We inverse the test in this case: If our - * local socket wants ECN, but peer only set ece/cwr (but not - * ECT in IP header) its probably a non-DCTCP aware sender. + * congestion control: Linux DCTCP asserts ECT on all packets, + * including SYN, which is most optimal solution; however, + * others, such as FreeBSD do not. */ static void tcp_ecn_create_request(struct request_sock *req, const struct sk_buff *skb, @@ -5883,18 +5884,15 @@ static void tcp_ecn_create_request(struct request_sock *req, const struct tcphdr *th = tcp_hdr(skb); const struct net *net = sock_net(listen_sk); bool th_ecn = th->ece && th->cwr; - bool ect, need_ecn, ecn_ok; + bool ect, ecn_ok; if (!th_ecn) return; ect = !INET_ECN_is_not_ect(TCP_SKB_CB(skb)->ip_dsfield); - need_ecn = tcp_ca_needs_ecn(listen_sk); ecn_ok = net->ipv4.sysctl_tcp_ecn || dst_feature(dst, RTAX_FEATURE_ECN); - if (!ect && !need_ecn && ecn_ok) - inet_rsk(req)->ecn_ok = 1; - else if (ect && need_ecn) + if ((!ect && ecn_ok) || tcp_ca_needs_ecn(listen_sk)) inet_rsk(req)->ecn_ok = 1; } diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 722c8bceaf9a..4fcc9a768849 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -948,7 +948,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, skb_orphan(skb); skb->sk = sk; - skb->destructor = tcp_wfree; + skb->destructor = skb_is_tcp_pure_ack(skb) ? sock_wfree : tcp_wfree; skb_set_hash_from_sk(skb, sk); atomic_add(skb->truesize, &sk->sk_wmem_alloc); @@ -3270,6 +3270,14 @@ void tcp_send_ack(struct sock *sk) skb_reserve(buff, MAX_TCP_HEADER); tcp_init_nondata_skb(buff, tcp_acceptable_seq(sk), TCPHDR_ACK); + /* We do not want pure acks influencing TCP Small Queues or fq/pacing + * too much. + * SKB_TRUESIZE(max(1 .. 66, MAX_TCP_HEADER)) is unfortunately ~784 + * We also avoid tcp_wfree() overhead (cache line miss accessing + * tp->tsq_flags) by using regular sock_wfree() + */ + skb_set_tcp_pure_ack(buff); + /* Send it off, this clears delayed acks for us. */ skb_mstamp_get(&buff->skb_mstamp); tcp_transmit_skb(sk, buff, 0, sk_gfp_atomic(sk, GFP_ATOMIC)); |