summaryrefslogtreecommitdiff
path: root/net/ipv4
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4')
-rw-r--r--net/ipv4/devinet.c2
-rw-r--r--net/ipv4/fou.c18
-rw-r--r--net/ipv4/icmp.c17
-rw-r--r--net/ipv4/ip_sockglue.c7
-rw-r--r--net/ipv4/tcp_input.c20
-rw-r--r--net/ipv4/tcp_output.c10
6 files changed, 33 insertions, 41 deletions
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 59ebe16d06fc..f0b4a31d7bd6 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -2322,7 +2322,7 @@ static __net_initdata struct pernet_operations devinet_ops = {
.exit = devinet_exit_net,
};
-static struct rtnl_af_ops inet_af_ops = {
+static struct rtnl_af_ops inet_af_ops __read_mostly = {
.family = AF_INET,
.fill_link_af = inet_fill_link_af,
.get_link_af_size = inet_get_link_af_size,
diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c
index 3bc0cf07661c..92ddea1e6457 100644
--- a/net/ipv4/fou.c
+++ b/net/ipv4/fou.c
@@ -70,7 +70,6 @@ static struct guehdr *gue_remcsum(struct sk_buff *skb, struct guehdr *guehdr,
size_t start = ntohs(pd[0]);
size_t offset = ntohs(pd[1]);
size_t plen = hdrlen + max_t(size_t, offset + sizeof(u16), start);
- __wsum delta;
if (skb->remcsum_offload) {
/* Already processed in GRO path */
@@ -82,14 +81,7 @@ static struct guehdr *gue_remcsum(struct sk_buff *skb, struct guehdr *guehdr,
return NULL;
guehdr = (struct guehdr *)&udp_hdr(skb)[1];
- if (unlikely(skb->ip_summed != CHECKSUM_COMPLETE))
- __skb_checksum_complete(skb);
-
- delta = remcsum_adjust((void *)guehdr + hdrlen,
- skb->csum, start, offset);
-
- /* Adjust skb->csum since we changed the packet */
- skb->csum = csum_add(skb->csum, delta);
+ skb_remcsum_process(skb, (void *)guehdr + hdrlen, start, offset);
return guehdr;
}
@@ -228,7 +220,6 @@ static struct guehdr *gue_gro_remcsum(struct sk_buff *skb, unsigned int off,
size_t start = ntohs(pd[0]);
size_t offset = ntohs(pd[1]);
size_t plen = hdrlen + max_t(size_t, offset + sizeof(u16), start);
- __wsum delta;
if (skb->remcsum_offload)
return guehdr;
@@ -243,12 +234,7 @@ static struct guehdr *gue_gro_remcsum(struct sk_buff *skb, unsigned int off,
return NULL;
}
- delta = remcsum_adjust((void *)guehdr + hdrlen,
- NAPI_GRO_CB(skb)->csum, start, offset);
-
- /* Adjust skb->csum since we changed the packet */
- skb->csum = csum_add(skb->csum, delta);
- NAPI_GRO_CB(skb)->csum = csum_add(NAPI_GRO_CB(skb)->csum, delta);
+ skb_gro_remcsum_process(skb, (void *)guehdr + hdrlen, start, offset);
skb->remcsum_offload = 1;
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 36f5584d93c5..5e564014a0b7 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -205,7 +205,7 @@ static const struct icmp_control icmp_pointers[NR_ICMP_TYPES+1];
*/
static struct sock *icmp_sk(struct net *net)
{
- return net->ipv4.icmp_sk[smp_processor_id()];
+ return *this_cpu_ptr(net->ipv4.icmp_sk);
}
static inline struct sock *icmp_xmit_lock(struct net *net)
@@ -1140,8 +1140,8 @@ static void __net_exit icmp_sk_exit(struct net *net)
int i;
for_each_possible_cpu(i)
- inet_ctl_sock_destroy(net->ipv4.icmp_sk[i]);
- kfree(net->ipv4.icmp_sk);
+ inet_ctl_sock_destroy(*per_cpu_ptr(net->ipv4.icmp_sk, i));
+ free_percpu(net->ipv4.icmp_sk);
net->ipv4.icmp_sk = NULL;
}
@@ -1149,9 +1149,8 @@ static int __net_init icmp_sk_init(struct net *net)
{
int i, err;
- net->ipv4.icmp_sk =
- kzalloc(nr_cpu_ids * sizeof(struct sock *), GFP_KERNEL);
- if (net->ipv4.icmp_sk == NULL)
+ net->ipv4.icmp_sk = alloc_percpu(struct sock *);
+ if (!net->ipv4.icmp_sk)
return -ENOMEM;
for_each_possible_cpu(i) {
@@ -1162,7 +1161,7 @@ static int __net_init icmp_sk_init(struct net *net)
if (err < 0)
goto fail;
- net->ipv4.icmp_sk[i] = sk;
+ *per_cpu_ptr(net->ipv4.icmp_sk, i) = sk;
/* Enough space for 2 64K ICMP packets, including
* sk_buff/skb_shared_info struct overhead.
@@ -1203,8 +1202,8 @@ static int __net_init icmp_sk_init(struct net *net)
fail:
for_each_possible_cpu(i)
- inet_ctl_sock_destroy(net->ipv4.icmp_sk[i]);
- kfree(net->ipv4.icmp_sk);
+ inet_ctl_sock_destroy(*per_cpu_ptr(net->ipv4.icmp_sk, i));
+ free_percpu(net->ipv4.icmp_sk);
return err;
}
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index db5e0f81ce0a..31d8c71986b4 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -483,7 +483,7 @@ int ip_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len)
serr = SKB_EXT_ERR(skb);
- if (sin) {
+ if (sin && skb->len) {
sin->sin_family = AF_INET;
sin->sin_addr.s_addr = *(__be32 *)(skb_network_header(skb) +
serr->addr_offset);
@@ -496,8 +496,9 @@ int ip_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len)
sin = &errhdr.offender;
memset(sin, 0, sizeof(*sin));
- if (serr->ee.ee_origin == SO_EE_ORIGIN_ICMP ||
- ipv4_pktinfo_prepare_errqueue(sk, skb, serr->ee.ee_origin)) {
+ if (skb->len &&
+ (serr->ee.ee_origin == SO_EE_ORIGIN_ICMP ||
+ ipv4_pktinfo_prepare_errqueue(sk, skb, serr->ee.ee_origin))) {
sin->sin_family = AF_INET;
sin->sin_addr.s_addr = ip_hdr(skb)->saddr;
if (inet_sk(sk)->cmsg_flags)
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 71fb37c70581..d3dfff78fa19 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -3183,8 +3183,10 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
tp->fackets_out -= min(pkts_acked, tp->fackets_out);
- if (ca_ops->pkts_acked)
- ca_ops->pkts_acked(sk, pkts_acked, ca_seq_rtt_us);
+ if (ca_ops->pkts_acked) {
+ long rtt_us = min_t(ulong, ca_seq_rtt_us, sack_rtt_us);
+ ca_ops->pkts_acked(sk, pkts_acked, rtt_us);
+ }
} else if (skb && rtt_update && sack_rtt_us >= 0 &&
sack_rtt_us > skb_mstamp_us_delta(&now, &skb->skb_mstamp)) {
@@ -5870,10 +5872,9 @@ static inline void pr_drop_req(struct request_sock *req, __u16 port, int family)
* TCP ECN negotiation.
*
* Exception: tcp_ca wants ECN. This is required for DCTCP
- * congestion control; it requires setting ECT on all packets,
- * including SYN. We inverse the test in this case: If our
- * local socket wants ECN, but peer only set ece/cwr (but not
- * ECT in IP header) its probably a non-DCTCP aware sender.
+ * congestion control: Linux DCTCP asserts ECT on all packets,
+ * including SYN, which is most optimal solution; however,
+ * others, such as FreeBSD do not.
*/
static void tcp_ecn_create_request(struct request_sock *req,
const struct sk_buff *skb,
@@ -5883,18 +5884,15 @@ static void tcp_ecn_create_request(struct request_sock *req,
const struct tcphdr *th = tcp_hdr(skb);
const struct net *net = sock_net(listen_sk);
bool th_ecn = th->ece && th->cwr;
- bool ect, need_ecn, ecn_ok;
+ bool ect, ecn_ok;
if (!th_ecn)
return;
ect = !INET_ECN_is_not_ect(TCP_SKB_CB(skb)->ip_dsfield);
- need_ecn = tcp_ca_needs_ecn(listen_sk);
ecn_ok = net->ipv4.sysctl_tcp_ecn || dst_feature(dst, RTAX_FEATURE_ECN);
- if (!ect && !need_ecn && ecn_ok)
- inet_rsk(req)->ecn_ok = 1;
- else if (ect && need_ecn)
+ if ((!ect && ecn_ok) || tcp_ca_needs_ecn(listen_sk))
inet_rsk(req)->ecn_ok = 1;
}
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 722c8bceaf9a..4fcc9a768849 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -948,7 +948,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
skb_orphan(skb);
skb->sk = sk;
- skb->destructor = tcp_wfree;
+ skb->destructor = skb_is_tcp_pure_ack(skb) ? sock_wfree : tcp_wfree;
skb_set_hash_from_sk(skb, sk);
atomic_add(skb->truesize, &sk->sk_wmem_alloc);
@@ -3270,6 +3270,14 @@ void tcp_send_ack(struct sock *sk)
skb_reserve(buff, MAX_TCP_HEADER);
tcp_init_nondata_skb(buff, tcp_acceptable_seq(sk), TCPHDR_ACK);
+ /* We do not want pure acks influencing TCP Small Queues or fq/pacing
+ * too much.
+ * SKB_TRUESIZE(max(1 .. 66, MAX_TCP_HEADER)) is unfortunately ~784
+ * We also avoid tcp_wfree() overhead (cache line miss accessing
+ * tp->tsq_flags) by using regular sock_wfree()
+ */
+ skb_set_tcp_pure_ack(buff);
+
/* Send it off, this clears delayed acks for us. */
skb_mstamp_get(&buff->skb_mstamp);
tcp_transmit_skb(sk, buff, 0, sk_gfp_atomic(sk, GFP_ATOMIC));