summaryrefslogtreecommitdiff
path: root/net/ipv4
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4')
-rw-r--r--net/ipv4/esp4.c53
-rw-r--r--net/ipv4/fib_frontend.c28
-rw-r--r--net/ipv4/fib_rules.c4
-rw-r--r--net/ipv4/fib_trie.c22
-rw-r--r--net/ipv4/inet_hashtables.c37
-rw-r--r--net/ipv4/ip_gre.c16
-rw-r--r--net/ipv4/ipmr.c12
-rw-r--r--net/ipv4/netfilter/nft_fib_ipv4.c11
-rw-r--r--net/ipv4/proc.c1
-rw-r--r--net/ipv4/syncookies.c1
-rw-r--r--net/ipv4/tcp_input.c57
-rw-r--r--net/ipv4/tcp_minisocks.c26
-rw-r--r--net/ipv4/tcp_offload.c2
-rw-r--r--net/ipv4/tcp_output.c6
-rw-r--r--net/ipv4/udp_offload.c61
-rw-r--r--net/ipv4/xfrm4_input.c18
16 files changed, 198 insertions, 157 deletions
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index 0e4076866c0a..f14a41ee4aa1 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -120,47 +120,16 @@ static void esp_ssg_unref(struct xfrm_state *x, void *tmp, struct sk_buff *skb)
}
#ifdef CONFIG_INET_ESPINTCP
-struct esp_tcp_sk {
- struct sock *sk;
- struct rcu_head rcu;
-};
-
-static void esp_free_tcp_sk(struct rcu_head *head)
-{
- struct esp_tcp_sk *esk = container_of(head, struct esp_tcp_sk, rcu);
-
- sock_put(esk->sk);
- kfree(esk);
-}
-
static struct sock *esp_find_tcp_sk(struct xfrm_state *x)
{
struct xfrm_encap_tmpl *encap = x->encap;
struct net *net = xs_net(x);
- struct esp_tcp_sk *esk;
__be16 sport, dport;
- struct sock *nsk;
struct sock *sk;
- sk = rcu_dereference(x->encap_sk);
- if (sk && sk->sk_state == TCP_ESTABLISHED)
- return sk;
-
spin_lock_bh(&x->lock);
sport = encap->encap_sport;
dport = encap->encap_dport;
- nsk = rcu_dereference_protected(x->encap_sk,
- lockdep_is_held(&x->lock));
- if (sk && sk == nsk) {
- esk = kmalloc(sizeof(*esk), GFP_ATOMIC);
- if (!esk) {
- spin_unlock_bh(&x->lock);
- return ERR_PTR(-ENOMEM);
- }
- RCU_INIT_POINTER(x->encap_sk, NULL);
- esk->sk = sk;
- call_rcu(&esk->rcu, esp_free_tcp_sk);
- }
spin_unlock_bh(&x->lock);
sk = inet_lookup_established(net, net->ipv4.tcp_death_row.hashinfo, x->id.daddr.a4,
@@ -173,20 +142,6 @@ static struct sock *esp_find_tcp_sk(struct xfrm_state *x)
return ERR_PTR(-EINVAL);
}
- spin_lock_bh(&x->lock);
- nsk = rcu_dereference_protected(x->encap_sk,
- lockdep_is_held(&x->lock));
- if (encap->encap_sport != sport ||
- encap->encap_dport != dport) {
- sock_put(sk);
- sk = nsk ?: ERR_PTR(-EREMCHG);
- } else if (sk == nsk) {
- sock_put(sk);
- } else {
- rcu_assign_pointer(x->encap_sk, sk);
- }
- spin_unlock_bh(&x->lock);
-
return sk;
}
@@ -199,8 +154,10 @@ static int esp_output_tcp_finish(struct xfrm_state *x, struct sk_buff *skb)
sk = esp_find_tcp_sk(x);
err = PTR_ERR_OR_ZERO(sk);
- if (err)
+ if (err) {
+ kfree_skb(skb);
goto out;
+ }
bh_lock_sock(sk);
if (sock_owned_by_user(sk))
@@ -209,6 +166,8 @@ static int esp_output_tcp_finish(struct xfrm_state *x, struct sk_buff *skb)
err = espintcp_push_skb(sk, skb);
bh_unlock_sock(sk);
+ sock_put(sk);
+
out:
rcu_read_unlock();
return err;
@@ -392,6 +351,8 @@ static struct ip_esp_hdr *esp_output_tcp_encap(struct xfrm_state *x,
if (IS_ERR(sk))
return ERR_CAST(sk);
+ sock_put(sk);
+
*lenp = htons(len);
esph = (struct ip_esp_hdr *)(lenp + 1);
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 272e42d81323..8470e259d8fd 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -553,18 +553,16 @@ static int rtentry_to_fib_config(struct net *net, int cmd, struct rtentry *rt,
const struct in_ifaddr *ifa;
struct in_device *in_dev;
- in_dev = __in_dev_get_rtnl(dev);
+ in_dev = __in_dev_get_rtnl_net(dev);
if (!in_dev)
return -ENODEV;
*colon = ':';
- rcu_read_lock();
- in_dev_for_each_ifa_rcu(ifa, in_dev) {
+ in_dev_for_each_ifa_rtnl_net(net, ifa, in_dev) {
if (strcmp(ifa->ifa_label, devname) == 0)
break;
}
- rcu_read_unlock();
if (!ifa)
return -ENODEV;
@@ -635,7 +633,7 @@ int ip_rt_ioctl(struct net *net, unsigned int cmd, struct rtentry *rt)
if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
return -EPERM;
- rtnl_lock();
+ rtnl_net_lock(net);
err = rtentry_to_fib_config(net, cmd, rt, &cfg);
if (err == 0) {
struct fib_table *tb;
@@ -659,7 +657,7 @@ int ip_rt_ioctl(struct net *net, unsigned int cmd, struct rtentry *rt)
/* allocated by rtentry_to_fib_config() */
kfree(cfg.fc_mx);
}
- rtnl_unlock();
+ rtnl_net_unlock(net);
return err;
}
return -EINVAL;
@@ -837,19 +835,33 @@ static int rtm_to_fib_config(struct net *net, struct sk_buff *skb,
}
}
+ if (cfg->fc_dst_len > 32) {
+ NL_SET_ERR_MSG(extack, "Invalid prefix length");
+ err = -EINVAL;
+ goto errout;
+ }
+
+ if (cfg->fc_dst_len < 32 && (ntohl(cfg->fc_dst) << cfg->fc_dst_len)) {
+ NL_SET_ERR_MSG(extack, "Invalid prefix for given prefix length");
+ err = -EINVAL;
+ goto errout;
+ }
+
if (cfg->fc_nh_id) {
if (cfg->fc_oif || cfg->fc_gw_family ||
cfg->fc_encap || cfg->fc_mp) {
NL_SET_ERR_MSG(extack,
"Nexthop specification and nexthop id are mutually exclusive");
- return -EINVAL;
+ err = -EINVAL;
+ goto errout;
}
}
if (has_gw && has_via) {
NL_SET_ERR_MSG(extack,
"Nexthop configuration can not contain both GATEWAY and VIA");
- return -EINVAL;
+ err = -EINVAL;
+ goto errout;
}
if (!cfg->fc_table)
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c
index 9517b8667e00..041c46787d94 100644
--- a/net/ipv4/fib_rules.c
+++ b/net/ipv4/fib_rules.c
@@ -245,9 +245,9 @@ static int fib4_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
struct nlattr **tb,
struct netlink_ext_ack *extack)
{
- struct net *net = sock_net(skb->sk);
+ struct fib4_rule *rule4 = (struct fib4_rule *)rule;
+ struct net *net = rule->fr_net;
int err = -EINVAL;
- struct fib4_rule *rule4 = (struct fib4_rule *) rule;
if (tb[FRA_FLOWLABEL] || tb[FRA_FLOWLABEL_MASK]) {
NL_SET_ERR_MSG(extack,
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index d6411ac81096..59a6f0a9638f 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -1187,22 +1187,6 @@ static int fib_insert_alias(struct trie *t, struct key_vector *tp,
return 0;
}
-static bool fib_valid_key_len(u32 key, u8 plen, struct netlink_ext_ack *extack)
-{
- if (plen > KEYLENGTH) {
- NL_SET_ERR_MSG(extack, "Invalid prefix length");
- return false;
- }
-
- if ((plen < KEYLENGTH) && (key << plen)) {
- NL_SET_ERR_MSG(extack,
- "Invalid prefix for given prefix length");
- return false;
- }
-
- return true;
-}
-
static void fib_remove_alias(struct trie *t, struct key_vector *tp,
struct key_vector *l, struct fib_alias *old);
@@ -1223,9 +1207,6 @@ int fib_table_insert(struct net *net, struct fib_table *tb,
key = ntohl(cfg->fc_dst);
- if (!fib_valid_key_len(key, plen, extack))
- return -EINVAL;
-
pr_debug("Insert table=%u %08x/%d\n", tb->tb_id, key, plen);
fi = fib_create_info(cfg, extack);
@@ -1717,9 +1698,6 @@ int fib_table_delete(struct net *net, struct fib_table *tb,
key = ntohl(cfg->fc_dst);
- if (!fib_valid_key_len(key, plen, extack))
- return -EINVAL;
-
l = fib_find_node(t, &tp, key);
if (!l)
return -ESRCH;
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index 9bfcfd016e18..2b4a58824763 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -1230,22 +1230,37 @@ int inet_ehash_locks_alloc(struct inet_hashinfo *hashinfo)
{
unsigned int locksz = sizeof(spinlock_t);
unsigned int i, nblocks = 1;
+ spinlock_t *ptr = NULL;
- if (locksz != 0) {
- /* allocate 2 cache lines or at least one spinlock per cpu */
- nblocks = max(2U * L1_CACHE_BYTES / locksz, 1U);
- nblocks = roundup_pow_of_two(nblocks * num_possible_cpus());
+ if (locksz == 0)
+ goto set_mask;
- /* no more locks than number of hash buckets */
- nblocks = min(nblocks, hashinfo->ehash_mask + 1);
+ /* Allocate 2 cache lines or at least one spinlock per cpu. */
+ nblocks = max(2U * L1_CACHE_BYTES / locksz, 1U) * num_possible_cpus();
- hashinfo->ehash_locks = kvmalloc_array(nblocks, locksz, GFP_KERNEL);
- if (!hashinfo->ehash_locks)
- return -ENOMEM;
+ /* At least one page per NUMA node. */
+ nblocks = max(nblocks, num_online_nodes() * PAGE_SIZE / locksz);
+
+ nblocks = roundup_pow_of_two(nblocks);
+
+ /* No more locks than number of hash buckets. */
+ nblocks = min(nblocks, hashinfo->ehash_mask + 1);
- for (i = 0; i < nblocks; i++)
- spin_lock_init(&hashinfo->ehash_locks[i]);
+ if (num_online_nodes() > 1) {
+ /* Use vmalloc() to allow NUMA policy to spread pages
+ * on all available nodes if desired.
+ */
+ ptr = vmalloc_array(nblocks, locksz);
+ }
+ if (!ptr) {
+ ptr = kvmalloc_array(nblocks, locksz, GFP_KERNEL);
+ if (!ptr)
+ return -ENOMEM;
}
+ for (i = 0; i < nblocks; i++)
+ spin_lock_init(&ptr[i]);
+ hashinfo->ehash_locks = ptr;
+set_mask:
hashinfo->ehash_locks_mask = nblocks - 1;
return 0;
}
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index ed1b6b44faf8..c9f11a046c26 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -141,7 +141,6 @@ static int ipgre_err(struct sk_buff *skb, u32 info,
const struct iphdr *iph;
const int type = icmp_hdr(skb)->type;
const int code = icmp_hdr(skb)->code;
- unsigned int data_len = 0;
struct ip_tunnel *t;
if (tpi->proto == htons(ETH_P_TEB))
@@ -182,7 +181,6 @@ static int ipgre_err(struct sk_buff *skb, u32 info,
case ICMP_TIME_EXCEEDED:
if (code != ICMP_EXC_TTL)
return 0;
- data_len = icmp_hdr(skb)->un.reserved[1] * 4; /* RFC 4884 4.1 */
break;
case ICMP_REDIRECT:
@@ -190,10 +188,16 @@ static int ipgre_err(struct sk_buff *skb, u32 info,
}
#if IS_ENABLED(CONFIG_IPV6)
- if (tpi->proto == htons(ETH_P_IPV6) &&
- !ip6_err_gen_icmpv6_unreach(skb, iph->ihl * 4 + tpi->hdr_len,
- type, data_len))
- return 0;
+ if (tpi->proto == htons(ETH_P_IPV6)) {
+ unsigned int data_len = 0;
+
+ if (type == ICMP_TIME_EXCEEDED)
+ data_len = icmp_hdr(skb)->un.reserved[1] * 4; /* RFC 4884 4.1 */
+
+ if (!ip6_err_gen_icmpv6_unreach(skb, iph->ihl * 4 + tpi->hdr_len,
+ type, data_len))
+ return 0;
+ }
#endif
if (t->parms.iph.daddr == 0 ||
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 21ae7594a852..69df45c4a0aa 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -120,11 +120,6 @@ static void ipmr_expire_process(struct timer_list *t);
lockdep_rtnl_is_held() || \
list_empty(&net->ipv4.mr_tables))
-static bool ipmr_can_free_table(struct net *net)
-{
- return !check_net(net) || !net_initialized(net);
-}
-
static struct mr_table *ipmr_mr_table_iter(struct net *net,
struct mr_table *mrt)
{
@@ -317,11 +312,6 @@ EXPORT_SYMBOL(ipmr_rule_default);
#define ipmr_for_each_table(mrt, net) \
for (mrt = net->ipv4.mrt; mrt; mrt = NULL)
-static bool ipmr_can_free_table(struct net *net)
-{
- return !check_net(net);
-}
-
static struct mr_table *ipmr_mr_table_iter(struct net *net,
struct mr_table *mrt)
{
@@ -437,7 +427,7 @@ static void ipmr_free_table(struct mr_table *mrt)
{
struct net *net = read_pnet(&mrt->net);
- WARN_ON_ONCE(!ipmr_can_free_table(net));
+ WARN_ON_ONCE(!mr_can_free_table(net));
timer_shutdown_sync(&mrt->ipmr_expire_timer);
mroute_clean_tables(mrt, MRT_FLUSH_VIFS | MRT_FLUSH_VIFS_STATIC |
diff --git a/net/ipv4/netfilter/nft_fib_ipv4.c b/net/ipv4/netfilter/nft_fib_ipv4.c
index 625adbc42037..9082ca17e845 100644
--- a/net/ipv4/netfilter/nft_fib_ipv4.c
+++ b/net/ipv4/netfilter/nft_fib_ipv4.c
@@ -71,6 +71,11 @@ void nft_fib4_eval(const struct nft_expr *expr, struct nft_regs *regs,
const struct net_device *oif;
const struct net_device *found;
+ if (nft_fib_can_skip(pkt)) {
+ nft_fib_store_result(dest, priv, nft_in(pkt));
+ return;
+ }
+
/*
* Do not set flowi4_oif, it restricts results (for example, asking
* for oif 3 will get RTN_UNICAST result even if the daddr exits
@@ -85,12 +90,6 @@ void nft_fib4_eval(const struct nft_expr *expr, struct nft_regs *regs,
else
oif = NULL;
- if (nft_hook(pkt) == NF_INET_PRE_ROUTING &&
- nft_fib_is_loopback(pkt->skb, nft_in(pkt))) {
- nft_fib_store_result(dest, priv, nft_in(pkt));
- return;
- }
-
iph = skb_header_pointer(pkt->skb, noff, sizeof(_iph), &_iph);
if (!iph) {
regs->verdict.code = NFT_BREAK;
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index affd21a0f572..10cbeb76c274 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -189,6 +189,7 @@ static const struct snmp_mib snmp4_net_list[] = {
SNMP_MIB_ITEM("TWKilled", LINUX_MIB_TIMEWAITKILLED),
SNMP_MIB_ITEM("PAWSActive", LINUX_MIB_PAWSACTIVEREJECTED),
SNMP_MIB_ITEM("PAWSEstab", LINUX_MIB_PAWSESTABREJECTED),
+ SNMP_MIB_ITEM("TSEcrRejected", LINUX_MIB_TSECRREJECTED),
SNMP_MIB_ITEM("PAWSOldAck", LINUX_MIB_PAWS_OLD_ACK),
SNMP_MIB_ITEM("DelayedACKs", LINUX_MIB_DELAYEDACKS),
SNMP_MIB_ITEM("DelayedACKLocked", LINUX_MIB_DELAYEDACKLOCKED),
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index 1948d15f1f28..25976fa7768c 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -279,6 +279,7 @@ static int cookie_tcp_reqsk_init(struct sock *sk, struct sk_buff *skb,
ireq->smc_ok = 0;
treq->snt_synack = 0;
+ treq->snt_tsval_first = 0;
treq->tfo_listener = false;
treq->txhash = net_tx_rndhash();
treq->rcv_isn = ntohl(th->seq) - 1;
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 0cbf81bf3d45..1b09b4d76c29 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -419,6 +419,20 @@ static bool tcp_ecn_rcv_ecn_echo(const struct tcp_sock *tp, const struct tcphdr
return false;
}
+static void tcp_count_delivered_ce(struct tcp_sock *tp, u32 ecn_count)
+{
+ tp->delivered_ce += ecn_count;
+}
+
+/* Updates the delivered and delivered_ce counts */
+static void tcp_count_delivered(struct tcp_sock *tp, u32 delivered,
+ bool ece_ack)
+{
+ tp->delivered += delivered;
+ if (ece_ack)
+ tcp_count_delivered_ce(tp, delivered);
+}
+
/* Buffer size and advertised window tuning.
*
* 1. Tuning sk->sk_sndbuf, when connection enters established state.
@@ -1154,15 +1168,6 @@ void tcp_mark_skb_lost(struct sock *sk, struct sk_buff *skb)
}
}
-/* Updates the delivered and delivered_ce counts */
-static void tcp_count_delivered(struct tcp_sock *tp, u32 delivered,
- bool ece_ack)
-{
- tp->delivered += delivered;
- if (ece_ack)
- tp->delivered_ce += delivered;
-}
-
/* This procedure tags the retransmission queue when SACKs arrive.
*
* We have three tag bits: SACKED(S), RETRANS(R) and LOST(L).
@@ -3862,12 +3867,23 @@ static void tcp_process_tlp_ack(struct sock *sk, u32 ack, int flag)
}
}
-static inline void tcp_in_ack_event(struct sock *sk, u32 flags)
+static void tcp_in_ack_event(struct sock *sk, int flag)
{
const struct inet_connection_sock *icsk = inet_csk(sk);
- if (icsk->icsk_ca_ops->in_ack_event)
- icsk->icsk_ca_ops->in_ack_event(sk, flags);
+ if (icsk->icsk_ca_ops->in_ack_event) {
+ u32 ack_ev_flags = 0;
+
+ if (flag & FLAG_WIN_UPDATE)
+ ack_ev_flags |= CA_ACK_WIN_UPDATE;
+ if (flag & FLAG_SLOWPATH) {
+ ack_ev_flags |= CA_ACK_SLOWPATH;
+ if (flag & FLAG_ECE)
+ ack_ev_flags |= CA_ACK_ECE;
+ }
+
+ icsk->icsk_ca_ops->in_ack_event(sk, ack_ev_flags);
+ }
}
/* Congestion control has updated the cwnd already. So if we're in
@@ -3984,12 +4000,8 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
tcp_snd_una_update(tp, ack);
flag |= FLAG_WIN_UPDATE;
- tcp_in_ack_event(sk, CA_ACK_WIN_UPDATE);
-
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPHPACKS);
} else {
- u32 ack_ev_flags = CA_ACK_SLOWPATH;
-
if (ack_seq != TCP_SKB_CB(skb)->end_seq)
flag |= FLAG_DATA;
else
@@ -4001,19 +4013,12 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una,
&sack_state);
- if (tcp_ecn_rcv_ecn_echo(tp, tcp_hdr(skb))) {
+ if (tcp_ecn_rcv_ecn_echo(tp, tcp_hdr(skb)))
flag |= FLAG_ECE;
- ack_ev_flags |= CA_ACK_ECE;
- }
if (sack_state.sack_delivered)
tcp_count_delivered(tp, sack_state.sack_delivered,
flag & FLAG_ECE);
-
- if (flag & FLAG_WIN_UPDATE)
- ack_ev_flags |= CA_ACK_WIN_UPDATE;
-
- tcp_in_ack_event(sk, ack_ev_flags);
}
/* This is a deviation from RFC3168 since it states that:
@@ -4040,6 +4045,8 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
tcp_rack_update_reo_wnd(sk, &rs);
+ tcp_in_ack_event(sk, flag);
+
if (tp->tlp_high_seq)
tcp_process_tlp_ack(sk, ack, flag);
@@ -4071,6 +4078,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
return 1;
no_queue:
+ tcp_in_ack_event(sk, flag);
/* If data was DSACKed, see if we can undo a cwnd reduction. */
if (flag & FLAG_DSACKING_ACK) {
tcp_fastretrans_alert(sk, prior_snd_una, num_dupack, &flag,
@@ -7081,6 +7089,7 @@ static void tcp_openreq_init(struct request_sock *req,
tcp_rsk(req)->rcv_isn = TCP_SKB_CB(skb)->seq;
tcp_rsk(req)->rcv_nxt = TCP_SKB_CB(skb)->seq + 1;
tcp_rsk(req)->snt_synack = 0;
+ tcp_rsk(req)->snt_tsval_first = 0;
tcp_rsk(req)->last_oow_ack_time = 0;
req->mss = rx_opt->mss_clamp;
req->ts_recent = rx_opt->saw_tstamp ? rx_opt->rcv_tsval : 0;
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index dfdb7a4608a8..0d4ff5f2352f 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -665,6 +665,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
struct sock *child;
const struct tcphdr *th = tcp_hdr(skb);
__be32 flg = tcp_flag_word(th) & (TCP_FLAG_RST|TCP_FLAG_SYN|TCP_FLAG_ACK);
+ bool tsecr_reject = false;
bool paws_reject = false;
bool own_req;
@@ -674,8 +675,13 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
if (tmp_opt.saw_tstamp) {
tmp_opt.ts_recent = READ_ONCE(req->ts_recent);
- if (tmp_opt.rcv_tsecr)
+ if (tmp_opt.rcv_tsecr) {
+ if (inet_rsk(req)->tstamp_ok && !fastopen)
+ tsecr_reject = !between(tmp_opt.rcv_tsecr,
+ tcp_rsk(req)->snt_tsval_first,
+ READ_ONCE(tcp_rsk(req)->snt_tsval_last));
tmp_opt.rcv_tsecr -= tcp_rsk(req)->ts_off;
+ }
/* We do not store true stamp, but it is not required,
* it can be estimated (approximately)
* from another data.
@@ -790,18 +796,14 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
tcp_rsk(req)->snt_isn + 1))
return sk;
- /* Also, it would be not so bad idea to check rcv_tsecr, which
- * is essentially ACK extension and too early or too late values
- * should cause reset in unsynchronized states.
- */
-
/* RFC793: "first check sequence number". */
- if (paws_reject || !tcp_in_window(TCP_SKB_CB(skb)->seq,
- TCP_SKB_CB(skb)->end_seq,
- tcp_rsk(req)->rcv_nxt,
- tcp_rsk(req)->rcv_nxt +
- tcp_synack_window(req))) {
+ if (paws_reject || tsecr_reject ||
+ !tcp_in_window(TCP_SKB_CB(skb)->seq,
+ TCP_SKB_CB(skb)->end_seq,
+ tcp_rsk(req)->rcv_nxt,
+ tcp_rsk(req)->rcv_nxt +
+ tcp_synack_window(req))) {
/* Out of window: send ACK and drop. */
if (!(flg & TCP_FLAG_RST) &&
!tcp_oow_rate_limited(sock_net(sk), skb,
@@ -810,6 +812,8 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
req->rsk_ops->send_ack(sk, skb, req);
if (paws_reject)
NET_INC_STATS(sock_net(sk), LINUX_MIB_PAWSESTABREJECTED);
+ else if (tsecr_reject)
+ NET_INC_STATS(sock_net(sk), LINUX_MIB_TSECRREJECTED);
return NULL;
}
diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c
index 2dfac79dc78b..e04ebe651c33 100644
--- a/net/ipv4/tcp_offload.c
+++ b/net/ipv4/tcp_offload.c
@@ -435,7 +435,7 @@ static void tcp4_check_fraglist_gro(struct list_head *head, struct sk_buff *skb,
iif, sdif);
NAPI_GRO_CB(skb)->is_flist = !sk;
if (sk)
- sock_put(sk);
+ sock_gen_put(sk);
}
INDIRECT_CALLABLE_SCOPE
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index bc95d2a5924f..6031d7f7f519 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -941,6 +941,12 @@ static unsigned int tcp_synack_options(const struct sock *sk,
opts->options |= OPTION_TS;
opts->tsval = tcp_skb_timestamp_ts(tcp_rsk(req)->req_usec_ts, skb) +
tcp_rsk(req)->ts_off;
+ if (!tcp_rsk(req)->snt_tsval_first) {
+ if (!opts->tsval)
+ opts->tsval = ~0U;
+ tcp_rsk(req)->snt_tsval_first = opts->tsval;
+ }
+ WRITE_ONCE(tcp_rsk(req)->snt_tsval_last, opts->tsval);
opts->tsecr = READ_ONCE(req->ts_recent);
remaining -= TCPOLEN_TSTAMP_ALIGNED;
}
diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index ecfca59f31f1..da5d4aea1b59 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -247,6 +247,62 @@ static struct sk_buff *__udpv4_gso_segment_list_csum(struct sk_buff *segs)
return segs;
}
+static void __udpv6_gso_segment_csum(struct sk_buff *seg,
+ struct in6_addr *oldip,
+ const struct in6_addr *newip,
+ __be16 *oldport, __be16 newport)
+{
+ struct udphdr *uh = udp_hdr(seg);
+
+ if (ipv6_addr_equal(oldip, newip) && *oldport == newport)
+ return;
+
+ if (uh->check) {
+ inet_proto_csum_replace16(&uh->check, seg, oldip->s6_addr32,
+ newip->s6_addr32, true);
+
+ inet_proto_csum_replace2(&uh->check, seg, *oldport, newport,
+ false);
+ if (!uh->check)
+ uh->check = CSUM_MANGLED_0;
+ }
+
+ *oldip = *newip;
+ *oldport = newport;
+}
+
+static struct sk_buff *__udpv6_gso_segment_list_csum(struct sk_buff *segs)
+{
+ const struct ipv6hdr *iph;
+ const struct udphdr *uh;
+ struct ipv6hdr *iph2;
+ struct sk_buff *seg;
+ struct udphdr *uh2;
+
+ seg = segs;
+ uh = udp_hdr(seg);
+ iph = ipv6_hdr(seg);
+ uh2 = udp_hdr(seg->next);
+ iph2 = ipv6_hdr(seg->next);
+
+ if (!(*(const u32 *)&uh->source ^ *(const u32 *)&uh2->source) &&
+ ipv6_addr_equal(&iph->saddr, &iph2->saddr) &&
+ ipv6_addr_equal(&iph->daddr, &iph2->daddr))
+ return segs;
+
+ while ((seg = seg->next)) {
+ uh2 = udp_hdr(seg);
+ iph2 = ipv6_hdr(seg);
+
+ __udpv6_gso_segment_csum(seg, &iph2->saddr, &iph->saddr,
+ &uh2->source, uh->source);
+ __udpv6_gso_segment_csum(seg, &iph2->daddr, &iph->daddr,
+ &uh2->dest, uh->dest);
+ }
+
+ return segs;
+}
+
static struct sk_buff *__udp_gso_segment_list(struct sk_buff *skb,
netdev_features_t features,
bool is_ipv6)
@@ -259,7 +315,10 @@ static struct sk_buff *__udp_gso_segment_list(struct sk_buff *skb,
udp_hdr(skb)->len = htons(sizeof(struct udphdr) + mss);
- return is_ipv6 ? skb : __udpv4_gso_segment_list_csum(skb);
+ if (is_ipv6)
+ return __udpv6_gso_segment_list_csum(skb);
+ else
+ return __udpv4_gso_segment_list_csum(skb);
}
struct sk_buff *__udp_gso_segment(struct sk_buff *gso_skb,
diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c
index b5b06323cfd9..0d31a8c108d4 100644
--- a/net/ipv4/xfrm4_input.c
+++ b/net/ipv4/xfrm4_input.c
@@ -182,11 +182,15 @@ struct sk_buff *xfrm4_gro_udp_encap_rcv(struct sock *sk, struct list_head *head,
int offset = skb_gro_offset(skb);
const struct net_offload *ops;
struct sk_buff *pp = NULL;
- int ret;
-
- offset = offset - sizeof(struct udphdr);
+ int len, dlen;
+ __u8 *udpdata;
+ __be32 *udpdata32;
- if (!pskb_pull(skb, offset))
+ len = skb->len - offset;
+ dlen = offset + min(len, 8);
+ udpdata = skb_gro_header(skb, dlen, offset);
+ udpdata32 = (__be32 *)udpdata;
+ if (unlikely(!udpdata))
return NULL;
rcu_read_lock();
@@ -194,11 +198,10 @@ struct sk_buff *xfrm4_gro_udp_encap_rcv(struct sock *sk, struct list_head *head,
if (!ops || !ops->callbacks.gro_receive)
goto out;
- ret = __xfrm4_udp_encap_rcv(sk, skb, false);
- if (ret)
+ /* check if it is a keepalive or IKE packet */
+ if (len <= sizeof(struct ip_esp_hdr) || udpdata32[0] == 0)
goto out;
- skb_push(skb, offset);
NAPI_GRO_CB(skb)->proto = IPPROTO_UDP;
pp = call_gro_receive(ops->callbacks.gro_receive, head, skb);
@@ -208,7 +211,6 @@ struct sk_buff *xfrm4_gro_udp_encap_rcv(struct sock *sk, struct list_head *head,
out:
rcu_read_unlock();
- skb_push(skb, offset);
NAPI_GRO_CB(skb)->same_flow = 0;
NAPI_GRO_CB(skb)->flush = 1;