summaryrefslogtreecommitdiff
path: root/net/ipv4/tcp_input.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4/tcp_input.c')
-rw-r--r--net/ipv4/tcp_input.c275
1 files changed, 216 insertions, 59 deletions
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index f501ac048366..c9ab964189a0 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -866,7 +866,7 @@ static void tcp_update_reordering(struct sock *sk, const int metric,
/* This must be called before lost_out is incremented */
static void tcp_verify_retransmit_hint(struct tcp_sock *tp, struct sk_buff *skb)
{
- if ((tp->retransmit_skb_hint == NULL) ||
+ if (!tp->retransmit_skb_hint ||
before(TCP_SKB_CB(skb)->seq,
TCP_SKB_CB(tp->retransmit_skb_hint)->seq))
tp->retransmit_skb_hint = skb;
@@ -1256,7 +1256,7 @@ static u8 tcp_sacktag_one(struct sock *sk,
fack_count += pcount;
/* Lost marker hint past SACKed? Tweak RFC3517 cnt */
- if (!tcp_is_fack(tp) && (tp->lost_skb_hint != NULL) &&
+ if (!tcp_is_fack(tp) && tp->lost_skb_hint &&
before(start_seq, TCP_SKB_CB(tp->lost_skb_hint)->seq))
tp->lost_cnt_hint += pcount;
@@ -1535,7 +1535,7 @@ static struct sk_buff *tcp_sacktag_walk(struct sk_buff *skb, struct sock *sk,
if (!before(TCP_SKB_CB(skb)->seq, end_seq))
break;
- if ((next_dup != NULL) &&
+ if (next_dup &&
before(TCP_SKB_CB(skb)->seq, next_dup->end_seq)) {
in_sack = tcp_match_skb_to_sack(sk, skb,
next_dup->start_seq,
@@ -1551,7 +1551,7 @@ static struct sk_buff *tcp_sacktag_walk(struct sk_buff *skb, struct sock *sk,
if (in_sack <= 0) {
tmp = tcp_shift_skb_data(sk, skb, state,
start_seq, end_seq, dup_sack);
- if (tmp != NULL) {
+ if (tmp) {
if (tmp != skb) {
skb = tmp;
continue;
@@ -1614,7 +1614,7 @@ static struct sk_buff *tcp_maybe_skipping_dsack(struct sk_buff *skb,
struct tcp_sacktag_state *state,
u32 skip_to_seq)
{
- if (next_dup == NULL)
+ if (!next_dup)
return skb;
if (before(next_dup->start_seq, skip_to_seq)) {
@@ -1783,7 +1783,7 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb,
if (tcp_highest_sack_seq(tp) == cache->end_seq) {
/* ...but better entrypoint exists! */
skb = tcp_highest_sack(sk);
- if (skb == NULL)
+ if (!skb)
break;
state.fack_count = tp->fackets_out;
cache++;
@@ -1798,7 +1798,7 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb,
if (!before(start_seq, tcp_highest_sack_seq(tp))) {
skb = tcp_highest_sack(sk);
- if (skb == NULL)
+ if (!skb)
break;
state.fack_count = tp->fackets_out;
}
@@ -1820,14 +1820,12 @@ advance_sp:
for (j = 0; j < used_sacks; j++)
tp->recv_sack_cache[i++] = sp[j];
- tcp_mark_lost_retrans(sk);
-
- tcp_verify_left_out(tp);
-
if ((state.reord < tp->fackets_out) &&
((inet_csk(sk)->icsk_ca_state != TCP_CA_Loss) || tp->undo_marker))
tcp_update_reordering(sk, tp->fackets_out - state.reord, 0);
+ tcp_mark_lost_retrans(sk);
+ tcp_verify_left_out(tp);
out:
#if FASTRETRANS_DEBUG > 0
@@ -2700,16 +2698,21 @@ static void tcp_process_loss(struct sock *sk, int flag, bool is_dupack)
struct tcp_sock *tp = tcp_sk(sk);
bool recovered = !before(tp->snd_una, tp->high_seq);
+ if ((flag & FLAG_SND_UNA_ADVANCED) &&
+ tcp_try_undo_loss(sk, false))
+ return;
+
if (tp->frto) { /* F-RTO RFC5682 sec 3.1 (sack enhanced version). */
/* Step 3.b. A timeout is spurious if not all data are
* lost, i.e., never-retransmitted data are (s)acked.
*/
- if (tcp_try_undo_loss(sk, flag & FLAG_ORIG_SACK_ACKED))
+ if ((flag & FLAG_ORIG_SACK_ACKED) &&
+ tcp_try_undo_loss(sk, true))
return;
- if (after(tp->snd_nxt, tp->high_seq) &&
- (flag & FLAG_DATA_SACKED || is_dupack)) {
- tp->frto = 0; /* Loss was real: 2nd part of step 3.a */
+ if (after(tp->snd_nxt, tp->high_seq)) {
+ if (flag & FLAG_DATA_SACKED || is_dupack)
+ tp->frto = 0; /* Step 3.a. loss was real */
} else if (flag & FLAG_SND_UNA_ADVANCED && !recovered) {
tp->high_seq = tp->snd_nxt;
__tcp_push_pending_frames(sk, tcp_current_mss(sk),
@@ -2734,8 +2737,6 @@ static void tcp_process_loss(struct sock *sk, int flag, bool is_dupack)
else if (flag & FLAG_SND_UNA_ADVANCED)
tcp_reset_reno_sack(tp);
}
- if (tcp_try_undo_loss(sk, false))
- return;
tcp_xmit_retransmit_queue(sk);
}
@@ -3099,17 +3100,15 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
if (sacked & TCPCB_SACKED_RETRANS)
tp->retrans_out -= acked_pcount;
flag |= FLAG_RETRANS_DATA_ACKED;
- } else {
+ } else if (!(sacked & TCPCB_SACKED_ACKED)) {
last_ackt = skb->skb_mstamp;
WARN_ON_ONCE(last_ackt.v64 == 0);
if (!first_ackt.v64)
first_ackt = last_ackt;
- if (!(sacked & TCPCB_SACKED_ACKED)) {
- reord = min(pkts_acked, reord);
- if (!after(scb->end_seq, tp->high_seq))
- flag |= FLAG_ORIG_SACK_ACKED;
- }
+ reord = min(pkts_acked, reord);
+ if (!after(scb->end_seq, tp->high_seq))
+ flag |= FLAG_ORIG_SACK_ACKED;
}
if (sacked & TCPCB_SACKED_ACKED)
@@ -3282,6 +3281,28 @@ static inline bool tcp_may_update_window(const struct tcp_sock *tp,
(ack_seq == tp->snd_wl1 && nwin > tp->snd_wnd);
}
+/* If we update tp->snd_una, also update tp->bytes_acked */
+static void tcp_snd_una_update(struct tcp_sock *tp, u32 ack)
+{
+ u32 delta = ack - tp->snd_una;
+
+ u64_stats_update_begin(&tp->syncp);
+ tp->bytes_acked += delta;
+ u64_stats_update_end(&tp->syncp);
+ tp->snd_una = ack;
+}
+
+/* If we update tp->rcv_nxt, also update tp->bytes_received */
+static void tcp_rcv_nxt_update(struct tcp_sock *tp, u32 seq)
+{
+ u32 delta = seq - tp->rcv_nxt;
+
+ u64_stats_update_begin(&tp->syncp);
+ tp->bytes_received += delta;
+ u64_stats_update_end(&tp->syncp);
+ tp->rcv_nxt = seq;
+}
+
/* Update our send window.
*
* Window update algorithm, described in RFC793/RFC1122 (used in linux-2.2
@@ -3317,11 +3338,41 @@ static int tcp_ack_update_window(struct sock *sk, const struct sk_buff *skb, u32
}
}
- tp->snd_una = ack;
+ tcp_snd_una_update(tp, ack);
return flag;
}
+/* Return true if we're currently rate-limiting out-of-window ACKs and
+ * thus shouldn't send a dupack right now. We rate-limit dupacks in
+ * response to out-of-window SYNs or ACKs to mitigate ACK loops or DoS
+ * attacks that send repeated SYNs or ACKs for the same connection. To
+ * do this, we do not send a duplicate SYNACK or ACK if the remote
+ * endpoint is sending out-of-window SYNs or pure ACKs at a high rate.
+ */
+bool tcp_oow_rate_limited(struct net *net, const struct sk_buff *skb,
+ int mib_idx, u32 *last_oow_ack_time)
+{
+ /* Data packets without SYNs are not likely part of an ACK loop. */
+ if ((TCP_SKB_CB(skb)->seq != TCP_SKB_CB(skb)->end_seq) &&
+ !tcp_hdr(skb)->syn)
+ goto not_rate_limited;
+
+ if (*last_oow_ack_time) {
+ s32 elapsed = (s32)(tcp_time_stamp - *last_oow_ack_time);
+
+ if (0 <= elapsed && elapsed < sysctl_tcp_invalid_ratelimit) {
+ NET_INC_STATS_BH(net, mib_idx);
+ return true; /* rate-limited: don't send yet! */
+ }
+ }
+
+ *last_oow_ack_time = tcp_time_stamp;
+
+not_rate_limited:
+ return false; /* not rate-limited: go ahead, send dupack now! */
+}
+
/* RFC 5961 7 [ACK Throttling] */
static void tcp_send_challenge_ack(struct sock *sk, const struct sk_buff *skb)
{
@@ -3469,7 +3520,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
* Note, we use the fact that SND.UNA>=SND.WL2.
*/
tcp_update_wl(tp, ack_seq);
- tp->snd_una = ack;
+ tcp_snd_una_update(tp, ack);
flag |= FLAG_WIN_UPDATE;
tcp_in_ack_event(sk, CA_ACK_WIN_UPDATE);
@@ -3573,6 +3624,23 @@ old_ack:
return 0;
}
+static void tcp_parse_fastopen_option(int len, const unsigned char *cookie,
+ bool syn, struct tcp_fastopen_cookie *foc,
+ bool exp_opt)
+{
+ /* Valid only in SYN or SYN-ACK with an even length. */
+ if (!foc || !syn || len < 0 || (len & 1))
+ return;
+
+ if (len >= TCP_FASTOPEN_COOKIE_MIN &&
+ len <= TCP_FASTOPEN_COOKIE_MAX)
+ memcpy(foc->val, cookie, len);
+ else if (len != 0)
+ len = -1;
+ foc->len = len;
+ foc->exp = exp_opt;
+}
+
/* Look for tcp options. Normally only called on SYN and SYNACK packets.
* But, this can also be called on packets in the established flow when
* the fast version below fails.
@@ -3662,21 +3730,22 @@ void tcp_parse_options(const struct sk_buff *skb,
*/
break;
#endif
+ case TCPOPT_FASTOPEN:
+ tcp_parse_fastopen_option(
+ opsize - TCPOLEN_FASTOPEN_BASE,
+ ptr, th->syn, foc, false);
+ break;
+
case TCPOPT_EXP:
/* Fast Open option shares code 254 using a
- * 16 bits magic number. It's valid only in
- * SYN or SYN-ACK with an even size.
+ * 16 bits magic number.
*/
- if (opsize < TCPOLEN_EXP_FASTOPEN_BASE ||
- get_unaligned_be16(ptr) != TCPOPT_FASTOPEN_MAGIC ||
- foc == NULL || !th->syn || (opsize & 1))
- break;
- foc->len = opsize - TCPOLEN_EXP_FASTOPEN_BASE;
- if (foc->len >= TCP_FASTOPEN_COOKIE_MIN &&
- foc->len <= TCP_FASTOPEN_COOKIE_MAX)
- memcpy(foc->val, ptr + 2, foc->len);
- else if (foc->len != 0)
- foc->len = -1;
+ if (opsize >= TCPOLEN_EXP_FASTOPEN_BASE &&
+ get_unaligned_be16(ptr) ==
+ TCPOPT_FASTOPEN_MAGIC)
+ tcp_parse_fastopen_option(opsize -
+ TCPOLEN_EXP_FASTOPEN_BASE,
+ ptr + 2, th->syn, foc, true);
break;
}
@@ -4190,7 +4259,7 @@ static void tcp_ofo_queue(struct sock *sk)
tail = skb_peek_tail(&sk->sk_receive_queue);
eaten = tail && tcp_try_coalesce(sk, tail, skb, &fragstolen);
- tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
+ tcp_rcv_nxt_update(tp, TCP_SKB_CB(skb)->end_seq);
if (!eaten)
__skb_queue_tail(&sk->sk_receive_queue, skb);
if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
@@ -4358,7 +4427,7 @@ static int __must_check tcp_queue_rcv(struct sock *sk, struct sk_buff *skb, int
__skb_pull(skb, hdrlen);
eaten = (tail &&
tcp_try_coalesce(sk, tail, skb, fragstolen)) ? 1 : 0;
- tcp_sk(sk)->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
+ tcp_rcv_nxt_update(tcp_sk(sk), TCP_SKB_CB(skb)->end_seq);
if (!eaten) {
__skb_queue_tail(&sk->sk_receive_queue, skb);
skb_set_owner_r(skb, sk);
@@ -4451,7 +4520,7 @@ queue_and_out:
eaten = tcp_queue_rcv(sk, skb, 0, &fragstolen);
}
- tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
+ tcp_rcv_nxt_update(tp, TCP_SKB_CB(skb)->end_seq);
if (skb->len)
tcp_event_data_recv(sk, skb);
if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
@@ -4640,7 +4709,7 @@ static void tcp_collapse_ofo_queue(struct sock *sk)
struct sk_buff *head;
u32 start, end;
- if (skb == NULL)
+ if (!skb)
return;
start = TCP_SKB_CB(skb)->seq;
@@ -4799,6 +4868,8 @@ static void tcp_check_space(struct sock *sk)
{
if (sock_flag(sk, SOCK_QUEUE_SHRUNK)) {
sock_reset_flag(sk, SOCK_QUEUE_SHRUNK);
+ /* pairs with tcp_poll() */
+ smp_mb__after_atomic();
if (sk->sk_socket &&
test_bit(SOCK_NOSPACE, &sk->sk_socket->flags))
tcp_new_space(sk);
@@ -5095,7 +5166,7 @@ void tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
{
struct tcp_sock *tp = tcp_sk(sk);
- if (unlikely(sk->sk_rx_dst == NULL))
+ if (unlikely(!sk->sk_rx_dst))
inet_csk(sk)->icsk_af_ops->sk_rx_dst_set(sk, skb);
/*
* Header prediction.
@@ -5197,7 +5268,7 @@ void tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
tcp_rcv_rtt_measure_ts(sk, skb);
__skb_pull(skb, tcp_header_len);
- tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
+ tcp_rcv_nxt_update(tp, TCP_SKB_CB(skb)->end_seq);
NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPHPHITSTOUSER);
eaten = 1;
}
@@ -5292,7 +5363,7 @@ void tcp_finish_connect(struct sock *sk, struct sk_buff *skb)
tcp_set_state(sk, TCP_ESTABLISHED);
- if (skb != NULL) {
+ if (skb) {
icsk->icsk_af_ops->sk_rx_dst_set(sk, skb);
security_inet_conn_established(sk, skb);
}
@@ -5330,8 +5401,8 @@ static bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack,
{
struct tcp_sock *tp = tcp_sk(sk);
struct sk_buff *data = tp->syn_data ? tcp_write_queue_head(sk) : NULL;
- u16 mss = tp->rx_opt.mss_clamp;
- bool syn_drop;
+ u16 mss = tp->rx_opt.mss_clamp, try_exp = 0;
+ bool syn_drop = false;
if (mss == tp->rx_opt.user_mss) {
struct tcp_options_received opt;
@@ -5343,16 +5414,25 @@ static bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack,
mss = opt.mss_clamp;
}
- if (!tp->syn_fastopen) /* Ignore an unsolicited cookie */
+ if (!tp->syn_fastopen) {
+ /* Ignore an unsolicited cookie */
cookie->len = -1;
+ } else if (tp->total_retrans) {
+ /* SYN timed out and the SYN-ACK neither has a cookie nor
+ * acknowledges data. Presumably the remote received only
+ * the retransmitted (regular) SYNs: either the original
+ * SYN-data or the corresponding SYN-ACK was dropped.
+ */
+ syn_drop = (cookie->len < 0 && data);
+ } else if (cookie->len < 0 && !tp->syn_data) {
+ /* We requested a cookie but didn't get it. If we did not use
+ * the (old) exp opt format then try so next time (try_exp=1).
+ * Otherwise we go back to use the RFC7413 opt (try_exp=2).
+ */
+ try_exp = tp->syn_fastopen_exp ? 2 : 1;
+ }
- /* The SYN-ACK neither has cookie nor acknowledges the data. Presumably
- * the remote receives only the retransmitted (regular) SYNs: either
- * the original SYN-data or the corresponding SYN-ACK is lost.
- */
- syn_drop = (cookie->len <= 0 && data && tp->total_retrans);
-
- tcp_fastopen_cache_set(sk, mss, cookie, syn_drop);
+ tcp_fastopen_cache_set(sk, mss, cookie, syn_drop, try_exp);
if (data) { /* Retransmit unacked data in SYN */
tcp_for_write_queue_from(data, sk) {
@@ -5661,11 +5741,11 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
}
req = tp->fastopen_rsk;
- if (req != NULL) {
+ if (req) {
WARN_ON_ONCE(sk->sk_state != TCP_SYN_RECV &&
sk->sk_state != TCP_FIN_WAIT1);
- if (tcp_check_req(sk, skb, req, NULL, true) == NULL)
+ if (!tcp_check_req(sk, skb, req, true))
goto discard;
}
@@ -5751,7 +5831,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
* ACK we have received, this would have acknowledged
* our SYNACK so stop the SYNACK timer.
*/
- if (req != NULL) {
+ if (req) {
/* Return RST if ack_seq is invalid.
* Note that RFC793 only says to generate a
* DUPACK for it but for TCP Fast Open it seems
@@ -5913,6 +5993,80 @@ static void tcp_ecn_create_request(struct request_sock *req,
inet_rsk(req)->ecn_ok = 1;
}
+static void tcp_openreq_init(struct request_sock *req,
+ const struct tcp_options_received *rx_opt,
+ struct sk_buff *skb, const struct sock *sk)
+{
+ struct inet_request_sock *ireq = inet_rsk(req);
+
+ req->rcv_wnd = 0; /* So that tcp_send_synack() knows! */
+ req->cookie_ts = 0;
+ tcp_rsk(req)->rcv_isn = TCP_SKB_CB(skb)->seq;
+ tcp_rsk(req)->rcv_nxt = TCP_SKB_CB(skb)->seq + 1;
+ tcp_rsk(req)->snt_synack = tcp_time_stamp;
+ tcp_rsk(req)->last_oow_ack_time = 0;
+ req->mss = rx_opt->mss_clamp;
+ req->ts_recent = rx_opt->saw_tstamp ? rx_opt->rcv_tsval : 0;
+ ireq->tstamp_ok = rx_opt->tstamp_ok;
+ ireq->sack_ok = rx_opt->sack_ok;
+ ireq->snd_wscale = rx_opt->snd_wscale;
+ ireq->wscale_ok = rx_opt->wscale_ok;
+ ireq->acked = 0;
+ ireq->ecn_ok = 0;
+ ireq->ir_rmt_port = tcp_hdr(skb)->source;
+ ireq->ir_num = ntohs(tcp_hdr(skb)->dest);
+ ireq->ir_mark = inet_request_mark(sk, skb);
+}
+
+struct request_sock *inet_reqsk_alloc(const struct request_sock_ops *ops,
+ struct sock *sk_listener)
+{
+ struct request_sock *req = reqsk_alloc(ops, sk_listener);
+
+ if (req) {
+ struct inet_request_sock *ireq = inet_rsk(req);
+
+ kmemcheck_annotate_bitfield(ireq, flags);
+ ireq->opt = NULL;
+ atomic64_set(&ireq->ir_cookie, 0);
+ ireq->ireq_state = TCP_NEW_SYN_RECV;
+ write_pnet(&ireq->ireq_net, sock_net(sk_listener));
+ ireq->ireq_family = sk_listener->sk_family;
+ }
+
+ return req;
+}
+EXPORT_SYMBOL(inet_reqsk_alloc);
+
+/*
+ * Return true if a syncookie should be sent
+ */
+static bool tcp_syn_flood_action(struct sock *sk,
+ const struct sk_buff *skb,
+ const char *proto)
+{
+ const char *msg = "Dropping request";
+ bool want_cookie = false;
+ struct listen_sock *lopt;
+
+#ifdef CONFIG_SYN_COOKIES
+ if (sysctl_tcp_syncookies) {
+ msg = "Sending cookies";
+ want_cookie = true;
+ NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDOCOOKIES);
+ } else
+#endif
+ NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDROP);
+
+ lopt = inet_csk(sk)->icsk_accept_queue.listen_opt;
+ if (!lopt->synflood_warned && sysctl_tcp_syncookies != 2) {
+ lopt->synflood_warned = 1;
+ pr_info("%s: Possible SYN flooding on port %d. %s. Check SNMP counters.\n",
+ proto, ntohs(tcp_hdr(skb)->dest), msg);
+ }
+ return want_cookie;
+}
+
int tcp_conn_request(struct request_sock_ops *rsk_ops,
const struct tcp_request_sock_ops *af_ops,
struct sock *sk, struct sk_buff *skb)
@@ -5950,7 +6104,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
goto drop;
}
- req = inet_reqsk_alloc(rsk_ops);
+ req = inet_reqsk_alloc(rsk_ops, sk);
if (!req)
goto drop;
@@ -5967,6 +6121,9 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
tcp_openreq_init(req, &tmp_opt, skb, sk);
+ /* Note: tcp_v6_init_req() might override ir_iif for link locals */
+ inet_rsk(req)->ir_iif = sk->sk_bound_dev_if;
+
af_ops->init_req(req, sk, skb);
if (security_inet_conn_request(sk, skb, req))
@@ -6039,7 +6196,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
if (err || want_cookie)
goto drop_and_free;
- tcp_rsk(req)->listener = NULL;
+ tcp_rsk(req)->tfo_listener = false;
af_ops->queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
}