From f07373ead455a396e15a431bc08d8ce1dac6f1cf Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 22 Sep 2016 00:29:32 +0100 Subject: rxrpc: Add re-sent Tx annotation Add a Tx-phase annotation for packet buffers to indicate that a buffer has already been retransmitted. This will be used by future congestion management. Re-retransmissions of a packet don't affect the congestion window managment in the same way as initial retransmissions. Signed-off-by: David Howells --- net/rxrpc/input.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) (limited to 'net/rxrpc/input.c') diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index 7ac1edf3aac7..aa261df9fc9e 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -388,17 +388,25 @@ static void rxrpc_input_soft_acks(struct rxrpc_call *call, u8 *acks, { bool resend = false; int ix; + u8 annotation, anno_type; for (; nr_acks > 0; nr_acks--, seq++) { ix = seq & RXRPC_RXTX_BUFF_MASK; + annotation = call->rxtx_annotations[ix]; + anno_type = annotation & RXRPC_TX_ANNO_MASK; + annotation &= ~RXRPC_TX_ANNO_MASK; switch (*acks++) { case RXRPC_ACK_TYPE_ACK: - call->rxtx_annotations[ix] = RXRPC_TX_ANNO_ACK; + if (anno_type == RXRPC_TX_ANNO_ACK) + continue; + call->rxtx_annotations[ix] = + RXRPC_TX_ANNO_ACK | annotation; break; case RXRPC_ACK_TYPE_NACK: - if (call->rxtx_annotations[ix] == RXRPC_TX_ANNO_NAK) + if (anno_type == RXRPC_TX_ANNO_NAK) continue; - call->rxtx_annotations[ix] = RXRPC_TX_ANNO_NAK; + call->rxtx_annotations[ix] = + RXRPC_TX_ANNO_NAK | annotation; resend = true; break; default: -- cgit v1.2.3 From 8e83134db4ecb77a1dc3390b60ddeea840a5afbc Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 22 Sep 2016 00:29:31 +0100 Subject: rxrpc: Send pings to get RTT data Send a PING ACK packet to the peer when we get a new incoming call from a peer we don't have a record for. The PING RESPONSE ACK packet will tell us the following about the peer: (1) its receive window size (2) its MTU sizes (3) its support for jumbo DATA packets (4) if it supports slow start (similar to RFC 5681) (5) an estimate of the RTT This is necessary because the peer won't normally send us an ACK until it gets to the Rx phase and we send it a packet, but we would like to know some of this information before we start sending packets. A pair of tracepoints are added so that RTT determination can be observed. Signed-off-by: David Howells --- net/rxrpc/ar-internal.h | 7 +++++-- net/rxrpc/input.c | 48 +++++++++++++++++++++++++++++++++++++++++++++++- net/rxrpc/misc.c | 11 ++++++----- net/rxrpc/output.c | 22 ++++++++++++++++++++++ 4 files changed, 80 insertions(+), 8 deletions(-) (limited to 'net/rxrpc/input.c') diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 79c671e552c3..8b47f468eb9d 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -403,6 +403,7 @@ enum rxrpc_call_flag { RXRPC_CALL_EXPOSED, /* The call was exposed to the world */ RXRPC_CALL_RX_LAST, /* Received the last packet (at rxtx_top) */ RXRPC_CALL_TX_LAST, /* Last packet in Tx buffer (at rxtx_top) */ + RXRPC_CALL_PINGING, /* Ping in process */ }; /* @@ -487,6 +488,8 @@ struct rxrpc_call { u32 call_id; /* call ID on connection */ u32 cid; /* connection ID plus channel index */ int debug_id; /* debug ID for printks */ + unsigned short rx_pkt_offset; /* Current recvmsg packet offset */ + unsigned short rx_pkt_len; /* Current recvmsg packet len */ /* Rx/Tx circular buffer, depending on phase. * @@ -530,8 +533,8 @@ struct rxrpc_call { u16 ackr_skew; /* skew on packet being ACK'd */ rxrpc_serial_t ackr_serial; /* serial of packet being ACK'd */ rxrpc_seq_t ackr_prev_seq; /* previous sequence number received */ - unsigned short rx_pkt_offset; /* Current recvmsg packet offset */ - unsigned short rx_pkt_len; /* Current recvmsg packet len */ + rxrpc_serial_t ackr_ping; /* Last ping sent */ + ktime_t ackr_ping_time; /* Time last ping sent */ /* transmission-phase ACK management */ rxrpc_serial_t acks_latest; /* serial number of latest ACK received */ diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index aa261df9fc9e..a0a5bd108c9e 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -36,6 +36,19 @@ static void rxrpc_proto_abort(const char *why, } } +/* + * Ping the other end to fill our RTT cache and to retrieve the rwind + * and MTU parameters. + */ +static void rxrpc_send_ping(struct rxrpc_call *call, struct sk_buff *skb, + int skew) +{ + struct rxrpc_skb_priv *sp = rxrpc_skb(skb); + + rxrpc_propose_ACK(call, RXRPC_ACK_PING, skew, sp->hdr.serial, + true, true); +} + /* * Apply a hard ACK by advancing the Tx window. */ @@ -342,6 +355,32 @@ ack: _leave(" [queued]"); } +/* + * Process a ping response. + */ +static void rxrpc_input_ping_response(struct rxrpc_call *call, + ktime_t resp_time, + rxrpc_serial_t orig_serial, + rxrpc_serial_t ack_serial) +{ + rxrpc_serial_t ping_serial; + ktime_t ping_time; + + ping_time = call->ackr_ping_time; + smp_rmb(); + ping_serial = call->ackr_ping; + + if (!test_bit(RXRPC_CALL_PINGING, &call->flags) || + before(orig_serial, ping_serial)) + return; + clear_bit(RXRPC_CALL_PINGING, &call->flags); + if (after(orig_serial, ping_serial)) + return; + + rxrpc_peer_add_rtt(call, rxrpc_rtt_rx_ping_response, + orig_serial, ack_serial, ping_time, resp_time); +} + /* * Process the extra information that may be appended to an ACK packet */ @@ -438,6 +477,7 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb, struct rxrpc_ackinfo info; u8 acks[RXRPC_MAXACKS]; } buf; + rxrpc_serial_t acked_serial; rxrpc_seq_t first_soft_ack, hard_ack; int nr_acks, offset; @@ -449,6 +489,7 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb, } sp->offset += sizeof(buf.ack); + acked_serial = ntohl(buf.ack.serial); first_soft_ack = ntohl(buf.ack.firstPacket); hard_ack = first_soft_ack - 1; nr_acks = buf.ack.nAcks; @@ -460,10 +501,14 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb, ntohs(buf.ack.maxSkew), first_soft_ack, ntohl(buf.ack.previousPacket), - ntohl(buf.ack.serial), + acked_serial, rxrpc_acks(buf.ack.reason), buf.ack.nAcks); + if (buf.ack.reason == RXRPC_ACK_PING_RESPONSE) + rxrpc_input_ping_response(call, skb->tstamp, acked_serial, + sp->hdr.serial); + if (buf.ack.reason == RXRPC_ACK_PING) { _proto("Rx ACK %%%u PING Request", sp->hdr.serial); rxrpc_propose_ACK(call, RXRPC_ACK_PING_RESPONSE, @@ -830,6 +875,7 @@ void rxrpc_data_ready(struct sock *udp_sk) rcu_read_unlock(); goto reject_packet; } + rxrpc_send_ping(call, skb, skew); } rxrpc_input_call_packet(call, skb, skew); diff --git a/net/rxrpc/misc.c b/net/rxrpc/misc.c index 6321c23f9a6e..56e668352fc7 100644 --- a/net/rxrpc/misc.c +++ b/net/rxrpc/misc.c @@ -83,11 +83,12 @@ const s8 rxrpc_ack_priority[] = { [RXRPC_ACK_DELAY] = 1, [RXRPC_ACK_REQUESTED] = 2, [RXRPC_ACK_IDLE] = 3, - [RXRPC_ACK_PING_RESPONSE] = 4, - [RXRPC_ACK_DUPLICATE] = 5, - [RXRPC_ACK_OUT_OF_SEQUENCE] = 6, - [RXRPC_ACK_EXCEEDS_WINDOW] = 7, - [RXRPC_ACK_NOSPACE] = 8, + [RXRPC_ACK_DUPLICATE] = 4, + [RXRPC_ACK_OUT_OF_SEQUENCE] = 5, + [RXRPC_ACK_EXCEEDS_WINDOW] = 6, + [RXRPC_ACK_NOSPACE] = 7, + [RXRPC_ACK_PING_RESPONSE] = 8, + [RXRPC_ACK_PING] = 9, }; const char *rxrpc_acks(u8 reason) diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index 817fb0e82d6a..0d89cd3f2c01 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -57,6 +57,9 @@ static size_t rxrpc_fill_out_ack(struct rxrpc_call *call, pkt->ack.reason = call->ackr_reason; pkt->ack.nAcks = top - hard_ack; + if (pkt->ack.reason == RXRPC_ACK_PING) + pkt->whdr.flags |= RXRPC_REQUEST_ACK; + if (after(top, hard_ack)) { seq = hard_ack + 1; do { @@ -97,6 +100,7 @@ int rxrpc_send_call_packet(struct rxrpc_call *call, u8 type) struct kvec iov[2]; rxrpc_serial_t serial; size_t len, n; + bool ping = false; int ioc, ret; u32 abort_code; @@ -147,6 +151,7 @@ int rxrpc_send_call_packet(struct rxrpc_call *call, u8 type) ret = 0; goto out; } + ping = (call->ackr_reason == RXRPC_ACK_PING); n = rxrpc_fill_out_ack(call, pkt); call->ackr_reason = 0; @@ -183,12 +188,29 @@ int rxrpc_send_call_packet(struct rxrpc_call *call, u8 type) goto out; } + if (ping) { + call->ackr_ping = serial; + smp_wmb(); + /* We need to stick a time in before we send the packet in case + * the reply gets back before kernel_sendmsg() completes - but + * asking UDP to send the packet can take a relatively long + * time, so we update the time after, on the assumption that + * the packet transmission is more likely to happen towards the + * end of the kernel_sendmsg() call. + */ + call->ackr_ping_time = ktime_get_real(); + set_bit(RXRPC_CALL_PINGING, &call->flags); + trace_rxrpc_rtt_tx(call, rxrpc_rtt_tx_ping, serial); + } ret = kernel_sendmsg(conn->params.local->socket, &msg, iov, ioc, len); + if (ping) + call->ackr_ping_time = ktime_get_real(); if (ret < 0 && call->state < RXRPC_CALL_COMPLETE) { switch (type) { case RXRPC_PACKET_TYPE_ACK: + clear_bit(RXRPC_CALL_PINGING, &call->flags); rxrpc_propose_ACK(call, pkt->ack.reason, ntohs(pkt->ack.maxSkew), ntohl(pkt->ack.serial), -- cgit v1.2.3 From 50235c4b5a2fb9a9690f02cd1dea6ca047d7f79e Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 22 Sep 2016 00:29:31 +0100 Subject: rxrpc: Obtain RTT data by requesting ACKs on DATA packets In addition to sending a PING ACK to gain RTT data, we can set the RXRPC_REQUEST_ACK flag on a DATA packet and get a REQUESTED-ACK ACK. The ACK packet contains the serial number of the packet it is in response to, so we can look through the Tx buffer for a matching DATA packet. This requires that the data packets be stamped with the time of transmission as a ktime rather than having the resend_at time in jiffies. This further requires the resend code to do the resend determination in ktimes and convert to jiffies to set the timer. Signed-off-by: David Howells --- net/rxrpc/ar-internal.h | 7 +++---- net/rxrpc/call_event.c | 19 +++++++++---------- net/rxrpc/input.c | 35 +++++++++++++++++++++++++++++++++++ net/rxrpc/misc.c | 6 ++++-- net/rxrpc/output.c | 7 +++++-- net/rxrpc/sendmsg.c | 1 - net/rxrpc/sysctl.c | 2 +- 7 files changed, 57 insertions(+), 20 deletions(-) (limited to 'net/rxrpc/input.c') diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 8b47f468eb9d..1c4597b2c6cd 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -142,10 +142,7 @@ struct rxrpc_host_header { */ struct rxrpc_skb_priv { union { - unsigned long resend_at; /* time in jiffies at which to resend */ - struct { - u8 nr_jumbo; /* Number of jumbo subpackets */ - }; + u8 nr_jumbo; /* Number of jumbo subpackets */ }; union { unsigned int offset; /* offset into buffer of next read */ @@ -663,6 +660,7 @@ extern const char rxrpc_recvmsg_traces[rxrpc_recvmsg__nr_trace][5]; enum rxrpc_rtt_tx_trace { rxrpc_rtt_tx_ping, + rxrpc_rtt_tx_data, rxrpc_rtt_tx__nr_trace }; @@ -670,6 +668,7 @@ extern const char rxrpc_rtt_tx_traces[rxrpc_rtt_tx__nr_trace][5]; enum rxrpc_rtt_rx_trace { rxrpc_rtt_rx_ping_response, + rxrpc_rtt_rx_requested_ack, rxrpc_rtt_rx__nr_trace }; diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c index 34ad967f2d81..adb2ec61e21f 100644 --- a/net/rxrpc/call_event.c +++ b/net/rxrpc/call_event.c @@ -142,12 +142,14 @@ static void rxrpc_resend(struct rxrpc_call *call) struct rxrpc_skb_priv *sp; struct sk_buff *skb; rxrpc_seq_t cursor, seq, top; - unsigned long resend_at, now; + ktime_t now = ktime_get_real(), max_age, oldest, resend_at; int ix; u8 annotation, anno_type; _enter("{%d,%d}", call->tx_hard_ack, call->tx_top); + max_age = ktime_sub_ms(now, rxrpc_resend_timeout); + spin_lock_bh(&call->lock); cursor = call->tx_hard_ack; @@ -160,8 +162,7 @@ static void rxrpc_resend(struct rxrpc_call *call) * the packets in the Tx buffer we're going to resend and what the new * resend timeout will be. */ - now = jiffies; - resend_at = now + rxrpc_resend_timeout; + oldest = now; for (seq = cursor + 1; before_eq(seq, top); seq++) { ix = seq & RXRPC_RXTX_BUFF_MASK; annotation = call->rxtx_annotations[ix]; @@ -175,9 +176,9 @@ static void rxrpc_resend(struct rxrpc_call *call) sp = rxrpc_skb(skb); if (anno_type == RXRPC_TX_ANNO_UNACK) { - if (time_after(sp->resend_at, now)) { - if (time_before(sp->resend_at, resend_at)) - resend_at = sp->resend_at; + if (ktime_after(skb->tstamp, max_age)) { + if (ktime_before(skb->tstamp, oldest)) + oldest = skb->tstamp; continue; } } @@ -186,7 +187,8 @@ static void rxrpc_resend(struct rxrpc_call *call) call->rxtx_annotations[ix] = RXRPC_TX_ANNO_RETRANS | annotation; } - call->resend_at = resend_at; + resend_at = ktime_sub(ktime_add_ns(oldest, rxrpc_resend_timeout), now); + call->resend_at = jiffies + nsecs_to_jiffies(ktime_to_ns(resend_at)); /* Now go through the Tx window and perform the retransmissions. We * have to drop the lock for each send. If an ACK comes in whilst the @@ -205,15 +207,12 @@ static void rxrpc_resend(struct rxrpc_call *call) spin_unlock_bh(&call->lock); if (rxrpc_send_data_packet(call, skb) < 0) { - call->resend_at = now + 2; rxrpc_free_skb(skb, rxrpc_skb_tx_freed); return; } if (rxrpc_is_client_call(call)) rxrpc_expose_client_call(call); - sp = rxrpc_skb(skb); - sp->resend_at = now + rxrpc_resend_timeout; rxrpc_free_skb(skb, rxrpc_skb_tx_freed); spin_lock_bh(&call->lock); diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index a0a5bd108c9e..c121949de3c8 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -355,6 +355,38 @@ ack: _leave(" [queued]"); } +/* + * Process a requested ACK. + */ +static void rxrpc_input_requested_ack(struct rxrpc_call *call, + ktime_t resp_time, + rxrpc_serial_t orig_serial, + rxrpc_serial_t ack_serial) +{ + struct rxrpc_skb_priv *sp; + struct sk_buff *skb; + ktime_t sent_at; + int ix; + + for (ix = 0; ix < RXRPC_RXTX_BUFF_SIZE; ix++) { + skb = call->rxtx_buffer[ix]; + if (!skb) + continue; + + sp = rxrpc_skb(skb); + if (sp->hdr.serial != orig_serial) + continue; + smp_rmb(); + sent_at = skb->tstamp; + goto found; + } + return; + +found: + rxrpc_peer_add_rtt(call, rxrpc_rtt_rx_requested_ack, + orig_serial, ack_serial, sent_at, resp_time); +} + /* * Process a ping response. */ @@ -508,6 +540,9 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb, if (buf.ack.reason == RXRPC_ACK_PING_RESPONSE) rxrpc_input_ping_response(call, skb->tstamp, acked_serial, sp->hdr.serial); + if (buf.ack.reason == RXRPC_ACK_REQUESTED) + rxrpc_input_requested_ack(call, skb->tstamp, acked_serial, + sp->hdr.serial); if (buf.ack.reason == RXRPC_ACK_PING) { _proto("Rx ACK %%%u PING Request", sp->hdr.serial); diff --git a/net/rxrpc/misc.c b/net/rxrpc/misc.c index 56e668352fc7..0d425e707f22 100644 --- a/net/rxrpc/misc.c +++ b/net/rxrpc/misc.c @@ -68,9 +68,9 @@ unsigned int rxrpc_rx_mtu = 5692; unsigned int rxrpc_rx_jumbo_max = 4; /* - * Time till packet resend (in jiffies). + * Time till packet resend (in milliseconds). */ -unsigned int rxrpc_resend_timeout = 4 * HZ; +unsigned int rxrpc_resend_timeout = 4 * 1000; const char *const rxrpc_pkts[] = { "?00", @@ -186,8 +186,10 @@ const char rxrpc_recvmsg_traces[rxrpc_recvmsg__nr_trace][5] = { const char rxrpc_rtt_tx_traces[rxrpc_rtt_tx__nr_trace][5] = { [rxrpc_rtt_tx_ping] = "PING", + [rxrpc_rtt_tx_data] = "DATA", }; const char rxrpc_rtt_rx_traces[rxrpc_rtt_rx__nr_trace][5] = { [rxrpc_rtt_rx_ping_response] = "PONG", + [rxrpc_rtt_rx_requested_ack] = "RACK", }; diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index 0d89cd3f2c01..db01fbb70d23 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -300,9 +300,12 @@ int rxrpc_send_data_packet(struct rxrpc_call *call, struct sk_buff *skb) goto send_fragmentable; done: - if (ret == 0) { - sp->resend_at = jiffies + rxrpc_resend_timeout; + if (ret >= 0) { + skb->tstamp = ktime_get_real(); + smp_wmb(); sp->hdr.serial = serial; + if (whdr.flags & RXRPC_REQUEST_ACK) + trace_rxrpc_rtt_tx(call, rxrpc_rtt_tx_data, serial); } _leave(" = %d [%u]", ret, call->peer->maxdata); return ret; diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c index 3c969de3ef05..607223f4f871 100644 --- a/net/rxrpc/sendmsg.c +++ b/net/rxrpc/sendmsg.c @@ -137,7 +137,6 @@ static void rxrpc_queue_packet(struct rxrpc_call *call, struct sk_buff *skb, if (seq == 1 && rxrpc_is_client_call(call)) rxrpc_expose_client_call(call); - sp->resend_at = jiffies + rxrpc_resend_timeout; ret = rxrpc_send_data_packet(call, skb); if (ret < 0) { _debug("need instant resend %d", ret); diff --git a/net/rxrpc/sysctl.c b/net/rxrpc/sysctl.c index a03c61c672f5..13d1df03ebac 100644 --- a/net/rxrpc/sysctl.c +++ b/net/rxrpc/sysctl.c @@ -59,7 +59,7 @@ static struct ctl_table rxrpc_sysctl_table[] = { .data = &rxrpc_resend_timeout, .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = proc_dointvec_ms_jiffies, + .proc_handler = proc_dointvec, .extra1 = (void *)&one, }, { -- cgit v1.2.3 From fc943f67773487bb85131273f39b5f183caafe95 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 22 Sep 2016 00:29:32 +0100 Subject: rxrpc: Reduce the number of PING ACKs sent We don't want to send a PING ACK for every new incoming call as that just adds to the network traffic. Instead, we send a PING ACK to the first three that we receive and then once per second thereafter. This could probably be made adjustable in future. Signed-off-by: David Howells --- net/rxrpc/call_event.c | 2 +- net/rxrpc/input.c | 7 +++++-- 2 files changed, 6 insertions(+), 3 deletions(-) (limited to 'net/rxrpc/input.c') diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c index adb2ec61e21f..6e2ea8f4ae75 100644 --- a/net/rxrpc/call_event.c +++ b/net/rxrpc/call_event.c @@ -142,7 +142,7 @@ static void rxrpc_resend(struct rxrpc_call *call) struct rxrpc_skb_priv *sp; struct sk_buff *skb; rxrpc_seq_t cursor, seq, top; - ktime_t now = ktime_get_real(), max_age, oldest, resend_at; + ktime_t now = ktime_get_real(), max_age, oldest, resend_at; int ix; u8 annotation, anno_type; diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index c121949de3c8..cbb5d53f09d7 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -44,9 +44,12 @@ static void rxrpc_send_ping(struct rxrpc_call *call, struct sk_buff *skb, int skew) { struct rxrpc_skb_priv *sp = rxrpc_skb(skb); + ktime_t now = skb->tstamp; - rxrpc_propose_ACK(call, RXRPC_ACK_PING, skew, sp->hdr.serial, - true, true); + if (call->peer->rtt_usage < 3 || + ktime_before(ktime_add_ms(call->peer->rtt_last_req, 1000), now)) + rxrpc_propose_ACK(call, RXRPC_ACK_PING, skew, sp->hdr.serial, + true, true); } /* -- cgit v1.2.3