From af87b823ca2b05257192e8d48dc686db6173d7b2 Mon Sep 17 00:00:00 2001 From: Doug Graham Date: Wed, 29 Jul 2009 12:05:57 -0400 Subject: sctp: Fix piggybacked ACKs This patch corrects the conditions under which a SACK will be piggybacked on a DATA packet. The previous condition was incorrect due to a misinterpretation of RFC 4960 and/or RFC 2960. Specifically, the following paragraph from section 6.2 had not been implemented correctly: Before an endpoint transmits a DATA chunk, if any received DATA chunks have not been acknowledged (e.g., due to delayed ack), the sender should create a SACK and bundle it with the outbound DATA chunk, as long as the size of the final SCTP packet does not exceed the current MTU. See Section 6.2. When about to send a DATA chunk, the code now checks to see if the SACK timer is running. If it is, we know we have a SACK to send to the peer, so we append the SACK (assuming available space in the packet) and turn off the timer. For a simple request-response scenario, this will result in the SACK being bundled with the response, meaning the the SACK is received quickly by the client, and also meaning that no separate SACK packet needs to be sent by the server to acknowledge the request. Prior to this patch, a separate SACK packet would have been sent by the server SCTP only after its delayed-ACK timer had expired (usually 200ms). This is wasteful of bandwidth, and can also have a major negative impact on performance due the interaction of delayed ACKs with the Nagle algorithm. Signed-off-by: Doug Graham Signed-off-by: Vlad Yasevich --- net/sctp/output.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'net/sctp/output.c') diff --git a/net/sctp/output.c b/net/sctp/output.c index b94c21190566..94c110dcaf1d 100644 --- a/net/sctp/output.c +++ b/net/sctp/output.c @@ -234,18 +234,19 @@ static sctp_xmit_t sctp_packet_bundle_sack(struct sctp_packet *pkt, if (sctp_chunk_is_data(chunk) && !pkt->has_sack && !pkt->has_cookie_echo) { struct sctp_association *asoc; + struct timer_list *timer; asoc = pkt->transport->asoc; + timer = &asoc->timers[SCTP_EVENT_TIMEOUT_SACK]; - if (asoc->a_rwnd > asoc->rwnd) { + /* If the SACK timer is running, we have a pending SACK */ + if (timer_pending(timer)) { struct sctp_chunk *sack; asoc->a_rwnd = asoc->rwnd; sack = sctp_make_sack(asoc); if (sack) { - struct timer_list *timer; retval = sctp_packet_append_chunk(pkt, sack); asoc->peer.sack_needed = 0; - timer = &asoc->timers[SCTP_EVENT_TIMEOUT_SACK]; - if (timer_pending(timer) && del_timer(timer)) + if (del_timer(timer)) sctp_association_put(asoc); } } -- cgit v1.2.3 From e83963b769a2c38b436f5dcf82309f5cbc2f6012 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Fri, 7 Aug 2009 10:43:07 -0400 Subject: sctp: Generate SACKs when actually sending outbound DATA We are now trying to bundle SACKs when we have outbound DATA to send. However, there are situations where this outbound DATA will not be sent (due to congestion or available window). In such cases it's ok to wait for the timer to expire. This patch refactors the sending code so that betfore attempting to bundle the SACK we check to see if the DATA will actually be transmitted. Based on eirlier works for Doug Graham and Wei Youngjun . Signed-off-by: Vlad Yasevich --- net/sctp/output.c | 142 ++++++++++++++++++++++++++++++++---------------------- 1 file changed, 85 insertions(+), 57 deletions(-) (limited to 'net/sctp/output.c') diff --git a/net/sctp/output.c b/net/sctp/output.c index 94c110dcaf1d..e25e2e20b63d 100644 --- a/net/sctp/output.c +++ b/net/sctp/output.c @@ -61,8 +61,13 @@ #include /* Forward declarations for private helpers. */ -static sctp_xmit_t sctp_packet_append_data(struct sctp_packet *packet, +static sctp_xmit_t sctp_packet_can_append_data(struct sctp_packet *packet, struct sctp_chunk *chunk); +static void sctp_packet_append_data(struct sctp_packet *packet, + struct sctp_chunk *chunk); +static sctp_xmit_t sctp_packet_will_fit(struct sctp_packet *packet, + struct sctp_chunk *chunk, + u16 chunk_len); /* Config a packet. * This appears to be a followup set of initializations. @@ -262,13 +267,20 @@ sctp_xmit_t sctp_packet_append_chunk(struct sctp_packet *packet, { sctp_xmit_t retval = SCTP_XMIT_OK; __u16 chunk_len = WORD_ROUND(ntohs(chunk->chunk_hdr->length)); - size_t psize; - size_t pmtu; - int too_big; SCTP_DEBUG_PRINTK("%s: packet:%p chunk:%p\n", __func__, packet, chunk); + /* Data chunks are special. Before seeing what else we can + * bundle into this packet, check to see if we are allowed to + * send this DATA. + */ + if (sctp_chunk_is_data(chunk)) { + retval = sctp_packet_can_append_data(packet, chunk); + if (retval != SCTP_XMIT_OK) + goto finish; + } + /* Try to bundle AUTH chunk */ retval = sctp_packet_bundle_auth(packet, chunk); if (retval != SCTP_XMIT_OK) @@ -279,51 +291,16 @@ sctp_xmit_t sctp_packet_append_chunk(struct sctp_packet *packet, if (retval != SCTP_XMIT_OK) goto finish; - psize = packet->size; - pmtu = ((packet->transport->asoc) ? - (packet->transport->asoc->pathmtu) : - (packet->transport->pathmtu)); - - too_big = (psize + chunk_len > pmtu); - - /* Decide if we need to fragment or resubmit later. */ - if (too_big) { - /* It's OK to fragmet at IP level if any one of the following - * is true: - * 1. The packet is empty (meaning this chunk is greater - * the MTU) - * 2. The chunk we are adding is a control chunk - * 3. The packet doesn't have any data in it yet and data - * requires authentication. - */ - if (sctp_packet_empty(packet) || !sctp_chunk_is_data(chunk) || - (!packet->has_data && chunk->auth)) { - /* We no longer do re-fragmentation. - * Just fragment at the IP layer, if we - * actually hit this condition - */ - packet->ipfragok = 1; - goto append; - - } else { - retval = SCTP_XMIT_PMTU_FULL; - goto finish; - } - } - -append: - /* We believe that this chunk is OK to add to the packet (as - * long as we have the cwnd for it). - */ + /* Check to see if this chunk will fit into the packet */ + retval = sctp_packet_will_fit(packet, chunk, chunk_len); + if (retval != SCTP_XMIT_OK) + goto finish; - /* DATA is a special case since we must examine both rwnd and cwnd - * before we send DATA. - */ + /* We believe that this chunk is OK to add to the packet */ switch (chunk->chunk_hdr->type) { case SCTP_CID_DATA: - retval = sctp_packet_append_data(packet, chunk); - if (SCTP_XMIT_OK != retval) - goto finish; + /* Account for the data being in the packet */ + sctp_packet_append_data(packet, chunk); /* Disallow SACK bundling after DATA. */ packet->has_sack = 1; /* Disallow AUTH bundling after DATA */ @@ -633,16 +610,15 @@ nomem: * 2nd Level Abstractions ********************************************************************/ -/* This private function handles the specifics of appending DATA chunks. */ -static sctp_xmit_t sctp_packet_append_data(struct sctp_packet *packet, +/* This private function check to see if a chunk can be added */ +static sctp_xmit_t sctp_packet_can_append_data(struct sctp_packet *packet, struct sctp_chunk *chunk) { sctp_xmit_t retval = SCTP_XMIT_OK; - size_t datasize, rwnd, inflight; + size_t datasize, rwnd, inflight, flight_size; struct sctp_transport *transport = packet->transport; __u32 max_burst_bytes; struct sctp_association *asoc = transport->asoc; - struct sctp_sock *sp = sctp_sk(asoc->base.sk); struct sctp_outq *q = &asoc->outqueue; /* RFC 2960 6.1 Transmission of DATA Chunks @@ -659,7 +635,8 @@ static sctp_xmit_t sctp_packet_append_data(struct sctp_packet *packet, */ rwnd = asoc->peer.rwnd; - inflight = asoc->outqueue.outstanding_bytes; + inflight = q->outstanding_bytes; + flight_size = transport->flight_size; datasize = sctp_data_size(chunk); @@ -682,8 +659,8 @@ static sctp_xmit_t sctp_packet_append_data(struct sctp_packet *packet, * cwnd = flightsize + Max.Burst * MTU */ max_burst_bytes = asoc->max_burst * asoc->pathmtu; - if ((transport->flight_size + max_burst_bytes) < transport->cwnd) { - transport->cwnd = transport->flight_size + max_burst_bytes; + if ((flight_size + max_burst_bytes) < transport->cwnd) { + transport->cwnd = flight_size + max_burst_bytes; SCTP_DEBUG_PRINTK("%s: cwnd limited by max_burst: " "transport: %p, cwnd: %d, " "ssthresh: %d, flight_size: %d, " @@ -708,7 +685,7 @@ static sctp_xmit_t sctp_packet_append_data(struct sctp_packet *packet, * ignore the value of cwnd and SHOULD NOT delay retransmission. */ if (chunk->fast_retransmit != SCTP_NEED_FRTX) - if (transport->flight_size >= transport->cwnd) { + if (flight_size >= transport->cwnd) { retval = SCTP_XMIT_RWND_FULL; goto finish; } @@ -718,8 +695,8 @@ static sctp_xmit_t sctp_packet_append_data(struct sctp_packet *packet, * if any previously transmitted data on the connection remains * unacknowledged. */ - if (!sp->nodelay && sctp_packet_empty(packet) && - q->outstanding_bytes && sctp_state(asoc, ESTABLISHED)) { + if (!sctp_sk(asoc->base.sk)->nodelay && sctp_packet_empty(packet) && + inflight && sctp_state(asoc, ESTABLISHED)) { unsigned len = datasize + q->out_qlen; /* Check whether this chunk and all the rest of pending @@ -732,6 +709,19 @@ static sctp_xmit_t sctp_packet_append_data(struct sctp_packet *packet, } } +finish: + return retval; +} + +/* This private function does management things when adding DATA chunk */ +static void sctp_packet_append_data(struct sctp_packet *packet, + struct sctp_chunk *chunk) +{ + struct sctp_transport *transport = packet->transport; + size_t datasize = sctp_data_size(chunk); + struct sctp_association *asoc = transport->asoc; + u32 rwnd = asoc->peer.rwnd; + /* Keep track of how many bytes are in flight over this transport. */ transport->flight_size += datasize; @@ -754,7 +744,45 @@ static sctp_xmit_t sctp_packet_append_data(struct sctp_packet *packet, /* Has been accepted for transmission. */ if (!asoc->peer.prsctp_capable) chunk->msg->can_abandon = 0; +} + +static sctp_xmit_t sctp_packet_will_fit(struct sctp_packet *packet, + struct sctp_chunk *chunk, + u16 chunk_len) +{ + size_t psize; + size_t pmtu; + int too_big; + sctp_xmit_t retval = SCTP_XMIT_OK; + + psize = packet->size; + pmtu = ((packet->transport->asoc) ? + (packet->transport->asoc->pathmtu) : + (packet->transport->pathmtu)); + + too_big = (psize + chunk_len > pmtu); + + /* Decide if we need to fragment or resubmit later. */ + if (too_big) { + /* It's OK to fragmet at IP level if any one of the following + * is true: + * 1. The packet is empty (meaning this chunk is greater + * the MTU) + * 2. The chunk we are adding is a control chunk + * 3. The packet doesn't have any data in it yet and data + * requires authentication. + */ + if (sctp_packet_empty(packet) || !sctp_chunk_is_data(chunk) || + (!packet->has_data && chunk->auth)) { + /* We no longer do re-fragmentation. + * Just fragment at the IP layer, if we + * actually hit this condition + */ + packet->ipfragok = 1; + } else { + retval = SCTP_XMIT_PMTU_FULL; + } + } -finish: return retval; } -- cgit v1.2.3 From b29e7907288554db9c987c36facfd0304ee8ff5a Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Fri, 4 Sep 2009 18:20:59 -0400 Subject: sctp: Nagle delay should be based on path mtu The decision to delay due to Nagle should be based on the path mtu and future packet size. We currently incorrectly base it on 'frag_point' which is the SCTP DATA segment size, and also we do not count DATA chunk header overhead in the computation. This actuall allows situations where a user can set low 'frag_point', and then send small messages without delay. Signed-off-by: Vlad Yasevich --- net/sctp/output.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'net/sctp/output.c') diff --git a/net/sctp/output.c b/net/sctp/output.c index e25e2e20b63d..d0b84f6eba4d 100644 --- a/net/sctp/output.c +++ b/net/sctp/output.c @@ -697,13 +697,14 @@ static sctp_xmit_t sctp_packet_can_append_data(struct sctp_packet *packet, */ if (!sctp_sk(asoc->base.sk)->nodelay && sctp_packet_empty(packet) && inflight && sctp_state(asoc, ESTABLISHED)) { - unsigned len = datasize + q->out_qlen; + unsigned max = transport->pathmtu - packet->overhead; + unsigned len = chunk->skb->len + q->out_qlen; /* Check whether this chunk and all the rest of pending * data will fit or delay in hopes of bundling a full * sized packet. */ - if (len < asoc->frag_point) { + if (len < max) { retval = SCTP_XMIT_NAGLE_DELAY; goto finish; } -- cgit v1.2.3 From cb95ea32a457871f72752164de8d94fa20f4703c Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Fri, 4 Sep 2009 18:20:59 -0400 Subject: sctp: Don't do NAGLE delay on large writes that were fragmented small SCTP will delay the last part of a large write due to NAGLE, if that part is smaller then MTU. Since we are doing large writes, we might as well send the last portion now instead of waiting untill the next large write happens. The small portion will be sent as is regardless, so it's better to not delay it. This is a result of much discussions with Wei Yongjun and Doug Graham . Many thanks go out to them. Signed-off-by: Vlad Yasevich --- include/net/sctp/structs.h | 2 +- net/sctp/chunk.c | 2 ++ net/sctp/output.c | 4 +++- 3 files changed, 6 insertions(+), 2 deletions(-) (limited to 'net/sctp/output.c') diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index b1bd2689bb70..df4c6321996d 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -628,7 +628,7 @@ struct sctp_datamsg { /* Chunks waiting to be submitted to lower layer. */ struct list_head chunks; /* Chunks that have been transmitted. */ - struct list_head track; + size_t msg_size; /* Reference counting. */ atomic_t refcnt; /* When is this message no longer interesting to the peer? */ diff --git a/net/sctp/chunk.c b/net/sctp/chunk.c index 645577ddc33e..acf7c4d128f7 100644 --- a/net/sctp/chunk.c +++ b/net/sctp/chunk.c @@ -59,6 +59,7 @@ static void sctp_datamsg_init(struct sctp_datamsg *msg) msg->can_abandon = 0; msg->expires_at = 0; INIT_LIST_HEAD(&msg->chunks); + msg->msg_size = 0; } /* Allocate and initialize datamsg. */ @@ -155,6 +156,7 @@ static void sctp_datamsg_assign(struct sctp_datamsg *msg, struct sctp_chunk *chu { sctp_datamsg_hold(msg); chunk->msg = msg; + msg->msg_size += chunk->skb->len; } diff --git a/net/sctp/output.c b/net/sctp/output.c index d0b84f6eba4d..b801bc9fb639 100644 --- a/net/sctp/output.c +++ b/net/sctp/output.c @@ -703,8 +703,10 @@ static sctp_xmit_t sctp_packet_can_append_data(struct sctp_packet *packet, /* Check whether this chunk and all the rest of pending * data will fit or delay in hopes of bundling a full * sized packet. + * Don't delay large message writes that may have been + * fragmeneted into small peices. */ - if (len < max) { + if ((len < max) && (chunk->msg->msg_size < max)) { retval = SCTP_XMIT_NAGLE_DELAY; goto finish; } -- cgit v1.2.3 From d521c08f4c16d27f193718da778503a6472501da Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Wed, 2 Sep 2009 13:05:33 +0800 Subject: sctp: fix to reset packet information after packet transmit The packet information does not reset after packet transmit, this may cause some problems such as following DATA chunk be sent without AUTH chunk, even if the authentication of DATA chunk has been requested by the peer. Signed-off-by: Wei Yongjun Signed-off-by: Vlad Yasevich --- net/sctp/output.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) (limited to 'net/sctp/output.c') diff --git a/net/sctp/output.c b/net/sctp/output.c index b801bc9fb639..1f9336177ee2 100644 --- a/net/sctp/output.c +++ b/net/sctp/output.c @@ -136,6 +136,17 @@ struct sctp_packet *sctp_packet_init(struct sctp_packet *packet, return packet; } +static void sctp_packet_reset(struct sctp_packet *packet) +{ + packet->size = packet->overhead; + packet->has_cookie_echo = 0; + packet->has_sack = 0; + packet->has_data = 0; + packet->has_auth = 0; + packet->ipfragok = 0; + packet->auth = NULL; +} + /* Free a packet. */ void sctp_packet_free(struct sctp_packet *packet) { @@ -576,7 +587,7 @@ int sctp_packet_transmit(struct sctp_packet *packet) (*tp->af_specific->sctp_xmit)(nskb, tp); out: - packet->size = packet->overhead; + sctp_packet_reset(packet); return err; no_route: kfree_skb(nskb); -- cgit v1.2.3 From 4007cc88ceec8892b74792f0a10983b140beae72 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Fri, 4 Sep 2009 18:21:00 -0400 Subject: sctp: Correctly track if AUTH has been bundled. We currently track if AUTH has been bundled using the 'auth' pointer to the chunk. However, AUTH is disallowed after DATA is already in the packet, so we need to instead use the 'has_auth' field. Signed-off-by: Vlad Yasevich --- net/sctp/output.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/sctp/output.c') diff --git a/net/sctp/output.c b/net/sctp/output.c index 1f9336177ee2..e47398c07185 100644 --- a/net/sctp/output.c +++ b/net/sctp/output.c @@ -220,7 +220,7 @@ static sctp_xmit_t sctp_packet_bundle_auth(struct sctp_packet *pkt, /* See if this is an auth chunk we are bundling or if * auth is already bundled. */ - if (chunk->chunk_hdr->type == SCTP_CID_AUTH || pkt->auth) + if (chunk->chunk_hdr->type == SCTP_CID_AUTH || pkt->has_auth) return retval; /* if the peer did not request this chunk to be authenticated, -- cgit v1.2.3 From be2971438dec2e2d041af4701472a93a7dd03642 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Fri, 4 Sep 2009 14:34:06 +0800 Subject: sctp: remove dup code in net/sctp/output.c Use sctp_packet_reset() instead of dup code. Signed-off-by: Wei Yongjun Signed-off-by: Vlad Yasevich --- net/sctp/output.c | 37 +++++++++++++------------------------ 1 file changed, 13 insertions(+), 24 deletions(-) (limited to 'net/sctp/output.c') diff --git a/net/sctp/output.c b/net/sctp/output.c index e47398c07185..5cbda8f1ddfd 100644 --- a/net/sctp/output.c +++ b/net/sctp/output.c @@ -69,6 +69,17 @@ static sctp_xmit_t sctp_packet_will_fit(struct sctp_packet *packet, struct sctp_chunk *chunk, u16 chunk_len); +static void sctp_packet_reset(struct sctp_packet *packet) +{ + packet->size = packet->overhead; + packet->has_cookie_echo = 0; + packet->has_sack = 0; + packet->has_data = 0; + packet->has_auth = 0; + packet->ipfragok = 0; + packet->auth = NULL; +} + /* Config a packet. * This appears to be a followup set of initializations. */ @@ -80,13 +91,8 @@ struct sctp_packet *sctp_packet_config(struct sctp_packet *packet, SCTP_DEBUG_PRINTK("%s: packet:%p vtag:0x%x\n", __func__, packet, vtag); + sctp_packet_reset(packet); packet->vtag = vtag; - packet->has_cookie_echo = 0; - packet->has_sack = 0; - packet->has_auth = 0; - packet->has_data = 0; - packet->ipfragok = 0; - packet->auth = NULL; if (ecn_capable && sctp_packet_empty(packet)) { chunk = sctp_get_ecne_prepend(packet->transport->asoc); @@ -124,29 +130,12 @@ struct sctp_packet *sctp_packet_init(struct sctp_packet *packet, } overhead += sizeof(struct sctphdr); packet->overhead = overhead; - packet->size = overhead; + sctp_packet_reset(packet); packet->vtag = 0; - packet->has_cookie_echo = 0; - packet->has_sack = 0; - packet->has_auth = 0; - packet->has_data = 0; - packet->ipfragok = 0; packet->malloced = 0; - packet->auth = NULL; return packet; } -static void sctp_packet_reset(struct sctp_packet *packet) -{ - packet->size = packet->overhead; - packet->has_cookie_echo = 0; - packet->has_sack = 0; - packet->has_data = 0; - packet->has_auth = 0; - packet->ipfragok = 0; - packet->auth = NULL; -} - /* Free a packet. */ void sctp_packet_free(struct sctp_packet *packet) { -- cgit v1.2.3