/* Management of Tx window, Tx resend, ACKs and out-of-sequence reception * * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version * 2 of the License, or (at your option) any later version. */ #include <linux/module.h> #include <linux/circ_buf.h> #include <linux/net.h> #include <linux/skbuff.h> #include <linux/slab.h> #include <linux/udp.h> #include <net/sock.h> #include <net/af_rxrpc.h> #include "ar-internal.h" /* * How long to wait before scheduling ACK generation after seeing a * packet with RXRPC_REQUEST_ACK set (in jiffies). */ unsigned rxrpc_requested_ack_delay = 1; /* * How long to wait before scheduling an ACK with subtype DELAY (in jiffies). * * We use this when we've received new data packets. If those packets aren't * all consumed within this time we will send a DELAY ACK if an ACK was not * requested to let the sender know it doesn't need to resend. */ unsigned rxrpc_soft_ack_delay = 1 * HZ; /* * How long to wait before scheduling an ACK with subtype IDLE (in jiffies). * * We use this when we've consumed some previously soft-ACK'd packets when * further packets aren't immediately received to decide when to send an IDLE * ACK let the other end know that it can free up its Tx buffer space. */ unsigned rxrpc_idle_ack_delay = 0.5 * HZ; /* * Receive window size in packets. This indicates the maximum number of * unconsumed received packets we're willing to retain in memory. Once this * limit is hit, we should generate an EXCEEDS_WINDOW ACK and discard further * packets. */ unsigned rxrpc_rx_window_size = 32; /* * Maximum Rx MTU size. This indicates to the sender the size of jumbo packet * made by gluing normal packets together that we're willing to handle. */ unsigned rxrpc_rx_mtu = 5692; /* * The maximum number of fragments in a received jumbo packet that we tell the * sender that we're willing to handle. */ unsigned rxrpc_rx_jumbo_max = 4; static const char *rxrpc_acks(u8 reason) { static const char *const str[] = { "---", "REQ", "DUP", "OOS", "WIN", "MEM", "PNG", "PNR", "DLY", "IDL", "-?-" }; if (reason >= ARRAY_SIZE(str)) reason = ARRAY_SIZE(str) - 1; return str[reason]; } static const s8 rxrpc_ack_priority[] = { [0] = 0, [RXRPC_ACK_DELAY] = 1, [RXRPC_ACK_REQUESTED] = 2, [RXRPC_ACK_IDLE] = 3, [RXRPC_ACK_PING_RESPONSE] = 4, [RXRPC_ACK_DUPLICATE] = 5, [RXRPC_ACK_OUT_OF_SEQUENCE] = 6, [RXRPC_ACK_EXCEEDS_WINDOW] = 7, [RXRPC_ACK_NOSPACE] = 8, }; /* * propose an ACK be sent */ void __rxrpc_propose_ACK(struct rxrpc_call *call, u8 ack_reason, __be32 serial, bool immediate) { unsigned long expiry; s8 prior = rxrpc_ack_priority[ack_reason]; ASSERTCMP(prior, >, 0); _enter("{%d},%s,%%%x,%u", call->debug_id, rxrpc_acks(ack_reason), ntohl(serial), immediate); if (prior < rxrpc_ack_priority[call->ackr_reason]) { if (immediate) goto cancel_timer; return; } /* update DELAY, IDLE, REQUESTED and PING_RESPONSE ACK serial * numbers */ if (prior == rxrpc_ack_priority[call->ackr_reason]) { if (prior <= 4) call->ackr_serial = serial; if (immediate) goto cancel_timer; return; } call->ackr_reason = ack_reason; call->ackr_serial = serial; switch (ack_reason) { case RXRPC_ACK_DELAY: _debug("run delay timer"); expiry = rxrpc_soft_ack_delay; goto run_timer; case RXRPC_ACK_IDLE: if (!immediate) { _debug("run defer timer"); expiry = rxrpc_idle_ack_delay; goto run_timer; } goto cancel_timer; case RXRPC_ACK_REQUESTED: expiry = rxrpc_requested_ack_delay; if (!expiry) goto cancel_timer; if (!immediate || serial == cpu_to_be32(1)) { _debug("run defer timer"); goto run_timer; } default: _debug("immediate ACK"); goto cancel_timer; } run_timer: expiry += jiffies; if (!timer_pending(&call->ack_timer) || time_after(call->ack_timer.expires, expiry)) mod_timer(&call->ack_timer, expiry); return; cancel_timer: _debug("cancel timer %%%u", ntohl(serial)); try_to_del_timer_sync(&call->ack_timer); read_lock_bh(&call->state_lock); if (call->state <= RXRPC_CALL_COMPLETE && !test_and_set_bit(RXRPC_CALL_ACK, &call->events)) rxrpc_queue_call(call); read_unlock_bh(&call->state_lock); } /* * propose an ACK be sent, locking the call structure */ void rxrpc_propose_ACK(struct rxrpc_call *call, u8 ack_reason, __be32 serial, bool immediate) { s8 prior = rxrpc_ack_priority[ack_reason]; if (prior > rxrpc_ack_priority[call->ackr_reason]) { spin_lock_bh(&call->lock); __rxrpc_propose_ACK(call, ack_reason, serial, immediate); spin_unlock_bh(&call->lock); } } /* * set the resend timer */ static void rxrpc_set_resend(struct rxrpc_call *call, u8 resend, unsigned long resend_at) { read_lock_bh(&call->state_lock); if (call->state >= RXRPC_CALL_COMPLETE) resend = 0; if (resend & 1) { _debug("SET RESEND"); set_bit(RXRPC_CALL_RESEND, &call->events); } if (resend & 2) { _debug("MODIFY RESEND TIMER"); set_bit(RXRPC_CALL_RUN_RTIMER, &call->flags); mod_timer(&call->resend_timer, resend_at); } else { _debug("KILL RESEND TIMER"); del_timer_sync(&call->resend_timer); clear_bit(RXRPC_CALL_RESEND_TIMER, &call->events); clear_bit(RXRPC_CALL_RUN_RTIMER, &call->flags); } read_unlock_bh(&call->state_lock); } /* * resend packets */ static void rxrpc_resend(struct rxrpc_call *call) { struct rxrpc_skb_priv *sp; struct rxrpc_header *hdr; struct sk_buff *txb; unsigned long *p_txb, resend_at; bool stop; int loop; u8 resend; _enter("{%d,%d,%d,%d},", call->acks_hard, call->acks_unacked, atomic_read(&call->sequence), CIRC_CNT(call->acks_head, call->acks_tail, call->acks_winsz)); stop = false; resend = 0; resend_at = 0; for (loop = call->acks_tail; loop != call->acks_head || stop; loop = (loop + 1) & (call->acks_winsz - 1) ) { p_txb = call->acks_window + loop; smp_read_barrier_depends(); if (*p_txb & 1) continue; txb = (struct sk_buff *) *p_txb; sp = rxrpc_skb(txb); if (sp->need_resend) { sp->need_resend = false; /* each Tx packet has a new serial number */ sp->hdr.serial = htonl(atomic_inc_return(&call->conn->serial)); hdr = (struct rxrpc_header *) txb->head; hdr->serial = sp->hdr.serial; _proto("Tx DATA %%%u { #%d }", ntohl(sp->hdr.serial), ntohl(sp->hdr.seq)); if (rxrpc_send_packet(call->conn->trans, txb) < 0) { stop = true; sp->resend_at = jiffies + 3; } else { sp->resend_at = jiffies + rxrpc_resend_timeout; } } if (time_after_eq(jiffies + 1, sp->resend_at)) { sp->need_resend = true; resend |= 1; } else if (resend & 2) { if (time_before(sp->resend_at, resend_at)) resend_at = sp->resend_at; } else { resend_at = sp->resend_at; resend |= 2; } } rxrpc_set_resend(call, resend, resend_at); _leave(""); } /* * handle resend timer expiry */ static void rxrpc_resend_timer(struct rxrpc_call *call) { struct rxrpc_skb_priv *sp; struct sk_buff *txb; unsigned long *p_txb, resend_at; int loop; u8 resend; _enter("%d,%d,%d", call->acks_tail, call->acks_unacked, call->acks_head); if (call->state >= RXRPC_CALL_COMPLETE) return; resend = 0; resend_at = 0; for (loop = call->acks_unacked; loop != call->acks_head; loop = (loop + 1) & (call->acks_winsz - 1) ) { p_txb = call->acks_window + loop; smp_read_barrier_depends(); txb = (struct sk_buff *) (*p_txb & ~1); sp = rxrpc_skb(txb); ASSERT(!(*p_txb & 1)); if (sp->need_resend) { ; } else if (time_after_eq(jiffies + 1, sp->resend_at)) { sp->need_resend = true; resend |= 1; } else if (resend & 2) { if (time_before(sp->resend_at, resend_at)) resend_at = sp->resend_at; } else { resend_at = sp->resend_at; resend |= 2; } } rxrpc_set_resend(call, resend, resend_at); _leave(""); } /* * process soft ACKs of our transmitted packets * - these indicate packets the peer has or has not received, but hasn't yet * given to the consumer, and so can still be discarded and re-requested */ static int rxrpc_process_soft_ACKs(struct rxrpc_call *call, struct rxrpc_ackpacket *ack, struct sk_buff *skb) { struct rxrpc_skb_priv *sp; struct sk_buff *txb; unsigned long *p_txb, resend_at; int loop; u8 sacks[RXRPC_MAXACKS], resend; _enter("{%d,%d},{%d},", call->acks_hard, CIRC_CNT(call->acks_head, call->acks_tail, call->acks_winsz), ack->nAcks); if (skb_copy_bits(skb, 0, sacks, ack->nAcks) < 0) goto protocol_error; resend = 0; resend_at = 0; for (loop = 0; loop < ack->nAcks; loop++) { p_txb = call->acks_window; p_txb += (call->acks_tail + loop) & (call->acks_winsz - 1); smp_read_barrier_depends(); txb = (struct sk_buff *) (*p_txb & ~1); sp = rxrpc_skb(txb); switch (sacks[loop]) { case RXRPC_ACK_TYPE_ACK: sp->need_resend = false; *p_txb |= 1; break; case RXRPC_ACK_TYPE_NACK: sp->need_resend = true; *p_txb &= ~1; resend = 1; break; default: _debug("Unsupported ACK type %d", sacks[loop]); goto protocol_error; } } smp_mb(); call->acks_unacked = (call->acks_tail + loop) & (call->acks_winsz - 1); /* anything not explicitly ACK'd is implicitly NACK'd, but may just not * have been received or processed yet by the far end */ for (loop = call->acks_unacked; loop != call->acks_head; loop = (loop + 1) & (call->acks_winsz - 1) ) { p_txb = call->acks_window + loop; smp_read_barrier_depends(); txb = (struct sk_buff *) (*p_txb & ~1); sp = rxrpc_skb(txb); if (*p_txb & 1) { /* packet must have been discarded */ sp->need_resend = true; *p_txb &= ~1; resend |= 1; } else if (sp->need_resend) { ; } else if (time_after_eq(jiffies + 1, sp->resend_at)) { sp->need_resend = true; resend |= 1; } else if (resend & 2) { if (time_before(sp->resend_at, resend_at)) resend_at = sp->resend_at; } else { resend_at = sp->resend_at; resend |= 2; } } rxrpc_set_resend(call, resend, resend_at); _leave(" = 0"); return 0; protocol_error: _leave(" = -EPROTO"); return -EPROTO; } /* * discard hard-ACK'd packets from the Tx window */ static void rxrpc_rotate_tx_window(struct rxrpc_call *call, u32 hard) { unsigned long _skb; int tail = call->acks_tail, old_tail; int win = CIRC_CNT(call->acks_head, tail, call->acks_winsz); _enter("{%u,%u},%u", call->acks_hard, win, hard); ASSERTCMP(hard - call->acks_hard, <=, win); while (call->acks_hard < hard) { smp_read_barrier_depends(); _skb = call->acks_window[tail] & ~1; rxrpc_free_skb((struct sk_buff *) _skb); old_tail = tail; tail = (tail + 1) & (call->acks_winsz - 1); call->acks_tail = tail; if (call->acks_unacked == old_tail) call->acks_unacked = tail; call->acks_hard++; } wake_up(&call->tx_waitq); } /* * clear the Tx window in the event of a failure */ static void rxrpc_clear_tx_window(struct rxrpc_call *call) { rxrpc_rotate_tx_window(call, atomic_read(&call->sequence)); } /* * drain the out of sequence received packet queue into the packet Rx queue */ static int rxrpc_drain_rx_oos_queue(struct rxrpc_call *call) { struct rxrpc_skb_priv *sp; struct sk_buff *skb; bool terminal; int ret; _enter("{%d,%d}", call->rx_data_post, call->rx_first_oos); spin_lock_bh(&call->lock); ret = -ECONNRESET; if (test_bit(RXRPC_CALL_RELEASED, &call->flags)) goto socket_unavailable; skb = skb_dequeue(&call->rx_oos_queue); if (skb) { sp = rxrpc_skb(skb); _debug("drain OOS packet %d [%d]", ntohl(sp->hdr.seq), call->rx_first_oos); if (ntohl(sp->hdr.seq) != call->rx_first_oos) { skb_queue_head(&call->rx_oos_queue, skb); call->rx_first_oos = ntohl(rxrpc_skb(skb)->hdr.seq); _debug("requeue %p {%u}", skb, call->rx_first_oos); } else { skb->mark = RXRPC_SKB_MARK_DATA; terminal = ((sp->hdr.flags & RXRPC_LAST_PACKET) && !(sp->hdr.flags & RXRPC_CLIENT_INITIATED)); ret = rxrpc_queue_rcv_skb(call, skb, true, terminal); BUG_ON(ret < 0); _debug("drain #%u", call->rx_data_post); call->rx_data_post++; /* find out what the next packet is */ skb = skb_peek(&call->rx_oos_queue); if (skb) call->rx_first_oos = ntohl(rxrpc_skb(skb)->hdr.seq); else call->rx_first_oos = 0; _debug("peek %p {%u}", skb, call->rx_first_oos); } } ret = 0; socket_unavailable: spin_unlock_bh(&call->lock); _leave(" = %d", ret); return ret; } /* * insert an out of sequence packet into the buffer */ static void rxrpc_insert_oos_packet(struct rxrpc_call *call, struct sk_buff *skb) { struct rxrpc_skb_priv *sp, *psp; struct sk_buff *p; u32 seq; sp = rxrpc_skb(skb); seq = ntohl(sp->hdr.seq); _enter(",,{%u}", seq); skb->destructor = rxrpc_packet_destructor; ASSERTCMP(sp->call, ==, NULL); sp->call = call; rxrpc_get_call(call); /* insert into the buffer in sequence order */ spin_lock_bh(&call->lock); skb_queue_walk(&call->rx_oos_queue, p) { psp = rxrpc_skb(p); if (ntohl(psp->hdr.seq) > seq) { _debug("insert oos #%u before #%u", seq, ntohl(psp->hdr.seq)); skb_insert(p, skb, &call->rx_oos_queue); goto inserted; } } _debug("append oos #%u", seq); skb_queue_tail(&call->rx_oos_queue, skb); inserted: /* we might now have a new front to the queue */ if (call->rx_first_oos == 0 || seq < call->rx_first_oos) call->rx_first_oos = seq; read_lock(&call->state_lock); if (call->state < RXRPC_CALL_COMPLETE && call->rx_data_post == call->rx_first_oos) { _debug("drain rx oos now"); set_bit(RXRPC_CALL_DRAIN_RX_OOS, &call->events); } read_unlock(&call->state_lock); spin_unlock_bh(&call->lock); _leave(" [stored #%u]", call->rx_first_oos); } /* * clear the Tx window on final ACK reception */ static void rxrpc_zap_tx_window(struct rxrpc_call *call) { struct rxrpc_skb_priv *sp; struct sk_buff *skb; unsigned long _skb, *acks_window; u8 winsz = call->acks_winsz; int tail; acks_window = call->acks_window; call->acks_window = NULL; while (CIRC_CNT(call->acks_head, call->acks_tail, winsz) > 0) { tail = call->acks_tail; smp_read_barrier_depends(); _skb = acks_window[tail] & ~1; smp_mb(); call->acks_tail = (call->acks_tail + 1) & (winsz - 1); skb = (struct sk_buff *) _skb; sp = rxrpc_skb(skb); _debug("+++ clear Tx %u", ntohl(sp->hdr.seq)); rxrpc_free_skb(skb); } kfree(acks_window); } /* * process the extra information that may be appended to an ACK packet */ static void rxrpc_extract_ackinfo(struct rxrpc_call *call, struct sk_buff *skb, unsigned int latest, int nAcks) { struct rxrpc_ackinfo ackinfo; struct rxrpc_peer *peer; unsigned int mtu; if (skb_copy_bits(skb, nAcks + 3, &ackinfo, sizeof(ackinfo)) < 0) { _leave(" [no ackinfo]"); return; } _proto("Rx ACK %%%u Info { rx=%u max=%u rwin=%u jm=%u }", latest, ntohl(ackinfo.rxMTU), ntohl(ackinfo.maxMTU), ntohl(ackinfo.rwind), ntohl(ackinfo.jumbo_max)); mtu = min(ntohl(ackinfo.rxMTU), ntohl(ackinfo.maxMTU)); peer = call->conn->trans->peer; if (mtu < peer->maxdata) { spin_lock_bh(&peer->lock); peer->maxdata = mtu; peer->mtu = mtu + peer->hdrsize; spin_unlock_bh(&peer->lock); _net("Net MTU %u (maxdata %u)", peer->mtu, peer->maxdata); } } /* * process packets in the reception queue */ static int rxrpc_process_rx_queue(struct rxrpc_call *call, u32 *_abort_code) { struct rxrpc_ackpacket ack; struct rxrpc_skb_priv *sp; struct sk_buff *skb; bool post_ACK; int latest; u32 hard, tx; _enter(""); process_further: skb = skb_dequeue(&call->rx_queue); if (!skb) return -EAGAIN; _net("deferred skb %p", skb); sp = rxrpc_skb(skb); _debug("process %s [st %d]", rxrpc_pkts[sp->hdr.type], call->state); post_ACK = false; switch (sp->hdr.type) { /* data packets that wind up here have been received out of * order, need security processing or are jumbo packets */ case RXRPC_PACKET_TYPE_DATA: _proto("OOSQ DATA %%%u { #%u }", ntohl(sp->hdr.serial), ntohl(sp->hdr.seq)); /* secured packets must be verified and possibly decrypted */ if (rxrpc_verify_packet(call, skb, _abort_code) < 0) goto protocol_error; rxrpc_insert_oos_packet(call, skb); goto process_further; /* partial ACK to process */ case RXRPC_PACKET_TYPE_ACK: if (skb_copy_bits(skb, 0, &ack, sizeof(ack)) < 0) { _debug("extraction failure"); goto protocol_error; } if (!skb_pull(skb, sizeof(ack))) BUG(); latest = ntohl(sp->hdr.serial); hard = ntohl(ack.firstPacket); tx = atomic_read(&call->sequence); _proto("Rx ACK %%%u { m=%hu f=#%u p=#%u s=%%%u r=%s n=%u }", latest, ntohs(ack.maxSkew), hard, ntohl(ack.previousPacket), ntohl(ack.serial), rxrpc_acks(ack.reason), ack.nAcks); rxrpc_extract_ackinfo(call, skb, latest, ack.nAcks); if (ack.reason == RXRPC_ACK_PING) { _proto("Rx ACK %%%u PING Request", latest); rxrpc_propose_ACK(call, RXRPC_ACK_PING_RESPONSE, sp->hdr.serial, true); } /* discard any out-of-order or duplicate ACKs */ if (latest - call->acks_latest <= 0) { _debug("discard ACK %d <= %d", latest, call->acks_latest); goto discard; } call->acks_latest = latest; if (call->state != RXRPC_CALL_CLIENT_SEND_REQUEST && call->state != RXRPC_CALL_CLIENT_AWAIT_REPLY && call->state != RXRPC_CALL_SERVER_SEND_REPLY && call->state != RXRPC_CALL_SERVER_AWAIT_ACK) goto discard; _debug("Tx=%d H=%u S=%d", tx, call->acks_hard, call->state); if (hard > 0) { if (hard - 1 > tx) { _debug("hard-ACK'd packet %d not transmitted" " (%d top)", hard - 1, tx); goto protocol_error; } if ((call->state == RXRPC_CALL_CLIENT_AWAIT_REPLY || call->state == RXRPC_CALL_SERVER_AWAIT_ACK) && hard > tx) goto all_acked; smp_rmb(); rxrpc_rotate_tx_window(call, hard - 1); } if (ack.nAcks > 0) { if (hard - 1 + ack.nAcks > tx) { _debug("soft-ACK'd packet %d+%d not" " transmitted (%d top)", hard - 1, ack.nAcks, tx); goto protocol_error; } if (rxrpc_process_soft_ACKs(call, &ack, skb) < 0) goto protocol_error; } goto discard; /* complete ACK to process */ case RXRPC_PACKET_TYPE_ACKALL: goto all_acked; /* abort and busy are handled elsewhere */ case RXRPC_PACKET_TYPE_BUSY: case RXRPC_PACKET_TYPE_ABORT: BUG(); /* connection level events - also handled elsewhere */ case RXRPC_PACKET_TYPE_CHALLENGE: case RXRPC_PACKET_TYPE_RESPONSE: case RXRPC_PACKET_TYPE_DEBUG: BUG(); } /* if we've had a hard ACK that covers all the packets we've sent, then * that ends that phase of the operation */ all_acked: write_lock_bh(&call->state_lock); _debug("ack all %d", call->state); switch (call->state) { case RXRPC_CALL_CLIENT_AWAIT_REPLY: call->state = RXRPC_CALL_CLIENT_RECV_REPLY; break; case RXRPC_CALL_SERVER_AWAIT_ACK: _debug("srv complete"); call->state = RXRPC_CALL_COMPLETE; post_ACK = true; break; case RXRPC_CALL_CLIENT_SEND_REQUEST: case RXRPC_CALL_SERVER_RECV_REQUEST: goto protocol_error_unlock; /* can't occur yet */ default: write_unlock_bh(&call->state_lock); goto discard; /* assume packet left over from earlier phase */ } write_unlock_bh(&call->state_lock); /* if all the packets we sent are hard-ACK'd, then we can discard * whatever we've got left */ _debug("clear Tx %d", CIRC_CNT(call->acks_head, call->acks_tail, call->acks_winsz)); del_timer_sync(&call->resend_timer); clear_bit(RXRPC_CALL_RUN_RTIMER, &call->flags); clear_bit(RXRPC_CALL_RESEND_TIMER, &call->events); if (call->acks_window) rxrpc_zap_tx_window(call); if (post_ACK) { /* post the final ACK message for userspace to pick up */ _debug("post ACK"); skb->mark = RXRPC_SKB_MARK_FINAL_ACK; sp->call = call; rxrpc_get_call(call); spin_lock_bh(&call->lock); if (rxrpc_queue_rcv_skb(call, skb, true, true) < 0) BUG(); spin_unlock_bh(&call->lock); goto process_further; } discard: rxrpc_free_skb(skb); goto process_further; protocol_error_unlock: write_unlock_bh(&call->state_lock); protocol_error: rxrpc_free_skb(skb); _leave(" = -EPROTO"); return -EPROTO; } /* * post a message to the socket Rx queue for recvmsg() to pick up */ static int rxrpc_post_message(struct rxrpc_call *call, u32 mark, u32 error, bool fatal) { struct rxrpc_skb_priv *sp; struct sk_buff *skb; int ret; _enter("{%d,%lx},%u,%u,%d", call->debug_id, call->flags, mark, error, fatal); /* remove timers and things for fatal messages */ if (fatal) { del_timer_sync(&call->resend_timer); del_timer_sync(&call->ack_timer); clear_bit(RXRPC_CALL_RUN_RTIMER, &call->flags); } if (mark != RXRPC_SKB_MARK_NEW_CALL && !test_bit(RXRPC_CALL_HAS_USERID, &call->flags)) { _leave("[no userid]"); return 0; } if (!test_bit(RXRPC_CALL_TERMINAL_MSG, &call->flags)) { skb = alloc_skb(0, GFP_NOFS); if (!skb) return -ENOMEM; rxrpc_new_skb(skb); skb->mark = mark; sp = rxrpc_skb(skb); memset(sp, 0, sizeof(*sp)); sp->error = error; sp->call = call; rxrpc_get_call(call); spin_lock_bh(&call->lock); ret = rxrpc_queue_rcv_skb(call, skb, true, fatal); spin_unlock_bh(&call->lock); BUG_ON(ret < 0); } return 0; } /* * handle background processing of incoming call packets and ACK / abort * generation */ void rxrpc_process_call(struct work_struct *work) { struct rxrpc_call *call = container_of(work, struct rxrpc_call, processor); struct rxrpc_ackpacket ack; struct rxrpc_ackinfo ackinfo; struct rxrpc_header hdr; struct msghdr msg; struct kvec iov[5]; unsigned long bits; __be32 data, pad; size_t len; int genbit, loop, nbit, ioc, ret, mtu; u32 abort_code = RX_PROTOCOL_ERROR; u8 *acks = NULL; //printk("\n--------------------\n"); _enter("{%d,%s,%lx} [%lu]", call->debug_id, rxrpc_call_states[call->state], call->events, (jiffies - call->creation_jif) / (HZ / 10)); if (test_and_set_bit(RXRPC_CALL_PROC_BUSY, &call->flags)) { _debug("XXXXXXXXXXXXX RUNNING ON MULTIPLE CPUS XXXXXXXXXXXXX"); return; } /* there's a good chance we're going to have to send a message, so set * one up in advance */ msg.msg_name = &call->conn->trans->peer->srx.transport.sin; msg.msg_namelen = sizeof(call->conn->trans->peer->srx.transport.sin); msg.msg_control = NULL; msg.msg_controllen = 0; msg.msg_flags = 0; hdr.epoch = call->conn->epoch; hdr.cid = call->cid; hdr.callNumber = call->call_id; hdr.seq = 0; hdr.type = RXRPC_PACKET_TYPE_ACK; hdr.flags = call->conn->out_clientflag; hdr.userStatus = 0; hdr.securityIndex = call->conn->security_ix; hdr._rsvd = 0; hdr.serviceId = call->conn->service_id; memset(iov, 0, sizeof(iov)); iov[0].iov_base = &hdr; iov[0].iov_len = sizeof(hdr); /* deal with events of a final nature */ if (test_bit(RXRPC_CALL_RELEASE, &call->events)) { rxrpc_release_call(call); clear_bit(RXRPC_CALL_RELEASE, &call->events); } if (test_bit(RXRPC_CALL_RCVD_ERROR, &call->events)) { int error; clear_bit(RXRPC_CALL_CONN_ABORT, &call->events); clear_bit(RXRPC_CALL_REJECT_BUSY, &call->events); clear_bit(RXRPC_CALL_ABORT, &call->events); error = call->conn->trans->peer->net_error; _debug("post net error %d", error); if (rxrpc_post_message(call, RXRPC_SKB_MARK_NET_ERROR, error, true) < 0) goto no_mem; clear_bit(RXRPC_CALL_RCVD_ERROR, &call->events); goto kill_ACKs; } if (test_bit(RXRPC_CALL_CONN_ABORT, &call->events)) { ASSERTCMP(call->state, >, RXRPC_CALL_COMPLETE); clear_bit(RXRPC_CALL_REJECT_BUSY, &call->events); clear_bit(RXRPC_CALL_ABORT, &call->events); _debug("post conn abort"); if (rxrpc_post_message(call, RXRPC_SKB_MARK_LOCAL_ERROR, call->conn->error, true) < 0) goto no_mem; clear_bit(RXRPC_CALL_CONN_ABORT, &call->events); goto kill_ACKs; } if (test_bit(RXRPC_CALL_REJECT_BUSY, &call->events)) { hdr.type = RXRPC_PACKET_TYPE_BUSY; genbit = RXRPC_CALL_REJECT_BUSY; goto send_message; } if (test_bit(RXRPC_CALL_ABORT, &call->events)) { ASSERTCMP(call->state, >, RXRPC_CALL_COMPLETE); if (rxrpc_post_message(call, RXRPC_SKB_MARK_LOCAL_ERROR, ECONNABORTED, true) < 0) goto no_mem; hdr.type = RXRPC_PACKET_TYPE_ABORT; data = htonl(call->abort_code); iov[1].iov_base = &data; iov[1].iov_len = sizeof(data); genbit = RXRPC_CALL_ABORT; goto send_message; } if (test_bit(RXRPC_CALL_ACK_FINAL, &call->events)) { genbit = RXRPC_CALL_ACK_FINAL; ack.bufferSpace = htons(8); ack.maxSkew = 0; ack.serial = 0; ack.reason = RXRPC_ACK_IDLE; ack.nAcks = 0; call->ackr_reason = 0; spin_lock_bh(&call->lock); ack.serial = call->ackr_serial; ack.previousPacket = call->ackr_prev_seq; ack.firstPacket = htonl(call->rx_data_eaten + 1); spin_unlock_bh(&call->lock); pad = 0; iov[1].iov_base = &ack; iov[1].iov_len = sizeof(ack); iov[2].iov_base = &pad; iov[2].iov_len = 3; iov[3].iov_base = &ackinfo; iov[3].iov_len = sizeof(ackinfo); goto send_ACK; } if (call->events & ((1 << RXRPC_CALL_RCVD_BUSY) | (1 << RXRPC_CALL_RCVD_ABORT)) ) { u32 mark; if (test_bit(RXRPC_CALL_RCVD_ABORT, &call->events)) mark = RXRPC_SKB_MARK_REMOTE_ABORT; else mark = RXRPC_SKB_MARK_BUSY; _debug("post abort/busy"); rxrpc_clear_tx_window(call); if (rxrpc_post_message(call, mark, ECONNABORTED, true) < 0) goto no_mem; clear_bit(RXRPC_CALL_RCVD_BUSY, &call->events); clear_bit(RXRPC_CALL_RCVD_ABORT, &call->events); goto kill_ACKs; } if (test_and_clear_bit(RXRPC_CALL_RCVD_ACKALL, &call->events)) { _debug("do implicit ackall"); rxrpc_clear_tx_window(call); } if (test_bit(RXRPC_CALL_LIFE_TIMER, &call->events)) { write_lock_bh(&call->state_lock); if (call->state <= RXRPC_CALL_COMPLETE) { call->state = RXRPC_CALL_LOCALLY_ABORTED; call->abort_code = RX_CALL_TIMEOUT; set_bit(RXRPC_CALL_ABORT, &call->events); } write_unlock_bh(&call->state_lock); _debug("post timeout"); if (rxrpc_post_message(call, RXRPC_SKB_MARK_LOCAL_ERROR, ETIME, true) < 0) goto no_mem; clear_bit(RXRPC_CALL_LIFE_TIMER, &call->events); goto kill_ACKs; } /* deal with assorted inbound messages */ if (!skb_queue_empty(&call->rx_queue)) { switch (rxrpc_process_rx_queue(call, &abort_code)) { case 0: case -EAGAIN: break; case -ENOMEM: goto no_mem; case -EKEYEXPIRED: case -EKEYREJECTED: case -EPROTO: rxrpc_abort_call(call, abort_code); goto kill_ACKs; } } /* handle resending */ if (test_and_clear_bit(RXRPC_CALL_RESEND_TIMER, &call->events)) rxrpc_resend_timer(call); if (test_and_clear_bit(RXRPC_CALL_RESEND, &call->events)) rxrpc_resend(call); /* consider sending an ordinary ACK */ if (test_bit(RXRPC_CALL_ACK, &call->events)) { _debug("send ACK: window: %d - %d { %lx }", call->rx_data_eaten, call->ackr_win_top, call->ackr_window[0]); if (call->state > RXRPC_CALL_SERVER_ACK_REQUEST && call->ackr_reason != RXRPC_ACK_PING_RESPONSE) { /* ACK by sending reply DATA packet in this state */ clear_bit(RXRPC_CALL_ACK, &call->events); goto maybe_reschedule; } genbit = RXRPC_CALL_ACK; acks = kzalloc(call->ackr_win_top - call->rx_data_eaten, GFP_NOFS); if (!acks) goto no_mem; //hdr.flags = RXRPC_SLOW_START_OK; ack.bufferSpace = htons(8); ack.maxSkew = 0; ack.serial = 0; ack.reason = 0; spin_lock_bh(&call->lock); ack.reason = call->ackr_reason; ack.serial = call->ackr_serial; ack.previousPacket = call->ackr_prev_seq; ack.firstPacket = htonl(call->rx_data_eaten + 1); ack.nAcks = 0; for (loop = 0; loop < RXRPC_ACKR_WINDOW_ASZ; loop++) { nbit = loop * BITS_PER_LONG; for (bits = call->ackr_window[loop]; bits; bits >>= 1 ) { _debug("- l=%d n=%d b=%lx", loop, nbit, bits); if (bits & 1) { acks[nbit] = RXRPC_ACK_TYPE_ACK; ack.nAcks = nbit + 1; } nbit++; } } call->ackr_reason = 0; spin_unlock_bh(&call->lock); pad = 0; iov[1].iov_base = &ack; iov[1].iov_len = sizeof(ack); iov[2].iov_base = acks; iov[2].iov_len = ack.nAcks; iov[3].iov_base = &pad; iov[3].iov_len = 3; iov[4].iov_base = &ackinfo; iov[4].iov_len = sizeof(ackinfo); switch (ack.reason) { case RXRPC_ACK_REQUESTED: case RXRPC_ACK_DUPLICATE: case RXRPC_ACK_OUT_OF_SEQUENCE: case RXRPC_ACK_EXCEEDS_WINDOW: case RXRPC_ACK_NOSPACE: case RXRPC_ACK_PING: case RXRPC_ACK_PING_RESPONSE: goto send_ACK_with_skew; case RXRPC_ACK_DELAY: case RXRPC_ACK_IDLE: goto send_ACK; } } /* handle completion of security negotiations on an incoming * connection */ if (test_and_clear_bit(RXRPC_CALL_SECURED, &call->events)) { _debug("secured"); spin_lock_bh(&call->lock); if (call->state == RXRPC_CALL_SERVER_SECURING) { _debug("securing"); write_lock(&call->conn->lock); if (!test_bit(RXRPC_CALL_RELEASED, &call->flags) && !test_bit(RXRPC_CALL_RELEASE, &call->events)) { _debug("not released"); call->state = RXRPC_CALL_SERVER_ACCEPTING; list_move_tail(&call->accept_link, &call->socket->acceptq); } write_unlock(&call->conn->lock); read_lock(&call->state_lock); if (call->state < RXRPC_CALL_COMPLETE) set_bit(RXRPC_CALL_POST_ACCEPT, &call->events); read_unlock(&call->state_lock); } spin_unlock_bh(&call->lock); if (!test_bit(RXRPC_CALL_POST_ACCEPT, &call->events)) goto maybe_reschedule; } /* post a notification of an acceptable connection to the app */ if (test_bit(RXRPC_CALL_POST_ACCEPT, &call->events)) { _debug("post accept"); if (rxrpc_post_message(call, RXRPC_SKB_MARK_NEW_CALL, 0, false) < 0) goto no_mem; clear_bit(RXRPC_CALL_POST_ACCEPT, &call->events); goto maybe_reschedule; } /* handle incoming call acceptance */ if (test_and_clear_bit(RXRPC_CALL_ACCEPTED, &call->events)) { _debug("accepted"); ASSERTCMP(call->rx_data_post, ==, 0); call->rx_data_post = 1; read_lock_bh(&call->state_lock); if (call->state < RXRPC_CALL_COMPLETE) set_bit(RXRPC_CALL_DRAIN_RX_OOS, &call->events); read_unlock_bh(&call->state_lock); } /* drain the out of sequence received packet queue into the packet Rx * queue */ if (test_and_clear_bit(RXRPC_CALL_DRAIN_RX_OOS, &call->events)) { while (call->rx_data_post == call->rx_first_oos) if (rxrpc_drain_rx_oos_queue(call) < 0) break; goto maybe_reschedule; } /* other events may have been raised since we started checking */ goto maybe_reschedule; send_ACK_with_skew: ack.maxSkew = htons(atomic_read(&call->conn->hi_serial) - ntohl(ack.serial)); send_ACK: mtu = call->conn->trans->peer->if_mtu; mtu -= call->conn->trans->peer->hdrsize; ackinfo.maxMTU = htonl(mtu); ackinfo.rwind = htonl(rxrpc_rx_window_size); /* permit the peer to send us jumbo packets if it wants to */ ackinfo.rxMTU = htonl(rxrpc_rx_mtu); ackinfo.jumbo_max = htonl(rxrpc_rx_jumbo_max); hdr.serial = htonl(atomic_inc_return(&call->conn->serial)); _proto("Tx ACK %%%u { m=%hu f=#%u p=#%u s=%%%u r=%s n=%u }", ntohl(hdr.serial), ntohs(ack.maxSkew), ntohl(ack.firstPacket), ntohl(ack.previousPacket), ntohl(ack.serial), rxrpc_acks(ack.reason), ack.nAcks); del_timer_sync(&call->ack_timer); if (ack.nAcks > 0) set_bit(RXRPC_CALL_TX_SOFT_ACK, &call->flags); goto send_message_2; send_message: _debug("send message"); hdr.serial = htonl(atomic_inc_return(&call->conn->serial)); _proto("Tx %s %%%u", rxrpc_pkts[hdr.type], ntohl(hdr.serial)); send_message_2: len = iov[0].iov_len; ioc = 1; if (iov[4].iov_len) { ioc = 5; len += iov[4].iov_len; len += iov[3].iov_len; len += iov[2].iov_len; len += iov[1].iov_len; } else if (iov[3].iov_len) { ioc = 4; len += iov[3].iov_len; len += iov[2].iov_len; len += iov[1].iov_len; } else if (iov[2].iov_len) { ioc = 3; len += iov[2].iov_len; len += iov[1].iov_len; } else if (iov[1].iov_len) { ioc = 2; len += iov[1].iov_len; } ret = kernel_sendmsg(call->conn->trans->local->socket, &msg, iov, ioc, len); if (ret < 0) { _debug("sendmsg failed: %d", ret); read_lock_bh(&call->state_lock); if (call->state < RXRPC_CALL_DEAD) rxrpc_queue_call(call); read_unlock_bh(&call->state_lock); goto error; } switch (genbit) { case RXRPC_CALL_ABORT: clear_bit(genbit, &call->events); clear_bit(RXRPC_CALL_RCVD_ABORT, &call->events); goto kill_ACKs; case RXRPC_CALL_ACK_FINAL: write_lock_bh(&call->state_lock); if (call->state == RXRPC_CALL_CLIENT_FINAL_ACK) call->state = RXRPC_CALL_COMPLETE; write_unlock_bh(&call->state_lock); goto kill_ACKs; default: clear_bit(genbit, &call->events); switch (call->state) { case RXRPC_CALL_CLIENT_AWAIT_REPLY: case RXRPC_CALL_CLIENT_RECV_REPLY: case RXRPC_CALL_SERVER_RECV_REQUEST: case RXRPC_CALL_SERVER_ACK_REQUEST: _debug("start ACK timer"); rxrpc_propose_ACK(call, RXRPC_ACK_DELAY, call->ackr_serial, false); default: break; } goto maybe_reschedule; } kill_ACKs: del_timer_sync(&call->ack_timer); if (test_and_clear_bit(RXRPC_CALL_ACK_FINAL, &call->events)) rxrpc_put_call(call); clear_bit(RXRPC_CALL_ACK, &call->events); maybe_reschedule: if (call->events || !skb_queue_empty(&call->rx_queue)) { read_lock_bh(&call->state_lock); if (call->state < RXRPC_CALL_DEAD) rxrpc_queue_call(call); read_unlock_bh(&call->state_lock); } /* don't leave aborted connections on the accept queue */ if (call->state >= RXRPC_CALL_COMPLETE && !list_empty(&call->accept_link)) { _debug("X unlinking once-pending call %p { e=%lx f=%lx c=%x }", call, call->events, call->flags, ntohl(call->conn->cid)); read_lock_bh(&call->state_lock); if (!test_bit(RXRPC_CALL_RELEASED, &call->flags) && !test_and_set_bit(RXRPC_CALL_RELEASE, &call->events)) rxrpc_queue_call(call); read_unlock_bh(&call->state_lock); } error: clear_bit(RXRPC_CALL_PROC_BUSY, &call->flags); kfree(acks); /* because we don't want two CPUs both processing the work item for one * call at the same time, we use a flag to note when it's busy; however * this means there's a race between clearing the flag and setting the * work pending bit and the work item being processed again */ if (call->events && !work_pending(&call->processor)) { _debug("jumpstart %x", ntohl(call->conn->cid)); rxrpc_queue_call(call); } _leave(""); return; no_mem: _debug("out of memory"); goto maybe_reschedule; }