diff options
Diffstat (limited to 'net/rxrpc')
33 files changed, 3748 insertions, 3367 deletions
diff --git a/net/rxrpc/Kconfig b/net/rxrpc/Kconfig index accd35c05577..7ae023b37a83 100644 --- a/net/rxrpc/Kconfig +++ b/net/rxrpc/Kconfig @@ -58,4 +58,11 @@ config RXKAD See Documentation/networking/rxrpc.rst. +config RXPERF + tristate "RxRPC test service" + help + Provide an rxperf service tester. This listens on UDP port 7009 for + incoming calls from the rxperf program (an example of which can be + found in OpenAFS). + endif diff --git a/net/rxrpc/Makefile b/net/rxrpc/Makefile index b11281bed2a4..e76d3459d78e 100644 --- a/net/rxrpc/Makefile +++ b/net/rxrpc/Makefile @@ -16,6 +16,7 @@ rxrpc-y := \ conn_service.o \ input.o \ insecure.o \ + io_thread.o \ key.o \ local_event.o \ local_object.o \ @@ -30,8 +31,12 @@ rxrpc-y := \ sendmsg.o \ server_key.o \ skbuff.o \ + txbuf.o \ utils.o rxrpc-$(CONFIG_PROC_FS) += proc.o rxrpc-$(CONFIG_RXKAD) += rxkad.o rxrpc-$(CONFIG_SYSCTL) += sysctl.o + + +obj-$(CONFIG_RXPERF) += rxperf.o diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c index ceba28e9dce6..7ea576f6ba4b 100644 --- a/net/rxrpc/af_rxrpc.c +++ b/net/rxrpc/af_rxrpc.c @@ -39,7 +39,7 @@ atomic_t rxrpc_debug_id; EXPORT_SYMBOL(rxrpc_debug_id); /* count of skbs currently in use */ -atomic_t rxrpc_n_tx_skbs, rxrpc_n_rx_skbs; +atomic_t rxrpc_n_rx_skbs; struct workqueue_struct *rxrpc_workqueue; @@ -93,12 +93,11 @@ static int rxrpc_validate_address(struct rxrpc_sock *rx, srx->transport_len > len) return -EINVAL; - if (srx->transport.family != rx->family && - srx->transport.family == AF_INET && rx->family != AF_INET6) - return -EAFNOSUPPORT; - switch (srx->transport.family) { case AF_INET: + if (rx->family != AF_INET && + rx->family != AF_INET6) + return -EAFNOSUPPORT; if (srx->transport_len < sizeof(struct sockaddr_in)) return -EINVAL; tail = offsetof(struct sockaddr_rxrpc, transport.sin.__pad); @@ -106,6 +105,8 @@ static int rxrpc_validate_address(struct rxrpc_sock *rx, #ifdef CONFIG_AF_RXRPC_IPV6 case AF_INET6: + if (rx->family != AF_INET6) + return -EAFNOSUPPORT; if (srx->transport_len < sizeof(struct sockaddr_in6)) return -EINVAL; tail = offsetof(struct sockaddr_rxrpc, transport) + @@ -193,8 +194,8 @@ static int rxrpc_bind(struct socket *sock, struct sockaddr *saddr, int len) service_in_use: write_unlock(&local->services_lock); - rxrpc_unuse_local(local); - rxrpc_put_local(local); + rxrpc_unuse_local(local, rxrpc_local_unuse_bind); + rxrpc_put_local(local, rxrpc_local_put_bind); ret = -EADDRINUSE; error_unlock: release_sock(&rx->sk); @@ -327,7 +328,7 @@ struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *sock, mutex_unlock(&call->user_mutex); } - rxrpc_put_peer(cp.peer); + rxrpc_put_peer(cp.peer, rxrpc_peer_put_discard_tmp); _leave(" = %p", call); return call; } @@ -358,9 +359,9 @@ void rxrpc_kernel_end_call(struct socket *sock, struct rxrpc_call *call) /* Make sure we're not going to call back into a kernel service */ if (call->notify_rx) { - spin_lock_bh(&call->notify_lock); + spin_lock(&call->notify_lock); call->notify_rx = rxrpc_dummy_notify_rx; - spin_unlock_bh(&call->notify_lock); + spin_unlock(&call->notify_lock); } mutex_unlock(&call->user_mutex); @@ -811,14 +812,12 @@ static int rxrpc_shutdown(struct socket *sock, int flags) lock_sock(sk); - spin_lock_bh(&sk->sk_receive_queue.lock); if (sk->sk_state < RXRPC_CLOSE) { sk->sk_state = RXRPC_CLOSE; sk->sk_shutdown = SHUTDOWN_MASK; } else { ret = -ESHUTDOWN; } - spin_unlock_bh(&sk->sk_receive_queue.lock); rxrpc_discard_prealloc(rx); @@ -871,9 +870,7 @@ static int rxrpc_release_sock(struct sock *sk) break; } - spin_lock_bh(&sk->sk_receive_queue.lock); sk->sk_state = RXRPC_CLOSE; - spin_unlock_bh(&sk->sk_receive_queue.lock); if (rx->local && rcu_access_pointer(rx->local->service) == rx) { write_lock(&rx->local->services_lock); @@ -887,8 +884,8 @@ static int rxrpc_release_sock(struct sock *sk) flush_workqueue(rxrpc_workqueue); rxrpc_purge_queue(&sk->sk_receive_queue); - rxrpc_unuse_local(rx->local); - rxrpc_put_local(rx->local); + rxrpc_unuse_local(rx->local, rxrpc_local_unuse_release_sock); + rxrpc_put_local(rx->local, rxrpc_local_put_release_sock); rx->local = NULL; key_put(rx->key); rx->key = NULL; @@ -979,7 +976,7 @@ static int __init af_rxrpc_init(void) goto error_call_jar; } - rxrpc_workqueue = alloc_workqueue("krxrpcd", 0, 1); + rxrpc_workqueue = alloc_workqueue("krxrpcd", WQ_HIGHPRI | WQ_MEM_RECLAIM | WQ_UNBOUND, 1); if (!rxrpc_workqueue) { pr_notice("Failed to allocate work queue\n"); goto error_work_queue; @@ -1059,7 +1056,6 @@ static void __exit af_rxrpc_exit(void) sock_unregister(PF_RXRPC); proto_unregister(&rxrpc_proto); unregister_pernet_device(&rxrpc_net_ops); - ASSERTCMP(atomic_read(&rxrpc_n_tx_skbs), ==, 0); ASSERTCMP(atomic_read(&rxrpc_n_rx_skbs), ==, 0); /* Make sure the local and peer records pinned by any dying connections diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 8499ceb7719c..e7dccab7b741 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -29,12 +29,15 @@ struct rxrpc_crypt { struct key_preparsed_payload; struct rxrpc_connection; +struct rxrpc_txbuf; /* * Mark applied to socket buffers in skb->mark. skb->priority is used * to pass supplementary information. */ enum rxrpc_skb_mark { + RXRPC_SKB_MARK_PACKET, /* Received packet */ + RXRPC_SKB_MARK_ERROR, /* Error notification */ RXRPC_SKB_MARK_REJECT_BUSY, /* Reject with BUSY */ RXRPC_SKB_MARK_REJECT_ABORT, /* Reject with ABORT (code in skb->priority) */ }; @@ -75,7 +78,7 @@ struct rxrpc_net { bool kill_all_client_conns; atomic_t nr_client_conns; spinlock_t client_conn_cache_lock; /* Lock for ->*_client_conns */ - spinlock_t client_conn_discard_lock; /* Prevent multiple discarders */ + struct mutex client_conn_discard_lock; /* Prevent multiple discarders */ struct list_head idle_client_conns; struct work_struct client_conn_reaper; struct timer_list client_conn_reap_timer; @@ -93,6 +96,27 @@ struct rxrpc_net { struct list_head peer_keepalive_new; struct timer_list peer_keepalive_timer; struct work_struct peer_keepalive_work; + + atomic_t stat_tx_data; + atomic_t stat_tx_data_retrans; + atomic_t stat_tx_data_send; + atomic_t stat_tx_data_send_frag; + atomic_t stat_tx_data_send_fail; + atomic_t stat_tx_data_underflow; + atomic_t stat_tx_data_cwnd_reset; + atomic_t stat_rx_data; + atomic_t stat_rx_data_reqack; + atomic_t stat_rx_data_jumbo; + + atomic_t stat_tx_ack_fill; + atomic_t stat_tx_ack_send; + atomic_t stat_tx_ack_skip; + atomic_t stat_tx_acks[256]; + atomic_t stat_rx_acks[256]; + + atomic_t stat_why_req_ack[8]; + + atomic_t stat_io_loop; }; /* @@ -178,20 +202,12 @@ struct rxrpc_host_header { * - max 48 bytes (struct sk_buff::cb) */ struct rxrpc_skb_priv { - atomic_t nr_ring_pins; /* Number of rxtx ring pins */ - u8 nr_subpackets; /* Number of subpackets */ - u8 rx_flags; /* Received packet flags */ -#define RXRPC_SKB_INCL_LAST 0x01 /* - Includes last packet */ -#define RXRPC_SKB_TX_BUFFER 0x02 /* - Is transmit buffer */ - union { - int remain; /* amount of space remaining for next write */ + u16 offset; /* Offset of data */ + u16 len; /* Length of data */ + u8 flags; +#define RXRPC_RX_VERIFIED 0x01 - /* List of requested ACKs on subpackets */ - unsigned long rx_req_ack[(RXRPC_MAX_NR_JUMBO + BITS_PER_LONG - 1) / - BITS_PER_LONG]; - }; - - struct rxrpc_host_header hdr; /* RxRPC packet header from this packet */ + struct rxrpc_host_header hdr; /* RxRPC packet header from this packet */ }; #define rxrpc_skb(__skb) ((struct rxrpc_skb_priv *) &(__skb)->cb) @@ -233,19 +249,14 @@ struct rxrpc_security { size_t *, size_t *, size_t *); /* impose security on a packet */ - int (*secure_packet)(struct rxrpc_call *, struct sk_buff *, size_t); + int (*secure_packet)(struct rxrpc_call *, struct rxrpc_txbuf *); /* verify the security on a received packet */ - int (*verify_packet)(struct rxrpc_call *, struct sk_buff *, - unsigned int, unsigned int, rxrpc_seq_t, u16); + int (*verify_packet)(struct rxrpc_call *, struct sk_buff *); /* Free crypto request on a call */ void (*free_call_crypto)(struct rxrpc_call *); - /* Locate the data in a received packet that has been verified. */ - void (*locate_data)(struct rxrpc_call *, struct sk_buff *, - unsigned int *, unsigned int *); - /* issue a challenge */ int (*issue_challenge)(struct rxrpc_connection *); @@ -275,11 +286,11 @@ struct rxrpc_local { struct rxrpc_net *rxnet; /* The network ns in which this resides */ struct hlist_node link; struct socket *socket; /* my UDP socket */ - struct work_struct processor; + struct task_struct *io_thread; struct rxrpc_sock __rcu *service; /* Service(s) listening on this endpoint */ struct rw_semaphore defrag_sem; /* control re-enablement of IP DF bit */ - struct sk_buff_head reject_queue; /* packets awaiting rejection */ - struct sk_buff_head event_queue; /* endpoint event packets awaiting processing */ + struct sk_buff_head rx_queue; /* Received packets */ + struct list_head call_attend_q; /* Calls requiring immediate attention */ struct rb_root client_bundles; /* Client connection bundles by socket params */ spinlock_t client_bundles_lock; /* Lock for client_bundles */ spinlock_t lock; /* access lock */ @@ -326,7 +337,7 @@ struct rxrpc_peer { u32 rto_j; /* Retransmission timeout in jiffies */ u8 backoff; /* Backoff timeout */ - u8 cong_cwnd; /* Congestion window size */ + u8 cong_ssthresh; /* Congestion slow-start threshold */ }; /* @@ -397,12 +408,18 @@ enum rxrpc_conn_proto_state { * RxRPC client connection bundle. */ struct rxrpc_bundle { - struct rxrpc_conn_parameters params; + struct rxrpc_local *local; /* Representation of local endpoint */ + struct rxrpc_peer *peer; /* Remote endpoint */ + struct key *key; /* Security details */ refcount_t ref; atomic_t active; /* Number of active users */ unsigned int debug_id; + u32 security_level; /* Security level selected */ + u16 service_id; /* Service ID for this connection */ bool try_upgrade; /* True if the bundle is attempting upgrade */ bool alloc_conn; /* True if someone's getting a conn */ + bool exclusive; /* T if conn is exclusive */ + bool upgrade; /* T if service ID can be upgraded */ short alloc_error; /* Error from last conn allocation */ spinlock_t channel_lock; struct rb_node local_node; /* Node in local->client_conns */ @@ -418,9 +435,13 @@ struct rxrpc_bundle { */ struct rxrpc_connection { struct rxrpc_conn_proto proto; - struct rxrpc_conn_parameters params; + struct rxrpc_local *local; /* Representation of local endpoint */ + struct rxrpc_peer *peer; /* Remote endpoint */ + struct rxrpc_net *rxnet; /* Network namespace to which call belongs */ + struct key *key; /* Security details */ refcount_t ref; + atomic_t active; /* Active count for service conns */ struct rcu_head rcu; struct list_head cache_link; @@ -441,6 +462,7 @@ struct rxrpc_connection { struct timer_list timer; /* Conn event timer */ struct work_struct processor; /* connection event processor */ + struct work_struct destructor; /* In-process-context destroyer */ struct rxrpc_bundle *bundle; /* Client connection bundle */ struct rb_node service_node; /* Node in peer->service_conns */ struct list_head proc_link; /* link in procfs list */ @@ -465,9 +487,13 @@ struct rxrpc_connection { atomic_t serial; /* packet serial number counter */ unsigned int hi_serial; /* highest serial number received */ u32 service_id; /* Service ID, possibly upgraded */ + u32 security_level; /* Security level selected */ u8 security_ix; /* security type */ u8 out_clientflag; /* RXRPC_CLIENT_INITIATED if we are client */ u8 bundle_shift; /* Index into bundle->avail_chans */ + bool exclusive; /* T if conn is exclusive */ + bool upgrade; /* T if service ID can be upgraded */ + u16 orig_service_id; /* Originally requested service ID */ short error; /* Local error code */ }; @@ -491,26 +517,24 @@ enum rxrpc_call_flag { RXRPC_CALL_EXPOSED, /* The call was exposed to the world */ RXRPC_CALL_RX_LAST, /* Received the last packet (at rxtx_top) */ RXRPC_CALL_TX_LAST, /* Last packet in Tx buffer (at rxtx_top) */ + RXRPC_CALL_TX_ALL_ACKED, /* Last packet has been hard-acked */ RXRPC_CALL_SEND_PING, /* A ping will need to be sent */ RXRPC_CALL_RETRANS_TIMEOUT, /* Retransmission due to timeout occurred */ RXRPC_CALL_BEGAN_RX_TIMER, /* We began the expect_rx_by timer */ RXRPC_CALL_RX_HEARD, /* The peer responded at least once to this call */ - RXRPC_CALL_RX_UNDERRUN, /* Got data underrun */ RXRPC_CALL_DISCONNECTED, /* The call has been disconnected */ RXRPC_CALL_KERNEL, /* The call was made by the kernel */ RXRPC_CALL_UPGRADE, /* Service upgrade was requested for the call */ + RXRPC_CALL_EXCLUSIVE, /* The call uses a once-only connection */ + RXRPC_CALL_RX_IS_IDLE, /* Reception is idle - send an ACK */ }; /* * Events that can be raised on a call. */ enum rxrpc_call_event { - RXRPC_CALL_EV_ACK, /* need to generate ACK */ - RXRPC_CALL_EV_ABORT, /* need to generate abort */ - RXRPC_CALL_EV_RESEND, /* Tx resend required */ - RXRPC_CALL_EV_PING, /* Ping send required */ - RXRPC_CALL_EV_EXPIRED, /* Expiry occurred */ RXRPC_CALL_EV_ACK_LOST, /* ACK may be lost, send ping */ + RXRPC_CALL_EV_INITIAL_PING, /* Send initial ping for a new service call */ }; /* @@ -563,11 +587,14 @@ struct rxrpc_call { struct rcu_head rcu; struct rxrpc_connection *conn; /* connection carrying call */ struct rxrpc_peer *peer; /* Peer record for remote address */ + struct rxrpc_local *local; /* Representation of local endpoint */ struct rxrpc_sock __rcu *socket; /* socket responsible */ struct rxrpc_net *rxnet; /* Network namespace to which call belongs */ + struct key *key; /* Security details */ const struct rxrpc_security *security; /* applied security module */ struct mutex user_mutex; /* User access mutex */ - unsigned long ack_at; /* When deferred ACK needs to happen */ + struct sockaddr_rxrpc dest_srx; /* Destination address */ + unsigned long delay_ack_at; /* When DELAY ACK needs to happen */ unsigned long ack_lost_at; /* When ACK is figured as lost */ unsigned long resend_at; /* When next resend needs to happen */ unsigned long ping_at; /* When next to send a ping */ @@ -577,9 +604,8 @@ struct rxrpc_call { unsigned long expect_term_by; /* When we expect call termination by */ u32 next_rx_timo; /* Timeout for next Rx packet (jif) */ u32 next_req_timo; /* Timeout for next Rx request packet (jif) */ - struct skcipher_request *cipher_req; /* Packet cipher request buffer */ struct timer_list timer; /* Combined event timer */ - struct work_struct processor; /* Event processor */ + struct work_struct destroyer; /* In-process-context destroyer */ rxrpc_notify_rx_t notify_rx; /* kernel service Rx notification function */ struct list_head link; /* link in master call list */ struct list_head chan_wait_link; /* Link in conn->bundle->waiting_calls */ @@ -588,14 +614,13 @@ struct rxrpc_call { struct list_head recvmsg_link; /* Link in rx->recvmsg_q */ struct list_head sock_link; /* Link in rx->sock_calls */ struct rb_node sock_node; /* Node in rx->calls */ - struct sk_buff *tx_pending; /* Tx socket buffer being filled */ + struct list_head attend_link; /* Link in local->call_attend_q */ + struct rxrpc_txbuf *tx_pending; /* Tx buffer being filled */ wait_queue_head_t waitq; /* Wait queue for channel or Tx */ s64 tx_total_len; /* Total length left to be transmitted (or -1) */ - __be32 crypto_buf[2]; /* Temporary packet crypto buffer */ unsigned long user_call_ID; /* user-defined call ID */ unsigned long flags; unsigned long events; - spinlock_t lock; spinlock_t notify_lock; /* Kernel notification lock */ rwlock_t state_lock; /* lock for state transition */ u32 abort_code; /* Local/remote abort code */ @@ -603,51 +628,43 @@ struct rxrpc_call { enum rxrpc_call_state state; /* current state of call */ enum rxrpc_call_completion completion; /* Call completion condition */ refcount_t ref; - u16 service_id; /* service ID */ u8 security_ix; /* Security type */ enum rxrpc_interruptibility interruptibility; /* At what point call may be interrupted */ u32 call_id; /* call ID on connection */ u32 cid; /* connection ID plus channel index */ + u32 security_level; /* Security level selected */ int debug_id; /* debug ID for printks */ unsigned short rx_pkt_offset; /* Current recvmsg packet offset */ unsigned short rx_pkt_len; /* Current recvmsg packet len */ - bool rx_pkt_last; /* Current recvmsg packet is last */ - - /* Rx/Tx circular buffer, depending on phase. - * - * In the Rx phase, packets are annotated with 0 or the number of the - * segment of a jumbo packet each buffer refers to. There can be up to - * 47 segments in a maximum-size UDP packet. - * - * In the Tx phase, packets are annotated with which buffers have been - * acked. - */ -#define RXRPC_RXTX_BUFF_SIZE 64 -#define RXRPC_RXTX_BUFF_MASK (RXRPC_RXTX_BUFF_SIZE - 1) -#define RXRPC_INIT_RX_WINDOW_SIZE 63 - struct sk_buff **rxtx_buffer; - u8 *rxtx_annotations; -#define RXRPC_TX_ANNO_ACK 0 -#define RXRPC_TX_ANNO_UNACK 1 -#define RXRPC_TX_ANNO_NAK 2 -#define RXRPC_TX_ANNO_RETRANS 3 -#define RXRPC_TX_ANNO_MASK 0x03 -#define RXRPC_TX_ANNO_LAST 0x04 -#define RXRPC_TX_ANNO_RESENT 0x08 - -#define RXRPC_RX_ANNO_SUBPACKET 0x3f /* Subpacket number in jumbogram */ -#define RXRPC_RX_ANNO_VERIFIED 0x80 /* Set if verified and decrypted */ - rxrpc_seq_t tx_hard_ack; /* Dead slot in buffer; the first transmitted but - * not hard-ACK'd packet follows this. - */ + + /* Transmitted data tracking. */ + spinlock_t tx_lock; /* Transmit queue lock */ + struct list_head tx_sendmsg; /* Sendmsg prepared packets */ + struct list_head tx_buffer; /* Buffer of transmissible packets */ + rxrpc_seq_t tx_bottom; /* First packet in buffer */ + rxrpc_seq_t tx_transmitted; /* Highest packet transmitted */ + rxrpc_seq_t tx_prepared; /* Highest Tx slot prepared. */ rxrpc_seq_t tx_top; /* Highest Tx slot allocated. */ u16 tx_backoff; /* Delay to insert due to Tx failure */ + u8 tx_winsize; /* Maximum size of Tx window */ +#define RXRPC_TX_MAX_WINDOW 128 + ktime_t tx_last_sent; /* Last time a transmission occurred */ + + /* Received data tracking */ + struct sk_buff_head recvmsg_queue; /* Queue of packets ready for recvmsg() */ + struct sk_buff_head rx_oos_queue; /* Queue of out of sequence packets */ + + rxrpc_seq_t rx_highest_seq; /* Higest sequence number received */ + rxrpc_seq_t rx_consumed; /* Highest packet consumed */ + rxrpc_serial_t rx_serial; /* Highest serial received for this call */ + u8 rx_winsize; /* Size of Rx window */ /* TCP-style slow-start congestion control [RFC5681]. Since the SMSS * is fixed, we keep these numbers in terms of segments (ie. DATA * packets) rather than bytes. */ #define RXRPC_TX_SMSS RXRPC_JUMBO_DATALEN +#define RXRPC_MIN_CWND (RXRPC_TX_SMSS > 2190 ? 2 : RXRPC_TX_SMSS > 1095 ? 3 : 4) u8 cong_cwnd; /* Congestion window size */ u8 cong_extra; /* Extra to send for congestion management */ u8 cong_ssthresh; /* Slow-start threshold */ @@ -656,25 +673,17 @@ struct rxrpc_call { u8 cong_cumul_acks; /* Cumulative ACK count */ ktime_t cong_tstamp; /* Last time cwnd was changed */ - rxrpc_seq_t rx_hard_ack; /* Dead slot in buffer; the first received but not - * consumed packet follows this. - */ - rxrpc_seq_t rx_top; /* Highest Rx slot allocated. */ - rxrpc_seq_t rx_expect_next; /* Expected next packet sequence number */ - rxrpc_serial_t rx_serial; /* Highest serial received for this call */ - u8 rx_winsize; /* Size of Rx window */ - u8 tx_winsize; /* Maximum size of Tx window */ - bool tx_phase; /* T if transmission phase, F if receive phase */ - u8 nr_jumbo_bad; /* Number of jumbo dups/exceeds-windows */ - - spinlock_t input_lock; /* Lock for packet input to this call */ - /* Receive-phase ACK management (ACKs we send). */ u8 ackr_reason; /* reason to ACK */ rxrpc_serial_t ackr_serial; /* serial of packet being ACK'd */ - rxrpc_seq_t ackr_highest_seq; /* Higest sequence number received */ + atomic64_t ackr_window; /* Base (in LSW) and top (in MSW) of SACK window */ atomic_t ackr_nr_unacked; /* Number of unacked packets */ atomic_t ackr_nr_consumed; /* Number of packets needing hard ACK */ + struct { +#define RXRPC_SACK_SIZE 256 + /* SACK table for soft-acked packets */ + u8 ackr_sack_table[RXRPC_SACK_SIZE]; + } __aligned(8); /* RTT management */ rxrpc_serial_t rtt_serial[4]; /* Serial number of DATA or PING sent */ @@ -688,21 +697,20 @@ struct rxrpc_call { ktime_t acks_latest_ts; /* Timestamp of latest ACK received */ rxrpc_seq_t acks_first_seq; /* first sequence number received */ rxrpc_seq_t acks_prev_seq; /* Highest previousPacket received */ + rxrpc_seq_t acks_hard_ack; /* Latest hard-ack point */ rxrpc_seq_t acks_lowest_nak; /* Lowest NACK in the buffer (or ==tx_hard_ack) */ - rxrpc_seq_t acks_lost_top; /* tx_top at the time lost-ack ping sent */ - rxrpc_serial_t acks_lost_ping; /* Serial number of probe ACK */ + rxrpc_serial_t acks_highest_serial; /* Highest serial number ACK'd */ }; /* * Summary of a new ACK and the changes it made to the Tx buffer packet states. */ struct rxrpc_ack_summary { + u16 nr_acks; /* Number of ACKs in packet */ + u16 nr_new_acks; /* Number of new ACKs in packet */ + u16 nr_rot_new_acks; /* Number of rotated new ACKs */ u8 ack_reason; - u8 nr_acks; /* Number of ACKs in packet */ - u8 nr_nacks; /* Number of NACKs in packet */ - u8 nr_new_acks; /* Number of new ACKs in packet */ - u8 nr_new_nacks; /* Number of new NACKs in packet */ - u8 nr_rot_new_acks; /* Number of rotated new ACKs */ + bool saw_nacks; /* Saw NACKs in packet */ bool new_low_nack; /* T if new low NACK found */ bool retrans_timeo; /* T if reTx due to timeout happened */ u8 flight_size; /* Number of unreceived transmissions */ @@ -745,12 +753,57 @@ struct rxrpc_send_params { bool upgrade; /* If the connection is upgradeable */ }; +/* + * Buffer of data to be output as a packet. + */ +struct rxrpc_txbuf { + struct rcu_head rcu; + struct list_head call_link; /* Link in call->tx_sendmsg/tx_buffer */ + struct list_head tx_link; /* Link in live Enc queue or Tx queue */ + ktime_t last_sent; /* Time at which last transmitted */ + refcount_t ref; + rxrpc_seq_t seq; /* Sequence number of this packet */ + unsigned int call_debug_id; + unsigned int debug_id; + unsigned int len; /* Amount of data in buffer */ + unsigned int space; /* Remaining data space */ + unsigned int offset; /* Offset of fill point */ + unsigned long flags; +#define RXRPC_TXBUF_LAST 0 /* Set if last packet in Tx phase */ +#define RXRPC_TXBUF_RESENT 1 /* Set if has been resent */ + u8 /*enum rxrpc_propose_ack_trace*/ ack_why; /* If ack, why */ + struct { + /* The packet for encrypting and DMA'ing. We align it such + * that data[] aligns correctly for any crypto blocksize. + */ + u8 pad[64 - sizeof(struct rxrpc_wire_header)]; + struct rxrpc_wire_header wire; /* Network-ready header */ + union { + u8 data[RXRPC_JUMBO_DATALEN]; /* Data packet */ + struct { + struct rxrpc_ackpacket ack; + u8 acks[0]; + }; + }; + } __aligned(64); +}; + +static inline bool rxrpc_sending_to_server(const struct rxrpc_txbuf *txb) +{ + return txb->wire.flags & RXRPC_CLIENT_INITIATED; +} + +static inline bool rxrpc_sending_to_client(const struct rxrpc_txbuf *txb) +{ + return !rxrpc_sending_to_server(txb); +} + #include <trace/events/rxrpc.h> /* * af_rxrpc.c */ -extern atomic_t rxrpc_n_tx_skbs, rxrpc_n_rx_skbs; +extern atomic_t rxrpc_n_rx_skbs; extern struct workqueue_struct *rxrpc_workqueue; /* @@ -758,25 +811,29 @@ extern struct workqueue_struct *rxrpc_workqueue; */ int rxrpc_service_prealloc(struct rxrpc_sock *, gfp_t); void rxrpc_discard_prealloc(struct rxrpc_sock *); -struct rxrpc_call *rxrpc_new_incoming_call(struct rxrpc_local *, - struct rxrpc_sock *, - struct sk_buff *); +bool rxrpc_new_incoming_call(struct rxrpc_local *, struct rxrpc_peer *, + struct rxrpc_connection *, struct sockaddr_rxrpc *, + struct sk_buff *); void rxrpc_accept_incoming_calls(struct rxrpc_local *); int rxrpc_user_charge_accept(struct rxrpc_sock *, unsigned long); /* * call_event.c */ -void rxrpc_propose_ACK(struct rxrpc_call *, u8, u32, bool, bool, - enum rxrpc_propose_ack_trace); -void rxrpc_process_call(struct work_struct *); +void rxrpc_propose_ping(struct rxrpc_call *call, u32 serial, + enum rxrpc_propose_ack_trace why); +void rxrpc_send_ACK(struct rxrpc_call *, u8, rxrpc_serial_t, enum rxrpc_propose_ack_trace); +void rxrpc_propose_delay_ACK(struct rxrpc_call *, rxrpc_serial_t, + enum rxrpc_propose_ack_trace); +void rxrpc_shrink_call_tx_buffer(struct rxrpc_call *); +void rxrpc_resend(struct rxrpc_call *call, struct sk_buff *ack_skb); void rxrpc_reduce_call_timer(struct rxrpc_call *call, unsigned long expire_at, unsigned long now, enum rxrpc_timer_trace why); -void rxrpc_delete_call_timer(struct rxrpc_call *call); +void rxrpc_input_call_event(struct rxrpc_call *call, struct sk_buff *skb); /* * call_object.c @@ -785,6 +842,7 @@ extern const char *const rxrpc_call_states[]; extern const char *const rxrpc_call_completions[]; extern struct kmem_cache *rxrpc_call_jar; +void rxrpc_poke_call(struct rxrpc_call *call, enum rxrpc_call_poke_trace what); struct rxrpc_call *rxrpc_find_call_by_user_ID(struct rxrpc_sock *, unsigned long); struct rxrpc_call *rxrpc_alloc_call(struct rxrpc_sock *, gfp_t, unsigned int); struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *, @@ -796,10 +854,8 @@ void rxrpc_incoming_call(struct rxrpc_sock *, struct rxrpc_call *, struct sk_buff *); void rxrpc_release_call(struct rxrpc_sock *, struct rxrpc_call *); void rxrpc_release_calls_on_socket(struct rxrpc_sock *); -bool __rxrpc_queue_call(struct rxrpc_call *); -bool rxrpc_queue_call(struct rxrpc_call *); -void rxrpc_see_call(struct rxrpc_call *); -bool rxrpc_try_get_call(struct rxrpc_call *call, enum rxrpc_call_trace op); +void rxrpc_see_call(struct rxrpc_call *, enum rxrpc_call_trace); +struct rxrpc_call *rxrpc_try_get_call(struct rxrpc_call *, enum rxrpc_call_trace); void rxrpc_get_call(struct rxrpc_call *, enum rxrpc_call_trace); void rxrpc_put_call(struct rxrpc_call *, enum rxrpc_call_trace); void rxrpc_cleanup_call(struct rxrpc_call *); @@ -824,14 +880,14 @@ extern unsigned long rxrpc_conn_idle_client_fast_expiry; extern struct idr rxrpc_client_conn_ids; void rxrpc_destroy_client_conn_ids(void); -struct rxrpc_bundle *rxrpc_get_bundle(struct rxrpc_bundle *); -void rxrpc_put_bundle(struct rxrpc_bundle *); +struct rxrpc_bundle *rxrpc_get_bundle(struct rxrpc_bundle *, enum rxrpc_bundle_trace); +void rxrpc_put_bundle(struct rxrpc_bundle *, enum rxrpc_bundle_trace); int rxrpc_connect_call(struct rxrpc_sock *, struct rxrpc_call *, struct rxrpc_conn_parameters *, struct sockaddr_rxrpc *, gfp_t); void rxrpc_expose_client_call(struct rxrpc_call *); void rxrpc_disconnect_client_call(struct rxrpc_bundle *, struct rxrpc_call *); -void rxrpc_put_client_conn(struct rxrpc_connection *); +void rxrpc_put_client_conn(struct rxrpc_connection *, enum rxrpc_conn_trace); void rxrpc_discard_expired_client_conns(struct work_struct *); void rxrpc_destroy_all_client_connections(struct rxrpc_net *); void rxrpc_clean_up_local_conns(struct rxrpc_local *); @@ -841,6 +897,7 @@ void rxrpc_clean_up_local_conns(struct rxrpc_local *); */ void rxrpc_process_connection(struct work_struct *); void rxrpc_process_delayed_final_acks(struct rxrpc_connection *, bool); +int rxrpc_input_conn_packet(struct rxrpc_connection *conn, struct sk_buff *skb); /* * conn_object.c @@ -848,18 +905,20 @@ void rxrpc_process_delayed_final_acks(struct rxrpc_connection *, bool); extern unsigned int rxrpc_connection_expiry; extern unsigned int rxrpc_closed_conn_expiry; -struct rxrpc_connection *rxrpc_alloc_connection(gfp_t); -struct rxrpc_connection *rxrpc_find_connection_rcu(struct rxrpc_local *, - struct sk_buff *, - struct rxrpc_peer **); +struct rxrpc_connection *rxrpc_alloc_connection(struct rxrpc_net *, gfp_t); +struct rxrpc_connection *rxrpc_find_client_connection_rcu(struct rxrpc_local *, + struct sockaddr_rxrpc *, + struct sk_buff *); void __rxrpc_disconnect_call(struct rxrpc_connection *, struct rxrpc_call *); void rxrpc_disconnect_call(struct rxrpc_call *); -void rxrpc_kill_connection(struct rxrpc_connection *); -bool rxrpc_queue_conn(struct rxrpc_connection *); -void rxrpc_see_connection(struct rxrpc_connection *); -struct rxrpc_connection *rxrpc_get_connection(struct rxrpc_connection *); -struct rxrpc_connection *rxrpc_get_connection_maybe(struct rxrpc_connection *); -void rxrpc_put_service_conn(struct rxrpc_connection *); +void rxrpc_kill_client_conn(struct rxrpc_connection *); +void rxrpc_queue_conn(struct rxrpc_connection *, enum rxrpc_conn_trace); +void rxrpc_see_connection(struct rxrpc_connection *, enum rxrpc_conn_trace); +struct rxrpc_connection *rxrpc_get_connection(struct rxrpc_connection *, + enum rxrpc_conn_trace); +struct rxrpc_connection *rxrpc_get_connection_maybe(struct rxrpc_connection *, + enum rxrpc_conn_trace); +void rxrpc_put_connection(struct rxrpc_connection *, enum rxrpc_conn_trace); void rxrpc_service_connection_reaper(struct work_struct *); void rxrpc_destroy_all_connections(struct rxrpc_net *); @@ -873,17 +932,6 @@ static inline bool rxrpc_conn_is_service(const struct rxrpc_connection *conn) return !rxrpc_conn_is_client(conn); } -static inline void rxrpc_put_connection(struct rxrpc_connection *conn) -{ - if (!conn) - return; - - if (rxrpc_conn_is_client(conn)) - rxrpc_put_client_conn(conn); - else - rxrpc_put_service_conn(conn); -} - static inline void rxrpc_reduce_conn_timer(struct rxrpc_connection *conn, unsigned long expire_at) { @@ -903,7 +951,20 @@ void rxrpc_unpublish_service_conn(struct rxrpc_connection *); /* * input.c */ -int rxrpc_input_packet(struct sock *, struct sk_buff *); +void rxrpc_congestion_degrade(struct rxrpc_call *); +void rxrpc_input_call_packet(struct rxrpc_call *, struct sk_buff *); +void rxrpc_implicit_end_call(struct rxrpc_call *, struct sk_buff *); + +/* + * io_thread.c + */ +int rxrpc_encap_rcv(struct sock *, struct sk_buff *); +void rxrpc_error_report(struct sock *); +int rxrpc_io_thread(void *data); +static inline void rxrpc_wake_up_io_thread(struct rxrpc_local *local) +{ + wake_up_process(local->io_thread); +} /* * insecure.c @@ -922,43 +983,53 @@ int rxrpc_get_server_data_key(struct rxrpc_connection *, const void *, time64_t, /* * local_event.c */ -extern void rxrpc_process_local_events(struct rxrpc_local *); +void rxrpc_send_version_request(struct rxrpc_local *local, + struct rxrpc_host_header *hdr, + struct sk_buff *skb); /* * local_object.c */ struct rxrpc_local *rxrpc_lookup_local(struct net *, const struct sockaddr_rxrpc *); -struct rxrpc_local *rxrpc_get_local(struct rxrpc_local *); -struct rxrpc_local *rxrpc_get_local_maybe(struct rxrpc_local *); -void rxrpc_put_local(struct rxrpc_local *); -struct rxrpc_local *rxrpc_use_local(struct rxrpc_local *); -void rxrpc_unuse_local(struct rxrpc_local *); -void rxrpc_queue_local(struct rxrpc_local *); +struct rxrpc_local *rxrpc_get_local(struct rxrpc_local *, enum rxrpc_local_trace); +struct rxrpc_local *rxrpc_get_local_maybe(struct rxrpc_local *, enum rxrpc_local_trace); +void rxrpc_put_local(struct rxrpc_local *, enum rxrpc_local_trace); +struct rxrpc_local *rxrpc_use_local(struct rxrpc_local *, enum rxrpc_local_trace); +void rxrpc_unuse_local(struct rxrpc_local *, enum rxrpc_local_trace); +void rxrpc_destroy_local(struct rxrpc_local *local); void rxrpc_destroy_all_locals(struct rxrpc_net *); -static inline bool __rxrpc_unuse_local(struct rxrpc_local *local) +static inline bool __rxrpc_use_local(struct rxrpc_local *local, + enum rxrpc_local_trace why) { - return atomic_dec_return(&local->active_users) == 0; + int r, u; + + r = refcount_read(&local->ref); + u = atomic_fetch_add_unless(&local->active_users, 1, 0); + trace_rxrpc_local(local->debug_id, why, r, u); + return u != 0; } -static inline bool __rxrpc_use_local(struct rxrpc_local *local) +static inline void rxrpc_see_local(struct rxrpc_local *local, + enum rxrpc_local_trace why) { - return atomic_fetch_add_unless(&local->active_users, 1, 0) != 0; + int r, u; + + r = refcount_read(&local->ref); + u = atomic_read(&local->active_users); + trace_rxrpc_local(local->debug_id, why, r, u); } /* * misc.c */ extern unsigned int rxrpc_max_backlog __read_mostly; -extern unsigned long rxrpc_requested_ack_delay; extern unsigned long rxrpc_soft_ack_delay; extern unsigned long rxrpc_idle_ack_delay; extern unsigned int rxrpc_rx_window_size; extern unsigned int rxrpc_rx_mtu; extern unsigned int rxrpc_rx_jumbo_max; -extern const s8 rxrpc_ack_priority[]; - /* * net_ns.c */ @@ -973,17 +1044,17 @@ static inline struct rxrpc_net *rxrpc_net(struct net *net) /* * output.c */ -int rxrpc_send_ack_packet(struct rxrpc_call *, bool, rxrpc_serial_t *); +int rxrpc_send_ack_packet(struct rxrpc_call *call, struct rxrpc_txbuf *txb); int rxrpc_send_abort_packet(struct rxrpc_call *); -int rxrpc_send_data_packet(struct rxrpc_call *, struct sk_buff *, bool); -void rxrpc_reject_packets(struct rxrpc_local *); +int rxrpc_send_data_packet(struct rxrpc_call *, struct rxrpc_txbuf *); +void rxrpc_reject_packet(struct rxrpc_local *local, struct sk_buff *skb); void rxrpc_send_keepalive(struct rxrpc_peer *); +void rxrpc_transmit_one(struct rxrpc_call *call, struct rxrpc_txbuf *txb); /* * peer_event.c */ -void rxrpc_encap_err_rcv(struct sock *sk, struct sk_buff *skb, unsigned int udp_offset); -void rxrpc_error_report(struct sock *); +void rxrpc_input_error(struct rxrpc_local *, struct sk_buff *); void rxrpc_peer_keepalive_worker(struct work_struct *); /* @@ -993,14 +1064,15 @@ struct rxrpc_peer *rxrpc_lookup_peer_rcu(struct rxrpc_local *, const struct sockaddr_rxrpc *); struct rxrpc_peer *rxrpc_lookup_peer(struct rxrpc_sock *, struct rxrpc_local *, struct sockaddr_rxrpc *, gfp_t); -struct rxrpc_peer *rxrpc_alloc_peer(struct rxrpc_local *, gfp_t); +struct rxrpc_peer *rxrpc_alloc_peer(struct rxrpc_local *, gfp_t, + enum rxrpc_peer_trace); void rxrpc_new_incoming_peer(struct rxrpc_sock *, struct rxrpc_local *, struct rxrpc_peer *); void rxrpc_destroy_all_peers(struct rxrpc_net *); -struct rxrpc_peer *rxrpc_get_peer(struct rxrpc_peer *); -struct rxrpc_peer *rxrpc_get_peer_maybe(struct rxrpc_peer *); -void rxrpc_put_peer(struct rxrpc_peer *); -void rxrpc_put_peer_locked(struct rxrpc_peer *); +struct rxrpc_peer *rxrpc_get_peer(struct rxrpc_peer *, enum rxrpc_peer_trace); +struct rxrpc_peer *rxrpc_get_peer_maybe(struct rxrpc_peer *, enum rxrpc_peer_trace); +void rxrpc_put_peer(struct rxrpc_peer *, enum rxrpc_peer_trace); +void rxrpc_put_peer_locked(struct rxrpc_peer *, enum rxrpc_peer_trace); /* * proc.c @@ -1062,6 +1134,7 @@ extern const struct rxrpc_security rxkad; int __init rxrpc_init_security(void); const struct rxrpc_security *rxrpc_security_lookup(u8); void rxrpc_exit_security(void); +int rxrpc_init_client_call_security(struct rxrpc_call *); int rxrpc_init_client_conn_security(struct rxrpc_connection *); const struct rxrpc_security *rxrpc_get_incoming_security(struct rxrpc_sock *, struct sk_buff *); @@ -1084,7 +1157,6 @@ int rxrpc_server_keyring(struct rxrpc_sock *, sockptr_t, int); * skbuff.c */ void rxrpc_kernel_data_consumed(struct rxrpc_call *, struct sk_buff *); -void rxrpc_packet_destructor(struct sk_buff *); void rxrpc_new_skb(struct sk_buff *, enum rxrpc_skb_trace); void rxrpc_see_skb(struct sk_buff *, enum rxrpc_skb_trace); void rxrpc_eaten_skb(struct sk_buff *, enum rxrpc_skb_trace); @@ -1093,6 +1165,15 @@ void rxrpc_free_skb(struct sk_buff *, enum rxrpc_skb_trace); void rxrpc_purge_queue(struct sk_buff_head *); /* + * stats.c + */ +int rxrpc_stats_show(struct seq_file *seq, void *v); +int rxrpc_stats_clear(struct file *file, char *buf, size_t size); + +#define rxrpc_inc_stat(rxnet, s) atomic_inc(&(rxnet)->s) +#define rxrpc_dec_stat(rxnet, s) atomic_dec(&(rxnet)->s) + +/* * sysctl.c */ #ifdef CONFIG_SYSCTL @@ -1104,6 +1185,16 @@ static inline void rxrpc_sysctl_exit(void) {} #endif /* + * txbuf.c + */ +extern atomic_t rxrpc_nr_txbuf; +struct rxrpc_txbuf *rxrpc_alloc_txbuf(struct rxrpc_call *call, u8 packet_type, + gfp_t gfp); +void rxrpc_get_txbuf(struct rxrpc_txbuf *txb, enum rxrpc_txbuf_trace what); +void rxrpc_see_txbuf(struct rxrpc_txbuf *txb, enum rxrpc_txbuf_trace what); +void rxrpc_put_txbuf(struct rxrpc_txbuf *txb, enum rxrpc_txbuf_trace what); + +/* * utils.c */ int rxrpc_extract_addr_from_skb(struct sockaddr_rxrpc *, struct sk_buff *); @@ -1136,23 +1227,17 @@ extern unsigned int rxrpc_debug; #define kenter(FMT,...) dbgprintk("==> %s("FMT")",__func__ ,##__VA_ARGS__) #define kleave(FMT,...) dbgprintk("<== %s()"FMT"",__func__ ,##__VA_ARGS__) #define kdebug(FMT,...) dbgprintk(" "FMT ,##__VA_ARGS__) -#define kproto(FMT,...) dbgprintk("### "FMT ,##__VA_ARGS__) -#define knet(FMT,...) dbgprintk("@@@ "FMT ,##__VA_ARGS__) #if defined(__KDEBUG) #define _enter(FMT,...) kenter(FMT,##__VA_ARGS__) #define _leave(FMT,...) kleave(FMT,##__VA_ARGS__) #define _debug(FMT,...) kdebug(FMT,##__VA_ARGS__) -#define _proto(FMT,...) kproto(FMT,##__VA_ARGS__) -#define _net(FMT,...) knet(FMT,##__VA_ARGS__) #elif defined(CONFIG_AF_RXRPC_DEBUG) #define RXRPC_DEBUG_KENTER 0x01 #define RXRPC_DEBUG_KLEAVE 0x02 #define RXRPC_DEBUG_KDEBUG 0x04 -#define RXRPC_DEBUG_KPROTO 0x08 -#define RXRPC_DEBUG_KNET 0x10 #define _enter(FMT,...) \ do { \ @@ -1172,24 +1257,10 @@ do { \ kdebug(FMT,##__VA_ARGS__); \ } while (0) -#define _proto(FMT,...) \ -do { \ - if (unlikely(rxrpc_debug & RXRPC_DEBUG_KPROTO)) \ - kproto(FMT,##__VA_ARGS__); \ -} while (0) - -#define _net(FMT,...) \ -do { \ - if (unlikely(rxrpc_debug & RXRPC_DEBUG_KNET)) \ - knet(FMT,##__VA_ARGS__); \ -} while (0) - #else #define _enter(FMT,...) no_printk("==> %s("FMT")",__func__ ,##__VA_ARGS__) #define _leave(FMT,...) no_printk("<== %s()"FMT"",__func__ ,##__VA_ARGS__) #define _debug(FMT,...) no_printk(" "FMT ,##__VA_ARGS__) -#define _proto(FMT,...) no_printk("### "FMT ,##__VA_ARGS__) -#define _net(FMT,...) no_printk("@@@ "FMT ,##__VA_ARGS__) #endif /* diff --git a/net/rxrpc/call_accept.c b/net/rxrpc/call_accept.c index 99e10eea3732..d1850863507f 100644 --- a/net/rxrpc/call_accept.c +++ b/net/rxrpc/call_accept.c @@ -38,7 +38,6 @@ static int rxrpc_service_prealloc_one(struct rxrpc_sock *rx, unsigned long user_call_ID, gfp_t gfp, unsigned int debug_id) { - const void *here = __builtin_return_address(0); struct rxrpc_call *call, *xcall; struct rxrpc_net *rxnet = rxrpc_net(sock_net(&rx->sk)); struct rb_node *parent, **pp; @@ -70,7 +69,9 @@ static int rxrpc_service_prealloc_one(struct rxrpc_sock *rx, head = b->peer_backlog_head; tail = READ_ONCE(b->peer_backlog_tail); if (CIRC_CNT(head, tail, size) < max) { - struct rxrpc_peer *peer = rxrpc_alloc_peer(rx->local, gfp); + struct rxrpc_peer *peer; + + peer = rxrpc_alloc_peer(rx->local, gfp, rxrpc_peer_new_prealloc); if (!peer) return -ENOMEM; b->peer_backlog[head] = peer; @@ -89,9 +90,6 @@ static int rxrpc_service_prealloc_one(struct rxrpc_sock *rx, b->conn_backlog[head] = conn; smp_store_release(&b->conn_backlog_head, (head + 1) & (size - 1)); - - trace_rxrpc_conn(conn->debug_id, rxrpc_conn_new_service, - refcount_read(&conn->ref), here); } /* Now it gets complicated, because calls get registered with the @@ -102,10 +100,10 @@ static int rxrpc_service_prealloc_one(struct rxrpc_sock *rx, return -ENOMEM; call->flags |= (1 << RXRPC_CALL_IS_SERVICE); call->state = RXRPC_CALL_SERVER_PREALLOC; + __set_bit(RXRPC_CALL_EV_INITIAL_PING, &call->events); - trace_rxrpc_call(call->debug_id, rxrpc_call_new_service, - refcount_read(&call->ref), - here, (const void *)user_call_ID); + trace_rxrpc_call(call->debug_id, refcount_read(&call->ref), + user_call_ID, rxrpc_call_new_prealloc_service); write_lock(&rx->call_lock); @@ -126,11 +124,11 @@ static int rxrpc_service_prealloc_one(struct rxrpc_sock *rx, call->user_call_ID = user_call_ID; call->notify_rx = notify_rx; if (user_attach_call) { - rxrpc_get_call(call, rxrpc_call_got_kernel); + rxrpc_get_call(call, rxrpc_call_get_kernel_service); user_attach_call(call, user_call_ID); } - rxrpc_get_call(call, rxrpc_call_got_userid); + rxrpc_get_call(call, rxrpc_call_get_userid); rb_link_node(&call->sock_node, parent, pp); rb_insert_color(&call->sock_node, &rx->calls); set_bit(RXRPC_CALL_HAS_USERID, &call->flags); @@ -140,9 +138,9 @@ static int rxrpc_service_prealloc_one(struct rxrpc_sock *rx, write_unlock(&rx->call_lock); rxnet = call->rxnet; - spin_lock_bh(&rxnet->call_lock); + spin_lock(&rxnet->call_lock); list_add_tail_rcu(&call->link, &rxnet->calls); - spin_unlock_bh(&rxnet->call_lock); + spin_unlock(&rxnet->call_lock); b->call_backlog[call_head] = call; smp_store_release(&b->call_backlog_head, (call_head + 1) & (size - 1)); @@ -190,14 +188,14 @@ void rxrpc_discard_prealloc(struct rxrpc_sock *rx) /* Make sure that there aren't any incoming calls in progress before we * clear the preallocation buffers. */ - spin_lock_bh(&rx->incoming_lock); - spin_unlock_bh(&rx->incoming_lock); + spin_lock(&rx->incoming_lock); + spin_unlock(&rx->incoming_lock); head = b->peer_backlog_head; tail = b->peer_backlog_tail; while (CIRC_CNT(head, tail, size) > 0) { struct rxrpc_peer *peer = b->peer_backlog[tail]; - rxrpc_put_local(peer->local); + rxrpc_put_local(peer->local, rxrpc_local_put_prealloc_conn); kfree(peer); tail = (tail + 1) & (size - 1); } @@ -230,7 +228,7 @@ void rxrpc_discard_prealloc(struct rxrpc_sock *rx) } rxrpc_call_completed(call); rxrpc_release_call(rx, call); - rxrpc_put_call(call, rxrpc_call_put); + rxrpc_put_call(call, rxrpc_call_put_discard_prealloc); tail = (tail + 1) & (size - 1); } @@ -238,22 +236,6 @@ void rxrpc_discard_prealloc(struct rxrpc_sock *rx) } /* - * Ping the other end to fill our RTT cache and to retrieve the rwind - * and MTU parameters. - */ -static void rxrpc_send_ping(struct rxrpc_call *call, struct sk_buff *skb) -{ - struct rxrpc_skb_priv *sp = rxrpc_skb(skb); - ktime_t now = skb->tstamp; - - if (call->peer->rtt_count < 3 || - ktime_before(ktime_add_ms(call->peer->rtt_last_req, 1000), now)) - rxrpc_propose_ACK(call, RXRPC_ACK_PING, sp->hdr.serial, - true, true, - rxrpc_propose_ack_ping_for_params); -} - -/* * Allocate a new incoming call from the prealloc pool, along with a connection * and a peer as necessary. */ @@ -262,6 +244,7 @@ static struct rxrpc_call *rxrpc_alloc_incoming_call(struct rxrpc_sock *rx, struct rxrpc_peer *peer, struct rxrpc_connection *conn, const struct rxrpc_security *sec, + struct sockaddr_rxrpc *peer_srx, struct sk_buff *skb) { struct rxrpc_backlog *b = rx->backlog; @@ -287,12 +270,11 @@ static struct rxrpc_call *rxrpc_alloc_incoming_call(struct rxrpc_sock *rx, return NULL; if (!conn) { - if (peer && !rxrpc_get_peer_maybe(peer)) + if (peer && !rxrpc_get_peer_maybe(peer, rxrpc_peer_get_service_conn)) peer = NULL; if (!peer) { peer = b->peer_backlog[peer_tail]; - if (rxrpc_extract_addr_from_skb(&peer->srx, skb) < 0) - return NULL; + peer->srx = *peer_srx; b->peer_backlog[peer_tail] = NULL; smp_store_release(&b->peer_backlog_tail, (peer_tail + 1) & @@ -306,12 +288,13 @@ static struct rxrpc_call *rxrpc_alloc_incoming_call(struct rxrpc_sock *rx, b->conn_backlog[conn_tail] = NULL; smp_store_release(&b->conn_backlog_tail, (conn_tail + 1) & (RXRPC_BACKLOG_MAX - 1)); - conn->params.local = rxrpc_get_local(local); - conn->params.peer = peer; - rxrpc_see_connection(conn); + conn->local = rxrpc_get_local(local, rxrpc_local_get_prealloc_conn); + conn->peer = peer; + rxrpc_see_connection(conn, rxrpc_conn_see_new_service_conn); rxrpc_new_incoming_connection(rx, conn, sec, skb); } else { - rxrpc_get_connection(conn); + rxrpc_get_connection(conn, rxrpc_conn_get_service_conn); + atomic_inc(&conn->active); } /* And now we can allocate and set up a new call */ @@ -320,42 +303,69 @@ static struct rxrpc_call *rxrpc_alloc_incoming_call(struct rxrpc_sock *rx, smp_store_release(&b->call_backlog_tail, (call_tail + 1) & (RXRPC_BACKLOG_MAX - 1)); - rxrpc_see_call(call); + rxrpc_see_call(call, rxrpc_call_see_accept); + call->local = rxrpc_get_local(conn->local, rxrpc_local_get_call); call->conn = conn; call->security = conn->security; call->security_ix = conn->security_ix; - call->peer = rxrpc_get_peer(conn->params.peer); - call->cong_cwnd = call->peer->cong_cwnd; + call->peer = rxrpc_get_peer(conn->peer, rxrpc_peer_get_accept); + call->dest_srx = peer->srx; + call->cong_ssthresh = call->peer->cong_ssthresh; + call->tx_last_sent = ktime_get_real(); return call; } /* - * Set up a new incoming call. Called in BH context with the RCU read lock - * held. + * Set up a new incoming call. Called from the I/O thread. * * If this is for a kernel service, when we allocate the call, it will have * three refs on it: (1) the kernel service, (2) the user_call_ID tree, (3) the * retainer ref obtained from the backlog buffer. Prealloc calls for userspace - * services only have the ref from the backlog buffer. We want to pass this - * ref to non-BH context to dispose of. + * services only have the ref from the backlog buffer. * * If we want to report an error, we mark the skb with the packet type and - * abort code and return NULL. - * - * The call is returned with the user access mutex held. + * abort code and return false. */ -struct rxrpc_call *rxrpc_new_incoming_call(struct rxrpc_local *local, - struct rxrpc_sock *rx, - struct sk_buff *skb) +bool rxrpc_new_incoming_call(struct rxrpc_local *local, + struct rxrpc_peer *peer, + struct rxrpc_connection *conn, + struct sockaddr_rxrpc *peer_srx, + struct sk_buff *skb) { - struct rxrpc_skb_priv *sp = rxrpc_skb(skb); const struct rxrpc_security *sec = NULL; - struct rxrpc_connection *conn; - struct rxrpc_peer *peer = NULL; + struct rxrpc_skb_priv *sp = rxrpc_skb(skb); struct rxrpc_call *call = NULL; + struct rxrpc_sock *rx; _enter(""); + /* Don't set up a call for anything other than the first DATA packet. */ + if (sp->hdr.seq != 1 || + sp->hdr.type != RXRPC_PACKET_TYPE_DATA) + return true; /* Just discard */ + + rcu_read_lock(); + + /* Weed out packets to services we're not offering. Packets that would + * begin a call are explicitly rejected and the rest are just + * discarded. + */ + rx = rcu_dereference(local->service); + if (!rx || (sp->hdr.serviceId != rx->srx.srx_service && + sp->hdr.serviceId != rx->second_service) + ) { + if (sp->hdr.type == RXRPC_PACKET_TYPE_DATA && + sp->hdr.seq == 1) + goto unsupported_service; + goto discard; + } + + if (!conn) { + sec = rxrpc_get_incoming_security(rx, skb); + if (!sec) + goto reject; + } + spin_lock(&rx->incoming_lock); if (rx->sk.sk_state == RXRPC_SERVER_LISTEN_DISABLED || rx->sk.sk_state == RXRPC_CLOSE) { @@ -366,20 +376,8 @@ struct rxrpc_call *rxrpc_new_incoming_call(struct rxrpc_local *local, goto no_call; } - /* The peer, connection and call may all have sprung into existence due - * to a duplicate packet being handled on another CPU in parallel, so - * we have to recheck the routing. However, we're now holding - * rx->incoming_lock, so the values should remain stable. - */ - conn = rxrpc_find_connection_rcu(local, skb, &peer); - - if (!conn) { - sec = rxrpc_get_incoming_security(rx, skb); - if (!sec) - goto no_call; - } - - call = rxrpc_alloc_incoming_call(rx, local, peer, conn, sec, skb); + call = rxrpc_alloc_incoming_call(rx, local, peer, conn, sec, peer_srx, + skb); if (!call) { skb->mark = RXRPC_SKB_MARK_REJECT_BUSY; goto no_call; @@ -396,50 +394,41 @@ struct rxrpc_call *rxrpc_new_incoming_call(struct rxrpc_local *local, rx->notify_new_call(&rx->sk, call, call->user_call_ID); spin_lock(&conn->state_lock); - switch (conn->state) { - case RXRPC_CONN_SERVICE_UNSECURED: + if (conn->state == RXRPC_CONN_SERVICE_UNSECURED) { conn->state = RXRPC_CONN_SERVICE_CHALLENGING; set_bit(RXRPC_CONN_EV_CHALLENGE, &call->conn->events); - rxrpc_queue_conn(call->conn); - break; - - case RXRPC_CONN_SERVICE: - write_lock(&call->state_lock); - if (call->state < RXRPC_CALL_COMPLETE) - call->state = RXRPC_CALL_SERVER_RECV_REQUEST; - write_unlock(&call->state_lock); - break; - - case RXRPC_CONN_REMOTELY_ABORTED: - rxrpc_set_call_completion(call, RXRPC_CALL_REMOTELY_ABORTED, - conn->abort_code, conn->error); - break; - case RXRPC_CONN_LOCALLY_ABORTED: - rxrpc_abort_call("CON", call, sp->hdr.seq, - conn->abort_code, conn->error); - break; - default: - BUG(); + rxrpc_queue_conn(call->conn, rxrpc_conn_queue_challenge); } spin_unlock(&conn->state_lock); - spin_unlock(&rx->incoming_lock); - rxrpc_send_ping(call, skb); + spin_unlock(&rx->incoming_lock); + rcu_read_unlock(); - /* We have to discard the prealloc queue's ref here and rely on a - * combination of the RCU read lock and refs held either by the socket - * (recvmsg queue, to-be-accepted queue or user ID tree) or the kernel - * service to prevent the call from being deallocated too early. - */ - rxrpc_put_call(call, rxrpc_call_put); + if (hlist_unhashed(&call->error_link)) { + spin_lock(&call->peer->lock); + hlist_add_head(&call->error_link, &call->peer->error_targets); + spin_unlock(&call->peer->lock); + } _leave(" = %p{%d}", call, call->debug_id); - return call; - + rxrpc_input_call_event(call, skb); + rxrpc_put_call(call, rxrpc_call_put_input); + return true; + +unsupported_service: + trace_rxrpc_abort(0, "INV", sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq, + RX_INVALID_OPERATION, EOPNOTSUPP); + skb->priority = RX_INVALID_OPERATION; + goto reject; no_call: spin_unlock(&rx->incoming_lock); - _leave(" = NULL [%u]", skb->mark); - return NULL; +reject: + rcu_read_unlock(); + _leave(" = f [%u]", skb->mark); + return false; +discard: + rcu_read_unlock(); + return true; } /* diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c index 2a93e7b5fbd0..b2cf448fb02c 100644 --- a/net/rxrpc/call_event.c +++ b/net/rxrpc/call_event.c @@ -20,127 +20,84 @@ /* * Propose a PING ACK be sent. */ -static void rxrpc_propose_ping(struct rxrpc_call *call, - bool immediate, bool background) +void rxrpc_propose_ping(struct rxrpc_call *call, u32 serial, + enum rxrpc_propose_ack_trace why) { - if (immediate) { - if (background && - !test_and_set_bit(RXRPC_CALL_EV_PING, &call->events)) - rxrpc_queue_call(call); - } else { - unsigned long now = jiffies; - unsigned long ping_at = now + rxrpc_idle_ack_delay; - - if (time_before(ping_at, call->ping_at)) { - WRITE_ONCE(call->ping_at, ping_at); - rxrpc_reduce_call_timer(call, ping_at, now, - rxrpc_timer_set_for_ping); - } + unsigned long now = jiffies; + unsigned long ping_at = now + rxrpc_idle_ack_delay; + + if (time_before(ping_at, call->ping_at)) { + WRITE_ONCE(call->ping_at, ping_at); + rxrpc_reduce_call_timer(call, ping_at, now, + rxrpc_timer_set_for_ping); + trace_rxrpc_propose_ack(call, why, RXRPC_ACK_PING, serial); } } /* - * propose an ACK be sent + * Propose a DELAY ACK be sent in the future. */ -static void __rxrpc_propose_ACK(struct rxrpc_call *call, u8 ack_reason, - u32 serial, bool immediate, bool background, - enum rxrpc_propose_ack_trace why) +void rxrpc_propose_delay_ACK(struct rxrpc_call *call, rxrpc_serial_t serial, + enum rxrpc_propose_ack_trace why) { - enum rxrpc_propose_ack_outcome outcome = rxrpc_propose_ack_use; unsigned long expiry = rxrpc_soft_ack_delay; - s8 prior = rxrpc_ack_priority[ack_reason]; - - /* Pings are handled specially because we don't want to accidentally - * lose a ping response by subsuming it into a ping. - */ - if (ack_reason == RXRPC_ACK_PING) { - rxrpc_propose_ping(call, immediate, background); - goto trace; + unsigned long now = jiffies, ack_at; + + call->ackr_serial = serial; + + if (rxrpc_soft_ack_delay < expiry) + expiry = rxrpc_soft_ack_delay; + if (call->peer->srtt_us != 0) + ack_at = usecs_to_jiffies(call->peer->srtt_us >> 3); + else + ack_at = expiry; + + ack_at += READ_ONCE(call->tx_backoff); + ack_at += now; + if (time_before(ack_at, call->delay_ack_at)) { + WRITE_ONCE(call->delay_ack_at, ack_at); + rxrpc_reduce_call_timer(call, ack_at, now, + rxrpc_timer_set_for_ack); } - /* Update DELAY, IDLE, REQUESTED and PING_RESPONSE ACK serial - * numbers, but we don't alter the timeout. - */ - _debug("prior %u %u vs %u %u", - ack_reason, prior, - call->ackr_reason, rxrpc_ack_priority[call->ackr_reason]); - if (ack_reason == call->ackr_reason) { - if (RXRPC_ACK_UPDATEABLE & (1 << ack_reason)) { - outcome = rxrpc_propose_ack_update; - call->ackr_serial = serial; - } - if (!immediate) - goto trace; - } else if (prior > rxrpc_ack_priority[call->ackr_reason]) { - call->ackr_reason = ack_reason; - call->ackr_serial = serial; - } else { - outcome = rxrpc_propose_ack_subsume; - } + trace_rxrpc_propose_ack(call, why, RXRPC_ACK_DELAY, serial); +} - switch (ack_reason) { - case RXRPC_ACK_REQUESTED: - if (rxrpc_requested_ack_delay < expiry) - expiry = rxrpc_requested_ack_delay; - if (serial == 1) - immediate = false; - break; +/* + * Queue an ACK for immediate transmission. + */ +void rxrpc_send_ACK(struct rxrpc_call *call, u8 ack_reason, + rxrpc_serial_t serial, enum rxrpc_propose_ack_trace why) +{ + struct rxrpc_txbuf *txb; - case RXRPC_ACK_DELAY: - if (rxrpc_soft_ack_delay < expiry) - expiry = rxrpc_soft_ack_delay; - break; + if (test_bit(RXRPC_CALL_DISCONNECTED, &call->flags)) + return; - case RXRPC_ACK_IDLE: - if (rxrpc_idle_ack_delay < expiry) - expiry = rxrpc_idle_ack_delay; - break; + rxrpc_inc_stat(call->rxnet, stat_tx_acks[ack_reason]); - default: - immediate = true; - break; - } - - if (test_bit(RXRPC_CALL_EV_ACK, &call->events)) { - _debug("already scheduled"); - } else if (immediate || expiry == 0) { - _debug("immediate ACK %lx", call->events); - if (!test_and_set_bit(RXRPC_CALL_EV_ACK, &call->events) && - background) - rxrpc_queue_call(call); - } else { - unsigned long now = jiffies, ack_at; - - if (call->peer->srtt_us != 0) - ack_at = usecs_to_jiffies(call->peer->srtt_us >> 3); - else - ack_at = expiry; - - ack_at += READ_ONCE(call->tx_backoff); - ack_at += now; - if (time_before(ack_at, call->ack_at)) { - WRITE_ONCE(call->ack_at, ack_at); - rxrpc_reduce_call_timer(call, ack_at, now, - rxrpc_timer_set_for_ack); - } + txb = rxrpc_alloc_txbuf(call, RXRPC_PACKET_TYPE_ACK, + rcu_read_lock_held() ? GFP_ATOMIC | __GFP_NOWARN : GFP_NOFS); + if (!txb) { + kleave(" = -ENOMEM"); + return; } -trace: - trace_rxrpc_propose_ack(call, why, ack_reason, serial, immediate, - background, outcome); -} - -/* - * propose an ACK be sent, locking the call structure - */ -void rxrpc_propose_ACK(struct rxrpc_call *call, u8 ack_reason, - u32 serial, bool immediate, bool background, - enum rxrpc_propose_ack_trace why) -{ - spin_lock_bh(&call->lock); - __rxrpc_propose_ACK(call, ack_reason, serial, - immediate, background, why); - spin_unlock_bh(&call->lock); + txb->ack_why = why; + txb->wire.seq = 0; + txb->wire.type = RXRPC_PACKET_TYPE_ACK; + txb->wire.flags |= RXRPC_SLOW_START_OK; + txb->ack.bufferSpace = 0; + txb->ack.maxSkew = 0; + txb->ack.firstPacket = 0; + txb->ack.previousPacket = 0; + txb->ack.serial = htonl(serial); + txb->ack.reason = ack_reason; + txb->ack.nAcks = 0; + + trace_rxrpc_send_ack(call, why, ack_reason, serial); + rxrpc_send_ack_packet(call, txb); + rxrpc_put_txbuf(txb, rxrpc_txbuf_put_ack_tx); } /* @@ -154,64 +111,115 @@ static void rxrpc_congestion_timeout(struct rxrpc_call *call) /* * Perform retransmission of NAK'd and unack'd packets. */ -static void rxrpc_resend(struct rxrpc_call *call, unsigned long now_j) +void rxrpc_resend(struct rxrpc_call *call, struct sk_buff *ack_skb) { - struct sk_buff *skb; + struct rxrpc_ackpacket *ack = NULL; + struct rxrpc_txbuf *txb; unsigned long resend_at; - rxrpc_seq_t cursor, seq, top; + rxrpc_seq_t transmitted = READ_ONCE(call->tx_transmitted); ktime_t now, max_age, oldest, ack_ts; - int ix; - u8 annotation, anno_type, retrans = 0, unacked = 0; + bool unacked = false; + unsigned int i; + LIST_HEAD(retrans_queue); - _enter("{%d,%d}", call->tx_hard_ack, call->tx_top); + _enter("{%d,%d}", call->acks_hard_ack, call->tx_top); now = ktime_get_real(); max_age = ktime_sub_us(now, jiffies_to_usecs(call->peer->rto_j)); + oldest = now; + + if (list_empty(&call->tx_buffer)) + goto no_resend; - spin_lock_bh(&call->lock); + if (list_empty(&call->tx_buffer)) + goto no_further_resend; - cursor = call->tx_hard_ack; - top = call->tx_top; - ASSERT(before_eq(cursor, top)); - if (cursor == top) - goto out_unlock; + trace_rxrpc_resend(call, ack_skb); + txb = list_first_entry(&call->tx_buffer, struct rxrpc_txbuf, call_link); - /* Scan the packet list without dropping the lock and decide which of - * the packets in the Tx buffer we're going to resend and what the new - * resend timeout will be. + /* Scan the soft ACK table without dropping the lock and resend any + * explicitly NAK'd packets. */ - trace_rxrpc_resend(call, (cursor + 1) & RXRPC_RXTX_BUFF_MASK); - oldest = now; - for (seq = cursor + 1; before_eq(seq, top); seq++) { - ix = seq & RXRPC_RXTX_BUFF_MASK; - annotation = call->rxtx_annotations[ix]; - anno_type = annotation & RXRPC_TX_ANNO_MASK; - annotation &= ~RXRPC_TX_ANNO_MASK; - if (anno_type == RXRPC_TX_ANNO_ACK) - continue; + if (ack_skb) { + ack = (void *)ack_skb->data + sizeof(struct rxrpc_wire_header); - skb = call->rxtx_buffer[ix]; - rxrpc_see_skb(skb, rxrpc_skb_seen); + for (i = 0; i < ack->nAcks; i++) { + rxrpc_seq_t seq; - if (anno_type == RXRPC_TX_ANNO_UNACK) { - if (ktime_after(skb->tstamp, max_age)) { - if (ktime_before(skb->tstamp, oldest)) - oldest = skb->tstamp; + if (ack->acks[i] & 1) continue; + seq = ntohl(ack->firstPacket) + i; + if (after(txb->seq, transmitted)) + break; + if (after(txb->seq, seq)) + continue; /* A new hard ACK probably came in */ + list_for_each_entry_from(txb, &call->tx_buffer, call_link) { + if (txb->seq == seq) + goto found_txb; } - if (!(annotation & RXRPC_TX_ANNO_RESENT)) - unacked++; + goto no_further_resend; + + found_txb: + if (after(ntohl(txb->wire.serial), call->acks_highest_serial)) + continue; /* Ack point not yet reached */ + + rxrpc_see_txbuf(txb, rxrpc_txbuf_see_unacked); + + if (list_empty(&txb->tx_link)) { + list_add_tail(&txb->tx_link, &retrans_queue); + set_bit(RXRPC_TXBUF_RESENT, &txb->flags); + } + + trace_rxrpc_retransmit(call, txb->seq, + ktime_to_ns(ktime_sub(txb->last_sent, + max_age))); + + if (list_is_last(&txb->call_link, &call->tx_buffer)) + goto no_further_resend; + txb = list_next_entry(txb, call_link); } + } + + /* Fast-forward through the Tx queue to the point the peer says it has + * seen. Anything between the soft-ACK table and that point will get + * ACK'd or NACK'd in due course, so don't worry about it here; here we + * need to consider retransmitting anything beyond that point. + * + * Note that ACK for a packet can beat the update of tx_transmitted. + */ + if (after_eq(READ_ONCE(call->acks_prev_seq), READ_ONCE(call->tx_transmitted))) + goto no_further_resend; + + list_for_each_entry_from(txb, &call->tx_buffer, call_link) { + if (before_eq(txb->seq, READ_ONCE(call->acks_prev_seq))) + continue; + if (after(txb->seq, READ_ONCE(call->tx_transmitted))) + break; /* Not transmitted yet */ - /* Okay, we need to retransmit a packet. */ - call->rxtx_annotations[ix] = RXRPC_TX_ANNO_RETRANS | annotation; - retrans++; - trace_rxrpc_retransmit(call, seq, annotation | anno_type, - ktime_to_ns(ktime_sub(skb->tstamp, max_age))); + if (ack && ack->reason == RXRPC_ACK_PING_RESPONSE && + before(ntohl(txb->wire.serial), ntohl(ack->serial))) + goto do_resend; /* Wasn't accounted for by a more recent ping. */ + + if (ktime_after(txb->last_sent, max_age)) { + if (ktime_before(txb->last_sent, oldest)) + oldest = txb->last_sent; + continue; + } + + do_resend: + unacked = true; + if (list_empty(&txb->tx_link)) { + list_add_tail(&txb->tx_link, &retrans_queue); + set_bit(RXRPC_TXBUF_RESENT, &txb->flags); + rxrpc_inc_stat(call->rxnet, stat_tx_data_retrans); + } } +no_further_resend: +no_resend: resend_at = nsecs_to_jiffies(ktime_to_ns(ktime_sub(now, oldest))); - resend_at += jiffies + rxrpc_get_rto_backoff(call->peer, retrans); + resend_at += jiffies + rxrpc_get_rto_backoff(call->peer, + !list_empty(&retrans_queue)); WRITE_ONCE(call->resend_at, resend_at); if (unacked) @@ -221,125 +229,154 @@ static void rxrpc_resend(struct rxrpc_call *call, unsigned long now_j) * that an ACK got lost somewhere. Send a ping to find out instead of * retransmitting data. */ - if (!retrans) { - rxrpc_reduce_call_timer(call, resend_at, now_j, + if (list_empty(&retrans_queue)) { + rxrpc_reduce_call_timer(call, resend_at, jiffies, rxrpc_timer_set_for_resend); - spin_unlock_bh(&call->lock); ack_ts = ktime_sub(now, call->acks_latest_ts); if (ktime_to_us(ack_ts) < (call->peer->srtt_us >> 3)) goto out; - rxrpc_propose_ACK(call, RXRPC_ACK_PING, 0, true, false, - rxrpc_propose_ack_ping_for_lost_ack); - rxrpc_send_ack_packet(call, true, NULL); + rxrpc_send_ACK(call, RXRPC_ACK_PING, 0, + rxrpc_propose_ack_ping_for_lost_ack); goto out; } - /* Now go through the Tx window and perform the retransmissions. We - * have to drop the lock for each send. If an ACK comes in whilst the - * lock is dropped, it may clear some of the retransmission markers for - * packets that it soft-ACKs. - */ - for (seq = cursor + 1; before_eq(seq, top); seq++) { - ix = seq & RXRPC_RXTX_BUFF_MASK; - annotation = call->rxtx_annotations[ix]; - anno_type = annotation & RXRPC_TX_ANNO_MASK; - if (anno_type != RXRPC_TX_ANNO_RETRANS) - continue; + /* Retransmit the queue */ + while ((txb = list_first_entry_or_null(&retrans_queue, + struct rxrpc_txbuf, tx_link))) { + list_del_init(&txb->tx_link); + rxrpc_transmit_one(call, txb); + } - /* We need to reset the retransmission state, but we need to do - * so before we drop the lock as a new ACK/NAK may come in and - * confuse things - */ - annotation &= ~RXRPC_TX_ANNO_MASK; - annotation |= RXRPC_TX_ANNO_UNACK | RXRPC_TX_ANNO_RESENT; - call->rxtx_annotations[ix] = annotation; +out: + _leave(""); +} - skb = call->rxtx_buffer[ix]; - if (!skb) - continue; +static bool rxrpc_tx_window_has_space(struct rxrpc_call *call) +{ + unsigned int winsize = min_t(unsigned int, call->tx_winsize, + call->cong_cwnd + call->cong_extra); + rxrpc_seq_t window = call->acks_hard_ack, wtop = window + winsize; + rxrpc_seq_t tx_top = call->tx_top; + int space; + + space = wtop - tx_top; + return space > 0; +} - rxrpc_get_skb(skb, rxrpc_skb_got); - spin_unlock_bh(&call->lock); +/* + * Decant some if the sendmsg prepared queue into the transmission buffer. + */ +static void rxrpc_decant_prepared_tx(struct rxrpc_call *call) +{ + struct rxrpc_txbuf *txb; - if (rxrpc_send_data_packet(call, skb, true) < 0) { - rxrpc_free_skb(skb, rxrpc_skb_freed); - return; - } + if (rxrpc_is_client_call(call) && + !test_bit(RXRPC_CALL_EXPOSED, &call->flags)) + rxrpc_expose_client_call(call); - if (rxrpc_is_client_call(call)) - rxrpc_expose_client_call(call); + while ((txb = list_first_entry_or_null(&call->tx_sendmsg, + struct rxrpc_txbuf, call_link))) { + spin_lock(&call->tx_lock); + list_del(&txb->call_link); + spin_unlock(&call->tx_lock); - rxrpc_free_skb(skb, rxrpc_skb_freed); - spin_lock_bh(&call->lock); - if (after(call->tx_hard_ack, seq)) - seq = call->tx_hard_ack; + call->tx_top = txb->seq; + list_add_tail(&txb->call_link, &call->tx_buffer); + + rxrpc_transmit_one(call, txb); + + if (!rxrpc_tx_window_has_space(call)) + break; } +} -out_unlock: - spin_unlock_bh(&call->lock); -out: - _leave(""); +static void rxrpc_transmit_some_data(struct rxrpc_call *call) +{ + switch (call->state) { + case RXRPC_CALL_SERVER_ACK_REQUEST: + if (list_empty(&call->tx_sendmsg)) + return; + fallthrough; + + case RXRPC_CALL_SERVER_SEND_REPLY: + case RXRPC_CALL_SERVER_AWAIT_ACK: + case RXRPC_CALL_CLIENT_SEND_REQUEST: + case RXRPC_CALL_CLIENT_AWAIT_REPLY: + if (!rxrpc_tx_window_has_space(call)) + return; + if (list_empty(&call->tx_sendmsg)) { + rxrpc_inc_stat(call->rxnet, stat_tx_data_underflow); + return; + } + rxrpc_decant_prepared_tx(call); + break; + default: + return; + } +} + +/* + * Ping the other end to fill our RTT cache and to retrieve the rwind + * and MTU parameters. + */ +static void rxrpc_send_initial_ping(struct rxrpc_call *call) +{ + if (call->peer->rtt_count < 3 || + ktime_before(ktime_add_ms(call->peer->rtt_last_req, 1000), + ktime_get_real())) + rxrpc_send_ACK(call, RXRPC_ACK_PING, 0, + rxrpc_propose_ack_ping_for_params); } /* * Handle retransmission and deferred ACK/abort generation. */ -void rxrpc_process_call(struct work_struct *work) +void rxrpc_input_call_event(struct rxrpc_call *call, struct sk_buff *skb) { - struct rxrpc_call *call = - container_of(work, struct rxrpc_call, processor); - rxrpc_serial_t *send_ack; unsigned long now, next, t; - unsigned int iterations = 0; + rxrpc_serial_t ackr_serial; + bool resend = false, expired = false; - rxrpc_see_call(call); + rxrpc_see_call(call, rxrpc_call_see_input); //printk("\n--------------------\n"); _enter("{%d,%s,%lx}", call->debug_id, rxrpc_call_states[call->state], call->events); -recheck_state: - /* Limit the number of times we do this before returning to the manager */ - iterations++; - if (iterations > 5) - goto requeue; - - if (test_and_clear_bit(RXRPC_CALL_EV_ABORT, &call->events)) { - rxrpc_send_abort_packet(call); - goto recheck_state; - } + if (call->state == RXRPC_CALL_COMPLETE) + goto out; - if (call->state == RXRPC_CALL_COMPLETE) { - rxrpc_delete_call_timer(call); - goto out_put; - } + if (skb && skb->mark == RXRPC_SKB_MARK_ERROR) + goto out; - /* Work out if any timeouts tripped */ + /* If we see our async-event poke, check for timeout trippage. */ now = jiffies; t = READ_ONCE(call->expect_rx_by); if (time_after_eq(now, t)) { trace_rxrpc_timer(call, rxrpc_timer_exp_normal, now); - set_bit(RXRPC_CALL_EV_EXPIRED, &call->events); + expired = true; } t = READ_ONCE(call->expect_req_by); if (call->state == RXRPC_CALL_SERVER_RECV_REQUEST && time_after_eq(now, t)) { trace_rxrpc_timer(call, rxrpc_timer_exp_idle, now); - set_bit(RXRPC_CALL_EV_EXPIRED, &call->events); + expired = true; } t = READ_ONCE(call->expect_term_by); if (time_after_eq(now, t)) { trace_rxrpc_timer(call, rxrpc_timer_exp_hard, now); - set_bit(RXRPC_CALL_EV_EXPIRED, &call->events); + expired = true; } - t = READ_ONCE(call->ack_at); + t = READ_ONCE(call->delay_ack_at); if (time_after_eq(now, t)) { trace_rxrpc_timer(call, rxrpc_timer_exp_ack, now); - cmpxchg(&call->ack_at, t, now + MAX_JIFFY_OFFSET); - set_bit(RXRPC_CALL_EV_ACK, &call->events); + cmpxchg(&call->delay_ack_at, t, now + MAX_JIFFY_OFFSET); + ackr_serial = xchg(&call->ackr_serial, 0); + rxrpc_send_ACK(call, RXRPC_ACK_DELAY, ackr_serial, + rxrpc_propose_ack_ping_for_lost_ack); } t = READ_ONCE(call->ack_lost_at); @@ -353,27 +390,42 @@ recheck_state: if (time_after_eq(now, t)) { trace_rxrpc_timer(call, rxrpc_timer_exp_keepalive, now); cmpxchg(&call->keepalive_at, t, now + MAX_JIFFY_OFFSET); - rxrpc_propose_ACK(call, RXRPC_ACK_PING, 0, true, true, - rxrpc_propose_ack_ping_for_keepalive); - set_bit(RXRPC_CALL_EV_PING, &call->events); + rxrpc_send_ACK(call, RXRPC_ACK_PING, 0, + rxrpc_propose_ack_ping_for_keepalive); } t = READ_ONCE(call->ping_at); if (time_after_eq(now, t)) { trace_rxrpc_timer(call, rxrpc_timer_exp_ping, now); cmpxchg(&call->ping_at, t, now + MAX_JIFFY_OFFSET); - set_bit(RXRPC_CALL_EV_PING, &call->events); + rxrpc_send_ACK(call, RXRPC_ACK_PING, 0, + rxrpc_propose_ack_ping_for_keepalive); } t = READ_ONCE(call->resend_at); if (time_after_eq(now, t)) { trace_rxrpc_timer(call, rxrpc_timer_exp_resend, now); cmpxchg(&call->resend_at, t, now + MAX_JIFFY_OFFSET); - set_bit(RXRPC_CALL_EV_RESEND, &call->events); + resend = true; + } + + if (skb) + rxrpc_input_call_packet(call, skb); + + rxrpc_transmit_some_data(call); + + if (skb) { + struct rxrpc_skb_priv *sp = rxrpc_skb(skb); + + if (sp->hdr.type == RXRPC_PACKET_TYPE_ACK) + rxrpc_congestion_degrade(call); } + if (test_and_clear_bit(RXRPC_CALL_EV_INITIAL_PING, &call->events)) + rxrpc_send_initial_ping(call); + /* Process events */ - if (test_and_clear_bit(RXRPC_CALL_EV_EXPIRED, &call->events)) { + if (expired) { if (test_bit(RXRPC_CALL_RX_HEARD, &call->flags) && (int)call->conn->hi_serial - (int)call->rx_serial > 0) { trace_rxrpc_call_reset(call); @@ -381,67 +433,50 @@ recheck_state: } else { rxrpc_abort_call("EXP", call, 0, RX_CALL_TIMEOUT, -ETIME); } - set_bit(RXRPC_CALL_EV_ABORT, &call->events); - goto recheck_state; + rxrpc_send_abort_packet(call); + goto out; } - send_ack = NULL; - if (test_and_clear_bit(RXRPC_CALL_EV_ACK_LOST, &call->events)) { - call->acks_lost_top = call->tx_top; - rxrpc_propose_ACK(call, RXRPC_ACK_PING, 0, true, false, - rxrpc_propose_ack_ping_for_lost_ack); - send_ack = &call->acks_lost_ping; - } + if (test_and_clear_bit(RXRPC_CALL_EV_ACK_LOST, &call->events)) + rxrpc_send_ACK(call, RXRPC_ACK_PING, 0, + rxrpc_propose_ack_ping_for_lost_ack); - if (test_and_clear_bit(RXRPC_CALL_EV_ACK, &call->events) || - send_ack) { - if (call->ackr_reason) { - rxrpc_send_ack_packet(call, false, send_ack); - goto recheck_state; - } - } + if (resend && call->state != RXRPC_CALL_CLIENT_RECV_REPLY) + rxrpc_resend(call, NULL); - if (test_and_clear_bit(RXRPC_CALL_EV_PING, &call->events)) { - rxrpc_send_ack_packet(call, true, NULL); - goto recheck_state; - } + if (test_and_clear_bit(RXRPC_CALL_RX_IS_IDLE, &call->flags)) + rxrpc_send_ACK(call, RXRPC_ACK_IDLE, 0, + rxrpc_propose_ack_rx_idle); - if (test_and_clear_bit(RXRPC_CALL_EV_RESEND, &call->events) && - call->state != RXRPC_CALL_CLIENT_RECV_REPLY) { - rxrpc_resend(call, now); - goto recheck_state; - } + if (atomic_read(&call->ackr_nr_unacked) > 2) + rxrpc_send_ACK(call, RXRPC_ACK_IDLE, 0, + rxrpc_propose_ack_input_data); /* Make sure the timer is restarted */ - next = call->expect_rx_by; + if (call->state != RXRPC_CALL_COMPLETE) { + next = call->expect_rx_by; #define set(T) { t = READ_ONCE(T); if (time_before(t, next)) next = t; } - set(call->expect_req_by); - set(call->expect_term_by); - set(call->ack_at); - set(call->ack_lost_at); - set(call->resend_at); - set(call->keepalive_at); - set(call->ping_at); - - now = jiffies; - if (time_after_eq(now, next)) - goto recheck_state; + set(call->expect_req_by); + set(call->expect_term_by); + set(call->delay_ack_at); + set(call->ack_lost_at); + set(call->resend_at); + set(call->keepalive_at); + set(call->ping_at); - rxrpc_reduce_call_timer(call, next, now, rxrpc_timer_restart); + now = jiffies; + if (time_after_eq(now, next)) + rxrpc_poke_call(call, rxrpc_call_poke_timer_now); - /* other events may have been raised since we started checking */ - if (call->events && call->state < RXRPC_CALL_COMPLETE) - goto requeue; + rxrpc_reduce_call_timer(call, next, now, rxrpc_timer_restart); + } -out_put: - rxrpc_put_call(call, rxrpc_call_put); out: + if (call->state == RXRPC_CALL_COMPLETE) + del_timer_sync(&call->timer); + if (call->acks_hard_ack != call->tx_bottom) + rxrpc_shrink_call_tx_buffer(call); _leave(""); - return; - -requeue: - __rxrpc_queue_call(call); - goto out; } diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c index 6401cdf7a624..be5eb8cdf549 100644 --- a/net/rxrpc/call_object.c +++ b/net/rxrpc/call_object.c @@ -45,6 +45,24 @@ static struct semaphore rxrpc_call_limiter = static struct semaphore rxrpc_kernel_call_limiter = __SEMAPHORE_INITIALIZER(rxrpc_kernel_call_limiter, 1000); +void rxrpc_poke_call(struct rxrpc_call *call, enum rxrpc_call_poke_trace what) +{ + struct rxrpc_local *local = call->local; + bool busy; + + if (call->state < RXRPC_CALL_COMPLETE) { + spin_lock_bh(&local->lock); + busy = !list_empty(&call->attend_link); + trace_rxrpc_poke_call(call, busy, what); + if (!busy) { + rxrpc_get_call(call, rxrpc_call_get_poke); + list_add_tail(&call->attend_link, &local->call_attend_q); + } + spin_unlock_bh(&local->lock); + rxrpc_wake_up_io_thread(local); + } +} + static void rxrpc_call_timer_expired(struct timer_list *t) { struct rxrpc_call *call = from_timer(call, t, timer); @@ -52,10 +70,8 @@ static void rxrpc_call_timer_expired(struct timer_list *t) _enter("%d", call->debug_id); if (call->state < RXRPC_CALL_COMPLETE) { - trace_rxrpc_timer(call, rxrpc_timer_expired, jiffies); - __rxrpc_queue_call(call); - } else { - rxrpc_put_call(call, rxrpc_call_put); + trace_rxrpc_timer_expired(call, jiffies); + rxrpc_poke_call(call, rxrpc_call_poke_timer); } } @@ -64,21 +80,14 @@ void rxrpc_reduce_call_timer(struct rxrpc_call *call, unsigned long now, enum rxrpc_timer_trace why) { - if (rxrpc_try_get_call(call, rxrpc_call_got_timer)) { - trace_rxrpc_timer(call, why, now); - if (timer_reduce(&call->timer, expire_at)) - rxrpc_put_call(call, rxrpc_call_put_notimer); - } -} - -void rxrpc_delete_call_timer(struct rxrpc_call *call) -{ - if (del_timer_sync(&call->timer)) - rxrpc_put_call(call, rxrpc_call_put_timer); + trace_rxrpc_timer(call, why, now); + timer_reduce(&call->timer, expire_at); } static struct lock_class_key rxrpc_call_user_mutex_lock_class_key; +static void rxrpc_destroy_call(struct work_struct *); + /* * find an extant server call * - called in process context with IRQs enabled @@ -110,7 +119,7 @@ struct rxrpc_call *rxrpc_find_call_by_user_ID(struct rxrpc_sock *rx, return NULL; found_extant_call: - rxrpc_get_call(call, rxrpc_call_got); + rxrpc_get_call(call, rxrpc_call_get_sendmsg); read_unlock(&rx->call_lock); _leave(" = %p [%d]", call, refcount_read(&call->ref)); return call; @@ -129,16 +138,6 @@ struct rxrpc_call *rxrpc_alloc_call(struct rxrpc_sock *rx, gfp_t gfp, if (!call) return NULL; - call->rxtx_buffer = kcalloc(RXRPC_RXTX_BUFF_SIZE, - sizeof(struct sk_buff *), - gfp); - if (!call->rxtx_buffer) - goto nomem; - - call->rxtx_annotations = kcalloc(RXRPC_RXTX_BUFF_SIZE, sizeof(u8), gfp); - if (!call->rxtx_annotations) - goto nomem_2; - mutex_init(&call->user_mutex); /* Prevent lockdep reporting a deadlock false positive between the afs @@ -149,43 +148,45 @@ struct rxrpc_call *rxrpc_alloc_call(struct rxrpc_sock *rx, gfp_t gfp, &rxrpc_call_user_mutex_lock_class_key); timer_setup(&call->timer, rxrpc_call_timer_expired, 0); - INIT_WORK(&call->processor, &rxrpc_process_call); + INIT_WORK(&call->destroyer, rxrpc_destroy_call); INIT_LIST_HEAD(&call->link); INIT_LIST_HEAD(&call->chan_wait_link); INIT_LIST_HEAD(&call->accept_link); INIT_LIST_HEAD(&call->recvmsg_link); INIT_LIST_HEAD(&call->sock_link); + INIT_LIST_HEAD(&call->attend_link); + INIT_LIST_HEAD(&call->tx_sendmsg); + INIT_LIST_HEAD(&call->tx_buffer); + skb_queue_head_init(&call->recvmsg_queue); + skb_queue_head_init(&call->rx_oos_queue); init_waitqueue_head(&call->waitq); - spin_lock_init(&call->lock); spin_lock_init(&call->notify_lock); - spin_lock_init(&call->input_lock); + spin_lock_init(&call->tx_lock); rwlock_init(&call->state_lock); refcount_set(&call->ref, 1); call->debug_id = debug_id; call->tx_total_len = -1; call->next_rx_timo = 20 * HZ; call->next_req_timo = 1 * HZ; + atomic64_set(&call->ackr_window, 0x100000001ULL); memset(&call->sock_node, 0xed, sizeof(call->sock_node)); - /* Leave space in the ring to handle a maxed-out jumbo packet */ call->rx_winsize = rxrpc_rx_window_size; call->tx_winsize = 16; - call->rx_expect_next = 1; - call->cong_cwnd = 2; - call->cong_ssthresh = RXRPC_RXTX_BUFF_SIZE - 1; + if (RXRPC_TX_SMSS > 2190) + call->cong_cwnd = 2; + else if (RXRPC_TX_SMSS > 1095) + call->cong_cwnd = 3; + else + call->cong_cwnd = 4; + call->cong_ssthresh = RXRPC_TX_MAX_WINDOW; call->rxnet = rxnet; call->rtt_avail = RXRPC_CALL_RTT_AVAIL_MASK; atomic_inc(&rxnet->nr_calls); return call; - -nomem_2: - kfree(call->rxtx_buffer); -nomem: - kmem_cache_free(rxrpc_call_jar, call); - return NULL; } /* @@ -193,23 +194,45 @@ nomem: */ static struct rxrpc_call *rxrpc_alloc_client_call(struct rxrpc_sock *rx, struct sockaddr_rxrpc *srx, + struct rxrpc_conn_parameters *cp, + struct rxrpc_call_params *p, gfp_t gfp, unsigned int debug_id) { struct rxrpc_call *call; ktime_t now; + int ret; _enter(""); call = rxrpc_alloc_call(rx, gfp, debug_id); if (!call) return ERR_PTR(-ENOMEM); - call->state = RXRPC_CALL_CLIENT_AWAIT_CONN; - call->service_id = srx->srx_service; - call->tx_phase = true; now = ktime_get_real(); - call->acks_latest_ts = now; - call->cong_tstamp = now; + call->acks_latest_ts = now; + call->cong_tstamp = now; + call->state = RXRPC_CALL_CLIENT_AWAIT_CONN; + call->dest_srx = *srx; + call->interruptibility = p->interruptibility; + call->tx_total_len = p->tx_total_len; + call->key = key_get(cp->key); + call->local = rxrpc_get_local(cp->local, rxrpc_local_get_call); + if (p->kernel) + __set_bit(RXRPC_CALL_KERNEL, &call->flags); + if (cp->upgrade) + __set_bit(RXRPC_CALL_UPGRADE, &call->flags); + if (cp->exclusive) + __set_bit(RXRPC_CALL_EXCLUSIVE, &call->flags); + + ret = rxrpc_init_client_call_security(call); + if (ret < 0) { + __rxrpc_set_call_completion(call, RXRPC_CALL_LOCAL_ERROR, 0, ret); + rxrpc_put_call(call, rxrpc_call_put_discard_error); + return ERR_PTR(ret); + } + + trace_rxrpc_call(call->debug_id, refcount_read(&call->ref), + p->user_call_ID, rxrpc_call_new_client); _leave(" = %p", call); return call; @@ -223,10 +246,11 @@ static void rxrpc_start_call_timer(struct rxrpc_call *call) unsigned long now = jiffies; unsigned long j = now + MAX_JIFFY_OFFSET; - call->ack_at = j; + call->delay_ack_at = j; call->ack_lost_at = j; call->resend_at = j; call->ping_at = j; + call->keepalive_at = j; call->expect_rx_by = j; call->expect_req_by = j; call->expect_term_by = j; @@ -279,7 +303,6 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx, struct rxrpc_net *rxnet; struct semaphore *limiter; struct rb_node *parent, **pp; - const void *here = __builtin_return_address(0); int ret; _enter("%p,%lx", rx, p->user_call_ID); @@ -290,7 +313,7 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx, return ERR_PTR(-ERESTARTSYS); } - call = rxrpc_alloc_client_call(rx, srx, gfp, debug_id); + call = rxrpc_alloc_client_call(rx, srx, cp, p, gfp, debug_id); if (IS_ERR(call)) { release_sock(&rx->sk); up(limiter); @@ -298,14 +321,6 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx, return call; } - call->interruptibility = p->interruptibility; - call->tx_total_len = p->tx_total_len; - trace_rxrpc_call(call->debug_id, rxrpc_call_new_client, - refcount_read(&call->ref), - here, (const void *)p->user_call_ID); - if (p->kernel) - __set_bit(RXRPC_CALL_KERNEL, &call->flags); - /* We need to protect a partially set up call against the user as we * will be acting outside the socket lock. */ @@ -331,7 +346,7 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx, rcu_assign_pointer(call->socket, rx); call->user_call_ID = p->user_call_ID; __set_bit(RXRPC_CALL_HAS_USERID, &call->flags); - rxrpc_get_call(call, rxrpc_call_got_userid); + rxrpc_get_call(call, rxrpc_call_get_userid); rb_link_node(&call->sock_node, parent, pp); rb_insert_color(&call->sock_node, &rx->calls); list_add(&call->sock_link, &rx->sock_calls); @@ -339,9 +354,9 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx, write_unlock(&rx->call_lock); rxnet = call->rxnet; - spin_lock_bh(&rxnet->call_lock); + spin_lock(&rxnet->call_lock); list_add_tail_rcu(&call->link, &rxnet->calls); - spin_unlock_bh(&rxnet->call_lock); + spin_unlock(&rxnet->call_lock); /* From this point on, the call is protected by its own lock. */ release_sock(&rx->sk); @@ -353,13 +368,10 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx, if (ret < 0) goto error_attached_to_socket; - trace_rxrpc_call(call->debug_id, rxrpc_call_connected, - refcount_read(&call->ref), here, NULL); + rxrpc_see_call(call, rxrpc_call_see_connected); rxrpc_start_call_timer(call); - _net("CALL new %d on CONN %d", call->debug_id, call->conn->debug_id); - _leave(" = %p [new]", call); return call; @@ -373,11 +385,11 @@ error_dup_user_ID: release_sock(&rx->sk); __rxrpc_set_call_completion(call, RXRPC_CALL_LOCAL_ERROR, RX_CALL_DEAD, -EEXIST); - trace_rxrpc_call(call->debug_id, rxrpc_call_error, - refcount_read(&call->ref), here, ERR_PTR(-EEXIST)); + trace_rxrpc_call(call->debug_id, refcount_read(&call->ref), 0, + rxrpc_call_see_userid_exists); rxrpc_release_call(rx, call); mutex_unlock(&call->user_mutex); - rxrpc_put_call(call, rxrpc_call_put); + rxrpc_put_call(call, rxrpc_call_put_userid_exists); _leave(" = -EEXIST"); return ERR_PTR(-EEXIST); @@ -387,8 +399,8 @@ error_dup_user_ID: * leave the error to recvmsg() to deal with. */ error_attached_to_socket: - trace_rxrpc_call(call->debug_id, rxrpc_call_error, - refcount_read(&call->ref), here, ERR_PTR(ret)); + trace_rxrpc_call(call->debug_id, refcount_read(&call->ref), ret, + rxrpc_call_see_connect_failed); set_bit(RXRPC_CALL_DISCONNECTED, &call->flags); __rxrpc_set_call_completion(call, RXRPC_CALL_LOCAL_ERROR, RX_CALL_DEAD, ret); @@ -412,11 +424,34 @@ void rxrpc_incoming_call(struct rxrpc_sock *rx, rcu_assign_pointer(call->socket, rx); call->call_id = sp->hdr.callNumber; - call->service_id = sp->hdr.serviceId; + call->dest_srx.srx_service = sp->hdr.serviceId; call->cid = sp->hdr.cid; call->state = RXRPC_CALL_SERVER_SECURING; call->cong_tstamp = skb->tstamp; + spin_lock(&conn->state_lock); + + switch (conn->state) { + case RXRPC_CONN_SERVICE_UNSECURED: + case RXRPC_CONN_SERVICE_CHALLENGING: + call->state = RXRPC_CALL_SERVER_SECURING; + break; + case RXRPC_CONN_SERVICE: + call->state = RXRPC_CALL_SERVER_RECV_REQUEST; + break; + + case RXRPC_CONN_REMOTELY_ABORTED: + __rxrpc_set_call_completion(call, RXRPC_CALL_REMOTELY_ABORTED, + conn->abort_code, conn->error); + break; + case RXRPC_CONN_LOCALLY_ABORTED: + __rxrpc_abort_call("CON", call, 1, + conn->abort_code, conn->error); + break; + default: + BUG(); + } + /* Set the channel for this call. We don't get channel_lock as we're * only defending against the data_ready handler (which we're called * from) and the RESPONSE packet parser (which is only really @@ -427,99 +462,57 @@ void rxrpc_incoming_call(struct rxrpc_sock *rx, conn->channels[chan].call_counter = call->call_id; conn->channels[chan].call_id = call->call_id; rcu_assign_pointer(conn->channels[chan].call, call); + spin_unlock(&conn->state_lock); - spin_lock(&conn->params.peer->lock); - hlist_add_head_rcu(&call->error_link, &conn->params.peer->error_targets); - spin_unlock(&conn->params.peer->lock); - - _net("CALL incoming %d on CONN %d", call->debug_id, call->conn->debug_id); + spin_lock(&conn->peer->lock); + hlist_add_head(&call->error_link, &conn->peer->error_targets); + spin_unlock(&conn->peer->lock); rxrpc_start_call_timer(call); _leave(""); } /* - * Queue a call's work processor, getting a ref to pass to the work queue. - */ -bool rxrpc_queue_call(struct rxrpc_call *call) -{ - const void *here = __builtin_return_address(0); - int n; - - if (!__refcount_inc_not_zero(&call->ref, &n)) - return false; - if (rxrpc_queue_work(&call->processor)) - trace_rxrpc_call(call->debug_id, rxrpc_call_queued, n + 1, - here, NULL); - else - rxrpc_put_call(call, rxrpc_call_put_noqueue); - return true; -} - -/* - * Queue a call's work processor, passing the callers ref to the work queue. - */ -bool __rxrpc_queue_call(struct rxrpc_call *call) -{ - const void *here = __builtin_return_address(0); - int n = refcount_read(&call->ref); - ASSERTCMP(n, >=, 1); - if (rxrpc_queue_work(&call->processor)) - trace_rxrpc_call(call->debug_id, rxrpc_call_queued_ref, n, - here, NULL); - else - rxrpc_put_call(call, rxrpc_call_put_noqueue); - return true; -} - -/* * Note the re-emergence of a call. */ -void rxrpc_see_call(struct rxrpc_call *call) +void rxrpc_see_call(struct rxrpc_call *call, enum rxrpc_call_trace why) { - const void *here = __builtin_return_address(0); if (call) { - int n = refcount_read(&call->ref); + int r = refcount_read(&call->ref); - trace_rxrpc_call(call->debug_id, rxrpc_call_seen, n, - here, NULL); + trace_rxrpc_call(call->debug_id, r, 0, why); } } -bool rxrpc_try_get_call(struct rxrpc_call *call, enum rxrpc_call_trace op) +struct rxrpc_call *rxrpc_try_get_call(struct rxrpc_call *call, + enum rxrpc_call_trace why) { - const void *here = __builtin_return_address(0); - int n; + int r; - if (!__refcount_inc_not_zero(&call->ref, &n)) - return false; - trace_rxrpc_call(call->debug_id, op, n + 1, here, NULL); - return true; + if (!call || !__refcount_inc_not_zero(&call->ref, &r)) + return NULL; + trace_rxrpc_call(call->debug_id, r + 1, 0, why); + return call; } /* * Note the addition of a ref on a call. */ -void rxrpc_get_call(struct rxrpc_call *call, enum rxrpc_call_trace op) +void rxrpc_get_call(struct rxrpc_call *call, enum rxrpc_call_trace why) { - const void *here = __builtin_return_address(0); - int n; + int r; - __refcount_inc(&call->ref, &n); - trace_rxrpc_call(call->debug_id, op, n + 1, here, NULL); + __refcount_inc(&call->ref, &r); + trace_rxrpc_call(call->debug_id, r + 1, 0, why); } /* - * Clean up the RxTx skb ring. + * Clean up the Rx skb ring. */ static void rxrpc_cleanup_ring(struct rxrpc_call *call) { - int i; - - for (i = 0; i < RXRPC_RXTX_BUFF_SIZE; i++) { - rxrpc_free_skb(call->rxtx_buffer[i], rxrpc_skb_cleaned); - call->rxtx_buffer[i] = NULL; - } + skb_queue_purge(&call->recvmsg_queue); + skb_queue_purge(&call->rx_oos_queue); } /* @@ -527,28 +520,24 @@ static void rxrpc_cleanup_ring(struct rxrpc_call *call) */ void rxrpc_release_call(struct rxrpc_sock *rx, struct rxrpc_call *call) { - const void *here = __builtin_return_address(0); struct rxrpc_connection *conn = call->conn; bool put = false; _enter("{%d,%d}", call->debug_id, refcount_read(&call->ref)); - trace_rxrpc_call(call->debug_id, rxrpc_call_release, - refcount_read(&call->ref), - here, (const void *)call->flags); + trace_rxrpc_call(call->debug_id, refcount_read(&call->ref), + call->flags, rxrpc_call_see_release); ASSERTCMP(call->state, ==, RXRPC_CALL_COMPLETE); - spin_lock_bh(&call->lock); if (test_and_set_bit(RXRPC_CALL_RELEASED, &call->flags)) BUG(); - spin_unlock_bh(&call->lock); rxrpc_put_call_slot(call); - rxrpc_delete_call_timer(call); + del_timer_sync(&call->timer); /* Make sure we don't get any more notifications */ - write_lock_bh(&rx->recvmsg_lock); + write_lock(&rx->recvmsg_lock); if (!list_empty(&call->recvmsg_link)) { _debug("unlinking once-pending call %p { e=%lx f=%lx }", @@ -561,16 +550,16 @@ void rxrpc_release_call(struct rxrpc_sock *rx, struct rxrpc_call *call) call->recvmsg_link.next = NULL; call->recvmsg_link.prev = NULL; - write_unlock_bh(&rx->recvmsg_lock); + write_unlock(&rx->recvmsg_lock); if (put) - rxrpc_put_call(call, rxrpc_call_put); + rxrpc_put_call(call, rxrpc_call_put_unnotify); write_lock(&rx->call_lock); if (test_and_clear_bit(RXRPC_CALL_HAS_USERID, &call->flags)) { rb_erase(&call->sock_node, &rx->calls); memset(&call->sock_node, 0xdd, sizeof(call->sock_node)); - rxrpc_put_call(call, rxrpc_call_put_userid); + rxrpc_put_call(call, rxrpc_call_put_userid_exists); } list_del(&call->sock_link); @@ -599,17 +588,17 @@ void rxrpc_release_calls_on_socket(struct rxrpc_sock *rx) struct rxrpc_call, accept_link); list_del(&call->accept_link); rxrpc_abort_call("SKR", call, 0, RX_CALL_DEAD, -ECONNRESET); - rxrpc_put_call(call, rxrpc_call_put); + rxrpc_put_call(call, rxrpc_call_put_release_sock_tba); } while (!list_empty(&rx->sock_calls)) { call = list_entry(rx->sock_calls.next, struct rxrpc_call, sock_link); - rxrpc_get_call(call, rxrpc_call_got); + rxrpc_get_call(call, rxrpc_call_get_release_sock); rxrpc_abort_call("SKT", call, 0, RX_CALL_DEAD, -ECONNRESET); rxrpc_send_abort_packet(call); rxrpc_release_call(rx, call); - rxrpc_put_call(call, rxrpc_call_put); + rxrpc_put_call(call, rxrpc_call_put_release_sock); } _leave(""); @@ -618,26 +607,24 @@ void rxrpc_release_calls_on_socket(struct rxrpc_sock *rx) /* * release a call */ -void rxrpc_put_call(struct rxrpc_call *call, enum rxrpc_call_trace op) +void rxrpc_put_call(struct rxrpc_call *call, enum rxrpc_call_trace why) { struct rxrpc_net *rxnet = call->rxnet; - const void *here = __builtin_return_address(0); unsigned int debug_id = call->debug_id; bool dead; - int n; + int r; ASSERT(call != NULL); - dead = __refcount_dec_and_test(&call->ref, &n); - trace_rxrpc_call(debug_id, op, n, here, NULL); + dead = __refcount_dec_and_test(&call->ref, &r); + trace_rxrpc_call(debug_id, r - 1, 0, why); if (dead) { - _debug("call %d dead", call->debug_id); ASSERTCMP(call->state, ==, RXRPC_CALL_COMPLETE); if (!list_empty(&call->link)) { - spin_lock_bh(&rxnet->call_lock); + spin_lock(&rxnet->call_lock); list_del_init(&call->link); - spin_unlock_bh(&rxnet->call_lock); + spin_unlock(&rxnet->call_lock); } rxrpc_cleanup_call(call); @@ -645,38 +632,45 @@ void rxrpc_put_call(struct rxrpc_call *call, enum rxrpc_call_trace op) } /* - * Final call destruction - but must be done in process context. + * Free up the call under RCU. */ -static void rxrpc_destroy_call(struct work_struct *work) +static void rxrpc_rcu_free_call(struct rcu_head *rcu) { - struct rxrpc_call *call = container_of(work, struct rxrpc_call, processor); - struct rxrpc_net *rxnet = call->rxnet; - - rxrpc_delete_call_timer(call); + struct rxrpc_call *call = container_of(rcu, struct rxrpc_call, rcu); + struct rxrpc_net *rxnet = READ_ONCE(call->rxnet); - rxrpc_put_connection(call->conn); - rxrpc_put_peer(call->peer); - kfree(call->rxtx_buffer); - kfree(call->rxtx_annotations); kmem_cache_free(rxrpc_call_jar, call); if (atomic_dec_and_test(&rxnet->nr_calls)) wake_up_var(&rxnet->nr_calls); } /* - * Final call destruction under RCU. + * Final call destruction - but must be done in process context. */ -static void rxrpc_rcu_destroy_call(struct rcu_head *rcu) +static void rxrpc_destroy_call(struct work_struct *work) { - struct rxrpc_call *call = container_of(rcu, struct rxrpc_call, rcu); + struct rxrpc_call *call = container_of(work, struct rxrpc_call, destroyer); + struct rxrpc_txbuf *txb; - if (in_softirq()) { - INIT_WORK(&call->processor, rxrpc_destroy_call); - if (!rxrpc_queue_work(&call->processor)) - BUG(); - } else { - rxrpc_destroy_call(&call->processor); + del_timer_sync(&call->timer); + + rxrpc_cleanup_ring(call); + while ((txb = list_first_entry_or_null(&call->tx_sendmsg, + struct rxrpc_txbuf, call_link))) { + list_del(&txb->call_link); + rxrpc_put_txbuf(txb, rxrpc_txbuf_put_cleaned); } + while ((txb = list_first_entry_or_null(&call->tx_buffer, + struct rxrpc_txbuf, call_link))) { + list_del(&txb->call_link); + rxrpc_put_txbuf(txb, rxrpc_txbuf_put_cleaned); + } + + rxrpc_put_txbuf(call->tx_pending, rxrpc_txbuf_put_cleaned); + rxrpc_put_connection(call->conn, rxrpc_conn_put_call); + rxrpc_put_peer(call->peer, rxrpc_peer_put_call); + rxrpc_put_local(call->local, rxrpc_local_put_call); + call_rcu(&call->rcu, rxrpc_rcu_free_call); } /* @@ -684,17 +678,20 @@ static void rxrpc_rcu_destroy_call(struct rcu_head *rcu) */ void rxrpc_cleanup_call(struct rxrpc_call *call) { - _net("DESTROY CALL %d", call->debug_id); - memset(&call->sock_node, 0xcd, sizeof(call->sock_node)); ASSERTCMP(call->state, ==, RXRPC_CALL_COMPLETE); ASSERT(test_bit(RXRPC_CALL_RELEASED, &call->flags)); - rxrpc_cleanup_ring(call); - rxrpc_free_skb(call->tx_pending, rxrpc_skb_cleaned); + del_timer(&call->timer); - call_rcu(&call->rcu, rxrpc_rcu_destroy_call); + if (rcu_read_lock_held()) + /* Can't use the rxrpc workqueue as we need to cancel/flush + * something that may be running/waiting there. + */ + schedule_work(&call->destroyer); + else + rxrpc_destroy_call(&call->destroyer); } /* @@ -709,14 +706,14 @@ void rxrpc_destroy_all_calls(struct rxrpc_net *rxnet) _enter(""); if (!list_empty(&rxnet->calls)) { - spin_lock_bh(&rxnet->call_lock); + spin_lock(&rxnet->call_lock); while (!list_empty(&rxnet->calls)) { call = list_entry(rxnet->calls.next, struct rxrpc_call, link); _debug("Zapping call %p", call); - rxrpc_see_call(call); + rxrpc_see_call(call, rxrpc_call_see_zap); list_del_init(&call->link); pr_err("Call %p still in use (%d,%s,%lx,%lx)!\n", @@ -724,12 +721,12 @@ void rxrpc_destroy_all_calls(struct rxrpc_net *rxnet) rxrpc_call_states[call->state], call->flags, call->events); - spin_unlock_bh(&rxnet->call_lock); + spin_unlock(&rxnet->call_lock); cond_resched(); - spin_lock_bh(&rxnet->call_lock); + spin_lock(&rxnet->call_lock); } - spin_unlock_bh(&rxnet->call_lock); + spin_unlock(&rxnet->call_lock); } atomic_dec(&rxnet->nr_calls); diff --git a/net/rxrpc/conn_client.c b/net/rxrpc/conn_client.c index bdb335cb2d05..a08e33c9e54b 100644 --- a/net/rxrpc/conn_client.c +++ b/net/rxrpc/conn_client.c @@ -51,7 +51,7 @@ static void rxrpc_deactivate_bundle(struct rxrpc_bundle *bundle); static int rxrpc_get_client_connection_id(struct rxrpc_connection *conn, gfp_t gfp) { - struct rxrpc_net *rxnet = conn->params.local->rxnet; + struct rxrpc_net *rxnet = conn->rxnet; int id; _enter(""); @@ -122,37 +122,47 @@ static struct rxrpc_bundle *rxrpc_alloc_bundle(struct rxrpc_conn_parameters *cp, bundle = kzalloc(sizeof(*bundle), gfp); if (bundle) { - bundle->params = *cp; - rxrpc_get_peer(bundle->params.peer); + bundle->local = cp->local; + bundle->peer = rxrpc_get_peer(cp->peer, rxrpc_peer_get_bundle); + bundle->key = cp->key; + bundle->exclusive = cp->exclusive; + bundle->upgrade = cp->upgrade; + bundle->service_id = cp->service_id; + bundle->security_level = cp->security_level; refcount_set(&bundle->ref, 1); atomic_set(&bundle->active, 1); spin_lock_init(&bundle->channel_lock); INIT_LIST_HEAD(&bundle->waiting_calls); + trace_rxrpc_bundle(bundle->debug_id, 1, rxrpc_bundle_new); } return bundle; } -struct rxrpc_bundle *rxrpc_get_bundle(struct rxrpc_bundle *bundle) +struct rxrpc_bundle *rxrpc_get_bundle(struct rxrpc_bundle *bundle, + enum rxrpc_bundle_trace why) { - refcount_inc(&bundle->ref); + int r; + + __refcount_inc(&bundle->ref, &r); + trace_rxrpc_bundle(bundle->debug_id, r + 1, why); return bundle; } static void rxrpc_free_bundle(struct rxrpc_bundle *bundle) { - rxrpc_put_peer(bundle->params.peer); + trace_rxrpc_bundle(bundle->debug_id, 1, rxrpc_bundle_free); + rxrpc_put_peer(bundle->peer, rxrpc_peer_put_bundle); kfree(bundle); } -void rxrpc_put_bundle(struct rxrpc_bundle *bundle) +void rxrpc_put_bundle(struct rxrpc_bundle *bundle, enum rxrpc_bundle_trace why) { - unsigned int d = bundle->debug_id; + unsigned int id = bundle->debug_id; bool dead; int r; dead = __refcount_dec_and_test(&bundle->ref, &r); - - _debug("PUT B=%x %d", d, r - 1); + trace_rxrpc_bundle(id, r - 1, why); if (dead) rxrpc_free_bundle(bundle); } @@ -164,12 +174,12 @@ static struct rxrpc_connection * rxrpc_alloc_client_connection(struct rxrpc_bundle *bundle, gfp_t gfp) { struct rxrpc_connection *conn; - struct rxrpc_net *rxnet = bundle->params.local->rxnet; + struct rxrpc_net *rxnet = bundle->local->rxnet; int ret; _enter(""); - conn = rxrpc_alloc_connection(gfp); + conn = rxrpc_alloc_connection(rxnet, gfp); if (!conn) { _leave(" = -ENOMEM"); return ERR_PTR(-ENOMEM); @@ -177,10 +187,16 @@ rxrpc_alloc_client_connection(struct rxrpc_bundle *bundle, gfp_t gfp) refcount_set(&conn->ref, 1); conn->bundle = bundle; - conn->params = bundle->params; + conn->local = bundle->local; + conn->peer = bundle->peer; + conn->key = bundle->key; + conn->exclusive = bundle->exclusive; + conn->upgrade = bundle->upgrade; + conn->orig_service_id = bundle->service_id; + conn->security_level = bundle->security_level; conn->out_clientflag = RXRPC_CLIENT_INITIATED; conn->state = RXRPC_CONN_CLIENT; - conn->service_id = conn->params.service_id; + conn->service_id = conn->orig_service_id; ret = rxrpc_get_client_connection_id(conn, gfp); if (ret < 0) @@ -195,14 +211,13 @@ rxrpc_alloc_client_connection(struct rxrpc_bundle *bundle, gfp_t gfp) list_add_tail(&conn->proc_link, &rxnet->conn_proc_list); write_unlock(&rxnet->conn_lock); - rxrpc_get_bundle(bundle); - rxrpc_get_peer(conn->params.peer); - rxrpc_get_local(conn->params.local); - key_get(conn->params.key); + rxrpc_get_bundle(bundle, rxrpc_bundle_get_client_conn); + rxrpc_get_peer(conn->peer, rxrpc_peer_get_client_conn); + rxrpc_get_local(conn->local, rxrpc_local_get_client_conn); + key_get(conn->key); - trace_rxrpc_conn(conn->debug_id, rxrpc_conn_new_client, - refcount_read(&conn->ref), - __builtin_return_address(0)); + trace_rxrpc_conn(conn->debug_id, refcount_read(&conn->ref), + rxrpc_conn_new_client); atomic_inc(&rxnet->nr_client_conns); trace_rxrpc_client(conn, -1, rxrpc_client_alloc); @@ -228,7 +243,7 @@ static bool rxrpc_may_reuse_conn(struct rxrpc_connection *conn) if (!conn) goto dont_reuse; - rxnet = conn->params.local->rxnet; + rxnet = conn->rxnet; if (test_bit(RXRPC_CONN_DONT_REUSE, &conn->flags)) goto dont_reuse; @@ -285,7 +300,7 @@ static struct rxrpc_bundle *rxrpc_look_up_bundle(struct rxrpc_conn_parameters *c while (p) { bundle = rb_entry(p, struct rxrpc_bundle, local_node); -#define cmp(X) ((long)bundle->params.X - (long)cp->X) +#define cmp(X) ((long)bundle->X - (long)cp->X) diff = (cmp(peer) ?: cmp(key) ?: cmp(security_level) ?: @@ -314,7 +329,7 @@ static struct rxrpc_bundle *rxrpc_look_up_bundle(struct rxrpc_conn_parameters *c parent = *pp; bundle = rb_entry(parent, struct rxrpc_bundle, local_node); -#define cmp(X) ((long)bundle->params.X - (long)cp->X) +#define cmp(X) ((long)bundle->X - (long)cp->X) diff = (cmp(peer) ?: cmp(key) ?: cmp(security_level) ?: @@ -332,7 +347,7 @@ static struct rxrpc_bundle *rxrpc_look_up_bundle(struct rxrpc_conn_parameters *c candidate->debug_id = atomic_inc_return(&rxrpc_bundle_id); rb_link_node(&candidate->local_node, parent, pp); rb_insert_color(&candidate->local_node, &local->client_bundles); - rxrpc_get_bundle(candidate); + rxrpc_get_bundle(candidate, rxrpc_bundle_get_client_call); spin_unlock(&local->client_bundles_lock); _leave(" = %u [new]", candidate->debug_id); return candidate; @@ -340,7 +355,7 @@ static struct rxrpc_bundle *rxrpc_look_up_bundle(struct rxrpc_conn_parameters *c found_bundle_free: rxrpc_free_bundle(candidate); found_bundle: - rxrpc_get_bundle(bundle); + rxrpc_get_bundle(bundle, rxrpc_bundle_get_client_call); atomic_inc(&bundle->active); spin_unlock(&local->client_bundles_lock); _leave(" = %u [found]", bundle->debug_id); @@ -367,7 +382,8 @@ static struct rxrpc_bundle *rxrpc_prep_call(struct rxrpc_sock *rx, if (!cp->peer) goto error; - call->cong_cwnd = cp->peer->cong_cwnd; + call->tx_last_sent = ktime_get_real(); + call->cong_ssthresh = cp->peer->cong_ssthresh; if (call->cong_cwnd >= call->cong_ssthresh) call->cong_mode = RXRPC_CALL_CONGEST_AVOIDANCE; else @@ -455,10 +471,10 @@ static void rxrpc_add_conn_to_bundle(struct rxrpc_bundle *bundle, gfp_t gfp) if (candidate) { _debug("discard C=%x", candidate->debug_id); trace_rxrpc_client(candidate, -1, rxrpc_client_duplicate); - rxrpc_put_connection(candidate); + rxrpc_put_connection(candidate, rxrpc_conn_put_discard); } - rxrpc_put_connection(old); + rxrpc_put_connection(old, rxrpc_conn_put_noreuse); _leave(""); } @@ -529,23 +545,21 @@ static void rxrpc_activate_one_channel(struct rxrpc_connection *conn, clear_bit(RXRPC_CONN_FINAL_ACK_0 + channel, &conn->flags); clear_bit(conn->bundle_shift + channel, &bundle->avail_chans); - rxrpc_see_call(call); + rxrpc_see_call(call, rxrpc_call_see_activate_client); list_del_init(&call->chan_wait_link); - call->peer = rxrpc_get_peer(conn->params.peer); - call->conn = rxrpc_get_connection(conn); + call->peer = rxrpc_get_peer(conn->peer, rxrpc_peer_get_activate_call); + call->conn = rxrpc_get_connection(conn, rxrpc_conn_get_activate_call); call->cid = conn->proto.cid | channel; call->call_id = call_id; call->security = conn->security; call->security_ix = conn->security_ix; - call->service_id = conn->service_id; + call->dest_srx.srx_service = conn->service_id; trace_rxrpc_connect_call(call); - _net("CONNECT call %08x:%08x as call %d on conn %d", - call->cid, call->call_id, call->debug_id, conn->debug_id); - write_lock_bh(&call->state_lock); + write_lock(&call->state_lock); call->state = RXRPC_CALL_CLIENT_SEND_REQUEST; - write_unlock_bh(&call->state_lock); + write_unlock(&call->state_lock); /* Paired with the read barrier in rxrpc_connect_call(). This orders * cid and epoch in the connection wrt to call_id without the need to @@ -570,7 +584,7 @@ static void rxrpc_activate_one_channel(struct rxrpc_connection *conn, */ static void rxrpc_unidle_conn(struct rxrpc_bundle *bundle, struct rxrpc_connection *conn) { - struct rxrpc_net *rxnet = bundle->params.local->rxnet; + struct rxrpc_net *rxnet = bundle->local->rxnet; bool drop_ref; if (!list_empty(&conn->cache_link)) { @@ -582,7 +596,7 @@ static void rxrpc_unidle_conn(struct rxrpc_bundle *bundle, struct rxrpc_connecti } spin_unlock(&rxnet->client_conn_cache_lock); if (drop_ref) - rxrpc_put_connection(conn); + rxrpc_put_connection(conn, rxrpc_conn_put_unidle); } } @@ -731,7 +745,7 @@ granted_channel: out_put_bundle: rxrpc_deactivate_bundle(bundle); - rxrpc_put_bundle(bundle); + rxrpc_put_bundle(bundle, rxrpc_bundle_get_client_call); out: _leave(" = %d", ret); return ret; @@ -772,6 +786,10 @@ void rxrpc_expose_client_call(struct rxrpc_call *call) if (chan->call_counter >= INT_MAX) set_bit(RXRPC_CONN_DONT_REUSE, &conn->flags); trace_rxrpc_client(conn, channel, rxrpc_client_exposed); + + spin_lock(&call->peer->lock); + hlist_add_head(&call->error_link, &call->peer->error_targets); + spin_unlock(&call->peer->lock); } } @@ -796,7 +814,7 @@ void rxrpc_disconnect_client_call(struct rxrpc_bundle *bundle, struct rxrpc_call { struct rxrpc_connection *conn; struct rxrpc_channel *chan = NULL; - struct rxrpc_net *rxnet = bundle->params.local->rxnet; + struct rxrpc_net *rxnet = bundle->local->rxnet; unsigned int channel; bool may_reuse; u32 cid; @@ -886,7 +904,7 @@ void rxrpc_disconnect_client_call(struct rxrpc_bundle *bundle, struct rxrpc_call trace_rxrpc_client(conn, channel, rxrpc_client_to_idle); conn->idle_timestamp = jiffies; - rxrpc_get_connection(conn); + rxrpc_get_connection(conn, rxrpc_conn_get_idle); spin_lock(&rxnet->client_conn_cache_lock); list_move_tail(&conn->cache_link, &rxnet->idle_client_conns); spin_unlock(&rxnet->client_conn_cache_lock); @@ -928,7 +946,7 @@ static void rxrpc_unbundle_conn(struct rxrpc_connection *conn) if (need_drop) { rxrpc_deactivate_bundle(bundle); - rxrpc_put_connection(conn); + rxrpc_put_connection(conn, rxrpc_conn_put_unbundle); } } @@ -937,11 +955,11 @@ static void rxrpc_unbundle_conn(struct rxrpc_connection *conn) */ static void rxrpc_deactivate_bundle(struct rxrpc_bundle *bundle) { - struct rxrpc_local *local = bundle->params.local; + struct rxrpc_local *local = bundle->local; bool need_put = false; if (atomic_dec_and_lock(&bundle->active, &local->client_bundles_lock)) { - if (!bundle->params.exclusive) { + if (!bundle->exclusive) { _debug("erase bundle"); rb_erase(&bundle->local_node, &local->client_bundles); need_put = true; @@ -949,16 +967,16 @@ static void rxrpc_deactivate_bundle(struct rxrpc_bundle *bundle) spin_unlock(&local->client_bundles_lock); if (need_put) - rxrpc_put_bundle(bundle); + rxrpc_put_bundle(bundle, rxrpc_bundle_put_discard); } } /* * Clean up a dead client connection. */ -static void rxrpc_kill_client_conn(struct rxrpc_connection *conn) +void rxrpc_kill_client_conn(struct rxrpc_connection *conn) { - struct rxrpc_local *local = conn->params.local; + struct rxrpc_local *local = conn->local; struct rxrpc_net *rxnet = local->rxnet; _enter("C=%x", conn->debug_id); @@ -967,23 +985,6 @@ static void rxrpc_kill_client_conn(struct rxrpc_connection *conn) atomic_dec(&rxnet->nr_client_conns); rxrpc_put_client_connection_id(conn); - rxrpc_kill_connection(conn); -} - -/* - * Clean up a dead client connections. - */ -void rxrpc_put_client_conn(struct rxrpc_connection *conn) -{ - const void *here = __builtin_return_address(0); - unsigned int debug_id = conn->debug_id; - bool dead; - int r; - - dead = __refcount_dec_and_test(&conn->ref, &r); - trace_rxrpc_conn(debug_id, rxrpc_conn_put_client, r - 1, here); - if (dead) - rxrpc_kill_client_conn(conn); } /* @@ -1009,7 +1010,7 @@ void rxrpc_discard_expired_client_conns(struct work_struct *work) } /* Don't double up on the discarding */ - if (!spin_trylock(&rxnet->client_conn_discard_lock)) { + if (!mutex_trylock(&rxnet->client_conn_discard_lock)) { _leave(" [already]"); return; } @@ -1037,7 +1038,7 @@ next: expiry = rxrpc_conn_idle_client_expiry; if (nr_conns > rxrpc_reap_client_connections) expiry = rxrpc_conn_idle_client_fast_expiry; - if (conn->params.local->service_closed) + if (conn->local->service_closed) expiry = rxrpc_closed_conn_expiry * HZ; conn_expires_at = conn->idle_timestamp + expiry; @@ -1047,13 +1048,15 @@ next: goto not_yet_expired; } + atomic_dec(&conn->active); trace_rxrpc_client(conn, -1, rxrpc_client_discard); list_del_init(&conn->cache_link); spin_unlock(&rxnet->client_conn_cache_lock); rxrpc_unbundle_conn(conn); - rxrpc_put_connection(conn); /* Drop the ->cache_link ref */ + /* Drop the ->cache_link ref */ + rxrpc_put_connection(conn, rxrpc_conn_put_discard_idle); nr_conns--; goto next; @@ -1072,7 +1075,7 @@ not_yet_expired: out: spin_unlock(&rxnet->client_conn_cache_lock); - spin_unlock(&rxnet->client_conn_discard_lock); + mutex_unlock(&rxnet->client_conn_discard_lock); _leave(""); } @@ -1111,7 +1114,8 @@ void rxrpc_clean_up_local_conns(struct rxrpc_local *local) list_for_each_entry_safe(conn, tmp, &rxnet->idle_client_conns, cache_link) { - if (conn->params.local == local) { + if (conn->local == local) { + atomic_dec(&conn->active); trace_rxrpc_client(conn, -1, rxrpc_client_discard); list_move(&conn->cache_link, &graveyard); } @@ -1124,7 +1128,7 @@ void rxrpc_clean_up_local_conns(struct rxrpc_local *local) struct rxrpc_connection, cache_link); list_del_init(&conn->cache_link); rxrpc_unbundle_conn(conn); - rxrpc_put_connection(conn); + rxrpc_put_connection(conn, rxrpc_conn_put_local_dead); } _leave(" [culled]"); diff --git a/net/rxrpc/conn_event.c b/net/rxrpc/conn_event.c index aab069701398..480364bcbf85 100644 --- a/net/rxrpc/conn_event.c +++ b/net/rxrpc/conn_event.c @@ -52,8 +52,8 @@ static void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn, if (skb && call_id != sp->hdr.callNumber) return; - msg.msg_name = &conn->params.peer->srx.transport; - msg.msg_namelen = conn->params.peer->srx.transport_len; + msg.msg_name = &conn->peer->srx.transport; + msg.msg_namelen = conn->peer->srx.transport_len; msg.msg_control = NULL; msg.msg_controllen = 0; msg.msg_flags = 0; @@ -86,8 +86,8 @@ static void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn, break; case RXRPC_PACKET_TYPE_ACK: - mtu = conn->params.peer->if_mtu; - mtu -= conn->params.peer->hdrsize; + mtu = conn->peer->if_mtu; + mtu -= conn->peer->hdrsize; pkt.ack.bufferSpace = 0; pkt.ack.maxSkew = htons(skb ? skb->priority : 0); pkt.ack.firstPacket = htonl(chan->last_seq + 1); @@ -122,19 +122,17 @@ static void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn, switch (chan->last_type) { case RXRPC_PACKET_TYPE_ABORT: - _proto("Tx ABORT %%%u { %d } [re]", serial, conn->abort_code); break; case RXRPC_PACKET_TYPE_ACK: trace_rxrpc_tx_ack(chan->call_debug_id, serial, ntohl(pkt.ack.firstPacket), ntohl(pkt.ack.serial), pkt.ack.reason, 0); - _proto("Tx ACK %%%u [re]", serial); break; } - ret = kernel_sendmsg(conn->params.local->socket, &msg, iov, ioc, len); - conn->params.peer->last_tx_at = ktime_get_seconds(); + ret = kernel_sendmsg(conn->local->socket, &msg, iov, ioc, len); + conn->peer->last_tx_at = ktime_get_seconds(); if (ret < 0) trace_rxrpc_tx_fail(chan->call_debug_id, serial, ret, rxrpc_tx_point_call_final_resend); @@ -200,9 +198,9 @@ static int rxrpc_abort_connection(struct rxrpc_connection *conn, _enter("%d,,%u,%u", conn->debug_id, error, abort_code); /* generate a connection-level abort */ - spin_lock_bh(&conn->state_lock); + spin_lock(&conn->state_lock); if (conn->state >= RXRPC_CONN_REMOTELY_ABORTED) { - spin_unlock_bh(&conn->state_lock); + spin_unlock(&conn->state_lock); _leave(" = 0 [already dead]"); return 0; } @@ -211,10 +209,10 @@ static int rxrpc_abort_connection(struct rxrpc_connection *conn, conn->abort_code = abort_code; conn->state = RXRPC_CONN_LOCALLY_ABORTED; set_bit(RXRPC_CONN_DONT_REUSE, &conn->flags); - spin_unlock_bh(&conn->state_lock); + spin_unlock(&conn->state_lock); - msg.msg_name = &conn->params.peer->srx.transport; - msg.msg_namelen = conn->params.peer->srx.transport_len; + msg.msg_name = &conn->peer->srx.transport; + msg.msg_namelen = conn->peer->srx.transport_len; msg.msg_control = NULL; msg.msg_controllen = 0; msg.msg_flags = 0; @@ -242,9 +240,8 @@ static int rxrpc_abort_connection(struct rxrpc_connection *conn, serial = atomic_inc_return(&conn->serial); rxrpc_abort_calls(conn, RXRPC_CALL_LOCALLY_ABORTED, serial); whdr.serial = htonl(serial); - _proto("Tx CONN ABORT %%%u { %d }", serial, conn->abort_code); - ret = kernel_sendmsg(conn->params.local->socket, &msg, iov, 2, len); + ret = kernel_sendmsg(conn->local->socket, &msg, iov, 2, len); if (ret < 0) { trace_rxrpc_tx_fail(conn->debug_id, serial, ret, rxrpc_tx_point_conn_abort); @@ -254,7 +251,7 @@ static int rxrpc_abort_connection(struct rxrpc_connection *conn, trace_rxrpc_tx_packet(conn->debug_id, &whdr, rxrpc_tx_point_conn_abort); - conn->params.peer->last_tx_at = ktime_get_seconds(); + conn->peer->last_tx_at = ktime_get_seconds(); _leave(" = 0"); return 0; @@ -268,12 +265,12 @@ static void rxrpc_call_is_secure(struct rxrpc_call *call) { _enter("%p", call); if (call) { - write_lock_bh(&call->state_lock); + write_lock(&call->state_lock); if (call->state == RXRPC_CALL_SERVER_SECURING) { call->state = RXRPC_CALL_SERVER_RECV_REQUEST; rxrpc_notify_socket(call); } - write_unlock_bh(&call->state_lock); + write_unlock(&call->state_lock); } } @@ -285,8 +282,6 @@ static int rxrpc_process_event(struct rxrpc_connection *conn, u32 *_abort_code) { struct rxrpc_skb_priv *sp = rxrpc_skb(skb); - __be32 wtmp; - u32 abort_code; int loop, ret; if (conn->state >= RXRPC_CONN_REMOTELY_ABORTED) { @@ -308,17 +303,8 @@ static int rxrpc_process_event(struct rxrpc_connection *conn, return 0; case RXRPC_PACKET_TYPE_ABORT: - if (skb_copy_bits(skb, sizeof(struct rxrpc_wire_header), - &wtmp, sizeof(wtmp)) < 0) { - trace_rxrpc_rx_eproto(NULL, sp->hdr.serial, - tracepoint_string("bad_abort")); - return -EPROTO; - } - abort_code = ntohl(wtmp); - _proto("Rx ABORT %%%u { ac=%d }", sp->hdr.serial, abort_code); - conn->error = -ECONNABORTED; - conn->abort_code = abort_code; + conn->abort_code = skb->priority; conn->state = RXRPC_CONN_REMOTELY_ABORTED; set_bit(RXRPC_CONN_DONT_REUSE, &conn->flags); rxrpc_abort_calls(conn, RXRPC_CALL_REMOTELY_ABORTED, sp->hdr.serial); @@ -334,23 +320,23 @@ static int rxrpc_process_event(struct rxrpc_connection *conn, return ret; ret = conn->security->init_connection_security( - conn, conn->params.key->payload.data[0]); + conn, conn->key->payload.data[0]); if (ret < 0) return ret; spin_lock(&conn->bundle->channel_lock); - spin_lock_bh(&conn->state_lock); + spin_lock(&conn->state_lock); if (conn->state == RXRPC_CONN_SERVICE_CHALLENGING) { conn->state = RXRPC_CONN_SERVICE; - spin_unlock_bh(&conn->state_lock); + spin_unlock(&conn->state_lock); for (loop = 0; loop < RXRPC_MAXCALLS; loop++) rxrpc_call_is_secure( rcu_dereference_protected( conn->channels[loop].call, lockdep_is_held(&conn->bundle->channel_lock))); } else { - spin_unlock_bh(&conn->state_lock); + spin_unlock(&conn->state_lock); } spin_unlock(&conn->bundle->channel_lock); @@ -451,7 +437,7 @@ static void rxrpc_do_process_connection(struct rxrpc_connection *conn) /* go through the conn-level event packets, releasing the ref on this * connection that each one has when we've finished with it */ while ((skb = skb_dequeue(&conn->rx_queue))) { - rxrpc_see_skb(skb, rxrpc_skb_seen); + rxrpc_see_skb(skb, rxrpc_skb_see_conn_work); ret = rxrpc_process_event(conn, skb, &abort_code); switch (ret) { case -EPROTO: @@ -463,7 +449,7 @@ static void rxrpc_do_process_connection(struct rxrpc_connection *conn) goto requeue_and_leave; case -ECONNABORTED: default: - rxrpc_free_skb(skb, rxrpc_skb_freed); + rxrpc_free_skb(skb, rxrpc_skb_put_conn_work); break; } } @@ -477,7 +463,7 @@ requeue_and_leave: protocol_error: if (rxrpc_abort_connection(conn, ret, abort_code) < 0) goto requeue_and_leave; - rxrpc_free_skb(skb, rxrpc_skb_freed); + rxrpc_free_skb(skb, rxrpc_skb_put_conn_work); return; } @@ -486,14 +472,70 @@ void rxrpc_process_connection(struct work_struct *work) struct rxrpc_connection *conn = container_of(work, struct rxrpc_connection, processor); - rxrpc_see_connection(conn); + rxrpc_see_connection(conn, rxrpc_conn_see_work); - if (__rxrpc_use_local(conn->params.local)) { + if (__rxrpc_use_local(conn->local, rxrpc_local_use_conn_work)) { rxrpc_do_process_connection(conn); - rxrpc_unuse_local(conn->params.local); + rxrpc_unuse_local(conn->local, rxrpc_local_unuse_conn_work); } +} - rxrpc_put_connection(conn); - _leave(""); - return; +/* + * post connection-level events to the connection + * - this includes challenges, responses, some aborts and call terminal packet + * retransmission. + */ +static void rxrpc_post_packet_to_conn(struct rxrpc_connection *conn, + struct sk_buff *skb) +{ + _enter("%p,%p", conn, skb); + + rxrpc_get_skb(skb, rxrpc_skb_get_conn_work); + skb_queue_tail(&conn->rx_queue, skb); + rxrpc_queue_conn(conn, rxrpc_conn_queue_rx_work); +} + +/* + * Input a connection-level packet. + */ +int rxrpc_input_conn_packet(struct rxrpc_connection *conn, struct sk_buff *skb) +{ + struct rxrpc_skb_priv *sp = rxrpc_skb(skb); + + if (conn->state >= RXRPC_CONN_REMOTELY_ABORTED) { + _leave(" = -ECONNABORTED [%u]", conn->state); + return -ECONNABORTED; + } + + _enter("{%d},{%u,%%%u},", conn->debug_id, sp->hdr.type, sp->hdr.serial); + + switch (sp->hdr.type) { + case RXRPC_PACKET_TYPE_DATA: + case RXRPC_PACKET_TYPE_ACK: + rxrpc_conn_retransmit_call(conn, skb, + sp->hdr.cid & RXRPC_CHANNELMASK); + return 0; + + case RXRPC_PACKET_TYPE_BUSY: + /* Just ignore BUSY packets for now. */ + return 0; + + case RXRPC_PACKET_TYPE_ABORT: + conn->error = -ECONNABORTED; + conn->abort_code = skb->priority; + conn->state = RXRPC_CONN_REMOTELY_ABORTED; + set_bit(RXRPC_CONN_DONT_REUSE, &conn->flags); + rxrpc_abort_calls(conn, RXRPC_CALL_REMOTELY_ABORTED, sp->hdr.serial); + return -ECONNABORTED; + + case RXRPC_PACKET_TYPE_CHALLENGE: + case RXRPC_PACKET_TYPE_RESPONSE: + rxrpc_post_packet_to_conn(conn, skb); + return 0; + + default: + trace_rxrpc_rx_eproto(NULL, sp->hdr.serial, + tracepoint_string("bad_conn_pkt")); + return -EPROTO; + } } diff --git a/net/rxrpc/conn_object.c b/net/rxrpc/conn_object.c index 22089e37e97f..3c8f83dacb2b 100644 --- a/net/rxrpc/conn_object.c +++ b/net/rxrpc/conn_object.c @@ -19,20 +19,23 @@ unsigned int __read_mostly rxrpc_connection_expiry = 10 * 60; unsigned int __read_mostly rxrpc_closed_conn_expiry = 10; -static void rxrpc_destroy_connection(struct rcu_head *); +static void rxrpc_clean_up_connection(struct work_struct *work); +static void rxrpc_set_service_reap_timer(struct rxrpc_net *rxnet, + unsigned long reap_at); static void rxrpc_connection_timer(struct timer_list *timer) { struct rxrpc_connection *conn = container_of(timer, struct rxrpc_connection, timer); - rxrpc_queue_conn(conn); + rxrpc_queue_conn(conn, rxrpc_conn_queue_timer); } /* * allocate a new connection */ -struct rxrpc_connection *rxrpc_alloc_connection(gfp_t gfp) +struct rxrpc_connection *rxrpc_alloc_connection(struct rxrpc_net *rxnet, + gfp_t gfp) { struct rxrpc_connection *conn; @@ -42,10 +45,12 @@ struct rxrpc_connection *rxrpc_alloc_connection(gfp_t gfp) if (conn) { INIT_LIST_HEAD(&conn->cache_link); timer_setup(&conn->timer, &rxrpc_connection_timer, 0); - INIT_WORK(&conn->processor, &rxrpc_process_connection); + INIT_WORK(&conn->processor, rxrpc_process_connection); + INIT_WORK(&conn->destructor, rxrpc_clean_up_connection); INIT_LIST_HEAD(&conn->proc_link); INIT_LIST_HEAD(&conn->link); skb_queue_head_init(&conn->rx_queue); + conn->rxnet = rxnet; conn->security = &rxrpc_no_security; spin_lock_init(&conn->state_lock); conn->debug_id = atomic_inc_return(&rxrpc_debug_id); @@ -67,89 +72,55 @@ struct rxrpc_connection *rxrpc_alloc_connection(gfp_t gfp) * * The caller must be holding the RCU read lock. */ -struct rxrpc_connection *rxrpc_find_connection_rcu(struct rxrpc_local *local, - struct sk_buff *skb, - struct rxrpc_peer **_peer) +struct rxrpc_connection *rxrpc_find_client_connection_rcu(struct rxrpc_local *local, + struct sockaddr_rxrpc *srx, + struct sk_buff *skb) { struct rxrpc_connection *conn; - struct rxrpc_conn_proto k; struct rxrpc_skb_priv *sp = rxrpc_skb(skb); - struct sockaddr_rxrpc srx; struct rxrpc_peer *peer; _enter(",%x", sp->hdr.cid & RXRPC_CIDMASK); - if (rxrpc_extract_addr_from_skb(&srx, skb) < 0) - goto not_found; - - if (srx.transport.family != local->srx.transport.family && - (srx.transport.family == AF_INET && - local->srx.transport.family != AF_INET6)) { - pr_warn_ratelimited("AF_RXRPC: Protocol mismatch %u not %u\n", - srx.transport.family, - local->srx.transport.family); + /* Look up client connections by connection ID alone as their IDs are + * unique for this machine. + */ + conn = idr_find(&rxrpc_client_conn_ids, sp->hdr.cid >> RXRPC_CIDSHIFT); + if (!conn || refcount_read(&conn->ref) == 0) { + _debug("no conn"); goto not_found; } - k.epoch = sp->hdr.epoch; - k.cid = sp->hdr.cid & RXRPC_CIDMASK; - - if (rxrpc_to_server(sp)) { - /* We need to look up service connections by the full protocol - * parameter set. We look up the peer first as an intermediate - * step and then the connection from the peer's tree. - */ - peer = rxrpc_lookup_peer_rcu(local, &srx); - if (!peer) - goto not_found; - *_peer = peer; - conn = rxrpc_find_service_conn_rcu(peer, skb); - if (!conn || refcount_read(&conn->ref) == 0) - goto not_found; - _leave(" = %p", conn); - return conn; - } else { - /* Look up client connections by connection ID alone as their - * IDs are unique for this machine. - */ - conn = idr_find(&rxrpc_client_conn_ids, - sp->hdr.cid >> RXRPC_CIDSHIFT); - if (!conn || refcount_read(&conn->ref) == 0) { - _debug("no conn"); - goto not_found; - } + if (conn->proto.epoch != sp->hdr.epoch || + conn->local != local) + goto not_found; - if (conn->proto.epoch != k.epoch || - conn->params.local != local) + peer = conn->peer; + switch (srx->transport.family) { + case AF_INET: + if (peer->srx.transport.sin.sin_port != + srx->transport.sin.sin_port || + peer->srx.transport.sin.sin_addr.s_addr != + srx->transport.sin.sin_addr.s_addr) goto not_found; - - peer = conn->params.peer; - switch (srx.transport.family) { - case AF_INET: - if (peer->srx.transport.sin.sin_port != - srx.transport.sin.sin_port || - peer->srx.transport.sin.sin_addr.s_addr != - srx.transport.sin.sin_addr.s_addr) - goto not_found; - break; + break; #ifdef CONFIG_AF_RXRPC_IPV6 - case AF_INET6: - if (peer->srx.transport.sin6.sin6_port != - srx.transport.sin6.sin6_port || - memcmp(&peer->srx.transport.sin6.sin6_addr, - &srx.transport.sin6.sin6_addr, - sizeof(struct in6_addr)) != 0) - goto not_found; - break; + case AF_INET6: + if (peer->srx.transport.sin6.sin6_port != + srx->transport.sin6.sin6_port || + memcmp(&peer->srx.transport.sin6.sin6_addr, + &srx->transport.sin6.sin6_addr, + sizeof(struct in6_addr)) != 0) + goto not_found; + break; #endif - default: - BUG(); - } - - _leave(" = %p", conn); - return conn; + default: + BUG(); } + _leave(" = %p", conn); + return conn; + not_found: _leave(" = NULL"); return NULL; @@ -175,7 +146,7 @@ void __rxrpc_disconnect_call(struct rxrpc_connection *conn, trace_rxrpc_disconnect_call(call); switch (call->completion) { case RXRPC_CALL_SUCCEEDED: - chan->last_seq = call->rx_hard_ack; + chan->last_seq = call->rx_highest_seq; chan->last_type = RXRPC_PACKET_TYPE_ACK; break; case RXRPC_CALL_LOCALLY_ABORTED: @@ -207,12 +178,12 @@ void rxrpc_disconnect_call(struct rxrpc_call *call) { struct rxrpc_connection *conn = call->conn; - call->peer->cong_cwnd = call->cong_cwnd; + call->peer->cong_ssthresh = call->cong_ssthresh; if (!hlist_unhashed(&call->error_link)) { - spin_lock_bh(&call->peer->lock); - hlist_del_rcu(&call->error_link); - spin_unlock_bh(&call->peer->lock); + spin_lock(&call->peer->lock); + hlist_del_init(&call->error_link); + spin_unlock(&call->peer->lock); } if (rxrpc_is_client_call(call)) @@ -224,79 +195,45 @@ void rxrpc_disconnect_call(struct rxrpc_call *call) set_bit(RXRPC_CALL_DISCONNECTED, &call->flags); conn->idle_timestamp = jiffies; -} - -/* - * Kill off a connection. - */ -void rxrpc_kill_connection(struct rxrpc_connection *conn) -{ - struct rxrpc_net *rxnet = conn->params.local->rxnet; - - ASSERT(!rcu_access_pointer(conn->channels[0].call) && - !rcu_access_pointer(conn->channels[1].call) && - !rcu_access_pointer(conn->channels[2].call) && - !rcu_access_pointer(conn->channels[3].call)); - ASSERT(list_empty(&conn->cache_link)); - - write_lock(&rxnet->conn_lock); - list_del_init(&conn->proc_link); - write_unlock(&rxnet->conn_lock); - - /* Drain the Rx queue. Note that even though we've unpublished, an - * incoming packet could still be being added to our Rx queue, so we - * will need to drain it again in the RCU cleanup handler. - */ - rxrpc_purge_queue(&conn->rx_queue); - - /* Leave final destruction to RCU. The connection processor work item - * must carry a ref on the connection to prevent us getting here whilst - * it is queued or running. - */ - call_rcu(&conn->rcu, rxrpc_destroy_connection); + if (atomic_dec_and_test(&conn->active)) + rxrpc_set_service_reap_timer(conn->rxnet, + jiffies + rxrpc_connection_expiry); } /* * Queue a connection's work processor, getting a ref to pass to the work * queue. */ -bool rxrpc_queue_conn(struct rxrpc_connection *conn) +void rxrpc_queue_conn(struct rxrpc_connection *conn, enum rxrpc_conn_trace why) { - const void *here = __builtin_return_address(0); - int r; - - if (!__refcount_inc_not_zero(&conn->ref, &r)) - return false; - if (rxrpc_queue_work(&conn->processor)) - trace_rxrpc_conn(conn->debug_id, rxrpc_conn_queued, r + 1, here); - else - rxrpc_put_connection(conn); - return true; + if (atomic_read(&conn->active) >= 0 && + rxrpc_queue_work(&conn->processor)) + rxrpc_see_connection(conn, why); } /* * Note the re-emergence of a connection. */ -void rxrpc_see_connection(struct rxrpc_connection *conn) +void rxrpc_see_connection(struct rxrpc_connection *conn, + enum rxrpc_conn_trace why) { - const void *here = __builtin_return_address(0); if (conn) { - int n = refcount_read(&conn->ref); + int r = refcount_read(&conn->ref); - trace_rxrpc_conn(conn->debug_id, rxrpc_conn_seen, n, here); + trace_rxrpc_conn(conn->debug_id, r, why); } } /* * Get a ref on a connection. */ -struct rxrpc_connection *rxrpc_get_connection(struct rxrpc_connection *conn) +struct rxrpc_connection *rxrpc_get_connection(struct rxrpc_connection *conn, + enum rxrpc_conn_trace why) { - const void *here = __builtin_return_address(0); int r; __refcount_inc(&conn->ref, &r); - trace_rxrpc_conn(conn->debug_id, rxrpc_conn_got, r, here); + trace_rxrpc_conn(conn->debug_id, r + 1, why); return conn; } @@ -304,14 +241,14 @@ struct rxrpc_connection *rxrpc_get_connection(struct rxrpc_connection *conn) * Try to get a ref on a connection. */ struct rxrpc_connection * -rxrpc_get_connection_maybe(struct rxrpc_connection *conn) +rxrpc_get_connection_maybe(struct rxrpc_connection *conn, + enum rxrpc_conn_trace why) { - const void *here = __builtin_return_address(0); int r; if (conn) { if (__refcount_inc_not_zero(&conn->ref, &r)) - trace_rxrpc_conn(conn->debug_id, rxrpc_conn_got, r + 1, here); + trace_rxrpc_conn(conn->debug_id, r + 1, why); else conn = NULL; } @@ -329,49 +266,95 @@ static void rxrpc_set_service_reap_timer(struct rxrpc_net *rxnet, } /* - * Release a service connection + * destroy a virtual connection */ -void rxrpc_put_service_conn(struct rxrpc_connection *conn) +static void rxrpc_rcu_free_connection(struct rcu_head *rcu) { - const void *here = __builtin_return_address(0); - unsigned int debug_id = conn->debug_id; - int r; + struct rxrpc_connection *conn = + container_of(rcu, struct rxrpc_connection, rcu); + struct rxrpc_net *rxnet = conn->rxnet; - __refcount_dec(&conn->ref, &r); - trace_rxrpc_conn(debug_id, rxrpc_conn_put_service, r - 1, here); - if (r - 1 == 1) - rxrpc_set_service_reap_timer(conn->params.local->rxnet, - jiffies + rxrpc_connection_expiry); + _enter("{%d,u=%d}", conn->debug_id, refcount_read(&conn->ref)); + + trace_rxrpc_conn(conn->debug_id, refcount_read(&conn->ref), + rxrpc_conn_free); + kfree(conn); + + if (atomic_dec_and_test(&rxnet->nr_conns)) + wake_up_var(&rxnet->nr_conns); } /* - * destroy a virtual connection + * Clean up a dead connection. */ -static void rxrpc_destroy_connection(struct rcu_head *rcu) +static void rxrpc_clean_up_connection(struct work_struct *work) { struct rxrpc_connection *conn = - container_of(rcu, struct rxrpc_connection, rcu); + container_of(work, struct rxrpc_connection, destructor); + struct rxrpc_net *rxnet = conn->rxnet; - _enter("{%d,u=%d}", conn->debug_id, refcount_read(&conn->ref)); + ASSERT(!rcu_access_pointer(conn->channels[0].call) && + !rcu_access_pointer(conn->channels[1].call) && + !rcu_access_pointer(conn->channels[2].call) && + !rcu_access_pointer(conn->channels[3].call)); + ASSERT(list_empty(&conn->cache_link)); - ASSERTCMP(refcount_read(&conn->ref), ==, 0); + del_timer_sync(&conn->timer); + cancel_work_sync(&conn->processor); /* Processing may restart the timer */ + del_timer_sync(&conn->timer); - _net("DESTROY CONN %d", conn->debug_id); + write_lock(&rxnet->conn_lock); + list_del_init(&conn->proc_link); + write_unlock(&rxnet->conn_lock); - del_timer_sync(&conn->timer); rxrpc_purge_queue(&conn->rx_queue); + rxrpc_kill_client_conn(conn); + conn->security->clear(conn); - key_put(conn->params.key); - rxrpc_put_bundle(conn->bundle); - rxrpc_put_peer(conn->params.peer); + key_put(conn->key); + rxrpc_put_bundle(conn->bundle, rxrpc_bundle_put_conn); + rxrpc_put_peer(conn->peer, rxrpc_peer_put_conn); + rxrpc_put_local(conn->local, rxrpc_local_put_kill_conn); + + /* Drain the Rx queue. Note that even though we've unpublished, an + * incoming packet could still be being added to our Rx queue, so we + * will need to drain it again in the RCU cleanup handler. + */ + rxrpc_purge_queue(&conn->rx_queue); - if (atomic_dec_and_test(&conn->params.local->rxnet->nr_conns)) - wake_up_var(&conn->params.local->rxnet->nr_conns); - rxrpc_put_local(conn->params.local); + call_rcu(&conn->rcu, rxrpc_rcu_free_connection); +} - kfree(conn); - _leave(""); +/* + * Drop a ref on a connection. + */ +void rxrpc_put_connection(struct rxrpc_connection *conn, + enum rxrpc_conn_trace why) +{ + unsigned int debug_id; + bool dead; + int r; + + if (!conn) + return; + + debug_id = conn->debug_id; + dead = __refcount_dec_and_test(&conn->ref, &r); + trace_rxrpc_conn(debug_id, r - 1, why); + if (dead) { + del_timer(&conn->timer); + cancel_work(&conn->processor); + + if (in_softirq() || work_busy(&conn->processor) || + timer_pending(&conn->timer)) + /* Can't use the rxrpc workqueue as we need to cancel/flush + * something that may be running/waiting there. + */ + schedule_work(&conn->destructor); + else + rxrpc_clean_up_connection(&conn->destructor); + } } /* @@ -383,6 +366,7 @@ void rxrpc_service_connection_reaper(struct work_struct *work) struct rxrpc_net *rxnet = container_of(work, struct rxrpc_net, service_conn_reaper); unsigned long expire_at, earliest, idle_timestamp, now; + int active; LIST_HEAD(graveyard); @@ -393,20 +377,20 @@ void rxrpc_service_connection_reaper(struct work_struct *work) write_lock(&rxnet->conn_lock); list_for_each_entry_safe(conn, _p, &rxnet->service_conns, link) { - ASSERTCMP(refcount_read(&conn->ref), >, 0); - if (likely(refcount_read(&conn->ref) > 1)) + ASSERTCMP(atomic_read(&conn->active), >=, 0); + if (likely(atomic_read(&conn->active) > 0)) continue; if (conn->state == RXRPC_CONN_SERVICE_PREALLOC) continue; - if (rxnet->live && !conn->params.local->dead) { + if (rxnet->live && !conn->local->dead) { idle_timestamp = READ_ONCE(conn->idle_timestamp); expire_at = idle_timestamp + rxrpc_connection_expiry * HZ; - if (conn->params.local->service_closed) + if (conn->local->service_closed) expire_at = idle_timestamp + rxrpc_closed_conn_expiry * HZ; - _debug("reap CONN %d { u=%d,t=%ld }", - conn->debug_id, refcount_read(&conn->ref), + _debug("reap CONN %d { a=%d,t=%ld }", + conn->debug_id, atomic_read(&conn->active), (long)expire_at - (long)now); if (time_before(now, expire_at)) { @@ -416,12 +400,13 @@ void rxrpc_service_connection_reaper(struct work_struct *work) } } - /* The usage count sits at 1 whilst the object is unused on the - * list; we reduce that to 0 to make the object unavailable. + /* The activity count sits at 0 whilst the conn is unused on + * the list; we reduce that to -1 to make the conn unavailable. */ - if (!refcount_dec_if_one(&conn->ref)) + active = 0; + if (!atomic_try_cmpxchg(&conn->active, &active, -1)) continue; - trace_rxrpc_conn(conn->debug_id, rxrpc_conn_reap_service, 0, NULL); + rxrpc_see_connection(conn, rxrpc_conn_see_reap_service); if (rxrpc_conn_is_client(conn)) BUG(); @@ -443,8 +428,8 @@ void rxrpc_service_connection_reaper(struct work_struct *work) link); list_del_init(&conn->link); - ASSERTCMP(refcount_read(&conn->ref), ==, 0); - rxrpc_kill_connection(conn); + ASSERTCMP(atomic_read(&conn->active), ==, -1); + rxrpc_put_connection(conn, rxrpc_conn_put_service_reaped); } _leave(""); diff --git a/net/rxrpc/conn_service.c b/net/rxrpc/conn_service.c index 6e6aa02c6f9e..2a55a88b2a5b 100644 --- a/net/rxrpc/conn_service.c +++ b/net/rxrpc/conn_service.c @@ -73,7 +73,7 @@ static void rxrpc_publish_service_conn(struct rxrpc_peer *peer, struct rxrpc_conn_proto k = conn->proto; struct rb_node **pp, *parent; - write_seqlock_bh(&peer->service_conn_lock); + write_seqlock(&peer->service_conn_lock); pp = &peer->service_conns.rb_node; parent = NULL; @@ -94,14 +94,14 @@ static void rxrpc_publish_service_conn(struct rxrpc_peer *peer, rb_insert_color(&conn->service_node, &peer->service_conns); conn_published: set_bit(RXRPC_CONN_IN_SERVICE_CONNS, &conn->flags); - write_sequnlock_bh(&peer->service_conn_lock); + write_sequnlock(&peer->service_conn_lock); _leave(" = %d [new]", conn->debug_id); return; found_extant_conn: if (refcount_read(&cursor->ref) == 0) goto replace_old_connection; - write_sequnlock_bh(&peer->service_conn_lock); + write_sequnlock(&peer->service_conn_lock); /* We should not be able to get here. rxrpc_incoming_connection() is * called in a non-reentrant context, so there can't be a race to * insert a new connection. @@ -125,7 +125,7 @@ replace_old_connection: struct rxrpc_connection *rxrpc_prealloc_service_connection(struct rxrpc_net *rxnet, gfp_t gfp) { - struct rxrpc_connection *conn = rxrpc_alloc_connection(gfp); + struct rxrpc_connection *conn = rxrpc_alloc_connection(rxnet, gfp); if (conn) { /* We maintain an extra ref on the connection whilst it is on @@ -133,7 +133,8 @@ struct rxrpc_connection *rxrpc_prealloc_service_connection(struct rxrpc_net *rxn */ conn->state = RXRPC_CONN_SERVICE_PREALLOC; refcount_set(&conn->ref, 2); - conn->bundle = rxrpc_get_bundle(&rxrpc_service_dummy_bundle); + conn->bundle = rxrpc_get_bundle(&rxrpc_service_dummy_bundle, + rxrpc_bundle_get_service_conn); atomic_inc(&rxnet->nr_conns); write_lock(&rxnet->conn_lock); @@ -141,9 +142,7 @@ struct rxrpc_connection *rxrpc_prealloc_service_connection(struct rxrpc_net *rxn list_add_tail(&conn->proc_link, &rxnet->conn_proc_list); write_unlock(&rxnet->conn_lock); - trace_rxrpc_conn(conn->debug_id, rxrpc_conn_new_service, - refcount_read(&conn->ref), - __builtin_return_address(0)); + rxrpc_see_connection(conn, rxrpc_conn_new_service); } return conn; @@ -164,7 +163,7 @@ void rxrpc_new_incoming_connection(struct rxrpc_sock *rx, conn->proto.epoch = sp->hdr.epoch; conn->proto.cid = sp->hdr.cid & RXRPC_CIDMASK; - conn->params.service_id = sp->hdr.serviceId; + conn->orig_service_id = sp->hdr.serviceId; conn->service_id = sp->hdr.serviceId; conn->security_ix = sp->hdr.securityIndex; conn->out_clientflag = 0; @@ -182,10 +181,10 @@ void rxrpc_new_incoming_connection(struct rxrpc_sock *rx, conn->service_id == rx->service_upgrade.from) conn->service_id = rx->service_upgrade.to; - /* Make the connection a target for incoming packets. */ - rxrpc_publish_service_conn(conn->params.peer, conn); + atomic_set(&conn->active, 1); - _net("CONNECTION new %d {%x}", conn->debug_id, conn->proto.cid); + /* Make the connection a target for incoming packets. */ + rxrpc_publish_service_conn(conn->peer, conn); } /* @@ -194,10 +193,10 @@ void rxrpc_new_incoming_connection(struct rxrpc_sock *rx, */ void rxrpc_unpublish_service_conn(struct rxrpc_connection *conn) { - struct rxrpc_peer *peer = conn->params.peer; + struct rxrpc_peer *peer = conn->peer; - write_seqlock_bh(&peer->service_conn_lock); + write_seqlock(&peer->service_conn_lock); if (test_and_clear_bit(RXRPC_CONN_IN_SERVICE_CONNS, &conn->flags)) rb_erase(&conn->service_node, &peer->service_conns); - write_sequnlock_bh(&peer->service_conn_lock); + write_sequnlock(&peer->service_conn_lock); } diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index 721d847ba92b..d0e20e946e48 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -1,35 +1,19 @@ // SPDX-License-Identifier: GPL-2.0-or-later -/* RxRPC packet reception +/* Processing of received RxRPC packets * - * Copyright (C) 2007, 2016 Red Hat, Inc. All Rights Reserved. + * Copyright (C) 2020 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt -#include <linux/module.h> -#include <linux/net.h> -#include <linux/skbuff.h> -#include <linux/errqueue.h> -#include <linux/udp.h> -#include <linux/in.h> -#include <linux/in6.h> -#include <linux/icmp.h> -#include <linux/gfp.h> -#include <net/sock.h> -#include <net/af_rxrpc.h> -#include <net/ip.h> -#include <net/udp.h> -#include <net/net_namespace.h> #include "ar-internal.h" static void rxrpc_proto_abort(const char *why, struct rxrpc_call *call, rxrpc_seq_t seq) { - if (rxrpc_abort_call(why, call, seq, RX_PROTOCOL_ERROR, -EBADMSG)) { - set_bit(RXRPC_CALL_EV_ABORT, &call->events); - rxrpc_queue_call(call); - } + if (rxrpc_abort_call(why, call, seq, RX_PROTOCOL_ERROR, -EBADMSG)) + rxrpc_send_abort_packet(call); } /* @@ -46,7 +30,7 @@ static void rxrpc_congestion_management(struct rxrpc_call *call, bool resend = false; summary->flight_size = - (call->tx_top - call->tx_hard_ack) - summary->nr_acks; + (call->tx_top - call->acks_hard_ack) - summary->nr_acks; if (test_and_clear_bit(RXRPC_CALL_RETRANS_TIMEOUT, &call->flags)) { summary->retrans_timeo = true; @@ -74,7 +58,7 @@ static void rxrpc_congestion_management(struct rxrpc_call *call, switch (call->cong_mode) { case RXRPC_CALL_SLOW_START: - if (summary->nr_nacks > 0) + if (summary->saw_nacks) goto packet_loss_detected; if (summary->cumulative_acks > 0) cwnd += 1; @@ -85,7 +69,7 @@ static void rxrpc_congestion_management(struct rxrpc_call *call, goto out; case RXRPC_CALL_CONGEST_AVOIDANCE: - if (summary->nr_nacks > 0) + if (summary->saw_nacks) goto packet_loss_detected; /* We analyse the number of packets that get ACK'd per RTT @@ -104,7 +88,7 @@ static void rxrpc_congestion_management(struct rxrpc_call *call, goto out; case RXRPC_CALL_PACKET_LOSS: - if (summary->nr_nacks == 0) + if (!summary->saw_nacks) goto resume_normality; if (summary->new_low_nack) { @@ -142,7 +126,7 @@ static void rxrpc_congestion_management(struct rxrpc_call *call, } else { change = rxrpc_cong_progress; cwnd = call->cong_ssthresh; - if (summary->nr_nacks == 0) + if (!summary->saw_nacks) goto resume_normality; } goto out; @@ -164,13 +148,13 @@ resume_normality: out: cumulative_acks = 0; out_no_clear_ca: - if (cwnd >= RXRPC_RXTX_BUFF_SIZE - 1) - cwnd = RXRPC_RXTX_BUFF_SIZE - 1; + if (cwnd >= RXRPC_TX_MAX_WINDOW) + cwnd = RXRPC_TX_MAX_WINDOW; call->cong_cwnd = cwnd; call->cong_cumul_acks = cumulative_acks; trace_rxrpc_congest(call, summary, acked_serial, change); - if (resend && !test_and_set_bit(RXRPC_CALL_EV_RESEND, &call->events)) - rxrpc_queue_call(call); + if (resend) + rxrpc_resend(call, skb); return; packet_loss_detected: @@ -183,9 +167,8 @@ send_extra_data: /* Send some previously unsent DATA if we have some to advance the ACK * state. */ - if (call->rxtx_annotations[call->tx_top & RXRPC_RXTX_BUFF_MASK] & - RXRPC_TX_ANNO_LAST || - summary->nr_acks != call->tx_top - call->tx_hard_ack) { + if (test_bit(RXRPC_CALL_TX_LAST, &call->flags) || + summary->nr_acks != call->tx_top - call->acks_hard_ack) { call->cong_extra++; wake_up(&call->waitq); } @@ -193,58 +176,71 @@ send_extra_data: } /* + * Degrade the congestion window if we haven't transmitted a packet for >1RTT. + */ +void rxrpc_congestion_degrade(struct rxrpc_call *call) +{ + ktime_t rtt, now; + + if (call->cong_mode != RXRPC_CALL_SLOW_START && + call->cong_mode != RXRPC_CALL_CONGEST_AVOIDANCE) + return; + if (call->state == RXRPC_CALL_CLIENT_AWAIT_REPLY) + return; + + rtt = ns_to_ktime(call->peer->srtt_us * (1000 / 8)); + now = ktime_get_real(); + if (!ktime_before(ktime_add(call->tx_last_sent, rtt), now)) + return; + + trace_rxrpc_reset_cwnd(call, now); + rxrpc_inc_stat(call->rxnet, stat_tx_data_cwnd_reset); + call->tx_last_sent = now; + call->cong_mode = RXRPC_CALL_SLOW_START; + call->cong_ssthresh = max_t(unsigned int, call->cong_ssthresh, + call->cong_cwnd * 3 / 4); + call->cong_cwnd = max_t(unsigned int, call->cong_cwnd / 2, RXRPC_MIN_CWND); +} + +/* * Apply a hard ACK by advancing the Tx window. */ static bool rxrpc_rotate_tx_window(struct rxrpc_call *call, rxrpc_seq_t to, struct rxrpc_ack_summary *summary) { - struct sk_buff *skb, *list = NULL; + struct rxrpc_txbuf *txb; bool rot_last = false; - int ix; - u8 annotation; - - if (call->acks_lowest_nak == call->tx_hard_ack) { - call->acks_lowest_nak = to; - } else if (before_eq(call->acks_lowest_nak, to)) { - summary->new_low_nack = true; - call->acks_lowest_nak = to; - } - - spin_lock(&call->lock); - while (before(call->tx_hard_ack, to)) { - call->tx_hard_ack++; - ix = call->tx_hard_ack & RXRPC_RXTX_BUFF_MASK; - skb = call->rxtx_buffer[ix]; - annotation = call->rxtx_annotations[ix]; - rxrpc_see_skb(skb, rxrpc_skb_rotated); - call->rxtx_buffer[ix] = NULL; - call->rxtx_annotations[ix] = 0; - skb->next = list; - list = skb; - - if (annotation & RXRPC_TX_ANNO_LAST) { + list_for_each_entry_rcu(txb, &call->tx_buffer, call_link, false) { + if (before_eq(txb->seq, call->acks_hard_ack)) + continue; + summary->nr_rot_new_acks++; + if (test_bit(RXRPC_TXBUF_LAST, &txb->flags)) { set_bit(RXRPC_CALL_TX_LAST, &call->flags); rot_last = true; } - if ((annotation & RXRPC_TX_ANNO_MASK) != RXRPC_TX_ANNO_ACK) - summary->nr_rot_new_acks++; + if (txb->seq == to) + break; } - spin_unlock(&call->lock); + if (rot_last) + set_bit(RXRPC_CALL_TX_ALL_ACKED, &call->flags); - trace_rxrpc_transmit(call, (rot_last ? - rxrpc_transmit_rotate_last : - rxrpc_transmit_rotate)); - wake_up(&call->waitq); + _enter("%x,%x,%x,%d", to, call->acks_hard_ack, call->tx_top, rot_last); - while (list) { - skb = list; - list = skb->next; - skb_mark_not_on_list(skb); - rxrpc_free_skb(skb, rxrpc_skb_freed); + if (call->acks_lowest_nak == call->acks_hard_ack) { + call->acks_lowest_nak = to; + } else if (after(to, call->acks_lowest_nak)) { + summary->new_low_nack = true; + call->acks_lowest_nak = to; } + smp_store_release(&call->acks_hard_ack, to); + + trace_rxrpc_txqueue(call, (rot_last ? + rxrpc_txqueue_rotate_last : + rxrpc_txqueue_rotate)); + wake_up(&call->waitq); return rot_last; } @@ -284,9 +280,9 @@ static bool rxrpc_end_tx_phase(struct rxrpc_call *call, bool reply_begun, write_unlock(&call->state_lock); if (state == RXRPC_CALL_CLIENT_AWAIT_REPLY) - trace_rxrpc_transmit(call, rxrpc_transmit_await_reply); + trace_rxrpc_txqueue(call, rxrpc_txqueue_await_reply); else - trace_rxrpc_transmit(call, rxrpc_transmit_end); + trace_rxrpc_txqueue(call, rxrpc_txqueue_end); _leave(" = ok"); return true; @@ -307,13 +303,10 @@ static bool rxrpc_receiving_reply(struct rxrpc_call *call) rxrpc_seq_t top = READ_ONCE(call->tx_top); if (call->ackr_reason) { - spin_lock_bh(&call->lock); - call->ackr_reason = 0; - spin_unlock_bh(&call->lock); now = jiffies; timo = now + MAX_JIFFY_OFFSET; WRITE_ONCE(call->resend_at, timo); - WRITE_ONCE(call->ack_at, timo); + WRITE_ONCE(call->delay_ack_at, timo); trace_rxrpc_timer(call, rxrpc_timer_init_for_reply, now); } @@ -323,85 +316,231 @@ static bool rxrpc_receiving_reply(struct rxrpc_call *call) return false; } } - if (!rxrpc_end_tx_phase(call, true, "ETD")) - return false; - call->tx_phase = false; - return true; + return rxrpc_end_tx_phase(call, true, "ETD"); +} + +static void rxrpc_input_update_ack_window(struct rxrpc_call *call, + rxrpc_seq_t window, rxrpc_seq_t wtop) +{ + atomic64_set_release(&call->ackr_window, ((u64)wtop) << 32 | window); } /* - * Scan a data packet to validate its structure and to work out how many - * subpackets it contains. - * - * A jumbo packet is a collection of consecutive packets glued together with - * little headers between that indicate how to change the initial header for - * each subpacket. - * - * RXRPC_JUMBO_PACKET must be set on all but the last subpacket - and all but - * the last are RXRPC_JUMBO_DATALEN in size. The last subpacket may be of any - * size. + * Push a DATA packet onto the Rx queue. */ -static bool rxrpc_validate_data(struct sk_buff *skb) +static void rxrpc_input_queue_data(struct rxrpc_call *call, struct sk_buff *skb, + rxrpc_seq_t window, rxrpc_seq_t wtop, + enum rxrpc_receive_trace why) { struct rxrpc_skb_priv *sp = rxrpc_skb(skb); - unsigned int offset = sizeof(struct rxrpc_wire_header); - unsigned int len = skb->len; - u8 flags = sp->hdr.flags; + bool last = sp->hdr.flags & RXRPC_LAST_PACKET; - for (;;) { - if (flags & RXRPC_REQUEST_ACK) - __set_bit(sp->nr_subpackets, sp->rx_req_ack); - sp->nr_subpackets++; + __skb_queue_tail(&call->recvmsg_queue, skb); + rxrpc_input_update_ack_window(call, window, wtop); - if (!(flags & RXRPC_JUMBO_PACKET)) - break; + trace_rxrpc_receive(call, last ? why + 1 : why, sp->hdr.serial, sp->hdr.seq); +} - if (len - offset < RXRPC_JUMBO_SUBPKTLEN) - goto protocol_error; - if (flags & RXRPC_LAST_PACKET) - goto protocol_error; - offset += RXRPC_JUMBO_DATALEN; - if (skb_copy_bits(skb, offset, &flags, 1) < 0) - goto protocol_error; - offset += sizeof(struct rxrpc_jumbo_header); +/* + * Process a DATA packet. + */ +static void rxrpc_input_data_one(struct rxrpc_call *call, struct sk_buff *skb, + bool *_notify) +{ + struct rxrpc_skb_priv *sp = rxrpc_skb(skb); + struct sk_buff *oos; + rxrpc_serial_t serial = sp->hdr.serial; + u64 win = atomic64_read(&call->ackr_window); + rxrpc_seq_t window = lower_32_bits(win); + rxrpc_seq_t wtop = upper_32_bits(win); + rxrpc_seq_t wlimit = window + call->rx_winsize - 1; + rxrpc_seq_t seq = sp->hdr.seq; + bool last = sp->hdr.flags & RXRPC_LAST_PACKET; + int ack_reason = -1; + + rxrpc_inc_stat(call->rxnet, stat_rx_data); + if (sp->hdr.flags & RXRPC_REQUEST_ACK) + rxrpc_inc_stat(call->rxnet, stat_rx_data_reqack); + if (sp->hdr.flags & RXRPC_JUMBO_PACKET) + rxrpc_inc_stat(call->rxnet, stat_rx_data_jumbo); + + if (last) { + if (test_and_set_bit(RXRPC_CALL_RX_LAST, &call->flags) && + seq + 1 != wtop) { + rxrpc_proto_abort("LSN", call, seq); + return; + } + } else { + if (test_bit(RXRPC_CALL_RX_LAST, &call->flags) && + after_eq(seq, wtop)) { + pr_warn("Packet beyond last: c=%x q=%x window=%x-%x wlimit=%x\n", + call->debug_id, seq, window, wtop, wlimit); + rxrpc_proto_abort("LSA", call, seq); + return; + } } - if (flags & RXRPC_LAST_PACKET) - sp->rx_flags |= RXRPC_SKB_INCL_LAST; - return true; + if (after(seq, call->rx_highest_seq)) + call->rx_highest_seq = seq; -protocol_error: - return false; + trace_rxrpc_rx_data(call->debug_id, seq, serial, sp->hdr.flags); + + if (before(seq, window)) { + ack_reason = RXRPC_ACK_DUPLICATE; + goto send_ack; + } + if (after(seq, wlimit)) { + ack_reason = RXRPC_ACK_EXCEEDS_WINDOW; + goto send_ack; + } + + /* Queue the packet. */ + if (seq == window) { + rxrpc_seq_t reset_from; + bool reset_sack = false; + + if (sp->hdr.flags & RXRPC_REQUEST_ACK) + ack_reason = RXRPC_ACK_REQUESTED; + /* Send an immediate ACK if we fill in a hole */ + else if (!skb_queue_empty(&call->rx_oos_queue)) + ack_reason = RXRPC_ACK_DELAY; + else + atomic_inc_return(&call->ackr_nr_unacked); + + window++; + if (after(window, wtop)) + wtop = window; + + rxrpc_get_skb(skb, rxrpc_skb_get_to_recvmsg); + + spin_lock(&call->recvmsg_queue.lock); + rxrpc_input_queue_data(call, skb, window, wtop, rxrpc_receive_queue); + *_notify = true; + + while ((oos = skb_peek(&call->rx_oos_queue))) { + struct rxrpc_skb_priv *osp = rxrpc_skb(oos); + + if (after(osp->hdr.seq, window)) + break; + + __skb_unlink(oos, &call->rx_oos_queue); + last = osp->hdr.flags & RXRPC_LAST_PACKET; + seq = osp->hdr.seq; + if (!reset_sack) { + reset_from = seq; + reset_sack = true; + } + + window++; + rxrpc_input_queue_data(call, oos, window, wtop, + rxrpc_receive_queue_oos); + } + + spin_unlock(&call->recvmsg_queue.lock); + + if (reset_sack) { + do { + call->ackr_sack_table[reset_from % RXRPC_SACK_SIZE] = 0; + } while (reset_from++, before(reset_from, window)); + } + } else { + bool keep = false; + + ack_reason = RXRPC_ACK_OUT_OF_SEQUENCE; + + if (!call->ackr_sack_table[seq % RXRPC_SACK_SIZE]) { + call->ackr_sack_table[seq % RXRPC_SACK_SIZE] = 1; + keep = 1; + } + + if (after(seq + 1, wtop)) { + wtop = seq + 1; + rxrpc_input_update_ack_window(call, window, wtop); + } + + if (!keep) { + ack_reason = RXRPC_ACK_DUPLICATE; + goto send_ack; + } + + skb_queue_walk(&call->rx_oos_queue, oos) { + struct rxrpc_skb_priv *osp = rxrpc_skb(oos); + + if (after(osp->hdr.seq, seq)) { + rxrpc_get_skb(skb, rxrpc_skb_get_to_recvmsg_oos); + __skb_queue_before(&call->rx_oos_queue, oos, skb); + goto oos_queued; + } + } + + rxrpc_get_skb(skb, rxrpc_skb_get_to_recvmsg_oos); + __skb_queue_tail(&call->rx_oos_queue, skb); + oos_queued: + trace_rxrpc_receive(call, last ? rxrpc_receive_oos_last : rxrpc_receive_oos, + sp->hdr.serial, sp->hdr.seq); + } + +send_ack: + if (ack_reason >= 0) + rxrpc_send_ACK(call, ack_reason, serial, + rxrpc_propose_ack_input_data); + else + rxrpc_propose_delay_ACK(call, serial, + rxrpc_propose_ack_input_data); } /* - * Handle reception of a duplicate packet. - * - * We have to take care to avoid an attack here whereby we're given a series of - * jumbograms, each with a sequence number one before the preceding one and - * filled up to maximum UDP size. If they never send us the first packet in - * the sequence, they can cause us to have to hold on to around 2MiB of kernel - * space until the call times out. - * - * We limit the space usage by only accepting three duplicate jumbo packets per - * call. After that, we tell the other side we're no longer accepting jumbos - * (that information is encoded in the ACK packet). + * Split a jumbo packet and file the bits separately. */ -static void rxrpc_input_dup_data(struct rxrpc_call *call, rxrpc_seq_t seq, - bool is_jumbo, bool *_jumbo_bad) +static bool rxrpc_input_split_jumbo(struct rxrpc_call *call, struct sk_buff *skb) { - /* Discard normal packets that are duplicates. */ - if (is_jumbo) - return; + struct rxrpc_jumbo_header jhdr; + struct rxrpc_skb_priv *sp = rxrpc_skb(skb), *jsp; + struct sk_buff *jskb; + unsigned int offset = sizeof(struct rxrpc_wire_header); + unsigned int len = skb->len - offset; + bool notify = false; - /* Skip jumbo subpackets that are duplicates. When we've had three or - * more partially duplicate jumbo packets, we refuse to take any more - * jumbos for this call. - */ - if (!*_jumbo_bad) { - call->nr_jumbo_bad++; - *_jumbo_bad = true; + while (sp->hdr.flags & RXRPC_JUMBO_PACKET) { + if (len < RXRPC_JUMBO_SUBPKTLEN) + goto protocol_error; + if (sp->hdr.flags & RXRPC_LAST_PACKET) + goto protocol_error; + if (skb_copy_bits(skb, offset + RXRPC_JUMBO_DATALEN, + &jhdr, sizeof(jhdr)) < 0) + goto protocol_error; + + jskb = skb_clone(skb, GFP_NOFS); + if (!jskb) { + kdebug("couldn't clone"); + return false; + } + rxrpc_new_skb(jskb, rxrpc_skb_new_jumbo_subpacket); + jsp = rxrpc_skb(jskb); + jsp->offset = offset; + jsp->len = RXRPC_JUMBO_DATALEN; + rxrpc_input_data_one(call, jskb, ¬ify); + rxrpc_free_skb(jskb, rxrpc_skb_put_jumbo_subpacket); + + sp->hdr.flags = jhdr.flags; + sp->hdr._rsvd = ntohs(jhdr._rsvd); + sp->hdr.seq++; + sp->hdr.serial++; + offset += RXRPC_JUMBO_SUBPKTLEN; + len -= RXRPC_JUMBO_SUBPKTLEN; } + + sp->offset = offset; + sp->len = len; + rxrpc_input_data_one(call, skb, ¬ify); + if (notify) { + trace_rxrpc_notify_socket(call->debug_id, sp->hdr.serial); + rxrpc_notify_socket(call); + } + return true; + +protocol_error: + return false; } /* @@ -412,23 +551,16 @@ static void rxrpc_input_data(struct rxrpc_call *call, struct sk_buff *skb) { struct rxrpc_skb_priv *sp = rxrpc_skb(skb); enum rxrpc_call_state state; - unsigned int j, nr_subpackets, nr_unacked = 0; - rxrpc_serial_t serial = sp->hdr.serial, ack_serial = serial; - rxrpc_seq_t seq0 = sp->hdr.seq, hard_ack; - bool immediate_ack = false, jumbo_bad = false; - u8 ack = 0; - - _enter("{%u,%u},{%u,%u}", - call->rx_hard_ack, call->rx_top, skb->len, seq0); + rxrpc_serial_t serial = sp->hdr.serial; + rxrpc_seq_t seq0 = sp->hdr.seq; - _proto("Rx DATA %%%u { #%u f=%02x n=%u }", - sp->hdr.serial, seq0, sp->hdr.flags, sp->nr_subpackets); + _enter("{%llx,%x},{%u,%x}", + atomic64_read(&call->ackr_window), call->rx_highest_seq, + skb->len, seq0); state = READ_ONCE(call->state); - if (state >= RXRPC_CALL_COMPLETE) { - rxrpc_free_skb(skb, rxrpc_skb_freed); + if (state >= RXRPC_CALL_COMPLETE) return; - } if (state == RXRPC_CALL_SERVER_RECV_REQUEST) { unsigned long timo = READ_ONCE(call->next_req_timo); @@ -443,176 +575,23 @@ static void rxrpc_input_data(struct rxrpc_call *call, struct sk_buff *skb) } } - spin_lock(&call->input_lock); - /* Received data implicitly ACKs all of the request packets we sent * when we're acting as a client. */ if ((state == RXRPC_CALL_CLIENT_SEND_REQUEST || state == RXRPC_CALL_CLIENT_AWAIT_REPLY) && !rxrpc_receiving_reply(call)) - goto unlock; - - hard_ack = READ_ONCE(call->rx_hard_ack); - - nr_subpackets = sp->nr_subpackets; - if (nr_subpackets > 1) { - if (call->nr_jumbo_bad > 3) { - ack = RXRPC_ACK_NOSPACE; - ack_serial = serial; - goto ack; - } - } - - for (j = 0; j < nr_subpackets; j++) { - rxrpc_serial_t serial = sp->hdr.serial + j; - rxrpc_seq_t seq = seq0 + j; - unsigned int ix = seq & RXRPC_RXTX_BUFF_MASK; - bool terminal = (j == nr_subpackets - 1); - bool last = terminal && (sp->rx_flags & RXRPC_SKB_INCL_LAST); - u8 flags, annotation = j; - - _proto("Rx DATA+%u %%%u { #%x t=%u l=%u }", - j, serial, seq, terminal, last); - - if (last) { - if (test_bit(RXRPC_CALL_RX_LAST, &call->flags) && - seq != call->rx_top) { - rxrpc_proto_abort("LSN", call, seq); - goto unlock; - } - } else { - if (test_bit(RXRPC_CALL_RX_LAST, &call->flags) && - after_eq(seq, call->rx_top)) { - rxrpc_proto_abort("LSA", call, seq); - goto unlock; - } - } - - flags = 0; - if (last) - flags |= RXRPC_LAST_PACKET; - if (!terminal) - flags |= RXRPC_JUMBO_PACKET; - if (test_bit(j, sp->rx_req_ack)) - flags |= RXRPC_REQUEST_ACK; - trace_rxrpc_rx_data(call->debug_id, seq, serial, flags, annotation); - - if (before_eq(seq, hard_ack)) { - ack = RXRPC_ACK_DUPLICATE; - ack_serial = serial; - continue; - } - - if (call->rxtx_buffer[ix]) { - rxrpc_input_dup_data(call, seq, nr_subpackets > 1, - &jumbo_bad); - if (ack != RXRPC_ACK_DUPLICATE) { - ack = RXRPC_ACK_DUPLICATE; - ack_serial = serial; - } - immediate_ack = true; - continue; - } - - if (after(seq, hard_ack + call->rx_winsize)) { - ack = RXRPC_ACK_EXCEEDS_WINDOW; - ack_serial = serial; - if (flags & RXRPC_JUMBO_PACKET) { - if (!jumbo_bad) { - call->nr_jumbo_bad++; - jumbo_bad = true; - } - } - - goto ack; - } - - if (flags & RXRPC_REQUEST_ACK && !ack) { - ack = RXRPC_ACK_REQUESTED; - ack_serial = serial; - } - - if (after(seq0, call->ackr_highest_seq)) - call->ackr_highest_seq = seq0; - - /* Queue the packet. We use a couple of memory barriers here as need - * to make sure that rx_top is perceived to be set after the buffer - * pointer and that the buffer pointer is set after the annotation and - * the skb data. - * - * Barriers against rxrpc_recvmsg_data() and rxrpc_rotate_rx_window() - * and also rxrpc_fill_out_ack(). - */ - if (!terminal) - rxrpc_get_skb(skb, rxrpc_skb_got); - call->rxtx_annotations[ix] = annotation; - smp_wmb(); - call->rxtx_buffer[ix] = skb; - if (after(seq, call->rx_top)) { - smp_store_release(&call->rx_top, seq); - } else if (before(seq, call->rx_top)) { - /* Send an immediate ACK if we fill in a hole */ - if (!ack) { - ack = RXRPC_ACK_DELAY; - ack_serial = serial; - } - immediate_ack = true; - } - - if (terminal) { - /* From this point on, we're not allowed to touch the - * packet any longer as its ref now belongs to the Rx - * ring. - */ - skb = NULL; - sp = NULL; - } + goto out_notify; - nr_unacked++; - - if (last) { - set_bit(RXRPC_CALL_RX_LAST, &call->flags); - if (!ack) { - ack = RXRPC_ACK_DELAY; - ack_serial = serial; - } - trace_rxrpc_receive(call, rxrpc_receive_queue_last, serial, seq); - } else { - trace_rxrpc_receive(call, rxrpc_receive_queue, serial, seq); - } - - if (after_eq(seq, call->rx_expect_next)) { - if (after(seq, call->rx_expect_next)) { - _net("OOS %u > %u", seq, call->rx_expect_next); - ack = RXRPC_ACK_OUT_OF_SEQUENCE; - ack_serial = serial; - } - call->rx_expect_next = seq + 1; - } - if (!ack) - ack_serial = serial; + if (!rxrpc_input_split_jumbo(call, skb)) { + rxrpc_proto_abort("VLD", call, sp->hdr.seq); + goto out_notify; } + skb = NULL; -ack: - if (atomic_add_return(nr_unacked, &call->ackr_nr_unacked) > 2 && !ack) - ack = RXRPC_ACK_IDLE; - - if (ack) - rxrpc_propose_ACK(call, ack, ack_serial, - immediate_ack, true, - rxrpc_propose_ack_input_data); - else - rxrpc_propose_ACK(call, RXRPC_ACK_DELAY, serial, - false, true, - rxrpc_propose_ack_input_data); - +out_notify: trace_rxrpc_notify_socket(call->debug_id, serial); rxrpc_notify_socket(call); - -unlock: - spin_unlock(&call->input_lock); - rxrpc_free_skb(skb, rxrpc_skb_freed); _leave(" [queued]"); } @@ -671,55 +650,6 @@ static void rxrpc_complete_rtt_probe(struct rxrpc_call *call, } /* - * Process the response to a ping that we sent to find out if we lost an ACK. - * - * If we got back a ping response that indicates a lower tx_top than what we - * had at the time of the ping transmission, we adjudge all the DATA packets - * sent between the response tx_top and the ping-time tx_top to have been lost. - */ -static void rxrpc_input_check_for_lost_ack(struct rxrpc_call *call) -{ - rxrpc_seq_t top, bottom, seq; - bool resend = false; - - spin_lock_bh(&call->lock); - - bottom = call->tx_hard_ack + 1; - top = call->acks_lost_top; - if (before(bottom, top)) { - for (seq = bottom; before_eq(seq, top); seq++) { - int ix = seq & RXRPC_RXTX_BUFF_MASK; - u8 annotation = call->rxtx_annotations[ix]; - u8 anno_type = annotation & RXRPC_TX_ANNO_MASK; - - if (anno_type != RXRPC_TX_ANNO_UNACK) - continue; - annotation &= ~RXRPC_TX_ANNO_MASK; - annotation |= RXRPC_TX_ANNO_RETRANS; - call->rxtx_annotations[ix] = annotation; - resend = true; - } - } - - spin_unlock_bh(&call->lock); - - if (resend && !test_and_set_bit(RXRPC_CALL_EV_RESEND, &call->events)) - rxrpc_queue_call(call); -} - -/* - * Process a ping response. - */ -static void rxrpc_input_ping_response(struct rxrpc_call *call, - ktime_t resp_time, - rxrpc_serial_t acked_serial, - rxrpc_serial_t ack_serial) -{ - if (acked_serial == call->acks_lost_ping) - rxrpc_input_check_for_lost_ack(call); -} - -/* * Process the extra information that may be appended to an ACK packet */ static void rxrpc_input_ackinfo(struct rxrpc_call *call, struct sk_buff *skb, @@ -731,13 +661,8 @@ static void rxrpc_input_ackinfo(struct rxrpc_call *call, struct sk_buff *skb, bool wake = false; u32 rwind = ntohl(ackinfo->rwind); - _proto("Rx ACK %%%u Info { rx=%u max=%u rwin=%u jm=%u }", - sp->hdr.serial, - ntohl(ackinfo->rxMTU), ntohl(ackinfo->maxMTU), - rwind, ntohl(ackinfo->jumbo_max)); - - if (rwind > RXRPC_RXTX_BUFF_SIZE - 1) - rwind = RXRPC_RXTX_BUFF_SIZE - 1; + if (rwind > RXRPC_TX_MAX_WINDOW) + rwind = RXRPC_TX_MAX_WINDOW; if (call->tx_winsize != rwind) { if (rwind > call->tx_winsize) wake = true; @@ -752,11 +677,10 @@ static void rxrpc_input_ackinfo(struct rxrpc_call *call, struct sk_buff *skb, peer = call->peer; if (mtu < peer->maxdata) { - spin_lock_bh(&peer->lock); + spin_lock(&peer->lock); peer->maxdata = mtu; peer->mtu = mtu + peer->hdrsize; - spin_unlock_bh(&peer->lock); - _net("Net MTU %u (maxdata %u)", peer->mtu, peer->maxdata); + spin_unlock(&peer->lock); } if (wake) @@ -776,40 +700,19 @@ static void rxrpc_input_soft_acks(struct rxrpc_call *call, u8 *acks, rxrpc_seq_t seq, int nr_acks, struct rxrpc_ack_summary *summary) { - int ix; - u8 annotation, anno_type; - - for (; nr_acks > 0; nr_acks--, seq++) { - ix = seq & RXRPC_RXTX_BUFF_MASK; - annotation = call->rxtx_annotations[ix]; - anno_type = annotation & RXRPC_TX_ANNO_MASK; - annotation &= ~RXRPC_TX_ANNO_MASK; - switch (*acks++) { - case RXRPC_ACK_TYPE_ACK: + unsigned int i; + + for (i = 0; i < nr_acks; i++) { + if (acks[i] == RXRPC_ACK_TYPE_ACK) { summary->nr_acks++; - if (anno_type == RXRPC_TX_ANNO_ACK) - continue; summary->nr_new_acks++; - call->rxtx_annotations[ix] = - RXRPC_TX_ANNO_ACK | annotation; - break; - case RXRPC_ACK_TYPE_NACK: - if (!summary->nr_nacks && - call->acks_lowest_nak != seq) { - call->acks_lowest_nak = seq; + } else { + if (!summary->saw_nacks && + call->acks_lowest_nak != seq + i) { + call->acks_lowest_nak = seq + i; summary->new_low_nack = true; } - summary->nr_nacks++; - if (anno_type == RXRPC_TX_ANNO_NAK) - continue; - summary->nr_new_nacks++; - if (anno_type == RXRPC_TX_ANNO_RETRANS) - continue; - call->rxtx_annotations[ix] = - RXRPC_TX_ANNO_NAK | annotation; - break; - default: - return rxrpc_proto_abort("SFT", call, 0); + summary->saw_nacks = true; } } } @@ -851,12 +754,9 @@ static bool rxrpc_is_ack_valid(struct rxrpc_call *call, static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb) { struct rxrpc_ack_summary summary = { 0 }; + struct rxrpc_ackpacket ack; struct rxrpc_skb_priv *sp = rxrpc_skb(skb); - union { - struct rxrpc_ackpacket ack; - struct rxrpc_ackinfo info; - u8 acks[RXRPC_MAXACKS]; - } buf; + struct rxrpc_ackinfo info; rxrpc_serial_t ack_serial, acked_serial; rxrpc_seq_t first_soft_ack, hard_ack, prev_pkt; int nr_acks, offset, ioffset; @@ -864,29 +764,26 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb) _enter(""); offset = sizeof(struct rxrpc_wire_header); - if (skb_copy_bits(skb, offset, &buf.ack, sizeof(buf.ack)) < 0) { - _debug("extraction failure"); + if (skb_copy_bits(skb, offset, &ack, sizeof(ack)) < 0) return rxrpc_proto_abort("XAK", call, 0); - } - offset += sizeof(buf.ack); + offset += sizeof(ack); ack_serial = sp->hdr.serial; - acked_serial = ntohl(buf.ack.serial); - first_soft_ack = ntohl(buf.ack.firstPacket); - prev_pkt = ntohl(buf.ack.previousPacket); + acked_serial = ntohl(ack.serial); + first_soft_ack = ntohl(ack.firstPacket); + prev_pkt = ntohl(ack.previousPacket); hard_ack = first_soft_ack - 1; - nr_acks = buf.ack.nAcks; - summary.ack_reason = (buf.ack.reason < RXRPC_ACK__INVALID ? - buf.ack.reason : RXRPC_ACK__INVALID); + nr_acks = ack.nAcks; + summary.ack_reason = (ack.reason < RXRPC_ACK__INVALID ? + ack.reason : RXRPC_ACK__INVALID); trace_rxrpc_rx_ack(call, ack_serial, acked_serial, first_soft_ack, prev_pkt, summary.ack_reason, nr_acks); + rxrpc_inc_stat(call->rxnet, stat_rx_acks[ack.reason]); - switch (buf.ack.reason) { + switch (ack.reason) { case RXRPC_ACK_PING_RESPONSE: - rxrpc_input_ping_response(call, skb->tstamp, acked_serial, - ack_serial); rxrpc_complete_rtt_probe(call, skb->tstamp, acked_serial, ack_serial, rxrpc_rtt_rx_ping_response); break; @@ -901,22 +798,19 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb) break; } - if (buf.ack.reason == RXRPC_ACK_PING) { - _proto("Rx ACK %%%u PING Request", ack_serial); - rxrpc_propose_ACK(call, RXRPC_ACK_PING_RESPONSE, - ack_serial, true, true, - rxrpc_propose_ack_respond_to_ping); + if (ack.reason == RXRPC_ACK_PING) { + rxrpc_send_ACK(call, RXRPC_ACK_PING_RESPONSE, ack_serial, + rxrpc_propose_ack_respond_to_ping); } else if (sp->hdr.flags & RXRPC_REQUEST_ACK) { - rxrpc_propose_ACK(call, RXRPC_ACK_REQUESTED, - ack_serial, true, true, - rxrpc_propose_ack_respond_to_ack); + rxrpc_send_ACK(call, RXRPC_ACK_REQUESTED, ack_serial, + rxrpc_propose_ack_respond_to_ack); } /* If we get an EXCEEDS_WINDOW ACK from the server, it probably * indicates that the client address changed due to NAT. The server * lost the call because it switched to a different peer. */ - if (unlikely(buf.ack.reason == RXRPC_ACK_EXCEEDS_WINDOW) && + if (unlikely(ack.reason == RXRPC_ACK_EXCEEDS_WINDOW) && first_soft_ack == 1 && prev_pkt == 0 && rxrpc_is_client_call(call)) { @@ -929,10 +823,10 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb) * indicate a change of address. However, we can retransmit the call * if we still have it buffered to the beginning. */ - if (unlikely(buf.ack.reason == RXRPC_ACK_OUT_OF_SEQUENCE) && + if (unlikely(ack.reason == RXRPC_ACK_OUT_OF_SEQUENCE) && first_soft_ack == 1 && prev_pkt == 0 && - call->tx_hard_ack == 0 && + call->acks_hard_ack == 0 && rxrpc_is_client_call(call)) { rxrpc_set_call_completion(call, RXRPC_CALL_REMOTELY_ABORTED, 0, -ENETRESET); @@ -947,34 +841,34 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb) return; } - buf.info.rxMTU = 0; + info.rxMTU = 0; ioffset = offset + nr_acks + 3; - if (skb->len >= ioffset + sizeof(buf.info) && - skb_copy_bits(skb, ioffset, &buf.info, sizeof(buf.info)) < 0) + if (skb->len >= ioffset + sizeof(info) && + skb_copy_bits(skb, ioffset, &info, sizeof(info)) < 0) return rxrpc_proto_abort("XAI", call, 0); - spin_lock(&call->input_lock); + if (nr_acks > 0) + skb_condense(skb); - /* Discard any out-of-order or duplicate ACKs (inside lock). */ - if (!rxrpc_is_ack_valid(call, first_soft_ack, prev_pkt)) { - trace_rxrpc_rx_discard_ack(call->debug_id, ack_serial, - first_soft_ack, call->acks_first_seq, - prev_pkt, call->acks_prev_seq); - goto out; - } call->acks_latest_ts = skb->tstamp; - call->acks_first_seq = first_soft_ack; call->acks_prev_seq = prev_pkt; + switch (ack.reason) { + case RXRPC_ACK_PING: + break; + default: + if (after(acked_serial, call->acks_highest_serial)) + call->acks_highest_serial = acked_serial; + break; + } + /* Parse rwind and mtu sizes if provided. */ - if (buf.info.rxMTU) - rxrpc_input_ackinfo(call, skb, &buf.info); + if (info.rxMTU) + rxrpc_input_ackinfo(call, skb, &info); - if (first_soft_ack == 0) { - rxrpc_proto_abort("AK0", call, 0); - goto out; - } + if (first_soft_ack == 0) + return rxrpc_proto_abort("AK0", call, 0); /* Ignore ACKs unless we are or have just been transmitting. */ switch (READ_ONCE(call->state)) { @@ -984,46 +878,36 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb) case RXRPC_CALL_SERVER_AWAIT_ACK: break; default: - goto out; + return; } - if (before(hard_ack, call->tx_hard_ack) || - after(hard_ack, call->tx_top)) { - rxrpc_proto_abort("AKW", call, 0); - goto out; - } - if (nr_acks > call->tx_top - hard_ack) { - rxrpc_proto_abort("AKN", call, 0); - goto out; - } + if (before(hard_ack, call->acks_hard_ack) || + after(hard_ack, call->tx_top)) + return rxrpc_proto_abort("AKW", call, 0); + if (nr_acks > call->tx_top - hard_ack) + return rxrpc_proto_abort("AKN", call, 0); - if (after(hard_ack, call->tx_hard_ack)) { + if (after(hard_ack, call->acks_hard_ack)) { if (rxrpc_rotate_tx_window(call, hard_ack, &summary)) { rxrpc_end_tx_phase(call, false, "ETA"); - goto out; + return; } } if (nr_acks > 0) { - if (skb_copy_bits(skb, offset, buf.acks, nr_acks) < 0) { - rxrpc_proto_abort("XSA", call, 0); - goto out; - } - rxrpc_input_soft_acks(call, buf.acks, first_soft_ack, nr_acks, - &summary); + if (offset > (int)skb->len - nr_acks) + return rxrpc_proto_abort("XSA", call, 0); + rxrpc_input_soft_acks(call, skb->data + offset, first_soft_ack, + nr_acks, &summary); } - if (call->rxtx_annotations[call->tx_top & RXRPC_RXTX_BUFF_MASK] & - RXRPC_TX_ANNO_LAST && + if (test_bit(RXRPC_CALL_TX_LAST, &call->flags) && summary.nr_acks == call->tx_top - hard_ack && rxrpc_is_client_call(call)) - rxrpc_propose_ACK(call, RXRPC_ACK_PING, ack_serial, - false, true, - rxrpc_propose_ack_ping_for_lost_reply); + rxrpc_propose_ping(call, ack_serial, + rxrpc_propose_ack_ping_for_lost_reply); rxrpc_congestion_management(call, skb, &summary, acked_serial); -out: - spin_unlock(&call->input_lock); } /* @@ -1032,16 +916,9 @@ out: static void rxrpc_input_ackall(struct rxrpc_call *call, struct sk_buff *skb) { struct rxrpc_ack_summary summary = { 0 }; - struct rxrpc_skb_priv *sp = rxrpc_skb(skb); - - _proto("Rx ACKALL %%%u", sp->hdr.serial); - - spin_lock(&call->input_lock); if (rxrpc_rotate_tx_window(call, call->tx_top, &summary)) rxrpc_end_tx_phase(call, false, "ETL"); - - spin_unlock(&call->input_lock); } /* @@ -1050,35 +927,30 @@ static void rxrpc_input_ackall(struct rxrpc_call *call, struct sk_buff *skb) static void rxrpc_input_abort(struct rxrpc_call *call, struct sk_buff *skb) { struct rxrpc_skb_priv *sp = rxrpc_skb(skb); - __be32 wtmp; - u32 abort_code = RX_CALL_DEAD; - - _enter(""); - - if (skb->len >= 4 && - skb_copy_bits(skb, sizeof(struct rxrpc_wire_header), - &wtmp, sizeof(wtmp)) >= 0) - abort_code = ntohl(wtmp); - - trace_rxrpc_rx_abort(call, sp->hdr.serial, abort_code); - _proto("Rx ABORT %%%u { %x }", sp->hdr.serial, abort_code); + trace_rxrpc_rx_abort(call, sp->hdr.serial, skb->priority); rxrpc_set_call_completion(call, RXRPC_CALL_REMOTELY_ABORTED, - abort_code, -ECONNABORTED); + skb->priority, -ECONNABORTED); } /* * Process an incoming call packet. */ -static void rxrpc_input_call_packet(struct rxrpc_call *call, - struct sk_buff *skb) +void rxrpc_input_call_packet(struct rxrpc_call *call, struct sk_buff *skb) { struct rxrpc_skb_priv *sp = rxrpc_skb(skb); unsigned long timo; _enter("%p,%p", call, skb); + if (sp->hdr.serviceId != call->dest_srx.srx_service) + call->dest_srx.srx_service = sp->hdr.serviceId; + if ((int)sp->hdr.serial - (int)call->rx_serial > 0) + call->rx_serial = sp->hdr.serial; + if (!test_bit(RXRPC_CALL_RX_HEARD, &call->flags)) + set_bit(RXRPC_CALL_RX_HEARD, &call->flags); + timo = READ_ONCE(call->next_rx_timo); if (timo) { unsigned long now = jiffies, expect_rx_by; @@ -1092,15 +964,13 @@ static void rxrpc_input_call_packet(struct rxrpc_call *call, switch (sp->hdr.type) { case RXRPC_PACKET_TYPE_DATA: rxrpc_input_data(call, skb); - goto no_free; + break; case RXRPC_PACKET_TYPE_ACK: rxrpc_input_ack(call, skb); break; case RXRPC_PACKET_TYPE_BUSY: - _proto("Rx BUSY %%%u", sp->hdr.serial); - /* Just ignore BUSY packets from the server; the retry and * lifespan timers will take care of business. BUSY packets * from the client don't make sense. @@ -1118,10 +988,6 @@ static void rxrpc_input_call_packet(struct rxrpc_call *call, default: break; } - - rxrpc_free_skb(skb, rxrpc_skb_freed); -no_free: - _leave(""); } /* @@ -1130,10 +996,10 @@ no_free: * * TODO: If callNumber > call_id + 1, renegotiate security. */ -static void rxrpc_input_implicit_end_call(struct rxrpc_sock *rx, - struct rxrpc_connection *conn, - struct rxrpc_call *call) +void rxrpc_implicit_end_call(struct rxrpc_call *call, struct sk_buff *skb) { + struct rxrpc_connection *conn = call->conn; + switch (READ_ONCE(call->state)) { case RXRPC_CALL_SERVER_AWAIT_ACK: rxrpc_call_completed(call); @@ -1141,362 +1007,15 @@ static void rxrpc_input_implicit_end_call(struct rxrpc_sock *rx, case RXRPC_CALL_COMPLETE: break; default: - if (rxrpc_abort_call("IMP", call, 0, RX_CALL_DEAD, -ESHUTDOWN)) { - set_bit(RXRPC_CALL_EV_ABORT, &call->events); - rxrpc_queue_call(call); - } + if (rxrpc_abort_call("IMP", call, 0, RX_CALL_DEAD, -ESHUTDOWN)) + rxrpc_send_abort_packet(call); trace_rxrpc_improper_term(call); break; } - spin_lock(&rx->incoming_lock); - __rxrpc_disconnect_call(conn, call); - spin_unlock(&rx->incoming_lock); -} - -/* - * post connection-level events to the connection - * - this includes challenges, responses, some aborts and call terminal packet - * retransmission. - */ -static void rxrpc_post_packet_to_conn(struct rxrpc_connection *conn, - struct sk_buff *skb) -{ - _enter("%p,%p", conn, skb); - - skb_queue_tail(&conn->rx_queue, skb); - rxrpc_queue_conn(conn); -} - -/* - * post endpoint-level events to the local endpoint - * - this includes debug and version messages - */ -static void rxrpc_post_packet_to_local(struct rxrpc_local *local, - struct sk_buff *skb) -{ - _enter("%p,%p", local, skb); - - if (rxrpc_get_local_maybe(local)) { - skb_queue_tail(&local->event_queue, skb); - rxrpc_queue_local(local); - } else { - rxrpc_free_skb(skb, rxrpc_skb_freed); - } -} - -/* - * put a packet up for transport-level abort - */ -static void rxrpc_reject_packet(struct rxrpc_local *local, struct sk_buff *skb) -{ - if (rxrpc_get_local_maybe(local)) { - skb_queue_tail(&local->reject_queue, skb); - rxrpc_queue_local(local); - } else { - rxrpc_free_skb(skb, rxrpc_skb_freed); - } -} + rxrpc_input_call_event(call, skb); -/* - * Extract the wire header from a packet and translate the byte order. - */ -static noinline -int rxrpc_extract_header(struct rxrpc_skb_priv *sp, struct sk_buff *skb) -{ - struct rxrpc_wire_header whdr; - - /* dig out the RxRPC connection details */ - if (skb_copy_bits(skb, 0, &whdr, sizeof(whdr)) < 0) { - trace_rxrpc_rx_eproto(NULL, sp->hdr.serial, - tracepoint_string("bad_hdr")); - return -EBADMSG; - } - - memset(sp, 0, sizeof(*sp)); - sp->hdr.epoch = ntohl(whdr.epoch); - sp->hdr.cid = ntohl(whdr.cid); - sp->hdr.callNumber = ntohl(whdr.callNumber); - sp->hdr.seq = ntohl(whdr.seq); - sp->hdr.serial = ntohl(whdr.serial); - sp->hdr.flags = whdr.flags; - sp->hdr.type = whdr.type; - sp->hdr.userStatus = whdr.userStatus; - sp->hdr.securityIndex = whdr.securityIndex; - sp->hdr._rsvd = ntohs(whdr._rsvd); - sp->hdr.serviceId = ntohs(whdr.serviceId); - return 0; -} - -/* - * handle data received on the local endpoint - * - may be called in interrupt context - * - * [!] Note that as this is called from the encap_rcv hook, the socket is not - * held locked by the caller and nothing prevents sk_user_data on the UDP from - * being cleared in the middle of processing this function. - * - * Called with the RCU read lock held from the IP layer via UDP. - */ -int rxrpc_input_packet(struct sock *udp_sk, struct sk_buff *skb) -{ - struct rxrpc_local *local = rcu_dereference_sk_user_data(udp_sk); - struct rxrpc_connection *conn; - struct rxrpc_channel *chan; - struct rxrpc_call *call = NULL; - struct rxrpc_skb_priv *sp; - struct rxrpc_peer *peer = NULL; - struct rxrpc_sock *rx = NULL; - unsigned int channel; - - _enter("%p", udp_sk); - - if (unlikely(!local)) { - kfree_skb(skb); - return 0; - } - if (skb->tstamp == 0) - skb->tstamp = ktime_get_real(); - - rxrpc_new_skb(skb, rxrpc_skb_received); - - skb_pull(skb, sizeof(struct udphdr)); - - /* The UDP protocol already released all skb resources; - * we are free to add our own data there. - */ - sp = rxrpc_skb(skb); - - /* dig out the RxRPC connection details */ - if (rxrpc_extract_header(sp, skb) < 0) - goto bad_message; - - if (IS_ENABLED(CONFIG_AF_RXRPC_INJECT_LOSS)) { - static int lose; - if ((lose++ & 7) == 7) { - trace_rxrpc_rx_lose(sp); - rxrpc_free_skb(skb, rxrpc_skb_lost); - return 0; - } - } - - if (skb->tstamp == 0) - skb->tstamp = ktime_get_real(); - trace_rxrpc_rx_packet(sp); - - switch (sp->hdr.type) { - case RXRPC_PACKET_TYPE_VERSION: - if (rxrpc_to_client(sp)) - goto discard; - rxrpc_post_packet_to_local(local, skb); - goto out; - - case RXRPC_PACKET_TYPE_BUSY: - if (rxrpc_to_server(sp)) - goto discard; - fallthrough; - case RXRPC_PACKET_TYPE_ACK: - case RXRPC_PACKET_TYPE_ACKALL: - if (sp->hdr.callNumber == 0) - goto bad_message; - fallthrough; - case RXRPC_PACKET_TYPE_ABORT: - break; - - case RXRPC_PACKET_TYPE_DATA: - if (sp->hdr.callNumber == 0 || - sp->hdr.seq == 0) - goto bad_message; - if (!rxrpc_validate_data(skb)) - goto bad_message; - - /* Unshare the packet so that it can be modified for in-place - * decryption. - */ - if (sp->hdr.securityIndex != 0) { - struct sk_buff *nskb = skb_unshare(skb, GFP_ATOMIC); - if (!nskb) { - rxrpc_eaten_skb(skb, rxrpc_skb_unshared_nomem); - goto out; - } - - if (nskb != skb) { - rxrpc_eaten_skb(skb, rxrpc_skb_received); - skb = nskb; - rxrpc_new_skb(skb, rxrpc_skb_unshared); - sp = rxrpc_skb(skb); - } - } - break; - - case RXRPC_PACKET_TYPE_CHALLENGE: - if (rxrpc_to_server(sp)) - goto discard; - break; - case RXRPC_PACKET_TYPE_RESPONSE: - if (rxrpc_to_client(sp)) - goto discard; - break; - - /* Packet types 9-11 should just be ignored. */ - case RXRPC_PACKET_TYPE_PARAMS: - case RXRPC_PACKET_TYPE_10: - case RXRPC_PACKET_TYPE_11: - goto discard; - - default: - _proto("Rx Bad Packet Type %u", sp->hdr.type); - goto bad_message; - } - - if (sp->hdr.serviceId == 0) - goto bad_message; - - if (rxrpc_to_server(sp)) { - /* Weed out packets to services we're not offering. Packets - * that would begin a call are explicitly rejected and the rest - * are just discarded. - */ - rx = rcu_dereference(local->service); - if (!rx || (sp->hdr.serviceId != rx->srx.srx_service && - sp->hdr.serviceId != rx->second_service)) { - if (sp->hdr.type == RXRPC_PACKET_TYPE_DATA && - sp->hdr.seq == 1) - goto unsupported_service; - goto discard; - } - } - - conn = rxrpc_find_connection_rcu(local, skb, &peer); - if (conn) { - if (sp->hdr.securityIndex != conn->security_ix) - goto wrong_security; - - if (sp->hdr.serviceId != conn->service_id) { - int old_id; - - if (!test_bit(RXRPC_CONN_PROBING_FOR_UPGRADE, &conn->flags)) - goto reupgrade; - old_id = cmpxchg(&conn->service_id, conn->params.service_id, - sp->hdr.serviceId); - - if (old_id != conn->params.service_id && - old_id != sp->hdr.serviceId) - goto reupgrade; - } - - if (sp->hdr.callNumber == 0) { - /* Connection-level packet */ - _debug("CONN %p {%d}", conn, conn->debug_id); - rxrpc_post_packet_to_conn(conn, skb); - goto out; - } - - if ((int)sp->hdr.serial - (int)conn->hi_serial > 0) - conn->hi_serial = sp->hdr.serial; - - /* Call-bound packets are routed by connection channel. */ - channel = sp->hdr.cid & RXRPC_CHANNELMASK; - chan = &conn->channels[channel]; - - /* Ignore really old calls */ - if (sp->hdr.callNumber < chan->last_call) - goto discard; - - if (sp->hdr.callNumber == chan->last_call) { - if (chan->call || - sp->hdr.type == RXRPC_PACKET_TYPE_ABORT) - goto discard; - - /* For the previous service call, if completed - * successfully, we discard all further packets. - */ - if (rxrpc_conn_is_service(conn) && - chan->last_type == RXRPC_PACKET_TYPE_ACK) - goto discard; - - /* But otherwise we need to retransmit the final packet - * from data cached in the connection record. - */ - if (sp->hdr.type == RXRPC_PACKET_TYPE_DATA) - trace_rxrpc_rx_data(chan->call_debug_id, - sp->hdr.seq, - sp->hdr.serial, - sp->hdr.flags, 0); - rxrpc_post_packet_to_conn(conn, skb); - goto out; - } - - call = rcu_dereference(chan->call); - - if (sp->hdr.callNumber > chan->call_id) { - if (rxrpc_to_client(sp)) - goto reject_packet; - if (call) - rxrpc_input_implicit_end_call(rx, conn, call); - call = NULL; - } - - if (call) { - if (sp->hdr.serviceId != call->service_id) - call->service_id = sp->hdr.serviceId; - if ((int)sp->hdr.serial - (int)call->rx_serial > 0) - call->rx_serial = sp->hdr.serial; - if (!test_bit(RXRPC_CALL_RX_HEARD, &call->flags)) - set_bit(RXRPC_CALL_RX_HEARD, &call->flags); - } - } - - if (!call || refcount_read(&call->ref) == 0) { - if (rxrpc_to_client(sp) || - sp->hdr.type != RXRPC_PACKET_TYPE_DATA) - goto bad_message; - if (sp->hdr.seq != 1) - goto discard; - call = rxrpc_new_incoming_call(local, rx, skb); - if (!call) - goto reject_packet; - } - - /* Process a call packet; this either discards or passes on the ref - * elsewhere. - */ - rxrpc_input_call_packet(call, skb); - goto out; - -discard: - rxrpc_free_skb(skb, rxrpc_skb_freed); -out: - trace_rxrpc_rx_done(0, 0); - return 0; - -wrong_security: - trace_rxrpc_abort(0, "SEC", sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq, - RXKADINCONSISTENCY, EBADMSG); - skb->priority = RXKADINCONSISTENCY; - goto post_abort; - -unsupported_service: - trace_rxrpc_abort(0, "INV", sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq, - RX_INVALID_OPERATION, EOPNOTSUPP); - skb->priority = RX_INVALID_OPERATION; - goto post_abort; - -reupgrade: - trace_rxrpc_abort(0, "UPG", sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq, - RX_PROTOCOL_ERROR, EBADMSG); - goto protocol_error; - -bad_message: - trace_rxrpc_abort(0, "BAD", sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq, - RX_PROTOCOL_ERROR, EBADMSG); -protocol_error: - skb->priority = RX_PROTOCOL_ERROR; -post_abort: - skb->mark = RXRPC_SKB_MARK_REJECT_ABORT; -reject_packet: - trace_rxrpc_rx_done(skb->mark, skb->priority); - rxrpc_reject_packet(local, skb); - _leave(" [badmsg]"); - return 0; + spin_lock(&conn->bundle->channel_lock); + __rxrpc_disconnect_call(conn, call); + spin_unlock(&conn->bundle->channel_lock); } diff --git a/net/rxrpc/insecure.c b/net/rxrpc/insecure.c index 9aae99d67833..0eb8471bfc53 100644 --- a/net/rxrpc/insecure.c +++ b/net/rxrpc/insecure.c @@ -25,16 +25,16 @@ static int none_how_much_data(struct rxrpc_call *call, size_t remain, return 0; } -static int none_secure_packet(struct rxrpc_call *call, struct sk_buff *skb, - size_t data_size) +static int none_secure_packet(struct rxrpc_call *call, struct rxrpc_txbuf *txb) { return 0; } -static int none_verify_packet(struct rxrpc_call *call, struct sk_buff *skb, - unsigned int offset, unsigned int len, - rxrpc_seq_t seq, u16 expected_cksum) +static int none_verify_packet(struct rxrpc_call *call, struct sk_buff *skb) { + struct rxrpc_skb_priv *sp = rxrpc_skb(skb); + + sp->flags |= RXRPC_RX_VERIFIED; return 0; } @@ -42,11 +42,6 @@ static void none_free_call_crypto(struct rxrpc_call *call) { } -static void none_locate_data(struct rxrpc_call *call, struct sk_buff *skb, - unsigned int *_offset, unsigned int *_len) -{ -} - static int none_respond_to_challenge(struct rxrpc_connection *conn, struct sk_buff *skb, u32 *_abort_code) @@ -95,7 +90,6 @@ const struct rxrpc_security rxrpc_no_security = { .how_much_data = none_how_much_data, .secure_packet = none_secure_packet, .verify_packet = none_verify_packet, - .locate_data = none_locate_data, .respond_to_challenge = none_respond_to_challenge, .verify_response = none_verify_response, .clear = none_clear, diff --git a/net/rxrpc/io_thread.c b/net/rxrpc/io_thread.c new file mode 100644 index 000000000000..d83ae3193032 --- /dev/null +++ b/net/rxrpc/io_thread.c @@ -0,0 +1,496 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* RxRPC packet reception + * + * Copyright (C) 2007, 2016, 2022 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include "ar-internal.h" + +static int rxrpc_input_packet_on_conn(struct rxrpc_connection *conn, + struct sockaddr_rxrpc *peer_srx, + struct sk_buff *skb); + +/* + * handle data received on the local endpoint + * - may be called in interrupt context + * + * [!] Note that as this is called from the encap_rcv hook, the socket is not + * held locked by the caller and nothing prevents sk_user_data on the UDP from + * being cleared in the middle of processing this function. + * + * Called with the RCU read lock held from the IP layer via UDP. + */ +int rxrpc_encap_rcv(struct sock *udp_sk, struct sk_buff *skb) +{ + struct rxrpc_local *local = rcu_dereference_sk_user_data(udp_sk); + + if (unlikely(!local)) { + kfree_skb(skb); + return 0; + } + if (skb->tstamp == 0) + skb->tstamp = ktime_get_real(); + + skb->mark = RXRPC_SKB_MARK_PACKET; + rxrpc_new_skb(skb, rxrpc_skb_new_encap_rcv); + skb_queue_tail(&local->rx_queue, skb); + rxrpc_wake_up_io_thread(local); + return 0; +} + +/* + * Handle an error received on the local endpoint. + */ +void rxrpc_error_report(struct sock *sk) +{ + struct rxrpc_local *local; + struct sk_buff *skb; + + rcu_read_lock(); + local = rcu_dereference_sk_user_data(sk); + if (unlikely(!local)) { + rcu_read_unlock(); + return; + } + + while ((skb = skb_dequeue(&sk->sk_error_queue))) { + skb->mark = RXRPC_SKB_MARK_ERROR; + rxrpc_new_skb(skb, rxrpc_skb_new_error_report); + skb_queue_tail(&local->rx_queue, skb); + } + + rxrpc_wake_up_io_thread(local); + rcu_read_unlock(); +} + +/* + * Process event packets targeted at a local endpoint. + */ +static void rxrpc_input_version(struct rxrpc_local *local, struct sk_buff *skb) +{ + struct rxrpc_skb_priv *sp = rxrpc_skb(skb); + char v; + + _enter(""); + + rxrpc_see_skb(skb, rxrpc_skb_see_version); + if (skb_copy_bits(skb, sizeof(struct rxrpc_wire_header), &v, 1) >= 0) { + if (v == 0) + rxrpc_send_version_request(local, &sp->hdr, skb); + } +} + +/* + * Extract the wire header from a packet and translate the byte order. + */ +static noinline +int rxrpc_extract_header(struct rxrpc_skb_priv *sp, struct sk_buff *skb) +{ + struct rxrpc_wire_header whdr; + + /* dig out the RxRPC connection details */ + if (skb_copy_bits(skb, 0, &whdr, sizeof(whdr)) < 0) { + trace_rxrpc_rx_eproto(NULL, sp->hdr.serial, + tracepoint_string("bad_hdr")); + return -EBADMSG; + } + + memset(sp, 0, sizeof(*sp)); + sp->hdr.epoch = ntohl(whdr.epoch); + sp->hdr.cid = ntohl(whdr.cid); + sp->hdr.callNumber = ntohl(whdr.callNumber); + sp->hdr.seq = ntohl(whdr.seq); + sp->hdr.serial = ntohl(whdr.serial); + sp->hdr.flags = whdr.flags; + sp->hdr.type = whdr.type; + sp->hdr.userStatus = whdr.userStatus; + sp->hdr.securityIndex = whdr.securityIndex; + sp->hdr._rsvd = ntohs(whdr._rsvd); + sp->hdr.serviceId = ntohs(whdr.serviceId); + return 0; +} + +/* + * Extract the abort code from an ABORT packet and stash it in skb->priority. + */ +static bool rxrpc_extract_abort(struct sk_buff *skb) +{ + __be32 wtmp; + + if (skb_copy_bits(skb, sizeof(struct rxrpc_wire_header), + &wtmp, sizeof(wtmp)) < 0) + return false; + skb->priority = ntohl(wtmp); + return true; +} + +/* + * Process packets received on the local endpoint + */ +static int rxrpc_input_packet(struct rxrpc_local *local, struct sk_buff **_skb) +{ + struct rxrpc_connection *conn; + struct sockaddr_rxrpc peer_srx; + struct rxrpc_skb_priv *sp; + struct rxrpc_peer *peer = NULL; + struct sk_buff *skb = *_skb; + int ret = 0; + + skb_pull(skb, sizeof(struct udphdr)); + + sp = rxrpc_skb(skb); + + /* dig out the RxRPC connection details */ + if (rxrpc_extract_header(sp, skb) < 0) + goto bad_message; + + if (IS_ENABLED(CONFIG_AF_RXRPC_INJECT_LOSS)) { + static int lose; + if ((lose++ & 7) == 7) { + trace_rxrpc_rx_lose(sp); + return 0; + } + } + + trace_rxrpc_rx_packet(sp); + + switch (sp->hdr.type) { + case RXRPC_PACKET_TYPE_VERSION: + if (rxrpc_to_client(sp)) + return 0; + rxrpc_input_version(local, skb); + return 0; + + case RXRPC_PACKET_TYPE_BUSY: + if (rxrpc_to_server(sp)) + return 0; + fallthrough; + case RXRPC_PACKET_TYPE_ACK: + case RXRPC_PACKET_TYPE_ACKALL: + if (sp->hdr.callNumber == 0) + goto bad_message; + break; + case RXRPC_PACKET_TYPE_ABORT: + if (!rxrpc_extract_abort(skb)) + return 0; /* Just discard if malformed */ + break; + + case RXRPC_PACKET_TYPE_DATA: + if (sp->hdr.callNumber == 0 || + sp->hdr.seq == 0) + goto bad_message; + + /* Unshare the packet so that it can be modified for in-place + * decryption. + */ + if (sp->hdr.securityIndex != 0) { + skb = skb_unshare(skb, GFP_ATOMIC); + if (!skb) { + rxrpc_eaten_skb(*_skb, rxrpc_skb_eaten_by_unshare_nomem); + *_skb = NULL; + return 0; + } + + if (skb != *_skb) { + rxrpc_eaten_skb(*_skb, rxrpc_skb_eaten_by_unshare); + *_skb = skb; + rxrpc_new_skb(skb, rxrpc_skb_new_unshared); + sp = rxrpc_skb(skb); + } + } + break; + + case RXRPC_PACKET_TYPE_CHALLENGE: + if (rxrpc_to_server(sp)) + return 0; + break; + case RXRPC_PACKET_TYPE_RESPONSE: + if (rxrpc_to_client(sp)) + return 0; + break; + + /* Packet types 9-11 should just be ignored. */ + case RXRPC_PACKET_TYPE_PARAMS: + case RXRPC_PACKET_TYPE_10: + case RXRPC_PACKET_TYPE_11: + return 0; + + default: + goto bad_message; + } + + if (sp->hdr.serviceId == 0) + goto bad_message; + + if (WARN_ON_ONCE(rxrpc_extract_addr_from_skb(&peer_srx, skb) < 0)) + return true; /* Unsupported address type - discard. */ + + if (peer_srx.transport.family != local->srx.transport.family && + (peer_srx.transport.family == AF_INET && + local->srx.transport.family != AF_INET6)) { + pr_warn_ratelimited("AF_RXRPC: Protocol mismatch %u not %u\n", + peer_srx.transport.family, + local->srx.transport.family); + return true; /* Wrong address type - discard. */ + } + + if (rxrpc_to_client(sp)) { + rcu_read_lock(); + conn = rxrpc_find_client_connection_rcu(local, &peer_srx, skb); + conn = rxrpc_get_connection_maybe(conn, rxrpc_conn_get_call_input); + rcu_read_unlock(); + if (!conn) { + trace_rxrpc_abort(0, "NCC", sp->hdr.cid, + sp->hdr.callNumber, sp->hdr.seq, + RXKADINCONSISTENCY, EBADMSG); + goto protocol_error; + } + + ret = rxrpc_input_packet_on_conn(conn, &peer_srx, skb); + rxrpc_put_connection(conn, rxrpc_conn_put_call_input); + return ret; + } + + /* We need to look up service connections by the full protocol + * parameter set. We look up the peer first as an intermediate step + * and then the connection from the peer's tree. + */ + rcu_read_lock(); + + peer = rxrpc_lookup_peer_rcu(local, &peer_srx); + if (!peer) { + rcu_read_unlock(); + return rxrpc_new_incoming_call(local, NULL, NULL, &peer_srx, skb); + } + + conn = rxrpc_find_service_conn_rcu(peer, skb); + conn = rxrpc_get_connection_maybe(conn, rxrpc_conn_get_call_input); + if (conn) { + rcu_read_unlock(); + ret = rxrpc_input_packet_on_conn(conn, &peer_srx, skb); + rxrpc_put_connection(conn, rxrpc_conn_put_call_input); + return ret; + } + + peer = rxrpc_get_peer_maybe(peer, rxrpc_peer_get_input); + rcu_read_unlock(); + + ret = rxrpc_new_incoming_call(local, peer, NULL, &peer_srx, skb); + rxrpc_put_peer(peer, rxrpc_peer_put_input); + if (ret < 0) + goto reject_packet; + return 0; + +bad_message: + trace_rxrpc_abort(0, "BAD", sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq, + RX_PROTOCOL_ERROR, EBADMSG); +protocol_error: + skb->priority = RX_PROTOCOL_ERROR; + skb->mark = RXRPC_SKB_MARK_REJECT_ABORT; +reject_packet: + rxrpc_reject_packet(local, skb); + return ret; +} + +/* + * Deal with a packet that's associated with an extant connection. + */ +static int rxrpc_input_packet_on_conn(struct rxrpc_connection *conn, + struct sockaddr_rxrpc *peer_srx, + struct sk_buff *skb) +{ + struct rxrpc_skb_priv *sp = rxrpc_skb(skb); + struct rxrpc_channel *chan; + struct rxrpc_call *call = NULL; + unsigned int channel; + + if (sp->hdr.securityIndex != conn->security_ix) + goto wrong_security; + + if (sp->hdr.serviceId != conn->service_id) { + int old_id; + + if (!test_bit(RXRPC_CONN_PROBING_FOR_UPGRADE, &conn->flags)) + goto reupgrade; + old_id = cmpxchg(&conn->service_id, conn->orig_service_id, + sp->hdr.serviceId); + + if (old_id != conn->orig_service_id && + old_id != sp->hdr.serviceId) + goto reupgrade; + } + + if (after(sp->hdr.serial, conn->hi_serial)) + conn->hi_serial = sp->hdr.serial; + + /* It's a connection-level packet if the call number is 0. */ + if (sp->hdr.callNumber == 0) + return rxrpc_input_conn_packet(conn, skb); + + /* Call-bound packets are routed by connection channel. */ + channel = sp->hdr.cid & RXRPC_CHANNELMASK; + chan = &conn->channels[channel]; + + /* Ignore really old calls */ + if (sp->hdr.callNumber < chan->last_call) + return 0; + + if (sp->hdr.callNumber == chan->last_call) { + if (chan->call || + sp->hdr.type == RXRPC_PACKET_TYPE_ABORT) + return 0; + + /* For the previous service call, if completed successfully, we + * discard all further packets. + */ + if (rxrpc_conn_is_service(conn) && + chan->last_type == RXRPC_PACKET_TYPE_ACK) + return 0; + + /* But otherwise we need to retransmit the final packet from + * data cached in the connection record. + */ + if (sp->hdr.type == RXRPC_PACKET_TYPE_DATA) + trace_rxrpc_rx_data(chan->call_debug_id, + sp->hdr.seq, + sp->hdr.serial, + sp->hdr.flags); + rxrpc_input_conn_packet(conn, skb); + return 0; + } + + rcu_read_lock(); + call = rxrpc_try_get_call(rcu_dereference(chan->call), + rxrpc_call_get_input); + rcu_read_unlock(); + + if (sp->hdr.callNumber > chan->call_id) { + if (rxrpc_to_client(sp)) { + rxrpc_put_call(call, rxrpc_call_put_input); + goto reject_packet; + } + + if (call) { + rxrpc_implicit_end_call(call, skb); + rxrpc_put_call(call, rxrpc_call_put_input); + call = NULL; + } + } + + if (!call) { + if (rxrpc_to_client(sp)) + goto bad_message; + if (rxrpc_new_incoming_call(conn->local, conn->peer, conn, + peer_srx, skb)) + return 0; + goto reject_packet; + } + + rxrpc_input_call_event(call, skb); + rxrpc_put_call(call, rxrpc_call_put_input); + return 0; + +wrong_security: + trace_rxrpc_abort(0, "SEC", sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq, + RXKADINCONSISTENCY, EBADMSG); + skb->priority = RXKADINCONSISTENCY; + goto post_abort; + +reupgrade: + trace_rxrpc_abort(0, "UPG", sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq, + RX_PROTOCOL_ERROR, EBADMSG); + goto protocol_error; + +bad_message: + trace_rxrpc_abort(0, "BAD", sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq, + RX_PROTOCOL_ERROR, EBADMSG); +protocol_error: + skb->priority = RX_PROTOCOL_ERROR; +post_abort: + skb->mark = RXRPC_SKB_MARK_REJECT_ABORT; +reject_packet: + rxrpc_reject_packet(conn->local, skb); + return 0; +} + +/* + * I/O and event handling thread. + */ +int rxrpc_io_thread(void *data) +{ + struct sk_buff_head rx_queue; + struct rxrpc_local *local = data; + struct rxrpc_call *call; + struct sk_buff *skb; + + skb_queue_head_init(&rx_queue); + + set_user_nice(current, MIN_NICE); + + for (;;) { + rxrpc_inc_stat(local->rxnet, stat_io_loop); + + /* Deal with calls that want immediate attention. */ + if ((call = list_first_entry_or_null(&local->call_attend_q, + struct rxrpc_call, + attend_link))) { + spin_lock_bh(&local->lock); + list_del_init(&call->attend_link); + spin_unlock_bh(&local->lock); + + trace_rxrpc_call_poked(call); + rxrpc_input_call_event(call, NULL); + rxrpc_put_call(call, rxrpc_call_put_poke); + continue; + } + + /* Process received packets and errors. */ + if ((skb = __skb_dequeue(&rx_queue))) { + switch (skb->mark) { + case RXRPC_SKB_MARK_PACKET: + skb->priority = 0; + rxrpc_input_packet(local, &skb); + trace_rxrpc_rx_done(skb->mark, skb->priority); + rxrpc_free_skb(skb, rxrpc_skb_put_input); + break; + case RXRPC_SKB_MARK_ERROR: + rxrpc_input_error(local, skb); + rxrpc_free_skb(skb, rxrpc_skb_put_error_report); + break; + default: + WARN_ON_ONCE(1); + rxrpc_free_skb(skb, rxrpc_skb_put_unknown); + break; + } + continue; + } + + if (!skb_queue_empty(&local->rx_queue)) { + spin_lock_irq(&local->rx_queue.lock); + skb_queue_splice_tail_init(&local->rx_queue, &rx_queue); + spin_unlock_irq(&local->rx_queue.lock); + continue; + } + + set_current_state(TASK_INTERRUPTIBLE); + if (!skb_queue_empty(&local->rx_queue) || + !list_empty(&local->call_attend_q)) { + __set_current_state(TASK_RUNNING); + continue; + } + + if (kthread_should_stop()) + break; + schedule(); + } + + __set_current_state(TASK_RUNNING); + rxrpc_see_local(local, rxrpc_local_stop); + rxrpc_destroy_local(local); + local->io_thread = NULL; + rxrpc_see_local(local, rxrpc_local_stopped); + return 0; +} diff --git a/net/rxrpc/key.c b/net/rxrpc/key.c index 8d2073e0e3da..8d53aded09c4 100644 --- a/net/rxrpc/key.c +++ b/net/rxrpc/key.c @@ -513,7 +513,7 @@ int rxrpc_get_server_data_key(struct rxrpc_connection *conn, if (ret < 0) goto error; - conn->params.key = key; + conn->key = key; _leave(" = 0 [%d]", key_serial(key)); return 0; @@ -602,7 +602,8 @@ static long rxrpc_read(const struct key *key, } _debug("token[%u]: toksize=%u", ntoks, toksize); - ASSERTCMP(toksize, <=, AFSTOKEN_LENGTH_MAX); + if (WARN_ON(toksize > AFSTOKEN_LENGTH_MAX)) + return -EIO; toksizes[ntoks++] = toksize; size += toksize + 4; /* each token has a length word */ @@ -679,8 +680,9 @@ static long rxrpc_read(const struct key *key, return -ENOPKG; } - ASSERTCMP((unsigned long)xdr - (unsigned long)oldxdr, ==, - toksize); + if (WARN_ON((unsigned long)xdr - (unsigned long)oldxdr == + toksize)) + return -EIO; } #undef ENCODE_STR @@ -688,8 +690,10 @@ static long rxrpc_read(const struct key *key, #undef ENCODE64 #undef ENCODE - ASSERTCMP(tok, ==, ntoks); - ASSERTCMP((char __user *) xdr - buffer, ==, size); + if (WARN_ON(tok != ntoks)) + return -EIO; + if (WARN_ON((unsigned long)xdr - (unsigned long)buffer != size)) + return -EIO; _leave(" = %zu", size); return size; } diff --git a/net/rxrpc/local_event.c b/net/rxrpc/local_event.c index 19e929c7c38b..5e69ea6b233d 100644 --- a/net/rxrpc/local_event.c +++ b/net/rxrpc/local_event.c @@ -21,9 +21,9 @@ static const char rxrpc_version_string[65] = "linux-" UTS_RELEASE " AF_RXRPC"; /* * Reply to a version request */ -static void rxrpc_send_version_request(struct rxrpc_local *local, - struct rxrpc_host_header *hdr, - struct sk_buff *skb) +void rxrpc_send_version_request(struct rxrpc_local *local, + struct rxrpc_host_header *hdr, + struct sk_buff *skb) { struct rxrpc_wire_header whdr; struct rxrpc_skb_priv *sp = rxrpc_skb(skb); @@ -63,8 +63,6 @@ static void rxrpc_send_version_request(struct rxrpc_local *local, len = iov[0].iov_len + iov[1].iov_len; - _proto("Tx VERSION (reply)"); - ret = kernel_sendmsg(local->socket, &msg, iov, 2, len); if (ret < 0) trace_rxrpc_tx_fail(local->debug_id, 0, ret, @@ -75,41 +73,3 @@ static void rxrpc_send_version_request(struct rxrpc_local *local, _leave(""); } - -/* - * Process event packets targeted at a local endpoint. - */ -void rxrpc_process_local_events(struct rxrpc_local *local) -{ - struct sk_buff *skb; - char v; - - _enter(""); - - skb = skb_dequeue(&local->event_queue); - if (skb) { - struct rxrpc_skb_priv *sp = rxrpc_skb(skb); - - rxrpc_see_skb(skb, rxrpc_skb_seen); - _debug("{%d},{%u}", local->debug_id, sp->hdr.type); - - switch (sp->hdr.type) { - case RXRPC_PACKET_TYPE_VERSION: - if (skb_copy_bits(skb, sizeof(struct rxrpc_wire_header), - &v, 1) < 0) - return; - _proto("Rx VERSION { %02x }", v); - if (v == 0) - rxrpc_send_version_request(local, &sp->hdr, skb); - break; - - default: - /* Just ignore anything we don't understand */ - break; - } - - rxrpc_free_skb(skb, rxrpc_skb_freed); - } - - _leave(""); -} diff --git a/net/rxrpc/local_object.c b/net/rxrpc/local_object.c index 38ea98ff426b..44222923c0d1 100644 --- a/net/rxrpc/local_object.c +++ b/net/rxrpc/local_object.c @@ -20,10 +20,23 @@ #include <net/af_rxrpc.h> #include "ar-internal.h" -static void rxrpc_local_processor(struct work_struct *); static void rxrpc_local_rcu(struct rcu_head *); /* + * Handle an ICMP/ICMP6 error turning up at the tunnel. Push it through the + * usual mechanism so that it gets parsed and presented through the UDP + * socket's error_report(). + */ +static void rxrpc_encap_err_rcv(struct sock *sk, struct sk_buff *skb, int err, + __be16 port, u32 info, u8 *payload) +{ + if (ip_hdr(skb)->version == IPVERSION) + return ip_icmp_error(sk, skb, err, port, info, payload); + if (IS_ENABLED(CONFIG_AF_RXRPC_IPV6)) + return ipv6_icmp_error(sk, skb, err, port, info, payload); +} + +/* * Compare a local to an address. Return -ve, 0 or +ve to indicate less than, * same or greater than. * @@ -83,10 +96,9 @@ static struct rxrpc_local *rxrpc_alloc_local(struct rxrpc_net *rxnet, atomic_set(&local->active_users, 1); local->rxnet = rxnet; INIT_HLIST_NODE(&local->link); - INIT_WORK(&local->processor, rxrpc_local_processor); init_rwsem(&local->defrag_sem); - skb_queue_head_init(&local->reject_queue); - skb_queue_head_init(&local->event_queue); + skb_queue_head_init(&local->rx_queue); + INIT_LIST_HEAD(&local->call_attend_q); local->client_bundles = RB_ROOT; spin_lock_init(&local->client_bundles_lock); spin_lock_init(&local->lock); @@ -94,7 +106,7 @@ static struct rxrpc_local *rxrpc_alloc_local(struct rxrpc_net *rxnet, local->debug_id = atomic_inc_return(&rxrpc_debug_id); memcpy(&local->srx, srx, sizeof(*srx)); local->srx.srx_service = 0; - trace_rxrpc_local(local->debug_id, rxrpc_local_new, 1, NULL); + trace_rxrpc_local(local->debug_id, rxrpc_local_new, 1, 1); } _leave(" = %p", local); @@ -110,6 +122,7 @@ static int rxrpc_open_socket(struct rxrpc_local *local, struct net *net) struct udp_tunnel_sock_cfg tuncfg = {NULL}; struct sockaddr_rxrpc *srx = &local->srx; struct udp_port_cfg udp_conf = {0}; + struct task_struct *io_thread; struct sock *usk; int ret; @@ -136,7 +149,7 @@ static int rxrpc_open_socket(struct rxrpc_local *local, struct net *net) } tuncfg.encap_type = UDP_ENCAP_RXRPC; - tuncfg.encap_rcv = rxrpc_input_packet; + tuncfg.encap_rcv = rxrpc_encap_rcv; tuncfg.encap_err_rcv = rxrpc_encap_err_rcv; tuncfg.sk_user_data = local; setup_udp_tunnel_sock(net, local->socket, &tuncfg); @@ -169,8 +182,23 @@ static int rxrpc_open_socket(struct rxrpc_local *local, struct net *net) BUG(); } + io_thread = kthread_run(rxrpc_io_thread, local, + "krxrpcio/%u", ntohs(udp_conf.local_udp_port)); + if (IS_ERR(io_thread)) { + ret = PTR_ERR(io_thread); + goto error_sock; + } + + local->io_thread = io_thread; _leave(" = 0"); return 0; + +error_sock: + kernel_sock_shutdown(local->socket, SHUT_RDWR); + local->socket->sk->sk_user_data = NULL; + sock_release(local->socket); + local->socket = NULL; + return ret; } /* @@ -182,7 +210,6 @@ struct rxrpc_local *rxrpc_lookup_local(struct net *net, struct rxrpc_local *local; struct rxrpc_net *rxnet = rxrpc_net(net); struct hlist_node *cursor; - const char *age; long diff; int ret; @@ -213,10 +240,9 @@ struct rxrpc_local *rxrpc_lookup_local(struct net *net, * we're attempting to use a local address that the dying * object is still using. */ - if (!rxrpc_use_local(local)) + if (!rxrpc_use_local(local, rxrpc_local_use_lookup)) break; - age = "old"; goto found; } @@ -234,14 +260,9 @@ struct rxrpc_local *rxrpc_lookup_local(struct net *net, } else { hlist_add_head_rcu(&local->link, &rxnet->local_endpoints); } - age = "new"; found: mutex_unlock(&rxnet->local_mutex); - - _net("LOCAL %s %d {%pISp}", - age, local->debug_id, &local->srx.transport); - _leave(" = %p", local); return local; @@ -263,64 +284,49 @@ addr_in_use: /* * Get a ref on a local endpoint. */ -struct rxrpc_local *rxrpc_get_local(struct rxrpc_local *local) +struct rxrpc_local *rxrpc_get_local(struct rxrpc_local *local, + enum rxrpc_local_trace why) { - const void *here = __builtin_return_address(0); - int r; + int r, u; + u = atomic_read(&local->active_users); __refcount_inc(&local->ref, &r); - trace_rxrpc_local(local->debug_id, rxrpc_local_got, r + 1, here); + trace_rxrpc_local(local->debug_id, why, r + 1, u); return local; } /* * Get a ref on a local endpoint unless its usage has already reached 0. */ -struct rxrpc_local *rxrpc_get_local_maybe(struct rxrpc_local *local) +struct rxrpc_local *rxrpc_get_local_maybe(struct rxrpc_local *local, + enum rxrpc_local_trace why) { - const void *here = __builtin_return_address(0); - int r; + int r, u; - if (local) { - if (__refcount_inc_not_zero(&local->ref, &r)) - trace_rxrpc_local(local->debug_id, rxrpc_local_got, - r + 1, here); - else - local = NULL; + if (local && __refcount_inc_not_zero(&local->ref, &r)) { + u = atomic_read(&local->active_users); + trace_rxrpc_local(local->debug_id, why, r + 1, u); + return local; } - return local; -} - -/* - * Queue a local endpoint and pass the caller's reference to the work item. - */ -void rxrpc_queue_local(struct rxrpc_local *local) -{ - const void *here = __builtin_return_address(0); - unsigned int debug_id = local->debug_id; - int r = refcount_read(&local->ref); - if (rxrpc_queue_work(&local->processor)) - trace_rxrpc_local(debug_id, rxrpc_local_queued, r + 1, here); - else - rxrpc_put_local(local); + return NULL; } /* * Drop a ref on a local endpoint. */ -void rxrpc_put_local(struct rxrpc_local *local) +void rxrpc_put_local(struct rxrpc_local *local, enum rxrpc_local_trace why) { - const void *here = __builtin_return_address(0); unsigned int debug_id; bool dead; - int r; + int r, u; if (local) { debug_id = local->debug_id; + u = atomic_read(&local->active_users); dead = __refcount_dec_and_test(&local->ref, &r); - trace_rxrpc_local(debug_id, rxrpc_local_put, r, here); + trace_rxrpc_local(debug_id, why, r, u); if (dead) call_rcu(&local->rcu, rxrpc_local_rcu); @@ -330,14 +336,15 @@ void rxrpc_put_local(struct rxrpc_local *local) /* * Start using a local endpoint. */ -struct rxrpc_local *rxrpc_use_local(struct rxrpc_local *local) +struct rxrpc_local *rxrpc_use_local(struct rxrpc_local *local, + enum rxrpc_local_trace why) { - local = rxrpc_get_local_maybe(local); + local = rxrpc_get_local_maybe(local, rxrpc_local_get_for_use); if (!local) return NULL; - if (!__rxrpc_use_local(local)) { - rxrpc_put_local(local); + if (!__rxrpc_use_local(local, why)) { + rxrpc_put_local(local, rxrpc_local_put_for_use); return NULL; } @@ -346,15 +353,19 @@ struct rxrpc_local *rxrpc_use_local(struct rxrpc_local *local) /* * Cease using a local endpoint. Once the number of active users reaches 0, we - * start the closure of the transport in the work processor. + * start the closure of the transport in the I/O thread.. */ -void rxrpc_unuse_local(struct rxrpc_local *local) +void rxrpc_unuse_local(struct rxrpc_local *local, enum rxrpc_local_trace why) { + unsigned int debug_id = local->debug_id; + int r, u; + if (local) { - if (__rxrpc_unuse_local(local)) { - rxrpc_get_local(local); - rxrpc_queue_local(local); - } + r = refcount_read(&local->ref); + u = atomic_dec_return(&local->active_users); + trace_rxrpc_local(debug_id, why, r, u); + if (u == 0) + kthread_stop(local->io_thread); } } @@ -365,7 +376,7 @@ void rxrpc_unuse_local(struct rxrpc_local *local) * Closing the socket cannot be done from bottom half context or RCU callback * context because it might sleep. */ -static void rxrpc_local_destroyer(struct rxrpc_local *local) +void rxrpc_destroy_local(struct rxrpc_local *local) { struct socket *socket = local->socket; struct rxrpc_net *rxnet = local->rxnet; @@ -392,47 +403,7 @@ static void rxrpc_local_destroyer(struct rxrpc_local *local) /* At this point, there should be no more packets coming in to the * local endpoint. */ - rxrpc_purge_queue(&local->reject_queue); - rxrpc_purge_queue(&local->event_queue); -} - -/* - * Process events on an endpoint. The work item carries a ref which - * we must release. - */ -static void rxrpc_local_processor(struct work_struct *work) -{ - struct rxrpc_local *local = - container_of(work, struct rxrpc_local, processor); - bool again; - - if (local->dead) - return; - - trace_rxrpc_local(local->debug_id, rxrpc_local_processing, - refcount_read(&local->ref), NULL); - - do { - again = false; - if (!__rxrpc_use_local(local)) { - rxrpc_local_destroyer(local); - break; - } - - if (!skb_queue_empty(&local->reject_queue)) { - rxrpc_reject_packets(local); - again = true; - } - - if (!skb_queue_empty(&local->event_queue)) { - rxrpc_process_local_events(local); - again = true; - } - - __rxrpc_unuse_local(local); - } while (again); - - rxrpc_put_local(local); + rxrpc_purge_queue(&local->rx_queue); } /* @@ -442,13 +413,8 @@ static void rxrpc_local_rcu(struct rcu_head *rcu) { struct rxrpc_local *local = container_of(rcu, struct rxrpc_local, rcu); - _enter("%d", local->debug_id); - - ASSERT(!work_pending(&local->processor)); - - _net("DESTROY LOCAL %d", local->debug_id); + rxrpc_see_local(local, rxrpc_local_free); kfree(local); - _leave(""); } /* diff --git a/net/rxrpc/misc.c b/net/rxrpc/misc.c index d4144fd86f84..056c428d8bf3 100644 --- a/net/rxrpc/misc.c +++ b/net/rxrpc/misc.c @@ -17,12 +17,6 @@ unsigned int rxrpc_max_backlog __read_mostly = 10; /* - * How long to wait before scheduling ACK generation after seeing a - * packet with RXRPC_REQUEST_ACK set (in jiffies). - */ -unsigned long rxrpc_requested_ack_delay = 1; - -/* * How long to wait before scheduling an ACK with subtype DELAY (in jiffies). * * We use this when we've received new data packets. If those packets aren't @@ -46,10 +40,7 @@ unsigned long rxrpc_idle_ack_delay = HZ / 2; * limit is hit, we should generate an EXCEEDS_WINDOW ACK and discard further * packets. */ -unsigned int rxrpc_rx_window_size = RXRPC_INIT_RX_WINDOW_SIZE; -#if (RXRPC_RXTX_BUFF_SIZE - 1) < RXRPC_INIT_RX_WINDOW_SIZE -#error Need to reduce RXRPC_INIT_RX_WINDOW_SIZE -#endif +unsigned int rxrpc_rx_window_size = 255; /* * Maximum Rx MTU size. This indicates to the sender the size of jumbo packet @@ -62,15 +53,3 @@ unsigned int rxrpc_rx_mtu = 5692; * sender that we're willing to handle. */ unsigned int rxrpc_rx_jumbo_max = 4; - -const s8 rxrpc_ack_priority[] = { - [0] = 0, - [RXRPC_ACK_DELAY] = 1, - [RXRPC_ACK_REQUESTED] = 2, - [RXRPC_ACK_IDLE] = 3, - [RXRPC_ACK_DUPLICATE] = 4, - [RXRPC_ACK_OUT_OF_SEQUENCE] = 5, - [RXRPC_ACK_EXCEEDS_WINDOW] = 6, - [RXRPC_ACK_NOSPACE] = 7, - [RXRPC_ACK_PING_RESPONSE] = 8, -}; diff --git a/net/rxrpc/net_ns.c b/net/rxrpc/net_ns.c index bb4c25d6df64..5905530e2f33 100644 --- a/net/rxrpc/net_ns.c +++ b/net/rxrpc/net_ns.c @@ -65,7 +65,7 @@ static __net_init int rxrpc_init_net(struct net *net) atomic_set(&rxnet->nr_client_conns, 0); rxnet->kill_all_client_conns = false; spin_lock_init(&rxnet->client_conn_cache_lock); - spin_lock_init(&rxnet->client_conn_discard_lock); + mutex_init(&rxnet->client_conn_discard_lock); INIT_LIST_HEAD(&rxnet->idle_client_conns); INIT_WORK(&rxnet->client_conn_reaper, rxrpc_discard_expired_client_conns); @@ -101,6 +101,8 @@ static __net_init int rxrpc_init_net(struct net *net) proc_create_net("locals", 0444, rxnet->proc_net, &rxrpc_local_seq_ops, sizeof(struct seq_net_private)); + proc_create_net_single_write("stats", S_IFREG | 0644, rxnet->proc_net, + rxrpc_stats_show, rxrpc_stats_clear, NULL); return 0; err_proc: diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index 9683617db704..3d8c9f830ee0 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -13,15 +13,27 @@ #include <linux/export.h> #include <net/sock.h> #include <net/af_rxrpc.h> +#include <net/udp.h> #include "ar-internal.h" -struct rxrpc_ack_buffer { - struct rxrpc_wire_header whdr; - struct rxrpc_ackpacket ack; - u8 acks[255]; - u8 pad[3]; - struct rxrpc_ackinfo ackinfo; -}; +extern int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len); + +static ssize_t do_udp_sendmsg(struct socket *socket, struct msghdr *msg, size_t len) +{ + struct sockaddr *sa = msg->msg_name; + struct sock *sk = socket->sk; + + if (IS_ENABLED(CONFIG_AF_RXRPC_IPV6)) { + if (sa->sa_family == AF_INET6) { + if (sk->sk_family != AF_INET6) { + pr_warn("AF_INET6 address on AF_INET socket\n"); + return -ENOPROTOOPT; + } + return udpv6_sendmsg(sk, msg, len); + } + } + return udp_sendmsg(sk, msg, len); +} struct rxrpc_abort_buffer { struct rxrpc_wire_header whdr; @@ -68,66 +80,83 @@ static void rxrpc_set_keepalive(struct rxrpc_call *call) */ static size_t rxrpc_fill_out_ack(struct rxrpc_connection *conn, struct rxrpc_call *call, - struct rxrpc_ack_buffer *pkt, - rxrpc_seq_t *_hard_ack, - rxrpc_seq_t *_top, - u8 reason) + struct rxrpc_txbuf *txb) { - rxrpc_serial_t serial; - unsigned int tmp; - rxrpc_seq_t hard_ack, top, seq; - int ix; + struct rxrpc_ackinfo ackinfo; + unsigned int qsize; + rxrpc_seq_t window, wtop, wrap_point, ix, first; + int rsize; + u64 wtmp; u32 mtu, jmax; - u8 *ackp = pkt->acks; + u8 *ackp = txb->acks; + u8 sack_buffer[sizeof(call->ackr_sack_table)] __aligned(8); - tmp = atomic_xchg(&call->ackr_nr_unacked, 0); - tmp |= atomic_xchg(&call->ackr_nr_consumed, 0); - if (!tmp && (reason == RXRPC_ACK_DELAY || - reason == RXRPC_ACK_IDLE)) - return 0; + atomic_set(&call->ackr_nr_unacked, 0); + atomic_set(&call->ackr_nr_consumed, 0); + rxrpc_inc_stat(call->rxnet, stat_tx_ack_fill); /* Barrier against rxrpc_input_data(). */ - serial = call->ackr_serial; - hard_ack = READ_ONCE(call->rx_hard_ack); - top = smp_load_acquire(&call->rx_top); - *_hard_ack = hard_ack; - *_top = top; - - pkt->ack.bufferSpace = htons(8); - pkt->ack.maxSkew = htons(0); - pkt->ack.firstPacket = htonl(hard_ack + 1); - pkt->ack.previousPacket = htonl(call->ackr_highest_seq); - pkt->ack.serial = htonl(serial); - pkt->ack.reason = reason; - pkt->ack.nAcks = top - hard_ack; - - if (reason == RXRPC_ACK_PING) - pkt->whdr.flags |= RXRPC_REQUEST_ACK; - - if (after(top, hard_ack)) { - seq = hard_ack + 1; - do { - ix = seq & RXRPC_RXTX_BUFF_MASK; - if (call->rxtx_buffer[ix]) - *ackp++ = RXRPC_ACK_TYPE_ACK; - else - *ackp++ = RXRPC_ACK_TYPE_NACK; - seq++; - } while (before_eq(seq, top)); +retry: + wtmp = atomic64_read_acquire(&call->ackr_window); + window = lower_32_bits(wtmp); + wtop = upper_32_bits(wtmp); + txb->ack.firstPacket = htonl(window); + txb->ack.nAcks = 0; + + if (after(wtop, window)) { + /* Try to copy the SACK ring locklessly. We can use the copy, + * only if the now-current top of the window didn't go past the + * previously read base - otherwise we can't know whether we + * have old data or new data. + */ + memcpy(sack_buffer, call->ackr_sack_table, sizeof(sack_buffer)); + wrap_point = window + RXRPC_SACK_SIZE - 1; + wtmp = atomic64_read_acquire(&call->ackr_window); + window = lower_32_bits(wtmp); + wtop = upper_32_bits(wtmp); + if (after(wtop, wrap_point)) { + cond_resched(); + goto retry; + } + + /* The buffer is maintained as a ring with an invariant mapping + * between bit position and sequence number, so we'll probably + * need to rotate it. + */ + txb->ack.nAcks = wtop - window; + ix = window % RXRPC_SACK_SIZE; + first = sizeof(sack_buffer) - ix; + + if (ix + txb->ack.nAcks <= RXRPC_SACK_SIZE) { + memcpy(txb->acks, sack_buffer + ix, txb->ack.nAcks); + } else { + memcpy(txb->acks, sack_buffer + ix, first); + memcpy(txb->acks + first, sack_buffer, + txb->ack.nAcks - first); + } + + ackp += txb->ack.nAcks; + } else if (before(wtop, window)) { + pr_warn("ack window backward %x %x", window, wtop); + } else if (txb->ack.reason == RXRPC_ACK_DELAY) { + txb->ack.reason = RXRPC_ACK_IDLE; } - mtu = conn->params.peer->if_mtu; - mtu -= conn->params.peer->hdrsize; - jmax = (call->nr_jumbo_bad > 3) ? 1 : rxrpc_rx_jumbo_max; - pkt->ackinfo.rxMTU = htonl(rxrpc_rx_mtu); - pkt->ackinfo.maxMTU = htonl(mtu); - pkt->ackinfo.rwind = htonl(call->rx_winsize); - pkt->ackinfo.jumbo_max = htonl(jmax); + mtu = conn->peer->if_mtu; + mtu -= conn->peer->hdrsize; + jmax = rxrpc_rx_jumbo_max; + qsize = (window - 1) - call->rx_consumed; + rsize = max_t(int, call->rx_winsize - qsize, 0); + ackinfo.rxMTU = htonl(rxrpc_rx_mtu); + ackinfo.maxMTU = htonl(mtu); + ackinfo.rwind = htonl(rsize); + ackinfo.jumbo_max = htonl(jmax); *ackp++ = 0; *ackp++ = 0; *ackp++ = 0; - return top - hard_ack + 3; + memcpy(ackp, &ackinfo, sizeof(ackinfo)); + return txb->ack.nAcks + 3 + sizeof(ackinfo); } /* @@ -174,28 +203,20 @@ static void rxrpc_cancel_rtt_probe(struct rxrpc_call *call, } /* - * Send an ACK call packet. + * Transmit an ACK packet. */ -int rxrpc_send_ack_packet(struct rxrpc_call *call, bool ping, - rxrpc_serial_t *_serial) +int rxrpc_send_ack_packet(struct rxrpc_call *call, struct rxrpc_txbuf *txb) { struct rxrpc_connection *conn; - struct rxrpc_ack_buffer *pkt; struct msghdr msg; - struct kvec iov[2]; + struct kvec iov[1]; rxrpc_serial_t serial; - rxrpc_seq_t hard_ack, top; size_t len, n; int ret, rtt_slot = -1; - u8 reason; if (test_bit(RXRPC_CALL_DISCONNECTED, &call->flags)) return -ECONNRESET; - pkt = kzalloc(sizeof(*pkt), GFP_KERNEL); - if (!pkt) - return -ENOMEM; - conn = call->conn; msg.msg_name = &call->peer->srx.transport; @@ -204,79 +225,48 @@ int rxrpc_send_ack_packet(struct rxrpc_call *call, bool ping, msg.msg_controllen = 0; msg.msg_flags = 0; - pkt->whdr.epoch = htonl(conn->proto.epoch); - pkt->whdr.cid = htonl(call->cid); - pkt->whdr.callNumber = htonl(call->call_id); - pkt->whdr.seq = 0; - pkt->whdr.type = RXRPC_PACKET_TYPE_ACK; - pkt->whdr.flags = RXRPC_SLOW_START_OK | conn->out_clientflag; - pkt->whdr.userStatus = 0; - pkt->whdr.securityIndex = call->security_ix; - pkt->whdr._rsvd = 0; - pkt->whdr.serviceId = htons(call->service_id); - - spin_lock_bh(&call->lock); - if (ping) { - reason = RXRPC_ACK_PING; - } else { - reason = call->ackr_reason; - if (!call->ackr_reason) { - spin_unlock_bh(&call->lock); - ret = 0; - goto out; - } - call->ackr_reason = 0; - } - n = rxrpc_fill_out_ack(conn, call, pkt, &hard_ack, &top, reason); + if (txb->ack.reason == RXRPC_ACK_PING) + txb->wire.flags |= RXRPC_REQUEST_ACK; - spin_unlock_bh(&call->lock); - if (n == 0) { - kfree(pkt); + n = rxrpc_fill_out_ack(conn, call, txb); + if (n == 0) return 0; - } - iov[0].iov_base = pkt; - iov[0].iov_len = sizeof(pkt->whdr) + sizeof(pkt->ack) + n; - iov[1].iov_base = &pkt->ackinfo; - iov[1].iov_len = sizeof(pkt->ackinfo); - len = iov[0].iov_len + iov[1].iov_len; + iov[0].iov_base = &txb->wire; + iov[0].iov_len = sizeof(txb->wire) + sizeof(txb->ack) + n; + len = iov[0].iov_len; serial = atomic_inc_return(&conn->serial); - pkt->whdr.serial = htonl(serial); + txb->wire.serial = htonl(serial); trace_rxrpc_tx_ack(call->debug_id, serial, - ntohl(pkt->ack.firstPacket), - ntohl(pkt->ack.serial), - pkt->ack.reason, pkt->ack.nAcks); - if (_serial) - *_serial = serial; + ntohl(txb->ack.firstPacket), + ntohl(txb->ack.serial), txb->ack.reason, txb->ack.nAcks); - if (ping) + if (txb->ack.reason == RXRPC_ACK_PING) rtt_slot = rxrpc_begin_rtt_probe(call, serial, rxrpc_rtt_tx_ping); - ret = kernel_sendmsg(conn->params.local->socket, &msg, iov, 2, len); - conn->params.peer->last_tx_at = ktime_get_seconds(); + rxrpc_inc_stat(call->rxnet, stat_tx_ack_send); + + /* Grab the highest received seq as late as possible */ + txb->ack.previousPacket = htonl(call->rx_highest_seq); + + iov_iter_kvec(&msg.msg_iter, WRITE, iov, 1, len); + ret = do_udp_sendmsg(conn->local->socket, &msg, len); + call->peer->last_tx_at = ktime_get_seconds(); if (ret < 0) trace_rxrpc_tx_fail(call->debug_id, serial, ret, rxrpc_tx_point_call_ack); else - trace_rxrpc_tx_packet(call->debug_id, &pkt->whdr, + trace_rxrpc_tx_packet(call->debug_id, &txb->wire, rxrpc_tx_point_call_ack); rxrpc_tx_backoff(call, ret); if (call->state < RXRPC_CALL_COMPLETE) { - if (ret < 0) { + if (ret < 0) rxrpc_cancel_rtt_probe(call, serial, rtt_slot); - rxrpc_propose_ACK(call, pkt->ack.reason, - ntohl(pkt->ack.serial), - false, true, - rxrpc_propose_ack_retry_tx); - } - rxrpc_set_keepalive(call); } -out: - kfree(pkt); return ret; } @@ -299,7 +289,7 @@ int rxrpc_send_abort_packet(struct rxrpc_call *call) * channel instead, thereby closing off this call. */ if (rxrpc_is_client_call(call) && - test_bit(RXRPC_CALL_TX_LAST, &call->flags)) + test_bit(RXRPC_CALL_TX_ALL_ACKED, &call->flags)) return 0; if (test_bit(RXRPC_CALL_DISCONNECTED, &call->flags)) @@ -322,7 +312,7 @@ int rxrpc_send_abort_packet(struct rxrpc_call *call) pkt.whdr.userStatus = 0; pkt.whdr.securityIndex = call->security_ix; pkt.whdr._rsvd = 0; - pkt.whdr.serviceId = htons(call->service_id); + pkt.whdr.serviceId = htons(call->dest_srx.srx_service); pkt.abort_code = htonl(call->abort_code); iov[0].iov_base = &pkt; @@ -331,9 +321,9 @@ int rxrpc_send_abort_packet(struct rxrpc_call *call) serial = atomic_inc_return(&conn->serial); pkt.whdr.serial = htonl(serial); - ret = kernel_sendmsg(conn->params.local->socket, - &msg, iov, 1, sizeof(pkt)); - conn->params.peer->last_tx_at = ktime_get_seconds(); + iov_iter_kvec(&msg.msg_iter, WRITE, iov, 1, sizeof(pkt)); + ret = do_udp_sendmsg(conn->local->socket, &msg, sizeof(pkt)); + conn->peer->last_tx_at = ktime_get_seconds(); if (ret < 0) trace_rxrpc_tx_fail(call->debug_id, serial, ret, rxrpc_tx_point_call_abort); @@ -347,50 +337,30 @@ int rxrpc_send_abort_packet(struct rxrpc_call *call) /* * send a packet through the transport endpoint */ -int rxrpc_send_data_packet(struct rxrpc_call *call, struct sk_buff *skb, - bool retrans) +int rxrpc_send_data_packet(struct rxrpc_call *call, struct rxrpc_txbuf *txb) { + enum rxrpc_req_ack_trace why; struct rxrpc_connection *conn = call->conn; - struct rxrpc_wire_header whdr; - struct rxrpc_skb_priv *sp = rxrpc_skb(skb); struct msghdr msg; - struct kvec iov[2]; + struct kvec iov[1]; rxrpc_serial_t serial; size_t len; int ret, rtt_slot = -1; - _enter(",{%d}", skb->len); - - if (hlist_unhashed(&call->error_link)) { - spin_lock_bh(&call->peer->lock); - hlist_add_head_rcu(&call->error_link, &call->peer->error_targets); - spin_unlock_bh(&call->peer->lock); - } + _enter("%x,{%d}", txb->seq, txb->len); /* Each transmission of a Tx packet needs a new serial number */ serial = atomic_inc_return(&conn->serial); - - whdr.epoch = htonl(conn->proto.epoch); - whdr.cid = htonl(call->cid); - whdr.callNumber = htonl(call->call_id); - whdr.seq = htonl(sp->hdr.seq); - whdr.serial = htonl(serial); - whdr.type = RXRPC_PACKET_TYPE_DATA; - whdr.flags = sp->hdr.flags; - whdr.userStatus = 0; - whdr.securityIndex = call->security_ix; - whdr._rsvd = htons(sp->hdr._rsvd); - whdr.serviceId = htons(call->service_id); + txb->wire.serial = htonl(serial); if (test_bit(RXRPC_CONN_PROBING_FOR_UPGRADE, &conn->flags) && - sp->hdr.seq == 1) - whdr.userStatus = RXRPC_USERSTATUS_SERVICE_UPGRADE; + txb->seq == 1) + txb->wire.userStatus = RXRPC_USERSTATUS_SERVICE_UPGRADE; - iov[0].iov_base = &whdr; - iov[0].iov_len = sizeof(whdr); - iov[1].iov_base = skb->head; - iov[1].iov_len = skb->len; - len = iov[0].iov_len + iov[1].iov_len; + iov[0].iov_base = &txb->wire; + iov[0].iov_len = sizeof(txb->wire) + txb->len; + len = iov[0].iov_len; + iov_iter_kvec(&msg.msg_iter, WRITE, iov, 1, len); msg.msg_name = &call->peer->srx.transport; msg.msg_namelen = call->peer->srx.transport_len; @@ -405,41 +375,64 @@ int rxrpc_send_data_packet(struct rxrpc_call *call, struct sk_buff *skb, * service call, lest OpenAFS incorrectly send us an ACK with some * soft-ACKs in it and then never follow up with a proper hard ACK. */ - if ((!(sp->hdr.flags & RXRPC_LAST_PACKET) || - rxrpc_to_server(sp) - ) && - (test_and_clear_bit(RXRPC_CALL_EV_ACK_LOST, &call->events) || - retrans || - call->cong_mode == RXRPC_CALL_SLOW_START || - (call->peer->rtt_count < 3 && sp->hdr.seq & 1) || - ktime_before(ktime_add_ms(call->peer->rtt_last_req, 1000), - ktime_get_real()))) - whdr.flags |= RXRPC_REQUEST_ACK; + if (txb->wire.flags & RXRPC_REQUEST_ACK) + why = rxrpc_reqack_already_on; + else if (test_bit(RXRPC_TXBUF_LAST, &txb->flags) && rxrpc_sending_to_client(txb)) + why = rxrpc_reqack_no_srv_last; + else if (test_and_clear_bit(RXRPC_CALL_EV_ACK_LOST, &call->events)) + why = rxrpc_reqack_ack_lost; + else if (test_bit(RXRPC_TXBUF_RESENT, &txb->flags)) + why = rxrpc_reqack_retrans; + else if (call->cong_mode == RXRPC_CALL_SLOW_START && call->cong_cwnd <= 2) + why = rxrpc_reqack_slow_start; + else if (call->tx_winsize <= 2) + why = rxrpc_reqack_small_txwin; + else if (call->peer->rtt_count < 3 && txb->seq & 1) + why = rxrpc_reqack_more_rtt; + else if (ktime_before(ktime_add_ms(call->peer->rtt_last_req, 1000), ktime_get_real())) + why = rxrpc_reqack_old_rtt; + else + goto dont_set_request_ack; + + rxrpc_inc_stat(call->rxnet, stat_why_req_ack[why]); + trace_rxrpc_req_ack(call->debug_id, txb->seq, why); + if (why != rxrpc_reqack_no_srv_last) + txb->wire.flags |= RXRPC_REQUEST_ACK; +dont_set_request_ack: if (IS_ENABLED(CONFIG_AF_RXRPC_INJECT_LOSS)) { static int lose; if ((lose++ & 7) == 7) { ret = 0; - trace_rxrpc_tx_data(call, sp->hdr.seq, serial, - whdr.flags, retrans, true); + trace_rxrpc_tx_data(call, txb->seq, serial, + txb->wire.flags, + test_bit(RXRPC_TXBUF_RESENT, &txb->flags), + true); goto done; } } - trace_rxrpc_tx_data(call, sp->hdr.seq, serial, whdr.flags, retrans, - false); + trace_rxrpc_tx_data(call, txb->seq, serial, txb->wire.flags, + test_bit(RXRPC_TXBUF_RESENT, &txb->flags), false); + + /* Track what we've attempted to transmit at least once so that the + * retransmission algorithm doesn't try to resend what we haven't sent + * yet. However, this can race as we can receive an ACK before we get + * to this point. But, OTOH, if we won't get an ACK mentioning this + * packet unless the far side received it (though it could have + * discarded it anyway and NAK'd it). + */ + cmpxchg(&call->tx_transmitted, txb->seq - 1, txb->seq); /* send the packet with the don't fragment bit set if we currently * think it's small enough */ - if (iov[1].iov_len >= call->peer->maxdata) + if (txb->len >= call->peer->maxdata) goto send_fragmentable; - down_read(&conn->params.local->defrag_sem); + down_read(&conn->local->defrag_sem); - sp->hdr.serial = serial; - smp_wmb(); /* Set serial before timestamp */ - skb->tstamp = ktime_get_real(); - if (whdr.flags & RXRPC_REQUEST_ACK) + txb->last_sent = ktime_get_real(); + if (txb->wire.flags & RXRPC_REQUEST_ACK) rtt_slot = rxrpc_begin_rtt_probe(call, serial, rxrpc_rtt_tx_data); /* send the packet by UDP @@ -448,16 +441,18 @@ int rxrpc_send_data_packet(struct rxrpc_call *call, struct sk_buff *skb, * - in which case, we'll have processed the ICMP error * message and update the peer record */ - ret = kernel_sendmsg(conn->params.local->socket, &msg, iov, 2, len); - conn->params.peer->last_tx_at = ktime_get_seconds(); + rxrpc_inc_stat(call->rxnet, stat_tx_data_send); + ret = do_udp_sendmsg(conn->local->socket, &msg, len); + conn->peer->last_tx_at = ktime_get_seconds(); - up_read(&conn->params.local->defrag_sem); + up_read(&conn->local->defrag_sem); if (ret < 0) { + rxrpc_inc_stat(call->rxnet, stat_tx_data_send_fail); rxrpc_cancel_rtt_probe(call, serial, rtt_slot); trace_rxrpc_tx_fail(call->debug_id, serial, ret, rxrpc_tx_point_call_data_nofrag); } else { - trace_rxrpc_tx_packet(call->debug_id, &whdr, + trace_rxrpc_tx_packet(call->debug_id, &txb->wire, rxrpc_tx_point_call_data_nofrag); } @@ -467,8 +462,9 @@ int rxrpc_send_data_packet(struct rxrpc_call *call, struct sk_buff *skb, done: if (ret >= 0) { - if (whdr.flags & RXRPC_REQUEST_ACK) { - call->peer->rtt_last_req = skb->tstamp; + call->tx_last_sent = txb->last_sent; + if (txb->wire.flags & RXRPC_REQUEST_ACK) { + call->peer->rtt_last_req = txb->last_sent; if (call->peer->rtt_count > 1) { unsigned long nowj = jiffies, ack_lost_at; @@ -480,7 +476,7 @@ done: } } - if (sp->hdr.seq == 1 && + if (txb->seq == 1 && !test_and_set_bit(RXRPC_CALL_BEGAN_RX_TIMER, &call->flags)) { unsigned long nowj = jiffies, expect_rx_by; @@ -510,25 +506,23 @@ send_fragmentable: /* attempt to send this message with fragmentation enabled */ _debug("send fragment"); - down_write(&conn->params.local->defrag_sem); + down_write(&conn->local->defrag_sem); - sp->hdr.serial = serial; - smp_wmb(); /* Set serial before timestamp */ - skb->tstamp = ktime_get_real(); - if (whdr.flags & RXRPC_REQUEST_ACK) + txb->last_sent = ktime_get_real(); + if (txb->wire.flags & RXRPC_REQUEST_ACK) rtt_slot = rxrpc_begin_rtt_probe(call, serial, rxrpc_rtt_tx_data); - switch (conn->params.local->srx.transport.family) { + switch (conn->local->srx.transport.family) { case AF_INET6: case AF_INET: - ip_sock_set_mtu_discover(conn->params.local->socket->sk, - IP_PMTUDISC_DONT); - ret = kernel_sendmsg(conn->params.local->socket, &msg, - iov, 2, len); - conn->params.peer->last_tx_at = ktime_get_seconds(); - - ip_sock_set_mtu_discover(conn->params.local->socket->sk, - IP_PMTUDISC_DO); + ip_sock_set_mtu_discover(conn->local->socket->sk, + IP_PMTUDISC_DONT); + rxrpc_inc_stat(call->rxnet, stat_tx_data_send_frag); + ret = do_udp_sendmsg(conn->local->socket, &msg, len); + conn->peer->last_tx_at = ktime_get_seconds(); + + ip_sock_set_mtu_discover(conn->local->socket->sk, + IP_PMTUDISC_DO); break; default: @@ -536,35 +530,35 @@ send_fragmentable: } if (ret < 0) { + rxrpc_inc_stat(call->rxnet, stat_tx_data_send_fail); rxrpc_cancel_rtt_probe(call, serial, rtt_slot); trace_rxrpc_tx_fail(call->debug_id, serial, ret, rxrpc_tx_point_call_data_frag); } else { - trace_rxrpc_tx_packet(call->debug_id, &whdr, + trace_rxrpc_tx_packet(call->debug_id, &txb->wire, rxrpc_tx_point_call_data_frag); } rxrpc_tx_backoff(call, ret); - up_write(&conn->params.local->defrag_sem); + up_write(&conn->local->defrag_sem); goto done; } /* - * reject packets through the local endpoint + * Reject a packet through the local endpoint. */ -void rxrpc_reject_packets(struct rxrpc_local *local) +void rxrpc_reject_packet(struct rxrpc_local *local, struct sk_buff *skb) { - struct sockaddr_rxrpc srx; - struct rxrpc_skb_priv *sp; struct rxrpc_wire_header whdr; - struct sk_buff *skb; + struct sockaddr_rxrpc srx; + struct rxrpc_skb_priv *sp = rxrpc_skb(skb); struct msghdr msg; struct kvec iov[2]; size_t size; __be32 code; int ret, ioc; - _enter("%d", local->debug_id); + rxrpc_see_skb(skb, rxrpc_skb_see_reject); iov[0].iov_base = &whdr; iov[0].iov_len = sizeof(whdr); @@ -578,52 +572,42 @@ void rxrpc_reject_packets(struct rxrpc_local *local) memset(&whdr, 0, sizeof(whdr)); - while ((skb = skb_dequeue(&local->reject_queue))) { - rxrpc_see_skb(skb, rxrpc_skb_seen); - sp = rxrpc_skb(skb); - - switch (skb->mark) { - case RXRPC_SKB_MARK_REJECT_BUSY: - whdr.type = RXRPC_PACKET_TYPE_BUSY; - size = sizeof(whdr); - ioc = 1; - break; - case RXRPC_SKB_MARK_REJECT_ABORT: - whdr.type = RXRPC_PACKET_TYPE_ABORT; - code = htonl(skb->priority); - size = sizeof(whdr) + sizeof(code); - ioc = 2; - break; - default: - rxrpc_free_skb(skb, rxrpc_skb_freed); - continue; - } - - if (rxrpc_extract_addr_from_skb(&srx, skb) == 0) { - msg.msg_namelen = srx.transport_len; - - whdr.epoch = htonl(sp->hdr.epoch); - whdr.cid = htonl(sp->hdr.cid); - whdr.callNumber = htonl(sp->hdr.callNumber); - whdr.serviceId = htons(sp->hdr.serviceId); - whdr.flags = sp->hdr.flags; - whdr.flags ^= RXRPC_CLIENT_INITIATED; - whdr.flags &= RXRPC_CLIENT_INITIATED; - - ret = kernel_sendmsg(local->socket, &msg, - iov, ioc, size); - if (ret < 0) - trace_rxrpc_tx_fail(local->debug_id, 0, ret, - rxrpc_tx_point_reject); - else - trace_rxrpc_tx_packet(local->debug_id, &whdr, - rxrpc_tx_point_reject); - } - - rxrpc_free_skb(skb, rxrpc_skb_freed); + switch (skb->mark) { + case RXRPC_SKB_MARK_REJECT_BUSY: + whdr.type = RXRPC_PACKET_TYPE_BUSY; + size = sizeof(whdr); + ioc = 1; + break; + case RXRPC_SKB_MARK_REJECT_ABORT: + whdr.type = RXRPC_PACKET_TYPE_ABORT; + code = htonl(skb->priority); + size = sizeof(whdr) + sizeof(code); + ioc = 2; + break; + default: + return; } - _leave(""); + if (rxrpc_extract_addr_from_skb(&srx, skb) == 0) { + msg.msg_namelen = srx.transport_len; + + whdr.epoch = htonl(sp->hdr.epoch); + whdr.cid = htonl(sp->hdr.cid); + whdr.callNumber = htonl(sp->hdr.callNumber); + whdr.serviceId = htons(sp->hdr.serviceId); + whdr.flags = sp->hdr.flags; + whdr.flags ^= RXRPC_CLIENT_INITIATED; + whdr.flags &= RXRPC_CLIENT_INITIATED; + + iov_iter_kvec(&msg.msg_iter, WRITE, iov, ioc, size); + ret = do_udp_sendmsg(local->socket, &msg, size); + if (ret < 0) + trace_rxrpc_tx_fail(local->debug_id, 0, ret, + rxrpc_tx_point_reject); + else + trace_rxrpc_tx_packet(local->debug_id, &whdr, + rxrpc_tx_point_reject); + } } /* @@ -664,9 +648,8 @@ void rxrpc_send_keepalive(struct rxrpc_peer *peer) len = iov[0].iov_len + iov[1].iov_len; - _proto("Tx VERSION (keepalive)"); - - ret = kernel_sendmsg(peer->local->socket, &msg, iov, 2, len); + iov_iter_kvec(&msg.msg_iter, WRITE, iov, 2, len); + ret = do_udp_sendmsg(peer->local->socket, &msg, len); if (ret < 0) trace_rxrpc_tx_fail(peer->debug_id, 0, ret, rxrpc_tx_point_version_keepalive); @@ -677,3 +660,43 @@ void rxrpc_send_keepalive(struct rxrpc_peer *peer) peer->last_tx_at = ktime_get_seconds(); _leave(""); } + +/* + * Schedule an instant Tx resend. + */ +static inline void rxrpc_instant_resend(struct rxrpc_call *call, + struct rxrpc_txbuf *txb) +{ + if (call->state < RXRPC_CALL_COMPLETE) + kdebug("resend"); +} + +/* + * Transmit one packet. + */ +void rxrpc_transmit_one(struct rxrpc_call *call, struct rxrpc_txbuf *txb) +{ + int ret; + + ret = rxrpc_send_data_packet(call, txb); + if (ret < 0) { + switch (ret) { + case -ENETUNREACH: + case -EHOSTUNREACH: + case -ECONNREFUSED: + rxrpc_set_call_completion(call, RXRPC_CALL_LOCAL_ERROR, + 0, ret); + break; + default: + _debug("need instant resend %d", ret); + rxrpc_instant_resend(call, txb); + } + } else { + unsigned long now = jiffies; + unsigned long resend_at = now + call->peer->rto_j; + + WRITE_ONCE(call->resend_at, resend_at); + rxrpc_reduce_call_timer(call, resend_at, now, + rxrpc_timer_set_for_send); + } +} diff --git a/net/rxrpc/peer_event.c b/net/rxrpc/peer_event.c index 32561e9567fe..6685bf917aa6 100644 --- a/net/rxrpc/peer_event.c +++ b/net/rxrpc/peer_event.c @@ -16,256 +16,11 @@ #include <net/sock.h> #include <net/af_rxrpc.h> #include <net/ip.h> -#include <net/icmp.h> #include "ar-internal.h" -static void rxrpc_adjust_mtu(struct rxrpc_peer *, unsigned int); -static void rxrpc_store_error(struct rxrpc_peer *, struct sock_exterr_skb *); -static void rxrpc_distribute_error(struct rxrpc_peer *, int, - enum rxrpc_call_completion); - -/* - * Find the peer associated with an ICMPv4 packet. - */ -static struct rxrpc_peer *rxrpc_lookup_peer_icmp_rcu(struct rxrpc_local *local, - struct sk_buff *skb, - unsigned int udp_offset, - unsigned int *info, - struct sockaddr_rxrpc *srx) -{ - struct iphdr *ip, *ip0 = ip_hdr(skb); - struct icmphdr *icmp = icmp_hdr(skb); - struct udphdr *udp = (struct udphdr *)(skb->data + udp_offset); - - _enter("%u,%u,%u", ip0->protocol, icmp->type, icmp->code); - - switch (icmp->type) { - case ICMP_DEST_UNREACH: - *info = ntohs(icmp->un.frag.mtu); - fallthrough; - case ICMP_TIME_EXCEEDED: - case ICMP_PARAMETERPROB: - ip = (struct iphdr *)((void *)icmp + 8); - break; - default: - return NULL; - } - - memset(srx, 0, sizeof(*srx)); - srx->transport_type = local->srx.transport_type; - srx->transport_len = local->srx.transport_len; - srx->transport.family = local->srx.transport.family; - - /* Can we see an ICMP4 packet on an ICMP6 listening socket? and vice - * versa? - */ - switch (srx->transport.family) { - case AF_INET: - srx->transport_len = sizeof(srx->transport.sin); - srx->transport.family = AF_INET; - srx->transport.sin.sin_port = udp->dest; - memcpy(&srx->transport.sin.sin_addr, &ip->daddr, - sizeof(struct in_addr)); - break; - -#ifdef CONFIG_AF_RXRPC_IPV6 - case AF_INET6: - srx->transport_len = sizeof(srx->transport.sin); - srx->transport.family = AF_INET; - srx->transport.sin.sin_port = udp->dest; - memcpy(&srx->transport.sin.sin_addr, &ip->daddr, - sizeof(struct in_addr)); - break; -#endif - - default: - WARN_ON_ONCE(1); - return NULL; - } - - _net("ICMP {%pISp}", &srx->transport); - return rxrpc_lookup_peer_rcu(local, srx); -} - -#ifdef CONFIG_AF_RXRPC_IPV6 -/* - * Find the peer associated with an ICMPv6 packet. - */ -static struct rxrpc_peer *rxrpc_lookup_peer_icmp6_rcu(struct rxrpc_local *local, - struct sk_buff *skb, - unsigned int udp_offset, - unsigned int *info, - struct sockaddr_rxrpc *srx) -{ - struct icmp6hdr *icmp = icmp6_hdr(skb); - struct ipv6hdr *ip, *ip0 = ipv6_hdr(skb); - struct udphdr *udp = (struct udphdr *)(skb->data + udp_offset); - - _enter("%u,%u,%u", ip0->nexthdr, icmp->icmp6_type, icmp->icmp6_code); - - switch (icmp->icmp6_type) { - case ICMPV6_DEST_UNREACH: - *info = ntohl(icmp->icmp6_mtu); - fallthrough; - case ICMPV6_PKT_TOOBIG: - case ICMPV6_TIME_EXCEED: - case ICMPV6_PARAMPROB: - ip = (struct ipv6hdr *)((void *)icmp + 8); - break; - default: - return NULL; - } - - memset(srx, 0, sizeof(*srx)); - srx->transport_type = local->srx.transport_type; - srx->transport_len = local->srx.transport_len; - srx->transport.family = local->srx.transport.family; - - /* Can we see an ICMP4 packet on an ICMP6 listening socket? and vice - * versa? - */ - switch (srx->transport.family) { - case AF_INET: - _net("Rx ICMP6 on v4 sock"); - srx->transport_len = sizeof(srx->transport.sin); - srx->transport.family = AF_INET; - srx->transport.sin.sin_port = udp->dest; - memcpy(&srx->transport.sin.sin_addr, - &ip->daddr.s6_addr32[3], sizeof(struct in_addr)); - break; - case AF_INET6: - _net("Rx ICMP6"); - srx->transport.sin.sin_port = udp->dest; - memcpy(&srx->transport.sin6.sin6_addr, &ip->daddr, - sizeof(struct in6_addr)); - break; - default: - WARN_ON_ONCE(1); - return NULL; - } - - _net("ICMP {%pISp}", &srx->transport); - return rxrpc_lookup_peer_rcu(local, srx); -} -#endif /* CONFIG_AF_RXRPC_IPV6 */ - -/* - * Handle an error received on the local endpoint as a tunnel. - */ -void rxrpc_encap_err_rcv(struct sock *sk, struct sk_buff *skb, - unsigned int udp_offset) -{ - struct sock_extended_err ee; - struct sockaddr_rxrpc srx; - struct rxrpc_local *local; - struct rxrpc_peer *peer; - unsigned int info = 0; - int err; - u8 version = ip_hdr(skb)->version; - u8 type = icmp_hdr(skb)->type; - u8 code = icmp_hdr(skb)->code; - - rcu_read_lock(); - local = rcu_dereference_sk_user_data(sk); - if (unlikely(!local)) { - rcu_read_unlock(); - return; - } - - rxrpc_new_skb(skb, rxrpc_skb_received); - - switch (ip_hdr(skb)->version) { - case IPVERSION: - peer = rxrpc_lookup_peer_icmp_rcu(local, skb, udp_offset, - &info, &srx); - break; -#ifdef CONFIG_AF_RXRPC_IPV6 - case 6: - peer = rxrpc_lookup_peer_icmp6_rcu(local, skb, udp_offset, - &info, &srx); - break; -#endif - default: - rcu_read_unlock(); - return; - } - - if (peer && !rxrpc_get_peer_maybe(peer)) - peer = NULL; - if (!peer) { - rcu_read_unlock(); - return; - } - - memset(&ee, 0, sizeof(ee)); - - switch (version) { - case IPVERSION: - switch (type) { - case ICMP_DEST_UNREACH: - switch (code) { - case ICMP_FRAG_NEEDED: - rxrpc_adjust_mtu(peer, info); - rcu_read_unlock(); - rxrpc_put_peer(peer); - return; - default: - break; - } - - err = EHOSTUNREACH; - if (code <= NR_ICMP_UNREACH) { - /* Might want to do something different with - * non-fatal errors - */ - //harderr = icmp_err_convert[code].fatal; - err = icmp_err_convert[code].errno; - } - break; - - case ICMP_TIME_EXCEEDED: - err = EHOSTUNREACH; - break; - default: - err = EPROTO; - break; - } - - ee.ee_origin = SO_EE_ORIGIN_ICMP; - ee.ee_type = type; - ee.ee_code = code; - ee.ee_errno = err; - break; - -#ifdef CONFIG_AF_RXRPC_IPV6 - case 6: - switch (type) { - case ICMPV6_PKT_TOOBIG: - rxrpc_adjust_mtu(peer, info); - rcu_read_unlock(); - rxrpc_put_peer(peer); - return; - } - - icmpv6_err_convert(type, code, &err); - - if (err == EACCES) - err = EHOSTUNREACH; - - ee.ee_origin = SO_EE_ORIGIN_ICMP6; - ee.ee_type = type; - ee.ee_code = code; - ee.ee_errno = err; - break; -#endif - } - - trace_rxrpc_rx_icmp(peer, &ee, &srx); - - rxrpc_distribute_error(peer, err, RXRPC_CALL_NETWORK_ERROR); - rcu_read_unlock(); - rxrpc_put_peer(peer); -} +static void rxrpc_store_error(struct rxrpc_peer *, struct sk_buff *); +static void rxrpc_distribute_error(struct rxrpc_peer *, struct sk_buff *, + enum rxrpc_call_completion, int); /* * Find the peer associated with a local error. @@ -283,6 +38,9 @@ static struct rxrpc_peer *rxrpc_lookup_peer_local_rcu(struct rxrpc_local *local, srx->transport_len = local->srx.transport_len; srx->transport.family = local->srx.transport.family; + /* Can we see an ICMP4 packet on an ICMP6 listening socket? and vice + * versa? + */ switch (srx->transport.family) { case AF_INET: srx->transport_len = sizeof(srx->transport.sin); @@ -290,13 +48,11 @@ static struct rxrpc_peer *rxrpc_lookup_peer_local_rcu(struct rxrpc_local *local, srx->transport.sin.sin_port = serr->port; switch (serr->ee.ee_origin) { case SO_EE_ORIGIN_ICMP: - _net("Rx ICMP"); memcpy(&srx->transport.sin.sin_addr, skb_network_header(skb) + serr->addr_offset, sizeof(struct in_addr)); break; case SO_EE_ORIGIN_ICMP6: - _net("Rx ICMP6 on v4 sock"); memcpy(&srx->transport.sin.sin_addr, skb_network_header(skb) + serr->addr_offset + 12, sizeof(struct in_addr)); @@ -312,14 +68,12 @@ static struct rxrpc_peer *rxrpc_lookup_peer_local_rcu(struct rxrpc_local *local, case AF_INET6: switch (serr->ee.ee_origin) { case SO_EE_ORIGIN_ICMP6: - _net("Rx ICMP6"); srx->transport.sin6.sin6_port = serr->port; memcpy(&srx->transport.sin6.sin6_addr, skb_network_header(skb) + serr->addr_offset, sizeof(struct in6_addr)); break; case SO_EE_ORIGIN_ICMP: - _net("Rx ICMP on v6 sock"); srx->transport_len = sizeof(srx->transport.sin); srx->transport.family = AF_INET; srx->transport.sin.sin_port = serr->port; @@ -348,13 +102,9 @@ static struct rxrpc_peer *rxrpc_lookup_peer_local_rcu(struct rxrpc_local *local, */ static void rxrpc_adjust_mtu(struct rxrpc_peer *peer, unsigned int mtu) { - _net("Rx ICMP Fragmentation Needed (%d)", mtu); - /* wind down the local interface MTU */ - if (mtu > 0 && peer->if_mtu == 65535 && mtu < peer->if_mtu) { + if (mtu > 0 && peer->if_mtu == 65535 && mtu < peer->if_mtu) peer->if_mtu = mtu; - _net("I/F MTU %u", mtu); - } if (mtu == 0) { /* they didn't give us a size, estimate one */ @@ -371,121 +121,66 @@ static void rxrpc_adjust_mtu(struct rxrpc_peer *peer, unsigned int mtu) } if (mtu < peer->mtu) { - spin_lock_bh(&peer->lock); + spin_lock(&peer->lock); peer->mtu = mtu; peer->maxdata = peer->mtu - peer->hdrsize; - spin_unlock_bh(&peer->lock); - _net("Net MTU %u (maxdata %u)", - peer->mtu, peer->maxdata); + spin_unlock(&peer->lock); } } /* * Handle an error received on the local endpoint. */ -void rxrpc_error_report(struct sock *sk) +void rxrpc_input_error(struct rxrpc_local *local, struct sk_buff *skb) { - struct sock_exterr_skb *serr; + struct sock_exterr_skb *serr = SKB_EXT_ERR(skb); struct sockaddr_rxrpc srx; - struct rxrpc_local *local; struct rxrpc_peer *peer = NULL; - struct sk_buff *skb; - rcu_read_lock(); - local = rcu_dereference_sk_user_data(sk); - if (unlikely(!local)) { - rcu_read_unlock(); + _enter("L=%x", local->debug_id); + + if (!skb->len && serr->ee.ee_origin == SO_EE_ORIGIN_TIMESTAMPING) { + _leave("UDP empty message"); return; } - _enter("%p{%d}", sk, local->debug_id); - /* Clear the outstanding error value on the socket so that it doesn't - * cause kernel_sendmsg() to return it later. - */ - sock_error(sk); - - skb = sock_dequeue_err_skb(sk); - if (!skb) { - rcu_read_unlock(); - _leave("UDP socket errqueue empty"); + rcu_read_lock(); + peer = rxrpc_lookup_peer_local_rcu(local, skb, &srx); + if (peer && !rxrpc_get_peer_maybe(peer, rxrpc_peer_get_input_error)) + peer = NULL; + rcu_read_unlock(); + if (!peer) return; - } - rxrpc_new_skb(skb, rxrpc_skb_received); - serr = SKB_EXT_ERR(skb); - - if (serr->ee.ee_origin == SO_EE_ORIGIN_LOCAL) { - peer = rxrpc_lookup_peer_local_rcu(local, skb, &srx); - if (peer && !rxrpc_get_peer_maybe(peer)) - peer = NULL; - if (peer) { - trace_rxrpc_rx_icmp(peer, &serr->ee, &srx); - rxrpc_store_error(peer, serr); - } + + trace_rxrpc_rx_icmp(peer, &serr->ee, &srx); + + if ((serr->ee.ee_origin == SO_EE_ORIGIN_ICMP && + serr->ee.ee_type == ICMP_DEST_UNREACH && + serr->ee.ee_code == ICMP_FRAG_NEEDED)) { + rxrpc_adjust_mtu(peer, serr->ee.ee_info); + goto out; } - rcu_read_unlock(); - rxrpc_free_skb(skb, rxrpc_skb_freed); - rxrpc_put_peer(peer); - _leave(""); + rxrpc_store_error(peer, skb); +out: + rxrpc_put_peer(peer, rxrpc_peer_put_input_error); } /* * Map an error report to error codes on the peer record. */ -static void rxrpc_store_error(struct rxrpc_peer *peer, - struct sock_exterr_skb *serr) +static void rxrpc_store_error(struct rxrpc_peer *peer, struct sk_buff *skb) { enum rxrpc_call_completion compl = RXRPC_CALL_NETWORK_ERROR; - struct sock_extended_err *ee; - int err; + struct sock_exterr_skb *serr = SKB_EXT_ERR(skb); + struct sock_extended_err *ee = &serr->ee; + int err = ee->ee_errno; _enter(""); - ee = &serr->ee; - - err = ee->ee_errno; - switch (ee->ee_origin) { - case SO_EE_ORIGIN_ICMP: - switch (ee->ee_type) { - case ICMP_DEST_UNREACH: - switch (ee->ee_code) { - case ICMP_NET_UNREACH: - _net("Rx Received ICMP Network Unreachable"); - break; - case ICMP_HOST_UNREACH: - _net("Rx Received ICMP Host Unreachable"); - break; - case ICMP_PORT_UNREACH: - _net("Rx Received ICMP Port Unreachable"); - break; - case ICMP_NET_UNKNOWN: - _net("Rx Received ICMP Unknown Network"); - break; - case ICMP_HOST_UNKNOWN: - _net("Rx Received ICMP Unknown Host"); - break; - default: - _net("Rx Received ICMP DestUnreach code=%u", - ee->ee_code); - break; - } - break; - - case ICMP_TIME_EXCEEDED: - _net("Rx Received ICMP TTL Exceeded"); - break; - - default: - _proto("Rx Received ICMP error { type=%u code=%u }", - ee->ee_type, ee->ee_code); - break; - } - break; - case SO_EE_ORIGIN_NONE: case SO_EE_ORIGIN_LOCAL: - _proto("Rx Received local error { error=%d }", err); compl = RXRPC_CALL_LOCAL_ERROR; break; @@ -493,26 +188,40 @@ static void rxrpc_store_error(struct rxrpc_peer *peer, if (err == EACCES) err = EHOSTUNREACH; fallthrough; + case SO_EE_ORIGIN_ICMP: default: - _proto("Rx Received error report { orig=%u }", ee->ee_origin); break; } - rxrpc_distribute_error(peer, err, compl); + rxrpc_distribute_error(peer, skb, compl, err); } /* * Distribute an error that occurred on a peer. */ -static void rxrpc_distribute_error(struct rxrpc_peer *peer, int error, - enum rxrpc_call_completion compl) +static void rxrpc_distribute_error(struct rxrpc_peer *peer, struct sk_buff *skb, + enum rxrpc_call_completion compl, int err) { struct rxrpc_call *call; + HLIST_HEAD(error_targets); + + spin_lock(&peer->lock); + hlist_move_list(&peer->error_targets, &error_targets); + + while (!hlist_empty(&error_targets)) { + call = hlist_entry(error_targets.first, + struct rxrpc_call, error_link); + hlist_del_init(&call->error_link); + spin_unlock(&peer->lock); - hlist_for_each_entry_rcu(call, &peer->error_targets, error_link) { - rxrpc_see_call(call); - rxrpc_set_call_completion(call, compl, 0, -error); + rxrpc_see_call(call, rxrpc_call_see_distribute_error); + rxrpc_set_call_completion(call, compl, 0, -err); + rxrpc_input_call_event(call, skb); + + spin_lock(&peer->lock); } + + spin_unlock(&peer->lock); } /* @@ -528,18 +237,18 @@ static void rxrpc_peer_keepalive_dispatch(struct rxrpc_net *rxnet, time64_t keepalive_at; int slot; - spin_lock_bh(&rxnet->peer_hash_lock); + spin_lock(&rxnet->peer_hash_lock); while (!list_empty(collector)) { peer = list_entry(collector->next, struct rxrpc_peer, keepalive_link); list_del_init(&peer->keepalive_link); - if (!rxrpc_get_peer_maybe(peer)) + if (!rxrpc_get_peer_maybe(peer, rxrpc_peer_get_keepalive)) continue; - if (__rxrpc_use_local(peer->local)) { - spin_unlock_bh(&rxnet->peer_hash_lock); + if (__rxrpc_use_local(peer->local, rxrpc_local_use_peer_keepalive)) { + spin_unlock(&rxnet->peer_hash_lock); keepalive_at = peer->last_tx_at + RXRPC_KEEPALIVE_TIME; slot = keepalive_at - base; @@ -558,15 +267,15 @@ static void rxrpc_peer_keepalive_dispatch(struct rxrpc_net *rxnet, */ slot += cursor; slot &= mask; - spin_lock_bh(&rxnet->peer_hash_lock); + spin_lock(&rxnet->peer_hash_lock); list_add_tail(&peer->keepalive_link, &rxnet->peer_keepalive[slot & mask]); - rxrpc_unuse_local(peer->local); + rxrpc_unuse_local(peer->local, rxrpc_local_unuse_peer_keepalive); } - rxrpc_put_peer_locked(peer); + rxrpc_put_peer_locked(peer, rxrpc_peer_put_keepalive); } - spin_unlock_bh(&rxnet->peer_hash_lock); + spin_unlock(&rxnet->peer_hash_lock); } /* @@ -596,7 +305,7 @@ void rxrpc_peer_keepalive_worker(struct work_struct *work) * second; the bucket at cursor + 1 goes at now + 1s and so * on... */ - spin_lock_bh(&rxnet->peer_hash_lock); + spin_lock(&rxnet->peer_hash_lock); list_splice_init(&rxnet->peer_keepalive_new, &collector); stop = cursor + ARRAY_SIZE(rxnet->peer_keepalive); @@ -608,7 +317,7 @@ void rxrpc_peer_keepalive_worker(struct work_struct *work) } base = now; - spin_unlock_bh(&rxnet->peer_hash_lock); + spin_unlock(&rxnet->peer_hash_lock); rxnet->peer_keepalive_base = base; rxnet->peer_keepalive_cursor = cursor; diff --git a/net/rxrpc/peer_object.c b/net/rxrpc/peer_object.c index 26d2ae9baaf2..608946dcc505 100644 --- a/net/rxrpc/peer_object.c +++ b/net/rxrpc/peer_object.c @@ -138,10 +138,8 @@ struct rxrpc_peer *rxrpc_lookup_peer_rcu(struct rxrpc_local *local, unsigned long hash_key = rxrpc_peer_hash_key(local, srx); peer = __rxrpc_lookup_peer_rcu(local, srx, hash_key); - if (peer) { - _net("PEER %d {%pISp}", peer->debug_id, &peer->srx.transport); + if (peer) _leave(" = %p {u=%d}", peer, refcount_read(&peer->ref)); - } return peer; } @@ -207,9 +205,9 @@ static void rxrpc_assess_MTU_size(struct rxrpc_sock *rx, /* * Allocate a peer. */ -struct rxrpc_peer *rxrpc_alloc_peer(struct rxrpc_local *local, gfp_t gfp) +struct rxrpc_peer *rxrpc_alloc_peer(struct rxrpc_local *local, gfp_t gfp, + enum rxrpc_peer_trace why) { - const void *here = __builtin_return_address(0); struct rxrpc_peer *peer; _enter(""); @@ -217,7 +215,7 @@ struct rxrpc_peer *rxrpc_alloc_peer(struct rxrpc_local *local, gfp_t gfp) peer = kzalloc(sizeof(struct rxrpc_peer), gfp); if (peer) { refcount_set(&peer->ref, 1); - peer->local = rxrpc_get_local(local); + peer->local = rxrpc_get_local(local, rxrpc_local_get_peer); INIT_HLIST_HEAD(&peer->error_targets); peer->service_conns = RB_ROOT; seqlock_init(&peer->service_conn_lock); @@ -227,13 +225,8 @@ struct rxrpc_peer *rxrpc_alloc_peer(struct rxrpc_local *local, gfp_t gfp) rxrpc_peer_init_rtt(peer); - if (RXRPC_TX_SMSS > 2190) - peer->cong_cwnd = 2; - else if (RXRPC_TX_SMSS > 1095) - peer->cong_cwnd = 3; - else - peer->cong_cwnd = 4; - trace_rxrpc_peer(peer->debug_id, rxrpc_peer_new, 1, here); + peer->cong_ssthresh = RXRPC_TX_MAX_WINDOW; + trace_rxrpc_peer(peer->debug_id, why, 1); } _leave(" = %p", peer); @@ -289,7 +282,7 @@ static struct rxrpc_peer *rxrpc_create_peer(struct rxrpc_sock *rx, _enter(""); - peer = rxrpc_alloc_peer(local, gfp); + peer = rxrpc_alloc_peer(local, gfp, rxrpc_peer_new_client); if (peer) { memcpy(&peer->srx, srx, sizeof(*srx)); rxrpc_init_peer(rx, peer, hash_key); @@ -301,7 +294,8 @@ static struct rxrpc_peer *rxrpc_create_peer(struct rxrpc_sock *rx, static void rxrpc_free_peer(struct rxrpc_peer *peer) { - rxrpc_put_local(peer->local); + trace_rxrpc_peer(peer->debug_id, 0, rxrpc_peer_free); + rxrpc_put_local(peer->local, rxrpc_local_put_peer); kfree_rcu(peer, rcu); } @@ -341,7 +335,7 @@ struct rxrpc_peer *rxrpc_lookup_peer(struct rxrpc_sock *rx, /* search the peer list first */ rcu_read_lock(); peer = __rxrpc_lookup_peer_rcu(local, srx, hash_key); - if (peer && !rxrpc_get_peer_maybe(peer)) + if (peer && !rxrpc_get_peer_maybe(peer, rxrpc_peer_get_lookup_client)) peer = NULL; rcu_read_unlock(); @@ -355,11 +349,11 @@ struct rxrpc_peer *rxrpc_lookup_peer(struct rxrpc_sock *rx, return NULL; } - spin_lock_bh(&rxnet->peer_hash_lock); + spin_lock(&rxnet->peer_hash_lock); /* Need to check that we aren't racing with someone else */ peer = __rxrpc_lookup_peer_rcu(local, srx, hash_key); - if (peer && !rxrpc_get_peer_maybe(peer)) + if (peer && !rxrpc_get_peer_maybe(peer, rxrpc_peer_get_lookup_client)) peer = NULL; if (!peer) { hash_add_rcu(rxnet->peer_hash, @@ -368,7 +362,7 @@ struct rxrpc_peer *rxrpc_lookup_peer(struct rxrpc_sock *rx, &rxnet->peer_keepalive_new); } - spin_unlock_bh(&rxnet->peer_hash_lock); + spin_unlock(&rxnet->peer_hash_lock); if (peer) rxrpc_free_peer(candidate); @@ -376,8 +370,6 @@ struct rxrpc_peer *rxrpc_lookup_peer(struct rxrpc_sock *rx, peer = candidate; } - _net("PEER %d {%pISp}", peer->debug_id, &peer->srx.transport); - _leave(" = %p {u=%d}", peer, refcount_read(&peer->ref)); return peer; } @@ -385,27 +377,26 @@ struct rxrpc_peer *rxrpc_lookup_peer(struct rxrpc_sock *rx, /* * Get a ref on a peer record. */ -struct rxrpc_peer *rxrpc_get_peer(struct rxrpc_peer *peer) +struct rxrpc_peer *rxrpc_get_peer(struct rxrpc_peer *peer, enum rxrpc_peer_trace why) { - const void *here = __builtin_return_address(0); int r; __refcount_inc(&peer->ref, &r); - trace_rxrpc_peer(peer->debug_id, rxrpc_peer_got, r + 1, here); + trace_rxrpc_peer(peer->debug_id, why, r + 1); return peer; } /* * Get a ref on a peer record unless its usage has already reached 0. */ -struct rxrpc_peer *rxrpc_get_peer_maybe(struct rxrpc_peer *peer) +struct rxrpc_peer *rxrpc_get_peer_maybe(struct rxrpc_peer *peer, + enum rxrpc_peer_trace why) { - const void *here = __builtin_return_address(0); int r; if (peer) { if (__refcount_inc_not_zero(&peer->ref, &r)) - trace_rxrpc_peer(peer->debug_id, rxrpc_peer_got, r + 1, here); + trace_rxrpc_peer(peer->debug_id, r + 1, why); else peer = NULL; } @@ -421,10 +412,10 @@ static void __rxrpc_put_peer(struct rxrpc_peer *peer) ASSERT(hlist_empty(&peer->error_targets)); - spin_lock_bh(&rxnet->peer_hash_lock); + spin_lock(&rxnet->peer_hash_lock); hash_del_rcu(&peer->hash_link); list_del_init(&peer->keepalive_link); - spin_unlock_bh(&rxnet->peer_hash_lock); + spin_unlock(&rxnet->peer_hash_lock); rxrpc_free_peer(peer); } @@ -432,9 +423,8 @@ static void __rxrpc_put_peer(struct rxrpc_peer *peer) /* * Drop a ref on a peer record. */ -void rxrpc_put_peer(struct rxrpc_peer *peer) +void rxrpc_put_peer(struct rxrpc_peer *peer, enum rxrpc_peer_trace why) { - const void *here = __builtin_return_address(0); unsigned int debug_id; bool dead; int r; @@ -442,7 +432,7 @@ void rxrpc_put_peer(struct rxrpc_peer *peer) if (peer) { debug_id = peer->debug_id; dead = __refcount_dec_and_test(&peer->ref, &r); - trace_rxrpc_peer(debug_id, rxrpc_peer_put, r - 1, here); + trace_rxrpc_peer(debug_id, r - 1, why); if (dead) __rxrpc_put_peer(peer); } @@ -452,15 +442,14 @@ void rxrpc_put_peer(struct rxrpc_peer *peer) * Drop a ref on a peer record where the caller already holds the * peer_hash_lock. */ -void rxrpc_put_peer_locked(struct rxrpc_peer *peer) +void rxrpc_put_peer_locked(struct rxrpc_peer *peer, enum rxrpc_peer_trace why) { - const void *here = __builtin_return_address(0); unsigned int debug_id = peer->debug_id; bool dead; int r; dead = __refcount_dec_and_test(&peer->ref, &r); - trace_rxrpc_peer(debug_id, rxrpc_peer_put, r - 1, here); + trace_rxrpc_peer(debug_id, r - 1, why); if (dead) { hash_del_rcu(&peer->hash_link); list_del_init(&peer->keepalive_link); diff --git a/net/rxrpc/proc.c b/net/rxrpc/proc.c index 245418943e01..3a59591ec061 100644 --- a/net/rxrpc/proc.c +++ b/net/rxrpc/proc.c @@ -49,55 +49,45 @@ static void rxrpc_call_seq_stop(struct seq_file *seq, void *v) static int rxrpc_call_seq_show(struct seq_file *seq, void *v) { struct rxrpc_local *local; - struct rxrpc_sock *rx; - struct rxrpc_peer *peer; struct rxrpc_call *call; struct rxrpc_net *rxnet = rxrpc_net(seq_file_net(seq)); unsigned long timeout = 0; - rxrpc_seq_t tx_hard_ack, rx_hard_ack; + rxrpc_seq_t acks_hard_ack; char lbuff[50], rbuff[50]; + u64 wtmp; if (v == &rxnet->calls) { seq_puts(seq, "Proto Local " " Remote " " SvID ConnID CallID End Use State Abort " - " DebugId TxSeq TW RxSeq RW RxSerial RxTimo\n"); + " DebugId TxSeq TW RxSeq RW RxSerial CW RxTimo\n"); return 0; } call = list_entry(v, struct rxrpc_call, link); - rx = rcu_dereference(call->socket); - if (rx) { - local = READ_ONCE(rx->local); - if (local) - sprintf(lbuff, "%pISpc", &local->srx.transport); - else - strcpy(lbuff, "no_local"); - } else { - strcpy(lbuff, "no_socket"); - } - - peer = call->peer; - if (peer) - sprintf(rbuff, "%pISpc", &peer->srx.transport); + local = call->local; + if (local) + sprintf(lbuff, "%pISpc", &local->srx.transport); else - strcpy(rbuff, "no_connection"); + strcpy(lbuff, "no_local"); + + sprintf(rbuff, "%pISpc", &call->dest_srx.transport); if (call->state != RXRPC_CALL_SERVER_PREALLOC) { timeout = READ_ONCE(call->expect_rx_by); timeout -= jiffies; } - tx_hard_ack = READ_ONCE(call->tx_hard_ack); - rx_hard_ack = READ_ONCE(call->rx_hard_ack); + acks_hard_ack = READ_ONCE(call->acks_hard_ack); + wtmp = atomic64_read_acquire(&call->ackr_window); seq_printf(seq, "UDP %-47.47s %-47.47s %4x %08x %08x %s %3u" - " %-8.8s %08x %08x %08x %02x %08x %02x %08x %06lx\n", + " %-8.8s %08x %08x %08x %02x %08x %02x %08x %02x %06lx\n", lbuff, rbuff, - call->service_id, + call->dest_srx.srx_service, call->cid, call->call_id, rxrpc_is_service_call(call) ? "Svc" : "Clt", @@ -105,9 +95,10 @@ static int rxrpc_call_seq_show(struct seq_file *seq, void *v) rxrpc_call_states[call->state], call->abort_code, call->debug_id, - tx_hard_ack, READ_ONCE(call->tx_top) - tx_hard_ack, - rx_hard_ack, READ_ONCE(call->rx_top) - rx_hard_ack, + acks_hard_ack, READ_ONCE(call->tx_top) - acks_hard_ack, + lower_32_bits(wtmp), upper_32_bits(wtmp) - lower_32_bits(wtmp), call->rx_serial, + call->cong_cwnd, timeout); return 0; @@ -158,7 +149,7 @@ static int rxrpc_connection_seq_show(struct seq_file *seq, void *v) seq_puts(seq, "Proto Local " " Remote " - " SvID ConnID End Use State Key " + " SvID ConnID End Ref Act State Key " " Serial ISerial CallId0 CallId1 CallId2 CallId3\n" ); return 0; @@ -171,12 +162,12 @@ static int rxrpc_connection_seq_show(struct seq_file *seq, void *v) goto print; } - sprintf(lbuff, "%pISpc", &conn->params.local->srx.transport); + sprintf(lbuff, "%pISpc", &conn->local->srx.transport); - sprintf(rbuff, "%pISpc", &conn->params.peer->srx.transport); + sprintf(rbuff, "%pISpc", &conn->peer->srx.transport); print: seq_printf(seq, - "UDP %-47.47s %-47.47s %4x %08x %s %3u" + "UDP %-47.47s %-47.47s %4x %08x %s %3u %3d" " %s %08x %08x %08x %08x %08x %08x %08x\n", lbuff, rbuff, @@ -184,8 +175,9 @@ print: conn->proto.cid, rxrpc_conn_is_service(conn) ? "Svc" : "Clt", refcount_read(&conn->ref), + atomic_read(&conn->active), rxrpc_conn_states[conn->state], - key_serial(conn->params.key), + key_serial(conn->key), atomic_read(&conn->serial), conn->hi_serial, conn->channels[0].call_id, @@ -216,7 +208,7 @@ static int rxrpc_peer_seq_show(struct seq_file *seq, void *v) seq_puts(seq, "Proto Local " " Remote " - " Use CW MTU LastUse RTT RTO\n" + " Use SST MTU LastUse RTT RTO\n" ); return 0; } @@ -234,7 +226,7 @@ static int rxrpc_peer_seq_show(struct seq_file *seq, void *v) lbuff, rbuff, refcount_read(&peer->ref), - peer->cong_cwnd, + peer->cong_ssthresh, peer->mtu, now - peer->last_tx_at, peer->srtt_us >> 3, @@ -340,7 +332,7 @@ static int rxrpc_local_seq_show(struct seq_file *seq, void *v) if (v == SEQ_START_TOKEN) { seq_puts(seq, "Proto Local " - " Use Act\n"); + " Use Act RxQ\n"); return 0; } @@ -349,10 +341,11 @@ static int rxrpc_local_seq_show(struct seq_file *seq, void *v) sprintf(lbuff, "%pISpc", &local->srx.transport); seq_printf(seq, - "UDP %-47.47s %3u %3u\n", + "UDP %-47.47s %3u %3u %3u\n", lbuff, refcount_read(&local->ref), - atomic_read(&local->active_users)); + atomic_read(&local->active_users), + local->rx_queue.qlen); return 0; } @@ -397,3 +390,109 @@ const struct seq_operations rxrpc_local_seq_ops = { .stop = rxrpc_local_seq_stop, .show = rxrpc_local_seq_show, }; + +/* + * Display stats in /proc/net/rxrpc/stats + */ +int rxrpc_stats_show(struct seq_file *seq, void *v) +{ + struct rxrpc_net *rxnet = rxrpc_net(seq_file_single_net(seq)); + + seq_printf(seq, + "Data : send=%u sendf=%u fail=%u\n", + atomic_read(&rxnet->stat_tx_data_send), + atomic_read(&rxnet->stat_tx_data_send_frag), + atomic_read(&rxnet->stat_tx_data_send_fail)); + seq_printf(seq, + "Data-Tx : nr=%u retrans=%u uf=%u cwr=%u\n", + atomic_read(&rxnet->stat_tx_data), + atomic_read(&rxnet->stat_tx_data_retrans), + atomic_read(&rxnet->stat_tx_data_underflow), + atomic_read(&rxnet->stat_tx_data_cwnd_reset)); + seq_printf(seq, + "Data-Rx : nr=%u reqack=%u jumbo=%u\n", + atomic_read(&rxnet->stat_rx_data), + atomic_read(&rxnet->stat_rx_data_reqack), + atomic_read(&rxnet->stat_rx_data_jumbo)); + seq_printf(seq, + "Ack : fill=%u send=%u skip=%u\n", + atomic_read(&rxnet->stat_tx_ack_fill), + atomic_read(&rxnet->stat_tx_ack_send), + atomic_read(&rxnet->stat_tx_ack_skip)); + seq_printf(seq, + "Ack-Tx : req=%u dup=%u oos=%u exw=%u nos=%u png=%u prs=%u dly=%u idl=%u\n", + atomic_read(&rxnet->stat_tx_acks[RXRPC_ACK_REQUESTED]), + atomic_read(&rxnet->stat_tx_acks[RXRPC_ACK_DUPLICATE]), + atomic_read(&rxnet->stat_tx_acks[RXRPC_ACK_OUT_OF_SEQUENCE]), + atomic_read(&rxnet->stat_tx_acks[RXRPC_ACK_EXCEEDS_WINDOW]), + atomic_read(&rxnet->stat_tx_acks[RXRPC_ACK_NOSPACE]), + atomic_read(&rxnet->stat_tx_acks[RXRPC_ACK_PING]), + atomic_read(&rxnet->stat_tx_acks[RXRPC_ACK_PING_RESPONSE]), + atomic_read(&rxnet->stat_tx_acks[RXRPC_ACK_DELAY]), + atomic_read(&rxnet->stat_tx_acks[RXRPC_ACK_IDLE])); + seq_printf(seq, + "Ack-Rx : req=%u dup=%u oos=%u exw=%u nos=%u png=%u prs=%u dly=%u idl=%u\n", + atomic_read(&rxnet->stat_rx_acks[RXRPC_ACK_REQUESTED]), + atomic_read(&rxnet->stat_rx_acks[RXRPC_ACK_DUPLICATE]), + atomic_read(&rxnet->stat_rx_acks[RXRPC_ACK_OUT_OF_SEQUENCE]), + atomic_read(&rxnet->stat_rx_acks[RXRPC_ACK_EXCEEDS_WINDOW]), + atomic_read(&rxnet->stat_rx_acks[RXRPC_ACK_NOSPACE]), + atomic_read(&rxnet->stat_rx_acks[RXRPC_ACK_PING]), + atomic_read(&rxnet->stat_rx_acks[RXRPC_ACK_PING_RESPONSE]), + atomic_read(&rxnet->stat_rx_acks[RXRPC_ACK_DELAY]), + atomic_read(&rxnet->stat_rx_acks[RXRPC_ACK_IDLE])); + seq_printf(seq, + "Why-Req-A: acklost=%u already=%u mrtt=%u ortt=%u\n", + atomic_read(&rxnet->stat_why_req_ack[rxrpc_reqack_ack_lost]), + atomic_read(&rxnet->stat_why_req_ack[rxrpc_reqack_already_on]), + atomic_read(&rxnet->stat_why_req_ack[rxrpc_reqack_more_rtt]), + atomic_read(&rxnet->stat_why_req_ack[rxrpc_reqack_old_rtt])); + seq_printf(seq, + "Why-Req-A: nolast=%u retx=%u slows=%u smtxw=%u\n", + atomic_read(&rxnet->stat_why_req_ack[rxrpc_reqack_no_srv_last]), + atomic_read(&rxnet->stat_why_req_ack[rxrpc_reqack_retrans]), + atomic_read(&rxnet->stat_why_req_ack[rxrpc_reqack_slow_start]), + atomic_read(&rxnet->stat_why_req_ack[rxrpc_reqack_small_txwin])); + seq_printf(seq, + "Buffers : txb=%u rxb=%u\n", + atomic_read(&rxrpc_nr_txbuf), + atomic_read(&rxrpc_n_rx_skbs)); + seq_printf(seq, + "IO-thread: loops=%u\n", + atomic_read(&rxnet->stat_io_loop)); + return 0; +} + +/* + * Clear stats if /proc/net/rxrpc/stats is written to. + */ +int rxrpc_stats_clear(struct file *file, char *buf, size_t size) +{ + struct seq_file *m = file->private_data; + struct rxrpc_net *rxnet = rxrpc_net(seq_file_single_net(m)); + + if (size > 1 || (size == 1 && buf[0] != '\n')) + return -EINVAL; + + atomic_set(&rxnet->stat_tx_data, 0); + atomic_set(&rxnet->stat_tx_data_retrans, 0); + atomic_set(&rxnet->stat_tx_data_underflow, 0); + atomic_set(&rxnet->stat_tx_data_cwnd_reset, 0); + atomic_set(&rxnet->stat_tx_data_send, 0); + atomic_set(&rxnet->stat_tx_data_send_frag, 0); + atomic_set(&rxnet->stat_tx_data_send_fail, 0); + atomic_set(&rxnet->stat_rx_data, 0); + atomic_set(&rxnet->stat_rx_data_reqack, 0); + atomic_set(&rxnet->stat_rx_data_jumbo, 0); + + atomic_set(&rxnet->stat_tx_ack_fill, 0); + atomic_set(&rxnet->stat_tx_ack_send, 0); + atomic_set(&rxnet->stat_tx_ack_skip, 0); + memset(&rxnet->stat_tx_acks, 0, sizeof(rxnet->stat_tx_acks)); + memset(&rxnet->stat_rx_acks, 0, sizeof(rxnet->stat_rx_acks)); + + memset(&rxnet->stat_why_req_ack, 0, sizeof(rxnet->stat_why_req_ack)); + + atomic_set(&rxnet->stat_io_loop, 0); + return size; +} diff --git a/net/rxrpc/protocol.h b/net/rxrpc/protocol.h index d2cf8e1d218f..6760cb99c6d6 100644 --- a/net/rxrpc/protocol.h +++ b/net/rxrpc/protocol.h @@ -84,7 +84,7 @@ struct rxrpc_jumbo_header { __be16 _rsvd; /* reserved */ __be16 cksum; /* kerberos security checksum */ }; -}; +} __packed; #define RXRPC_JUMBO_DATALEN 1412 /* non-terminal jumbo packet data length */ #define RXRPC_JUMBO_SUBPKTLEN (RXRPC_JUMBO_DATALEN + sizeof(struct rxrpc_jumbo_header)) @@ -132,13 +132,6 @@ struct rxrpc_ackpacket { } __packed; -/* Some ACKs refer to specific packets and some are general and can be updated. */ -#define RXRPC_ACK_UPDATEABLE ((1 << RXRPC_ACK_REQUESTED) | \ - (1 << RXRPC_ACK_PING_RESPONSE) | \ - (1 << RXRPC_ACK_DELAY) | \ - (1 << RXRPC_ACK_IDLE)) - - /* * ACK packets can have a further piece of information tagged on the end */ diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c index 7e39c262fd79..36b25d003cf0 100644 --- a/net/rxrpc/recvmsg.c +++ b/net/rxrpc/recvmsg.c @@ -36,16 +36,16 @@ void rxrpc_notify_socket(struct rxrpc_call *call) sk = &rx->sk; if (rx && sk->sk_state < RXRPC_CLOSE) { if (call->notify_rx) { - spin_lock_bh(&call->notify_lock); + spin_lock(&call->notify_lock); call->notify_rx(sk, call, call->user_call_ID); - spin_unlock_bh(&call->notify_lock); + spin_unlock(&call->notify_lock); } else { - write_lock_bh(&rx->recvmsg_lock); + write_lock(&rx->recvmsg_lock); if (list_empty(&call->recvmsg_link)) { - rxrpc_get_call(call, rxrpc_call_got); + rxrpc_get_call(call, rxrpc_call_get_notify_socket); list_add_tail(&call->recvmsg_link, &rx->recvmsg_q); } - write_unlock_bh(&rx->recvmsg_lock); + write_unlock(&rx->recvmsg_lock); if (!sock_flag(sk, SOCK_DEAD)) { _debug("call %ps", sk->sk_data_ready); @@ -87,9 +87,9 @@ bool rxrpc_set_call_completion(struct rxrpc_call *call, bool ret = false; if (call->state < RXRPC_CALL_COMPLETE) { - write_lock_bh(&call->state_lock); + write_lock(&call->state_lock); ret = __rxrpc_set_call_completion(call, compl, abort_code, error); - write_unlock_bh(&call->state_lock); + write_unlock(&call->state_lock); } return ret; } @@ -107,9 +107,9 @@ bool rxrpc_call_completed(struct rxrpc_call *call) bool ret = false; if (call->state < RXRPC_CALL_COMPLETE) { - write_lock_bh(&call->state_lock); + write_lock(&call->state_lock); ret = __rxrpc_call_completed(call); - write_unlock_bh(&call->state_lock); + write_unlock(&call->state_lock); } return ret; } @@ -131,9 +131,9 @@ bool rxrpc_abort_call(const char *why, struct rxrpc_call *call, { bool ret; - write_lock_bh(&call->state_lock); + write_lock(&call->state_lock); ret = __rxrpc_abort_call(why, call, seq, abort_code, error); - write_unlock_bh(&call->state_lock); + write_unlock(&call->state_lock); return ret; } @@ -173,8 +173,9 @@ static int rxrpc_recvmsg_term(struct rxrpc_call *call, struct msghdr *msg) break; } - trace_rxrpc_recvmsg(call, rxrpc_recvmsg_terminal, call->rx_hard_ack, - call->rx_pkt_offset, call->rx_pkt_len, ret); + trace_rxrpc_recvdata(call, rxrpc_recvmsg_terminal, + lower_32_bits(atomic64_read(&call->ackr_window)) - 1, + call->rx_pkt_offset, call->rx_pkt_len, ret); return ret; } @@ -183,35 +184,32 @@ static int rxrpc_recvmsg_term(struct rxrpc_call *call, struct msghdr *msg) */ static void rxrpc_end_rx_phase(struct rxrpc_call *call, rxrpc_serial_t serial) { + rxrpc_seq_t whigh = READ_ONCE(call->rx_highest_seq); + _enter("%d,%s", call->debug_id, rxrpc_call_states[call->state]); - trace_rxrpc_receive(call, rxrpc_receive_end, 0, call->rx_top); - ASSERTCMP(call->rx_hard_ack, ==, call->rx_top); + trace_rxrpc_receive(call, rxrpc_receive_end, 0, whigh); - if (call->state == RXRPC_CALL_CLIENT_RECV_REPLY) { - rxrpc_propose_ACK(call, RXRPC_ACK_IDLE, serial, false, true, - rxrpc_propose_ack_terminal_ack); - //rxrpc_send_ack_packet(call, false, NULL); - } + if (call->state == RXRPC_CALL_CLIENT_RECV_REPLY) + rxrpc_propose_delay_ACK(call, serial, rxrpc_propose_ack_terminal_ack); - write_lock_bh(&call->state_lock); + write_lock(&call->state_lock); switch (call->state) { case RXRPC_CALL_CLIENT_RECV_REPLY: __rxrpc_call_completed(call); - write_unlock_bh(&call->state_lock); + write_unlock(&call->state_lock); break; case RXRPC_CALL_SERVER_RECV_REQUEST: - call->tx_phase = true; call->state = RXRPC_CALL_SERVER_ACK_REQUEST; call->expect_req_by = jiffies + MAX_JIFFY_OFFSET; - write_unlock_bh(&call->state_lock); - rxrpc_propose_ACK(call, RXRPC_ACK_DELAY, serial, false, true, - rxrpc_propose_ack_processing_op); + write_unlock(&call->state_lock); + rxrpc_propose_delay_ACK(call, serial, + rxrpc_propose_ack_processing_op); break; default: - write_unlock_bh(&call->state_lock); + write_unlock(&call->state_lock); break; } } @@ -224,126 +222,51 @@ static void rxrpc_rotate_rx_window(struct rxrpc_call *call) struct rxrpc_skb_priv *sp; struct sk_buff *skb; rxrpc_serial_t serial; - rxrpc_seq_t hard_ack, top; - bool last = false; - u8 subpacket; - int ix; + rxrpc_seq_t old_consumed = call->rx_consumed, tseq; + bool last; + int acked; _enter("%d", call->debug_id); - hard_ack = call->rx_hard_ack; - top = smp_load_acquire(&call->rx_top); - ASSERT(before(hard_ack, top)); + skb = skb_dequeue(&call->recvmsg_queue); + rxrpc_see_skb(skb, rxrpc_skb_see_rotate); - hard_ack++; - ix = hard_ack & RXRPC_RXTX_BUFF_MASK; - skb = call->rxtx_buffer[ix]; - rxrpc_see_skb(skb, rxrpc_skb_rotated); sp = rxrpc_skb(skb); + tseq = sp->hdr.seq; + serial = sp->hdr.serial; + last = sp->hdr.flags & RXRPC_LAST_PACKET; - subpacket = call->rxtx_annotations[ix] & RXRPC_RX_ANNO_SUBPACKET; - serial = sp->hdr.serial + subpacket; - - if (subpacket == sp->nr_subpackets - 1 && - sp->rx_flags & RXRPC_SKB_INCL_LAST) - last = true; - - call->rxtx_buffer[ix] = NULL; - call->rxtx_annotations[ix] = 0; /* Barrier against rxrpc_input_data(). */ - smp_store_release(&call->rx_hard_ack, hard_ack); + if (after(tseq, call->rx_consumed)) + smp_store_release(&call->rx_consumed, tseq); - rxrpc_free_skb(skb, rxrpc_skb_freed); + rxrpc_free_skb(skb, rxrpc_skb_put_rotate); - trace_rxrpc_receive(call, rxrpc_receive_rotate, serial, hard_ack); + trace_rxrpc_receive(call, last ? rxrpc_receive_rotate_last : rxrpc_receive_rotate, + serial, call->rx_consumed); if (last) { rxrpc_end_rx_phase(call, serial); - } else { - /* Check to see if there's an ACK that needs sending. */ - if (atomic_inc_return(&call->ackr_nr_consumed) > 2) - rxrpc_propose_ACK(call, RXRPC_ACK_IDLE, serial, - true, false, - rxrpc_propose_ack_rotate_rx); - if (call->ackr_reason && call->ackr_reason != RXRPC_ACK_DELAY) - rxrpc_send_ack_packet(call, false, NULL); - } -} - -/* - * Decrypt and verify a (sub)packet. The packet's length may be changed due to - * padding, but if this is the case, the packet length will be resident in the - * socket buffer. Note that we can't modify the master skb info as the skb may - * be the home to multiple subpackets. - */ -static int rxrpc_verify_packet(struct rxrpc_call *call, struct sk_buff *skb, - u8 annotation, - unsigned int offset, unsigned int len) -{ - struct rxrpc_skb_priv *sp = rxrpc_skb(skb); - rxrpc_seq_t seq = sp->hdr.seq; - u16 cksum = sp->hdr.cksum; - u8 subpacket = annotation & RXRPC_RX_ANNO_SUBPACKET; - - _enter(""); - - /* For all but the head jumbo subpacket, the security checksum is in a - * jumbo header immediately prior to the data. - */ - if (subpacket > 0) { - __be16 tmp; - if (skb_copy_bits(skb, offset - 2, &tmp, 2) < 0) - BUG(); - cksum = ntohs(tmp); - seq += subpacket; + return; } - return call->security->verify_packet(call, skb, offset, len, - seq, cksum); + /* Check to see if there's an ACK that needs sending. */ + acked = atomic_add_return(call->rx_consumed - old_consumed, + &call->ackr_nr_consumed); + if (acked > 2 && + !test_and_set_bit(RXRPC_CALL_RX_IS_IDLE, &call->flags)) + rxrpc_poke_call(call, rxrpc_call_poke_idle); } /* - * Locate the data within a packet. This is complicated by: - * - * (1) An skb may contain a jumbo packet - so we have to find the appropriate - * subpacket. - * - * (2) The (sub)packets may be encrypted and, if so, the encrypted portion - * contains an extra header which includes the true length of the data, - * excluding any encrypted padding. + * Decrypt and verify a DATA packet. */ -static int rxrpc_locate_data(struct rxrpc_call *call, struct sk_buff *skb, - u8 *_annotation, - unsigned int *_offset, unsigned int *_len, - bool *_last) +static int rxrpc_verify_data(struct rxrpc_call *call, struct sk_buff *skb) { struct rxrpc_skb_priv *sp = rxrpc_skb(skb); - unsigned int offset = sizeof(struct rxrpc_wire_header); - unsigned int len; - bool last = false; - int ret; - u8 annotation = *_annotation; - u8 subpacket = annotation & RXRPC_RX_ANNO_SUBPACKET; - - /* Locate the subpacket */ - offset += subpacket * RXRPC_JUMBO_SUBPKTLEN; - len = skb->len - offset; - if (subpacket < sp->nr_subpackets - 1) - len = RXRPC_JUMBO_DATALEN; - else if (sp->rx_flags & RXRPC_SKB_INCL_LAST) - last = true; - - if (!(annotation & RXRPC_RX_ANNO_VERIFIED)) { - ret = rxrpc_verify_packet(call, skb, annotation, offset, len); - if (ret < 0) - return ret; - *_annotation |= RXRPC_RX_ANNO_VERIFIED; - } - *_offset = offset; - *_len = len; - *_last = last; - call->security->locate_data(call, skb, _offset, _len); - return 0; + if (sp->flags & RXRPC_RX_VERIFIED) + return 0; + return call->security->verify_packet(call, skb); } /* @@ -357,69 +280,49 @@ static int rxrpc_recvmsg_data(struct socket *sock, struct rxrpc_call *call, { struct rxrpc_skb_priv *sp; struct sk_buff *skb; - rxrpc_serial_t serial; - rxrpc_seq_t hard_ack, top, seq; + rxrpc_seq_t seq = 0; size_t remain; - bool rx_pkt_last; unsigned int rx_pkt_offset, rx_pkt_len; - int ix, copy, ret = -EAGAIN, ret2; - - if (test_and_clear_bit(RXRPC_CALL_RX_UNDERRUN, &call->flags) && - call->ackr_reason) - rxrpc_send_ack_packet(call, false, NULL); + int copy, ret = -EAGAIN, ret2; rx_pkt_offset = call->rx_pkt_offset; rx_pkt_len = call->rx_pkt_len; - rx_pkt_last = call->rx_pkt_last; if (call->state >= RXRPC_CALL_SERVER_ACK_REQUEST) { - seq = call->rx_hard_ack; + seq = lower_32_bits(atomic64_read(&call->ackr_window)) - 1; ret = 1; goto done; } - /* Barriers against rxrpc_input_data(). */ - hard_ack = call->rx_hard_ack; - seq = hard_ack + 1; - - while (top = smp_load_acquire(&call->rx_top), - before_eq(seq, top) - ) { - ix = seq & RXRPC_RXTX_BUFF_MASK; - skb = call->rxtx_buffer[ix]; - if (!skb) { - trace_rxrpc_recvmsg(call, rxrpc_recvmsg_hole, seq, - rx_pkt_offset, rx_pkt_len, 0); - break; - } - smp_rmb(); - rxrpc_see_skb(skb, rxrpc_skb_seen); + /* No one else can be removing stuff from the queue, so we shouldn't + * need the Rx lock to walk it. + */ + skb = skb_peek(&call->recvmsg_queue); + while (skb) { + rxrpc_see_skb(skb, rxrpc_skb_see_recvmsg); sp = rxrpc_skb(skb); + seq = sp->hdr.seq; - if (!(flags & MSG_PEEK)) { - serial = sp->hdr.serial; - serial += call->rxtx_annotations[ix] & RXRPC_RX_ANNO_SUBPACKET; + if (!(flags & MSG_PEEK)) trace_rxrpc_receive(call, rxrpc_receive_front, - serial, seq); - } + sp->hdr.serial, seq); if (msg) sock_recv_timestamp(msg, sock->sk, skb); if (rx_pkt_offset == 0) { - ret2 = rxrpc_locate_data(call, skb, - &call->rxtx_annotations[ix], - &rx_pkt_offset, &rx_pkt_len, - &rx_pkt_last); - trace_rxrpc_recvmsg(call, rxrpc_recvmsg_next, seq, - rx_pkt_offset, rx_pkt_len, ret2); + ret2 = rxrpc_verify_data(call, skb); + rx_pkt_offset = sp->offset; + rx_pkt_len = sp->len; + trace_rxrpc_recvdata(call, rxrpc_recvmsg_next, seq, + rx_pkt_offset, rx_pkt_len, ret2); if (ret2 < 0) { ret = ret2; goto out; } } else { - trace_rxrpc_recvmsg(call, rxrpc_recvmsg_cont, seq, - rx_pkt_offset, rx_pkt_len, 0); + trace_rxrpc_recvdata(call, rxrpc_recvmsg_cont, seq, + rx_pkt_offset, rx_pkt_len, 0); } /* We have to handle short, empty and used-up DATA packets. */ @@ -442,39 +345,35 @@ static int rxrpc_recvmsg_data(struct socket *sock, struct rxrpc_call *call, } if (rx_pkt_len > 0) { - trace_rxrpc_recvmsg(call, rxrpc_recvmsg_full, seq, - rx_pkt_offset, rx_pkt_len, 0); + trace_rxrpc_recvdata(call, rxrpc_recvmsg_full, seq, + rx_pkt_offset, rx_pkt_len, 0); ASSERTCMP(*_offset, ==, len); ret = 0; break; } /* The whole packet has been transferred. */ - if (!(flags & MSG_PEEK)) - rxrpc_rotate_rx_window(call); + if (sp->hdr.flags & RXRPC_LAST_PACKET) + ret = 1; rx_pkt_offset = 0; rx_pkt_len = 0; - if (rx_pkt_last) { - ASSERTCMP(seq, ==, READ_ONCE(call->rx_top)); - ret = 1; - goto out; - } + skb = skb_peek_next(skb, &call->recvmsg_queue); - seq++; + if (!(flags & MSG_PEEK)) + rxrpc_rotate_rx_window(call); } out: if (!(flags & MSG_PEEK)) { call->rx_pkt_offset = rx_pkt_offset; call->rx_pkt_len = rx_pkt_len; - call->rx_pkt_last = rx_pkt_last; } done: - trace_rxrpc_recvmsg(call, rxrpc_recvmsg_data_return, seq, - rx_pkt_offset, rx_pkt_len, ret); + trace_rxrpc_recvdata(call, rxrpc_recvmsg_data_return, seq, + rx_pkt_offset, rx_pkt_len, ret); if (ret == -EAGAIN) - set_bit(RXRPC_CALL_RX_UNDERRUN, &call->flags); + set_bit(RXRPC_CALL_RX_IS_IDLE, &call->flags); return ret; } @@ -495,7 +394,7 @@ int rxrpc_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, DEFINE_WAIT(wait); - trace_rxrpc_recvmsg(NULL, rxrpc_recvmsg_enter, 0, 0, 0, 0); + trace_rxrpc_recvmsg(NULL, rxrpc_recvmsg_enter, 0); if (flags & (MSG_OOB | MSG_TRUNC)) return -EOPNOTSUPP; @@ -532,8 +431,7 @@ try_again: if (list_empty(&rx->recvmsg_q)) { if (signal_pending(current)) goto wait_interrupted; - trace_rxrpc_recvmsg(NULL, rxrpc_recvmsg_wait, - 0, 0, 0, 0); + trace_rxrpc_recvmsg(NULL, rxrpc_recvmsg_wait, 0); timeo = schedule_timeout(timeo); } finish_wait(sk_sleep(&rx->sk), &wait); @@ -543,16 +441,16 @@ try_again: /* Find the next call and dequeue it if we're not just peeking. If we * do dequeue it, that comes with a ref that we will need to release. */ - write_lock_bh(&rx->recvmsg_lock); + write_lock(&rx->recvmsg_lock); l = rx->recvmsg_q.next; call = list_entry(l, struct rxrpc_call, recvmsg_link); if (!(flags & MSG_PEEK)) list_del_init(&call->recvmsg_link); else - rxrpc_get_call(call, rxrpc_call_got); - write_unlock_bh(&rx->recvmsg_lock); + rxrpc_get_call(call, rxrpc_call_get_recvmsg); + write_unlock(&rx->recvmsg_lock); - trace_rxrpc_recvmsg(call, rxrpc_recvmsg_dequeue, 0, 0, 0, 0); + trace_rxrpc_recvmsg(call, rxrpc_recvmsg_dequeue, 0); /* We're going to drop the socket lock, so we need to lock the call * against interference by sendmsg. @@ -588,11 +486,9 @@ try_again: } if (msg->msg_name && call->peer) { - struct sockaddr_rxrpc *srx = msg->msg_name; - size_t len = sizeof(call->peer->srx); + size_t len = sizeof(call->dest_srx); - memcpy(msg->msg_name, &call->peer->srx, len); - srx->srx_service = call->service_id; + memcpy(msg->msg_name, &call->dest_srx, len); msg->msg_namelen = len; } @@ -605,8 +501,7 @@ try_again: if (ret == -EAGAIN) ret = 0; - if (after(call->rx_top, call->rx_hard_ack) && - call->rxtx_buffer[(call->rx_hard_ack + 1) & RXRPC_RXTX_BUFF_MASK]) + if (!skb_queue_empty(&call->recvmsg_queue)) rxrpc_notify_socket(call); break; default: @@ -635,23 +530,23 @@ try_again: error_unlock_call: mutex_unlock(&call->user_mutex); - rxrpc_put_call(call, rxrpc_call_put); - trace_rxrpc_recvmsg(call, rxrpc_recvmsg_return, 0, 0, 0, ret); + rxrpc_put_call(call, rxrpc_call_put_recvmsg); + trace_rxrpc_recvmsg(call, rxrpc_recvmsg_return, ret); return ret; error_requeue_call: if (!(flags & MSG_PEEK)) { - write_lock_bh(&rx->recvmsg_lock); + write_lock(&rx->recvmsg_lock); list_add(&call->recvmsg_link, &rx->recvmsg_q); - write_unlock_bh(&rx->recvmsg_lock); - trace_rxrpc_recvmsg(call, rxrpc_recvmsg_requeue, 0, 0, 0, 0); + write_unlock(&rx->recvmsg_lock); + trace_rxrpc_recvmsg(call, rxrpc_recvmsg_requeue, 0); } else { - rxrpc_put_call(call, rxrpc_call_put); + rxrpc_put_call(call, rxrpc_call_put_recvmsg); } error_no_call: release_sock(&rx->sk); error_trace: - trace_rxrpc_recvmsg(call, rxrpc_recvmsg_return, 0, 0, 0, ret); + trace_rxrpc_recvmsg(call, rxrpc_recvmsg_return, ret); return ret; wait_interrupted: @@ -735,19 +630,8 @@ int rxrpc_kernel_recv_data(struct socket *sock, struct rxrpc_call *call, read_phase_complete: ret = 1; out: - switch (call->ackr_reason) { - case RXRPC_ACK_IDLE: - break; - case RXRPC_ACK_DELAY: - if (ret != -EAGAIN) - break; - fallthrough; - default: - rxrpc_send_ack_packet(call, false, NULL); - } - if (_service) - *_service = call->service_id; + *_service = call->dest_srx.srx_service; mutex_unlock(&call->user_mutex); _leave(" = %d [%zu,%d]", ret, iov_iter_count(iter), *_abort); return ret; diff --git a/net/rxrpc/rxkad.c b/net/rxrpc/rxkad.c index 78fa0524156f..d1233720e05f 100644 --- a/net/rxrpc/rxkad.c +++ b/net/rxrpc/rxkad.c @@ -103,7 +103,7 @@ static int rxkad_init_connection_security(struct rxrpc_connection *conn, struct crypto_sync_skcipher *ci; int ret; - _enter("{%d},{%x}", conn->debug_id, key_serial(conn->params.key)); + _enter("{%d},{%x}", conn->debug_id, key_serial(conn->key)); conn->security_ix = token->security_index; @@ -118,7 +118,7 @@ static int rxkad_init_connection_security(struct rxrpc_connection *conn, sizeof(token->kad->session_key)) < 0) BUG(); - switch (conn->params.security_level) { + switch (conn->security_level) { case RXRPC_SECURITY_PLAIN: case RXRPC_SECURITY_AUTH: case RXRPC_SECURITY_ENCRYPT: @@ -150,7 +150,7 @@ static int rxkad_how_much_data(struct rxrpc_call *call, size_t remain, { size_t shdr, buf_size, chunk; - switch (call->conn->params.security_level) { + switch (call->conn->security_level) { default: buf_size = chunk = min_t(size_t, remain, RXRPC_JUMBO_DATALEN); shdr = 0; @@ -192,7 +192,7 @@ static int rxkad_prime_packet_security(struct rxrpc_connection *conn, _enter(""); - if (!conn->params.key) + if (!conn->key) return 0; tmpbuf = kmalloc(tmpsize, GFP_KERNEL); @@ -205,7 +205,7 @@ static int rxkad_prime_packet_security(struct rxrpc_connection *conn, return -ENOMEM; } - token = conn->params.key->payload.data[0]; + token = conn->key->payload.data[0]; memcpy(&iv, token->kad->session_key, sizeof(iv)); tmpbuf[0] = htonl(conn->proto.epoch); @@ -233,16 +233,8 @@ static int rxkad_prime_packet_security(struct rxrpc_connection *conn, static struct skcipher_request *rxkad_get_call_crypto(struct rxrpc_call *call) { struct crypto_skcipher *tfm = &call->conn->rxkad.cipher->base; - struct skcipher_request *cipher_req = call->cipher_req; - if (!cipher_req) { - cipher_req = skcipher_request_alloc(tfm, GFP_NOFS); - if (!cipher_req) - return NULL; - call->cipher_req = cipher_req; - } - - return cipher_req; + return skcipher_request_alloc(tfm, GFP_NOFS); } /* @@ -250,20 +242,16 @@ static struct skcipher_request *rxkad_get_call_crypto(struct rxrpc_call *call) */ static void rxkad_free_call_crypto(struct rxrpc_call *call) { - if (call->cipher_req) - skcipher_request_free(call->cipher_req); - call->cipher_req = NULL; } /* * partially encrypt a packet (level 1 security) */ static int rxkad_secure_packet_auth(const struct rxrpc_call *call, - struct sk_buff *skb, u32 data_size, + struct rxrpc_txbuf *txb, struct skcipher_request *req) { - struct rxrpc_skb_priv *sp = rxrpc_skb(skb); - struct rxkad_level1_hdr hdr; + struct rxkad_level1_hdr *hdr = (void *)txb->data; struct rxrpc_crypt iv; struct scatterlist sg; size_t pad; @@ -271,22 +259,22 @@ static int rxkad_secure_packet_auth(const struct rxrpc_call *call, _enter(""); - check = sp->hdr.seq ^ call->call_id; - data_size |= (u32)check << 16; + check = txb->seq ^ ntohl(txb->wire.callNumber); + hdr->data_size = htonl((u32)check << 16 | txb->len); - hdr.data_size = htonl(data_size); - memcpy(skb->head, &hdr, sizeof(hdr)); - - pad = sizeof(struct rxkad_level1_hdr) + data_size; + txb->len += sizeof(struct rxkad_level1_hdr); + pad = txb->len; pad = RXKAD_ALIGN - pad; pad &= RXKAD_ALIGN - 1; - if (pad) - skb_put_zero(skb, pad); + if (pad) { + memset(txb->data + txb->offset, 0, pad); + txb->len += pad; + } /* start the encryption afresh */ memset(&iv, 0, sizeof(iv)); - sg_init_one(&sg, skb->head, 8); + sg_init_one(&sg, txb->data, 8); skcipher_request_set_sync_tfm(req, call->conn->rxkad.cipher); skcipher_request_set_callback(req, 0, NULL, NULL); skcipher_request_set_crypt(req, &sg, &sg, 8, iv.x); @@ -301,92 +289,68 @@ static int rxkad_secure_packet_auth(const struct rxrpc_call *call, * wholly encrypt a packet (level 2 security) */ static int rxkad_secure_packet_encrypt(const struct rxrpc_call *call, - struct sk_buff *skb, - u32 data_size, + struct rxrpc_txbuf *txb, struct skcipher_request *req) { const struct rxrpc_key_token *token; - struct rxkad_level2_hdr rxkhdr; - struct rxrpc_skb_priv *sp; + struct rxkad_level2_hdr *rxkhdr = (void *)txb->data; struct rxrpc_crypt iv; - struct scatterlist sg[16]; - unsigned int len; + struct scatterlist sg; size_t pad; u16 check; - int err; - - sp = rxrpc_skb(skb); + int ret; _enter(""); - check = sp->hdr.seq ^ call->call_id; + check = txb->seq ^ ntohl(txb->wire.callNumber); - rxkhdr.data_size = htonl(data_size | (u32)check << 16); - rxkhdr.checksum = 0; - memcpy(skb->head, &rxkhdr, sizeof(rxkhdr)); + rxkhdr->data_size = htonl(txb->len | (u32)check << 16); + rxkhdr->checksum = 0; - pad = sizeof(struct rxkad_level2_hdr) + data_size; + txb->len += sizeof(struct rxkad_level2_hdr); + pad = txb->len; pad = RXKAD_ALIGN - pad; pad &= RXKAD_ALIGN - 1; - if (pad) - skb_put_zero(skb, pad); + if (pad) { + memset(txb->data + txb->offset, 0, pad); + txb->len += pad; + } /* encrypt from the session key */ - token = call->conn->params.key->payload.data[0]; + token = call->conn->key->payload.data[0]; memcpy(&iv, token->kad->session_key, sizeof(iv)); - sg_init_one(&sg[0], skb->head, sizeof(rxkhdr)); + sg_init_one(&sg, txb->data, txb->len); skcipher_request_set_sync_tfm(req, call->conn->rxkad.cipher); skcipher_request_set_callback(req, 0, NULL, NULL); - skcipher_request_set_crypt(req, &sg[0], &sg[0], sizeof(rxkhdr), iv.x); - crypto_skcipher_encrypt(req); - - /* we want to encrypt the skbuff in-place */ - err = -EMSGSIZE; - if (skb_shinfo(skb)->nr_frags > 16) - goto out; - - len = round_up(data_size, RXKAD_ALIGN); - - sg_init_table(sg, ARRAY_SIZE(sg)); - err = skb_to_sgvec(skb, sg, 8, len); - if (unlikely(err < 0)) - goto out; - skcipher_request_set_crypt(req, sg, sg, len, iv.x); - crypto_skcipher_encrypt(req); - - _leave(" = 0"); - err = 0; - -out: + skcipher_request_set_crypt(req, &sg, &sg, txb->len, iv.x); + ret = crypto_skcipher_encrypt(req); skcipher_request_zero(req); - return err; + return ret; } /* * checksum an RxRPC packet header */ -static int rxkad_secure_packet(struct rxrpc_call *call, - struct sk_buff *skb, - size_t data_size) +static int rxkad_secure_packet(struct rxrpc_call *call, struct rxrpc_txbuf *txb) { - struct rxrpc_skb_priv *sp; struct skcipher_request *req; struct rxrpc_crypt iv; struct scatterlist sg; + union { + __be32 buf[2]; + } crypto __aligned(8); u32 x, y; int ret; - sp = rxrpc_skb(skb); - - _enter("{%d{%x}},{#%u},%zu,", - call->debug_id, key_serial(call->conn->params.key), - sp->hdr.seq, data_size); + _enter("{%d{%x}},{#%u},%u,", + call->debug_id, key_serial(call->conn->key), + txb->seq, txb->len); if (!call->conn->rxkad.cipher) return 0; - ret = key_validate(call->conn->params.key); + ret = key_validate(call->conn->key); if (ret < 0) return ret; @@ -398,39 +362,40 @@ static int rxkad_secure_packet(struct rxrpc_call *call, memcpy(&iv, call->conn->rxkad.csum_iv.x, sizeof(iv)); /* calculate the security checksum */ - x = (call->cid & RXRPC_CHANNELMASK) << (32 - RXRPC_CIDSHIFT); - x |= sp->hdr.seq & 0x3fffffff; - call->crypto_buf[0] = htonl(call->call_id); - call->crypto_buf[1] = htonl(x); + x = (ntohl(txb->wire.cid) & RXRPC_CHANNELMASK) << (32 - RXRPC_CIDSHIFT); + x |= txb->seq & 0x3fffffff; + crypto.buf[0] = txb->wire.callNumber; + crypto.buf[1] = htonl(x); - sg_init_one(&sg, call->crypto_buf, 8); + sg_init_one(&sg, crypto.buf, 8); skcipher_request_set_sync_tfm(req, call->conn->rxkad.cipher); skcipher_request_set_callback(req, 0, NULL, NULL); skcipher_request_set_crypt(req, &sg, &sg, 8, iv.x); crypto_skcipher_encrypt(req); skcipher_request_zero(req); - y = ntohl(call->crypto_buf[1]); + y = ntohl(crypto.buf[1]); y = (y >> 16) & 0xffff; if (y == 0) y = 1; /* zero checksums are not permitted */ - sp->hdr.cksum = y; + txb->wire.cksum = htons(y); - switch (call->conn->params.security_level) { + switch (call->conn->security_level) { case RXRPC_SECURITY_PLAIN: ret = 0; break; case RXRPC_SECURITY_AUTH: - ret = rxkad_secure_packet_auth(call, skb, data_size, req); + ret = rxkad_secure_packet_auth(call, txb, req); break; case RXRPC_SECURITY_ENCRYPT: - ret = rxkad_secure_packet_encrypt(call, skb, data_size, req); + ret = rxkad_secure_packet_encrypt(call, txb, req); break; default: ret = -EPERM; break; } + skcipher_request_free(req); _leave(" = %d [set %x]", ret, y); return ret; } @@ -439,11 +404,11 @@ static int rxkad_secure_packet(struct rxrpc_call *call, * decrypt partial encryption on a packet (level 1 security) */ static int rxkad_verify_packet_1(struct rxrpc_call *call, struct sk_buff *skb, - unsigned int offset, unsigned int len, rxrpc_seq_t seq, struct skcipher_request *req) { struct rxkad_level1_hdr sechdr; + struct rxrpc_skb_priv *sp = rxrpc_skb(skb); struct rxrpc_crypt iv; struct scatterlist sg[16]; bool aborted; @@ -453,9 +418,9 @@ static int rxkad_verify_packet_1(struct rxrpc_call *call, struct sk_buff *skb, _enter(""); - if (len < 8) { + if (sp->len < 8) { aborted = rxrpc_abort_eproto(call, skb, "rxkad_1_hdr", "V1H", - RXKADSEALEDINCON); + RXKADSEALEDINCON); goto protocol_error; } @@ -463,7 +428,7 @@ static int rxkad_verify_packet_1(struct rxrpc_call *call, struct sk_buff *skb, * directly into the target buffer. */ sg_init_table(sg, ARRAY_SIZE(sg)); - ret = skb_to_sgvec(skb, sg, offset, 8); + ret = skb_to_sgvec(skb, sg, sp->offset, 8); if (unlikely(ret < 0)) return ret; @@ -477,12 +442,13 @@ static int rxkad_verify_packet_1(struct rxrpc_call *call, struct sk_buff *skb, skcipher_request_zero(req); /* Extract the decrypted packet length */ - if (skb_copy_bits(skb, offset, &sechdr, sizeof(sechdr)) < 0) { + if (skb_copy_bits(skb, sp->offset, &sechdr, sizeof(sechdr)) < 0) { aborted = rxrpc_abort_eproto(call, skb, "rxkad_1_len", "XV1", RXKADDATALEN); goto protocol_error; } - len -= sizeof(sechdr); + sp->offset += sizeof(sechdr); + sp->len -= sizeof(sechdr); buf = ntohl(sechdr.data_size); data_size = buf & 0xffff; @@ -496,11 +462,12 @@ static int rxkad_verify_packet_1(struct rxrpc_call *call, struct sk_buff *skb, goto protocol_error; } - if (data_size > len) { + if (data_size > sp->len) { aborted = rxrpc_abort_eproto(call, skb, "rxkad_1_datalen", "V1L", RXKADDATALEN); goto protocol_error; } + sp->len = data_size; _leave(" = 0 [dlen=%x]", data_size); return 0; @@ -515,12 +482,12 @@ protocol_error: * wholly decrypt a packet (level 2 security) */ static int rxkad_verify_packet_2(struct rxrpc_call *call, struct sk_buff *skb, - unsigned int offset, unsigned int len, rxrpc_seq_t seq, struct skcipher_request *req) { const struct rxrpc_key_token *token; struct rxkad_level2_hdr sechdr; + struct rxrpc_skb_priv *sp = rxrpc_skb(skb); struct rxrpc_crypt iv; struct scatterlist _sg[4], *sg; bool aborted; @@ -528,9 +495,9 @@ static int rxkad_verify_packet_2(struct rxrpc_call *call, struct sk_buff *skb, u16 check; int nsg, ret; - _enter(",{%d}", skb->len); + _enter(",{%d}", sp->len); - if (len < 8) { + if (sp->len < 8) { aborted = rxrpc_abort_eproto(call, skb, "rxkad_2_hdr", "V2H", RXKADSEALEDINCON); goto protocol_error; @@ -550,7 +517,7 @@ static int rxkad_verify_packet_2(struct rxrpc_call *call, struct sk_buff *skb, } sg_init_table(sg, nsg); - ret = skb_to_sgvec(skb, sg, offset, len); + ret = skb_to_sgvec(skb, sg, sp->offset, sp->len); if (unlikely(ret < 0)) { if (sg != _sg) kfree(sg); @@ -558,24 +525,25 @@ static int rxkad_verify_packet_2(struct rxrpc_call *call, struct sk_buff *skb, } /* decrypt from the session key */ - token = call->conn->params.key->payload.data[0]; + token = call->conn->key->payload.data[0]; memcpy(&iv, token->kad->session_key, sizeof(iv)); skcipher_request_set_sync_tfm(req, call->conn->rxkad.cipher); skcipher_request_set_callback(req, 0, NULL, NULL); - skcipher_request_set_crypt(req, sg, sg, len, iv.x); + skcipher_request_set_crypt(req, sg, sg, sp->len, iv.x); crypto_skcipher_decrypt(req); skcipher_request_zero(req); if (sg != _sg) kfree(sg); /* Extract the decrypted packet length */ - if (skb_copy_bits(skb, offset, &sechdr, sizeof(sechdr)) < 0) { + if (skb_copy_bits(skb, sp->offset, &sechdr, sizeof(sechdr)) < 0) { aborted = rxrpc_abort_eproto(call, skb, "rxkad_2_len", "XV2", RXKADDATALEN); goto protocol_error; } - len -= sizeof(sechdr); + sp->offset += sizeof(sechdr); + sp->len -= sizeof(sechdr); buf = ntohl(sechdr.data_size); data_size = buf & 0xffff; @@ -589,12 +557,13 @@ static int rxkad_verify_packet_2(struct rxrpc_call *call, struct sk_buff *skb, goto protocol_error; } - if (data_size > len) { + if (data_size > sp->len) { aborted = rxrpc_abort_eproto(call, skb, "rxkad_2_datalen", "V2L", RXKADDATALEN); goto protocol_error; } + sp->len = data_size; _leave(" = 0 [dlen=%x]", data_size); return 0; @@ -609,22 +578,25 @@ nomem: } /* - * Verify the security on a received packet or subpacket (if part of a - * jumbo packet). + * Verify the security on a received packet and the subpackets therein. */ -static int rxkad_verify_packet(struct rxrpc_call *call, struct sk_buff *skb, - unsigned int offset, unsigned int len, - rxrpc_seq_t seq, u16 expected_cksum) +static int rxkad_verify_packet(struct rxrpc_call *call, struct sk_buff *skb) { + struct rxrpc_skb_priv *sp = rxrpc_skb(skb); struct skcipher_request *req; struct rxrpc_crypt iv; struct scatterlist sg; + union { + __be32 buf[2]; + } crypto __aligned(8); + rxrpc_seq_t seq = sp->hdr.seq; bool aborted; + int ret; u16 cksum; u32 x, y; _enter("{%d{%x}},{#%u}", - call->debug_id, key_serial(call->conn->params.key), seq); + call->debug_id, key_serial(call->conn->key), seq); if (!call->conn->rxkad.cipher) return 0; @@ -639,38 +611,45 @@ static int rxkad_verify_packet(struct rxrpc_call *call, struct sk_buff *skb, /* validate the security checksum */ x = (call->cid & RXRPC_CHANNELMASK) << (32 - RXRPC_CIDSHIFT); x |= seq & 0x3fffffff; - call->crypto_buf[0] = htonl(call->call_id); - call->crypto_buf[1] = htonl(x); + crypto.buf[0] = htonl(call->call_id); + crypto.buf[1] = htonl(x); - sg_init_one(&sg, call->crypto_buf, 8); + sg_init_one(&sg, crypto.buf, 8); skcipher_request_set_sync_tfm(req, call->conn->rxkad.cipher); skcipher_request_set_callback(req, 0, NULL, NULL); skcipher_request_set_crypt(req, &sg, &sg, 8, iv.x); crypto_skcipher_encrypt(req); skcipher_request_zero(req); - y = ntohl(call->crypto_buf[1]); + y = ntohl(crypto.buf[1]); cksum = (y >> 16) & 0xffff; if (cksum == 0) cksum = 1; /* zero checksums are not permitted */ - if (cksum != expected_cksum) { + if (cksum != sp->hdr.cksum) { aborted = rxrpc_abort_eproto(call, skb, "rxkad_csum", "VCK", RXKADSEALEDINCON); goto protocol_error; } - switch (call->conn->params.security_level) { + switch (call->conn->security_level) { case RXRPC_SECURITY_PLAIN: - return 0; + ret = 0; + break; case RXRPC_SECURITY_AUTH: - return rxkad_verify_packet_1(call, skb, offset, len, seq, req); + ret = rxkad_verify_packet_1(call, skb, seq, req); + break; case RXRPC_SECURITY_ENCRYPT: - return rxkad_verify_packet_2(call, skb, offset, len, seq, req); + ret = rxkad_verify_packet_2(call, skb, seq, req); + break; default: - return -ENOANO; + ret = -ENOANO; + break; } + skcipher_request_free(req); + return ret; + protocol_error: if (aborted) rxrpc_send_abort_packet(call); @@ -678,52 +657,6 @@ protocol_error: } /* - * Locate the data contained in a packet that was partially encrypted. - */ -static void rxkad_locate_data_1(struct rxrpc_call *call, struct sk_buff *skb, - unsigned int *_offset, unsigned int *_len) -{ - struct rxkad_level1_hdr sechdr; - - if (skb_copy_bits(skb, *_offset, &sechdr, sizeof(sechdr)) < 0) - BUG(); - *_offset += sizeof(sechdr); - *_len = ntohl(sechdr.data_size) & 0xffff; -} - -/* - * Locate the data contained in a packet that was completely encrypted. - */ -static void rxkad_locate_data_2(struct rxrpc_call *call, struct sk_buff *skb, - unsigned int *_offset, unsigned int *_len) -{ - struct rxkad_level2_hdr sechdr; - - if (skb_copy_bits(skb, *_offset, &sechdr, sizeof(sechdr)) < 0) - BUG(); - *_offset += sizeof(sechdr); - *_len = ntohl(sechdr.data_size) & 0xffff; -} - -/* - * Locate the data contained in an already decrypted packet. - */ -static void rxkad_locate_data(struct rxrpc_call *call, struct sk_buff *skb, - unsigned int *_offset, unsigned int *_len) -{ - switch (call->conn->params.security_level) { - case RXRPC_SECURITY_AUTH: - rxkad_locate_data_1(call, skb, _offset, _len); - return; - case RXRPC_SECURITY_ENCRYPT: - rxkad_locate_data_2(call, skb, _offset, _len); - return; - default: - return; - } -} - -/* * issue a challenge */ static int rxkad_issue_challenge(struct rxrpc_connection *conn) @@ -745,8 +678,8 @@ static int rxkad_issue_challenge(struct rxrpc_connection *conn) challenge.min_level = htonl(0); challenge.__padding = 0; - msg.msg_name = &conn->params.peer->srx.transport; - msg.msg_namelen = conn->params.peer->srx.transport_len; + msg.msg_name = &conn->peer->srx.transport; + msg.msg_namelen = conn->peer->srx.transport_len; msg.msg_control = NULL; msg.msg_controllen = 0; msg.msg_flags = 0; @@ -771,16 +704,15 @@ static int rxkad_issue_challenge(struct rxrpc_connection *conn) serial = atomic_inc_return(&conn->serial); whdr.serial = htonl(serial); - _proto("Tx CHALLENGE %%%u", serial); - ret = kernel_sendmsg(conn->params.local->socket, &msg, iov, 2, len); + ret = kernel_sendmsg(conn->local->socket, &msg, iov, 2, len); if (ret < 0) { trace_rxrpc_tx_fail(conn->debug_id, serial, ret, rxrpc_tx_point_rxkad_challenge); return -EAGAIN; } - conn->params.peer->last_tx_at = ktime_get_seconds(); + conn->peer->last_tx_at = ktime_get_seconds(); trace_rxrpc_tx_packet(conn->debug_id, &whdr, rxrpc_tx_point_rxkad_challenge); _leave(" = 0"); @@ -804,8 +736,8 @@ static int rxkad_send_response(struct rxrpc_connection *conn, _enter(""); - msg.msg_name = &conn->params.peer->srx.transport; - msg.msg_namelen = conn->params.peer->srx.transport_len; + msg.msg_name = &conn->peer->srx.transport; + msg.msg_namelen = conn->peer->srx.transport_len; msg.msg_control = NULL; msg.msg_controllen = 0; msg.msg_flags = 0; @@ -829,16 +761,15 @@ static int rxkad_send_response(struct rxrpc_connection *conn, serial = atomic_inc_return(&conn->serial); whdr.serial = htonl(serial); - _proto("Tx RESPONSE %%%u", serial); - ret = kernel_sendmsg(conn->params.local->socket, &msg, iov, 3, len); + ret = kernel_sendmsg(conn->local->socket, &msg, iov, 3, len); if (ret < 0) { trace_rxrpc_tx_fail(conn->debug_id, serial, ret, rxrpc_tx_point_rxkad_response); return -EAGAIN; } - conn->params.peer->last_tx_at = ktime_get_seconds(); + conn->peer->last_tx_at = ktime_get_seconds(); _leave(" = 0"); return 0; } @@ -901,15 +832,15 @@ static int rxkad_respond_to_challenge(struct rxrpc_connection *conn, u32 version, nonce, min_level, abort_code; int ret; - _enter("{%d,%x}", conn->debug_id, key_serial(conn->params.key)); + _enter("{%d,%x}", conn->debug_id, key_serial(conn->key)); eproto = tracepoint_string("chall_no_key"); abort_code = RX_PROTOCOL_ERROR; - if (!conn->params.key) + if (!conn->key) goto protocol_error; abort_code = RXKADEXPIRED; - ret = key_validate(conn->params.key); + ret = key_validate(conn->key); if (ret < 0) goto other_error; @@ -923,8 +854,7 @@ static int rxkad_respond_to_challenge(struct rxrpc_connection *conn, nonce = ntohl(challenge.nonce); min_level = ntohl(challenge.min_level); - _proto("Rx CHALLENGE %%%u { v=%u n=%u ml=%u }", - sp->hdr.serial, version, nonce, min_level); + trace_rxrpc_rx_challenge(conn, sp->hdr.serial, version, nonce, min_level); eproto = tracepoint_string("chall_ver"); abort_code = RXKADINCONSISTENCY; @@ -933,10 +863,10 @@ static int rxkad_respond_to_challenge(struct rxrpc_connection *conn, abort_code = RXKADLEVELFAIL; ret = -EACCES; - if (conn->params.security_level < min_level) + if (conn->security_level < min_level) goto other_error; - token = conn->params.key->payload.data[0]; + token = conn->key->payload.data[0]; /* build the response packet */ resp = kzalloc(sizeof(struct rxkad_response), GFP_NOFS); @@ -948,7 +878,7 @@ static int rxkad_respond_to_challenge(struct rxrpc_connection *conn, resp->encrypted.cid = htonl(conn->proto.cid); resp->encrypted.securityIndex = htonl(conn->security_ix); resp->encrypted.inc_nonce = htonl(nonce + 1); - resp->encrypted.level = htonl(conn->params.security_level); + resp->encrypted.level = htonl(conn->security_level); resp->kvno = htonl(token->kad->kvno); resp->ticket_len = htonl(token->kad->ticket_len); resp->encrypted.call_id[0] = htonl(conn->channels[0].call_counter); @@ -1206,8 +1136,8 @@ static int rxkad_verify_response(struct rxrpc_connection *conn, version = ntohl(response->version); ticket_len = ntohl(response->ticket_len); kvno = ntohl(response->kvno); - _proto("Rx RESPONSE %%%u { v=%u kv=%u tl=%u }", - sp->hdr.serial, version, kvno, ticket_len); + + trace_rxrpc_rx_response(conn, sp->hdr.serial, version, kvno, ticket_len); eproto = tracepoint_string("rxkad_rsp_ver"); abort_code = RXKADINCONSISTENCY; @@ -1232,9 +1162,10 @@ static int rxkad_verify_response(struct rxrpc_connection *conn, eproto = tracepoint_string("rxkad_tkt_short"); abort_code = RXKADPACKETSHORT; - if (skb_copy_bits(skb, sizeof(struct rxrpc_wire_header) + sizeof(*response), - ticket, ticket_len) < 0) - goto protocol_error_free; + ret = skb_copy_bits(skb, sizeof(struct rxrpc_wire_header) + sizeof(*response), + ticket, ticket_len); + if (ret < 0) + goto temporary_error_free_ticket; ret = rxkad_decrypt_ticket(conn, server_key, skb, ticket, ticket_len, &session_key, &expiry, _abort_code); @@ -1295,7 +1226,7 @@ static int rxkad_verify_response(struct rxrpc_connection *conn, level = ntohl(response->encrypted.level); if (level > RXRPC_SECURITY_ENCRYPT) goto protocol_error_free; - conn->params.security_level = level; + conn->security_level = level; /* create a key to hold the security data and expiration time - after * this the connection security can be handled in exactly the same way @@ -1397,7 +1328,6 @@ const struct rxrpc_security rxkad = { .secure_packet = rxkad_secure_packet, .verify_packet = rxkad_verify_packet, .free_call_crypto = rxkad_free_call_crypto, - .locate_data = rxkad_locate_data, .issue_challenge = rxkad_issue_challenge, .respond_to_challenge = rxkad_respond_to_challenge, .verify_response = rxkad_verify_response, diff --git a/net/rxrpc/rxperf.c b/net/rxrpc/rxperf.c new file mode 100644 index 000000000000..66f5eea291ff --- /dev/null +++ b/net/rxrpc/rxperf.c @@ -0,0 +1,619 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* In-kernel rxperf server for testing purposes. + * + * Copyright (C) 2022 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + */ + +#define pr_fmt(fmt) "rxperf: " fmt +#include <linux/module.h> +#include <linux/slab.h> +#include <net/sock.h> +#include <net/af_rxrpc.h> + +MODULE_DESCRIPTION("rxperf test server (afs)"); +MODULE_AUTHOR("Red Hat, Inc."); +MODULE_LICENSE("GPL"); + +#define RXPERF_PORT 7009 +#define RX_PERF_SERVICE 147 +#define RX_PERF_VERSION 3 +#define RX_PERF_SEND 0 +#define RX_PERF_RECV 1 +#define RX_PERF_RPC 3 +#define RX_PERF_FILE 4 +#define RX_PERF_MAGIC_COOKIE 0x4711 + +struct rxperf_proto_params { + __be32 version; + __be32 type; + __be32 rsize; + __be32 wsize; +} __packed; + +static const u8 rxperf_magic_cookie[] = { 0x00, 0x00, 0x47, 0x11 }; +static const u8 secret[8] = { 0xa7, 0x83, 0x8a, 0xcb, 0xc7, 0x83, 0xec, 0x94 }; + +enum rxperf_call_state { + RXPERF_CALL_SV_AWAIT_PARAMS, /* Server: Awaiting parameter block */ + RXPERF_CALL_SV_AWAIT_REQUEST, /* Server: Awaiting request data */ + RXPERF_CALL_SV_REPLYING, /* Server: Replying */ + RXPERF_CALL_SV_AWAIT_ACK, /* Server: Awaiting final ACK */ + RXPERF_CALL_COMPLETE, /* Completed or failed */ +}; + +struct rxperf_call { + struct rxrpc_call *rxcall; + struct iov_iter iter; + struct kvec kvec[1]; + struct work_struct work; + const char *type; + size_t iov_len; + size_t req_len; /* Size of request blob */ + size_t reply_len; /* Size of reply blob */ + unsigned int debug_id; + unsigned int operation_id; + struct rxperf_proto_params params; + __be32 tmp[2]; + s32 abort_code; + enum rxperf_call_state state; + short error; + unsigned short unmarshal; + u16 service_id; + int (*deliver)(struct rxperf_call *call); + void (*processor)(struct work_struct *work); +}; + +static struct socket *rxperf_socket; +static struct key *rxperf_sec_keyring; /* Ring of security/crypto keys */ +static struct workqueue_struct *rxperf_workqueue; + +static void rxperf_deliver_to_call(struct work_struct *work); +static int rxperf_deliver_param_block(struct rxperf_call *call); +static int rxperf_deliver_request(struct rxperf_call *call); +static int rxperf_process_call(struct rxperf_call *call); +static void rxperf_charge_preallocation(struct work_struct *work); + +static DECLARE_WORK(rxperf_charge_preallocation_work, + rxperf_charge_preallocation); + +static inline void rxperf_set_call_state(struct rxperf_call *call, + enum rxperf_call_state to) +{ + call->state = to; +} + +static inline void rxperf_set_call_complete(struct rxperf_call *call, + int error, s32 remote_abort) +{ + if (call->state != RXPERF_CALL_COMPLETE) { + call->abort_code = remote_abort; + call->error = error; + call->state = RXPERF_CALL_COMPLETE; + } +} + +static void rxperf_rx_discard_new_call(struct rxrpc_call *rxcall, + unsigned long user_call_ID) +{ + kfree((struct rxperf_call *)user_call_ID); +} + +static void rxperf_rx_new_call(struct sock *sk, struct rxrpc_call *rxcall, + unsigned long user_call_ID) +{ + queue_work(rxperf_workqueue, &rxperf_charge_preallocation_work); +} + +static void rxperf_queue_call_work(struct rxperf_call *call) +{ + queue_work(rxperf_workqueue, &call->work); +} + +static void rxperf_notify_rx(struct sock *sk, struct rxrpc_call *rxcall, + unsigned long call_user_ID) +{ + struct rxperf_call *call = (struct rxperf_call *)call_user_ID; + + if (call->state != RXPERF_CALL_COMPLETE) + rxperf_queue_call_work(call); +} + +static void rxperf_rx_attach(struct rxrpc_call *rxcall, unsigned long user_call_ID) +{ + struct rxperf_call *call = (struct rxperf_call *)user_call_ID; + + call->rxcall = rxcall; +} + +static void rxperf_notify_end_reply_tx(struct sock *sock, + struct rxrpc_call *rxcall, + unsigned long call_user_ID) +{ + rxperf_set_call_state((struct rxperf_call *)call_user_ID, + RXPERF_CALL_SV_AWAIT_ACK); +} + +/* + * Charge the incoming call preallocation. + */ +static void rxperf_charge_preallocation(struct work_struct *work) +{ + struct rxperf_call *call; + + for (;;) { + call = kzalloc(sizeof(*call), GFP_KERNEL); + if (!call) + break; + + call->type = "unset"; + call->debug_id = atomic_inc_return(&rxrpc_debug_id); + call->deliver = rxperf_deliver_param_block; + call->state = RXPERF_CALL_SV_AWAIT_PARAMS; + call->service_id = RX_PERF_SERVICE; + call->iov_len = sizeof(call->params); + call->kvec[0].iov_len = sizeof(call->params); + call->kvec[0].iov_base = &call->params; + iov_iter_kvec(&call->iter, READ, call->kvec, 1, call->iov_len); + INIT_WORK(&call->work, rxperf_deliver_to_call); + + if (rxrpc_kernel_charge_accept(rxperf_socket, + rxperf_notify_rx, + rxperf_rx_attach, + (unsigned long)call, + GFP_KERNEL, + call->debug_id) < 0) + break; + call = NULL; + } + + kfree(call); +} + +/* + * Open an rxrpc socket and bind it to be a server for callback notifications + * - the socket is left in blocking mode and non-blocking ops use MSG_DONTWAIT + */ +static int rxperf_open_socket(void) +{ + struct sockaddr_rxrpc srx; + struct socket *socket; + int ret; + + ret = sock_create_kern(&init_net, AF_RXRPC, SOCK_DGRAM, PF_INET6, + &socket); + if (ret < 0) + goto error_1; + + socket->sk->sk_allocation = GFP_NOFS; + + /* bind the callback manager's address to make this a server socket */ + memset(&srx, 0, sizeof(srx)); + srx.srx_family = AF_RXRPC; + srx.srx_service = RX_PERF_SERVICE; + srx.transport_type = SOCK_DGRAM; + srx.transport_len = sizeof(srx.transport.sin6); + srx.transport.sin6.sin6_family = AF_INET6; + srx.transport.sin6.sin6_port = htons(RXPERF_PORT); + + ret = rxrpc_sock_set_min_security_level(socket->sk, + RXRPC_SECURITY_ENCRYPT); + if (ret < 0) + goto error_2; + + ret = rxrpc_sock_set_security_keyring(socket->sk, rxperf_sec_keyring); + + ret = kernel_bind(socket, (struct sockaddr *)&srx, sizeof(srx)); + if (ret < 0) + goto error_2; + + rxrpc_kernel_new_call_notification(socket, rxperf_rx_new_call, + rxperf_rx_discard_new_call); + + ret = kernel_listen(socket, INT_MAX); + if (ret < 0) + goto error_2; + + rxperf_socket = socket; + rxperf_charge_preallocation(&rxperf_charge_preallocation_work); + return 0; + +error_2: + sock_release(socket); +error_1: + pr_err("Can't set up rxperf socket: %d\n", ret); + return ret; +} + +/* + * close the rxrpc socket rxperf was using + */ +static void rxperf_close_socket(void) +{ + kernel_listen(rxperf_socket, 0); + kernel_sock_shutdown(rxperf_socket, SHUT_RDWR); + flush_workqueue(rxperf_workqueue); + sock_release(rxperf_socket); +} + +/* + * Log remote abort codes that indicate that we have a protocol disagreement + * with the server. + */ +static void rxperf_log_error(struct rxperf_call *call, s32 remote_abort) +{ + static int max = 0; + const char *msg; + int m; + + switch (remote_abort) { + case RX_EOF: msg = "unexpected EOF"; break; + case RXGEN_CC_MARSHAL: msg = "client marshalling"; break; + case RXGEN_CC_UNMARSHAL: msg = "client unmarshalling"; break; + case RXGEN_SS_MARSHAL: msg = "server marshalling"; break; + case RXGEN_SS_UNMARSHAL: msg = "server unmarshalling"; break; + case RXGEN_DECODE: msg = "opcode decode"; break; + case RXGEN_SS_XDRFREE: msg = "server XDR cleanup"; break; + case RXGEN_CC_XDRFREE: msg = "client XDR cleanup"; break; + case -32: msg = "insufficient data"; break; + default: + return; + } + + m = max; + if (m < 3) { + max = m + 1; + pr_info("Peer reported %s failure on %s\n", msg, call->type); + } +} + +/* + * deliver messages to a call + */ +static void rxperf_deliver_to_call(struct work_struct *work) +{ + struct rxperf_call *call = container_of(work, struct rxperf_call, work); + enum rxperf_call_state state; + u32 abort_code, remote_abort = 0; + int ret; + + if (call->state == RXPERF_CALL_COMPLETE) + return; + + while (state = call->state, + state == RXPERF_CALL_SV_AWAIT_PARAMS || + state == RXPERF_CALL_SV_AWAIT_REQUEST || + state == RXPERF_CALL_SV_AWAIT_ACK + ) { + if (state == RXPERF_CALL_SV_AWAIT_ACK) { + if (!rxrpc_kernel_check_life(rxperf_socket, call->rxcall)) + goto call_complete; + return; + } + + ret = call->deliver(call); + if (ret == 0) + ret = rxperf_process_call(call); + + switch (ret) { + case 0: + continue; + case -EINPROGRESS: + case -EAGAIN: + return; + case -ECONNABORTED: + rxperf_log_error(call, call->abort_code); + goto call_complete; + case -EOPNOTSUPP: + abort_code = RXGEN_OPCODE; + rxrpc_kernel_abort_call(rxperf_socket, call->rxcall, + abort_code, ret, "GOP"); + goto call_complete; + case -ENOTSUPP: + abort_code = RX_USER_ABORT; + rxrpc_kernel_abort_call(rxperf_socket, call->rxcall, + abort_code, ret, "GUA"); + goto call_complete; + case -EIO: + pr_err("Call %u in bad state %u\n", + call->debug_id, call->state); + fallthrough; + case -ENODATA: + case -EBADMSG: + case -EMSGSIZE: + case -ENOMEM: + case -EFAULT: + rxrpc_kernel_abort_call(rxperf_socket, call->rxcall, + RXGEN_SS_UNMARSHAL, ret, "GUM"); + goto call_complete; + default: + rxrpc_kernel_abort_call(rxperf_socket, call->rxcall, + RX_CALL_DEAD, ret, "GER"); + goto call_complete; + } + } + +call_complete: + rxperf_set_call_complete(call, ret, remote_abort); + /* The call may have been requeued */ + rxrpc_kernel_end_call(rxperf_socket, call->rxcall); + cancel_work(&call->work); + kfree(call); +} + +/* + * Extract a piece of data from the received data socket buffers. + */ +static int rxperf_extract_data(struct rxperf_call *call, bool want_more) +{ + u32 remote_abort = 0; + int ret; + + ret = rxrpc_kernel_recv_data(rxperf_socket, call->rxcall, &call->iter, + &call->iov_len, want_more, &remote_abort, + &call->service_id); + pr_debug("Extract i=%zu l=%zu m=%u ret=%d\n", + iov_iter_count(&call->iter), call->iov_len, want_more, ret); + if (ret == 0 || ret == -EAGAIN) + return ret; + + if (ret == 1) { + switch (call->state) { + case RXPERF_CALL_SV_AWAIT_REQUEST: + rxperf_set_call_state(call, RXPERF_CALL_SV_REPLYING); + break; + case RXPERF_CALL_COMPLETE: + pr_debug("premature completion %d", call->error); + return call->error; + default: + break; + } + return 0; + } + + rxperf_set_call_complete(call, ret, remote_abort); + return ret; +} + +/* + * Grab the operation ID from an incoming manager call. + */ +static int rxperf_deliver_param_block(struct rxperf_call *call) +{ + u32 version; + int ret; + + /* Extract the parameter block */ + ret = rxperf_extract_data(call, true); + if (ret < 0) + return ret; + + version = ntohl(call->params.version); + call->operation_id = ntohl(call->params.type); + call->deliver = rxperf_deliver_request; + + if (version != RX_PERF_VERSION) { + pr_info("Version mismatch %x\n", version); + return -ENOTSUPP; + } + + switch (call->operation_id) { + case RX_PERF_SEND: + call->type = "send"; + call->reply_len = 0; + call->iov_len = 4; /* Expect req size */ + break; + case RX_PERF_RECV: + call->type = "recv"; + call->req_len = 0; + call->iov_len = 4; /* Expect reply size */ + break; + case RX_PERF_RPC: + call->type = "rpc"; + call->iov_len = 8; /* Expect req size and reply size */ + break; + case RX_PERF_FILE: + call->type = "file"; + fallthrough; + default: + return -EOPNOTSUPP; + } + + rxperf_set_call_state(call, RXPERF_CALL_SV_AWAIT_REQUEST); + return call->deliver(call); +} + +/* + * Deliver the request data. + */ +static int rxperf_deliver_request(struct rxperf_call *call) +{ + int ret; + + switch (call->unmarshal) { + case 0: + call->kvec[0].iov_len = call->iov_len; + call->kvec[0].iov_base = call->tmp; + iov_iter_kvec(&call->iter, READ, call->kvec, 1, call->iov_len); + call->unmarshal++; + fallthrough; + case 1: + ret = rxperf_extract_data(call, true); + if (ret < 0) + return ret; + + switch (call->operation_id) { + case RX_PERF_SEND: + call->type = "send"; + call->req_len = ntohl(call->tmp[0]); + call->reply_len = 0; + break; + case RX_PERF_RECV: + call->type = "recv"; + call->req_len = 0; + call->reply_len = ntohl(call->tmp[0]); + break; + case RX_PERF_RPC: + call->type = "rpc"; + call->req_len = ntohl(call->tmp[0]); + call->reply_len = ntohl(call->tmp[1]); + break; + default: + pr_info("Can't parse extra params\n"); + return -EIO; + } + + pr_debug("CALL op=%s rq=%zx rp=%zx\n", + call->type, call->req_len, call->reply_len); + + call->iov_len = call->req_len; + iov_iter_discard(&call->iter, READ, call->req_len); + call->unmarshal++; + fallthrough; + case 2: + ret = rxperf_extract_data(call, false); + if (ret < 0) + return ret; + call->unmarshal++; + fallthrough; + default: + return 0; + } +} + +/* + * Process a call for which we've received the request. + */ +static int rxperf_process_call(struct rxperf_call *call) +{ + struct msghdr msg = {}; + struct bio_vec bv[1]; + struct kvec iov[1]; + ssize_t n; + size_t reply_len = call->reply_len, len; + + rxrpc_kernel_set_tx_length(rxperf_socket, call->rxcall, + reply_len + sizeof(rxperf_magic_cookie)); + + while (reply_len > 0) { + len = min_t(size_t, reply_len, PAGE_SIZE); + bv[0].bv_page = ZERO_PAGE(0); + bv[0].bv_offset = 0; + bv[0].bv_len = len; + iov_iter_bvec(&msg.msg_iter, WRITE, bv, 1, len); + msg.msg_flags = MSG_MORE; + n = rxrpc_kernel_send_data(rxperf_socket, call->rxcall, &msg, + len, rxperf_notify_end_reply_tx); + if (n < 0) + return n; + if (n == 0) + return -EIO; + reply_len -= n; + } + + len = sizeof(rxperf_magic_cookie); + iov[0].iov_base = (void *)rxperf_magic_cookie; + iov[0].iov_len = len; + iov_iter_kvec(&msg.msg_iter, WRITE, iov, 1, len); + msg.msg_flags = 0; + n = rxrpc_kernel_send_data(rxperf_socket, call->rxcall, &msg, len, + rxperf_notify_end_reply_tx); + if (n >= 0) + return 0; /* Success */ + + if (n == -ENOMEM) + rxrpc_kernel_abort_call(rxperf_socket, call->rxcall, + RXGEN_SS_MARSHAL, -ENOMEM, "GOM"); + return n; +} + +/* + * Add a key to the security keyring. + */ +static int rxperf_add_key(struct key *keyring) +{ + key_ref_t kref; + int ret; + + kref = key_create_or_update(make_key_ref(keyring, true), + "rxrpc_s", + __stringify(RX_PERF_SERVICE) ":2", + secret, + sizeof(secret), + KEY_POS_VIEW | KEY_POS_READ | KEY_POS_SEARCH + | KEY_USR_VIEW, + KEY_ALLOC_NOT_IN_QUOTA); + + if (IS_ERR(kref)) { + pr_err("Can't allocate rxperf server key: %ld\n", PTR_ERR(kref)); + return PTR_ERR(kref); + } + + ret = key_link(keyring, key_ref_to_ptr(kref)); + if (ret < 0) + pr_err("Can't link rxperf server key: %d\n", ret); + key_ref_put(kref); + return ret; +} + +/* + * Initialise the rxperf server. + */ +static int __init rxperf_init(void) +{ + struct key *keyring; + int ret = -ENOMEM; + + pr_info("Server registering\n"); + + rxperf_workqueue = alloc_workqueue("rxperf", 0, 0); + if (!rxperf_workqueue) + goto error_workqueue; + + keyring = keyring_alloc("rxperf_server", + GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, current_cred(), + KEY_POS_VIEW | KEY_POS_READ | KEY_POS_SEARCH | + KEY_POS_WRITE | + KEY_USR_VIEW | KEY_USR_READ | KEY_USR_SEARCH | + KEY_USR_WRITE | + KEY_OTH_VIEW | KEY_OTH_READ | KEY_OTH_SEARCH, + KEY_ALLOC_NOT_IN_QUOTA, + NULL, NULL); + if (IS_ERR(keyring)) { + pr_err("Can't allocate rxperf server keyring: %ld\n", + PTR_ERR(keyring)); + goto error_keyring; + } + rxperf_sec_keyring = keyring; + ret = rxperf_add_key(keyring); + if (ret < 0) + goto error_key; + + ret = rxperf_open_socket(); + if (ret < 0) + goto error_socket; + return 0; + +error_socket: +error_key: + key_put(rxperf_sec_keyring); +error_keyring: + destroy_workqueue(rxperf_workqueue); + rcu_barrier(); +error_workqueue: + pr_err("Failed to register: %d\n", ret); + return ret; +} +late_initcall(rxperf_init); /* Must be called after net/ to create socket */ + +static void __exit rxperf_exit(void) +{ + pr_info("Server unregistering.\n"); + + rxperf_close_socket(); + key_put(rxperf_sec_keyring); + destroy_workqueue(rxperf_workqueue); + rcu_barrier(); +} +module_exit(rxperf_exit); + diff --git a/net/rxrpc/security.c b/net/rxrpc/security.c index 50cb5f1ee0c0..209f2c25a0da 100644 --- a/net/rxrpc/security.c +++ b/net/rxrpc/security.c @@ -63,13 +63,43 @@ const struct rxrpc_security *rxrpc_security_lookup(u8 security_index) } /* + * Initialise the security on a client call. + */ +int rxrpc_init_client_call_security(struct rxrpc_call *call) +{ + const struct rxrpc_security *sec; + struct rxrpc_key_token *token; + struct key *key = call->key; + int ret; + + if (!key) + return 0; + + ret = key_validate(key); + if (ret < 0) + return ret; + + for (token = key->payload.data[0]; token; token = token->next) { + sec = rxrpc_security_lookup(token->security_index); + if (sec) + goto found; + } + return -EKEYREJECTED; + +found: + call->security = sec; + _leave(" = 0"); + return 0; +} + +/* * initialise the security on a client connection */ int rxrpc_init_client_conn_security(struct rxrpc_connection *conn) { const struct rxrpc_security *sec; struct rxrpc_key_token *token; - struct key *key = conn->params.key; + struct key *key = conn->key; int ret; _enter("{%d},{%x}", conn->debug_id, key_serial(key)); @@ -163,7 +193,7 @@ struct key *rxrpc_look_up_server_security(struct rxrpc_connection *conn, rcu_read_lock(); - rx = rcu_dereference(conn->params.local->service); + rx = rcu_dereference(conn->local->service); if (!rx) goto out; diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c index 3c3a626459de..9fa7e37f7155 100644 --- a/net/rxrpc/sendmsg.c +++ b/net/rxrpc/sendmsg.c @@ -22,14 +22,9 @@ */ static bool rxrpc_check_tx_space(struct rxrpc_call *call, rxrpc_seq_t *_tx_win) { - unsigned int win_size = - min_t(unsigned int, call->tx_winsize, - call->cong_cwnd + call->cong_extra); - rxrpc_seq_t tx_win = READ_ONCE(call->tx_hard_ack); - if (_tx_win) - *_tx_win = tx_win; - return call->tx_top - tx_win < win_size; + *_tx_win = call->tx_bottom; + return call->tx_prepared - call->tx_bottom < 256; } /* @@ -50,7 +45,7 @@ static int rxrpc_wait_for_tx_window_intr(struct rxrpc_sock *rx, if (signal_pending(current)) return sock_intr_errno(*timeo); - trace_rxrpc_transmit(call, rxrpc_transmit_wait); + trace_rxrpc_txqueue(call, rxrpc_txqueue_wait); *timeo = schedule_timeout(*timeo); } } @@ -71,12 +66,11 @@ static int rxrpc_wait_for_tx_window_waitall(struct rxrpc_sock *rx, rtt = 2; timeout = rtt; - tx_start = READ_ONCE(call->tx_hard_ack); + tx_start = smp_load_acquire(&call->acks_hard_ack); for (;;) { set_current_state(TASK_UNINTERRUPTIBLE); - tx_win = READ_ONCE(call->tx_hard_ack); if (rxrpc_check_tx_space(call, &tx_win)) return 0; @@ -92,7 +86,7 @@ static int rxrpc_wait_for_tx_window_waitall(struct rxrpc_sock *rx, tx_start = tx_win; } - trace_rxrpc_transmit(call, rxrpc_transmit_wait); + trace_rxrpc_txqueue(call, rxrpc_txqueue_wait); timeout = schedule_timeout(timeout); } } @@ -112,7 +106,7 @@ static int rxrpc_wait_for_tx_window_nonintr(struct rxrpc_sock *rx, if (call->state >= RXRPC_CALL_COMPLETE) return call->error; - trace_rxrpc_transmit(call, rxrpc_transmit_wait); + trace_rxrpc_txqueue(call, rxrpc_txqueue_wait); *timeo = schedule_timeout(*timeo); } } @@ -129,8 +123,8 @@ static int rxrpc_wait_for_tx_window(struct rxrpc_sock *rx, DECLARE_WAITQUEUE(myself, current); int ret; - _enter(",{%u,%u,%u}", - call->tx_hard_ack, call->tx_top, call->tx_winsize); + _enter(",{%u,%u,%u,%u}", + call->tx_bottom, call->acks_hard_ack, call->tx_top, call->tx_winsize); add_wait_queue(&call->waitq, &myself); @@ -155,24 +149,6 @@ static int rxrpc_wait_for_tx_window(struct rxrpc_sock *rx, } /* - * Schedule an instant Tx resend. - */ -static inline void rxrpc_instant_resend(struct rxrpc_call *call, int ix) -{ - spin_lock_bh(&call->lock); - - if (call->state < RXRPC_CALL_COMPLETE) { - call->rxtx_annotations[ix] = - (call->rxtx_annotations[ix] & RXRPC_TX_ANNO_LAST) | - RXRPC_TX_ANNO_RETRANS; - if (!test_and_set_bit(RXRPC_CALL_EV_RESEND, &call->events)) - rxrpc_queue_call(call); - } - - spin_unlock_bh(&call->lock); -} - -/* * Notify the owner of the call that the transmit phase is ended and the last * packet has been queued. */ @@ -188,42 +164,38 @@ static void rxrpc_notify_end_tx(struct rxrpc_sock *rx, struct rxrpc_call *call, * the packet immediately. Returns the error from rxrpc_send_data_packet() * in case the caller wants to do something with it. */ -static int rxrpc_queue_packet(struct rxrpc_sock *rx, struct rxrpc_call *call, - struct sk_buff *skb, bool last, - rxrpc_notify_end_tx_t notify_end_tx) +static void rxrpc_queue_packet(struct rxrpc_sock *rx, struct rxrpc_call *call, + struct rxrpc_txbuf *txb, + rxrpc_notify_end_tx_t notify_end_tx) { - struct rxrpc_skb_priv *sp = rxrpc_skb(skb); unsigned long now; - rxrpc_seq_t seq = sp->hdr.seq; - int ret, ix; - u8 annotation = RXRPC_TX_ANNO_UNACK; - - _net("queue skb %p [%d]", skb, seq); + rxrpc_seq_t seq = txb->seq; + bool last = test_bit(RXRPC_TXBUF_LAST, &txb->flags), poke; - ASSERTCMP(seq, ==, call->tx_top + 1); + rxrpc_inc_stat(call->rxnet, stat_tx_data); - if (last) - annotation |= RXRPC_TX_ANNO_LAST; + ASSERTCMP(txb->seq, ==, call->tx_prepared + 1); /* We have to set the timestamp before queueing as the retransmit * algorithm can see the packet as soon as we queue it. */ - skb->tstamp = ktime_get_real(); - - ix = seq & RXRPC_RXTX_BUFF_MASK; - rxrpc_get_skb(skb, rxrpc_skb_got); - call->rxtx_annotations[ix] = annotation; - smp_wmb(); - call->rxtx_buffer[ix] = skb; - call->tx_top = seq; + txb->last_sent = ktime_get_real(); + if (last) - trace_rxrpc_transmit(call, rxrpc_transmit_queue_last); + trace_rxrpc_txqueue(call, rxrpc_txqueue_queue_last); else - trace_rxrpc_transmit(call, rxrpc_transmit_queue); + trace_rxrpc_txqueue(call, rxrpc_txqueue_queue); + + /* Add the packet to the call's output buffer */ + spin_lock(&call->tx_lock); + poke = list_empty(&call->tx_sendmsg); + list_add_tail(&txb->call_link, &call->tx_sendmsg); + call->tx_prepared = seq; + spin_unlock(&call->tx_lock); if (last || call->state == RXRPC_CALL_SERVER_ACK_REQUEST) { _debug("________awaiting reply/ACK__________"); - write_lock_bh(&call->state_lock); + write_lock(&call->state_lock); switch (call->state) { case RXRPC_CALL_CLIENT_SEND_REQUEST: call->state = RXRPC_CALL_CLIENT_AWAIT_REPLY; @@ -232,7 +204,7 @@ static int rxrpc_queue_packet(struct rxrpc_sock *rx, struct rxrpc_call *call, case RXRPC_CALL_SERVER_ACK_REQUEST: call->state = RXRPC_CALL_SERVER_SEND_REPLY; now = jiffies; - WRITE_ONCE(call->ack_at, now + MAX_JIFFY_OFFSET); + WRITE_ONCE(call->delay_ack_at, now + MAX_JIFFY_OFFSET); if (call->ackr_reason == RXRPC_ACK_DELAY) call->ackr_reason = 0; trace_rxrpc_timer(call, rxrpc_timer_init_for_send_reply, now); @@ -246,37 +218,11 @@ static int rxrpc_queue_packet(struct rxrpc_sock *rx, struct rxrpc_call *call, default: break; } - write_unlock_bh(&call->state_lock); + write_unlock(&call->state_lock); } - if (seq == 1 && rxrpc_is_client_call(call)) - rxrpc_expose_client_call(call); - - ret = rxrpc_send_data_packet(call, skb, false); - if (ret < 0) { - switch (ret) { - case -ENETUNREACH: - case -EHOSTUNREACH: - case -ECONNREFUSED: - rxrpc_set_call_completion(call, RXRPC_CALL_LOCAL_ERROR, - 0, ret); - goto out; - } - _debug("need instant resend %d", ret); - rxrpc_instant_resend(call, ix); - } else { - unsigned long now = jiffies; - unsigned long resend_at = now + call->peer->rto_j; - - WRITE_ONCE(call->resend_at, resend_at); - rxrpc_reduce_call_timer(call, resend_at, now, - rxrpc_timer_set_for_send); - } - -out: - rxrpc_free_skb(skb, rxrpc_skb_freed); - _leave(" = %d", ret); - return ret; + if (poke) + rxrpc_poke_call(call, rxrpc_call_poke_start); } /* @@ -290,8 +236,7 @@ static int rxrpc_send_data(struct rxrpc_sock *rx, rxrpc_notify_end_tx_t notify_end_tx, bool *_dropped_lock) { - struct rxrpc_skb_priv *sp; - struct sk_buff *skb; + struct rxrpc_txbuf *txb; struct sock *sk = &rx->sk; enum rxrpc_call_state state; long timeo; @@ -325,16 +270,13 @@ reload: goto maybe_error; } - skb = call->tx_pending; + txb = call->tx_pending; call->tx_pending = NULL; - rxrpc_see_skb(skb, rxrpc_skb_seen); + if (txb) + rxrpc_see_txbuf(txb, rxrpc_txbuf_see_send_more); do { - /* Check to see if there's a ping ACK to reply to. */ - if (call->ackr_reason == RXRPC_ACK_PING_RESPONSE) - rxrpc_send_ack_packet(call, false, NULL); - - if (!skb) { + if (!txb) { size_t remain, bufsize, chunk, offset; _debug("alloc"); @@ -355,53 +297,31 @@ reload: _debug("SIZE: %zu/%zu @%zu", chunk, bufsize, offset); /* create a buffer that we can retain until it's ACK'd */ - skb = sock_alloc_send_skb( - sk, bufsize, msg->msg_flags & MSG_DONTWAIT, &ret); - if (!skb) + ret = -ENOMEM; + txb = rxrpc_alloc_txbuf(call, RXRPC_PACKET_TYPE_DATA, + GFP_KERNEL); + if (!txb) goto maybe_error; - sp = rxrpc_skb(skb); - sp->rx_flags |= RXRPC_SKB_TX_BUFFER; - rxrpc_new_skb(skb, rxrpc_skb_new); - - _debug("ALLOC SEND %p", skb); - - ASSERTCMP(skb->mark, ==, 0); - - __skb_put(skb, offset); - - sp->remain = chunk; - if (sp->remain > skb_tailroom(skb)) - sp->remain = skb_tailroom(skb); - - _net("skb: hr %d, tr %d, hl %d, rm %d", - skb_headroom(skb), - skb_tailroom(skb), - skb_headlen(skb), - sp->remain); - - skb->ip_summed = CHECKSUM_UNNECESSARY; + txb->offset = offset; + txb->space -= offset; + txb->space = min_t(size_t, chunk, txb->space); } _debug("append"); - sp = rxrpc_skb(skb); /* append next segment of data to the current buffer */ if (msg_data_left(msg) > 0) { - int copy = skb_tailroom(skb); - ASSERTCMP(copy, >, 0); - if (copy > msg_data_left(msg)) - copy = msg_data_left(msg); - if (copy > sp->remain) - copy = sp->remain; - - _debug("add"); - ret = skb_add_data(skb, &msg->msg_iter, copy); - _debug("added"); - if (ret < 0) + size_t copy = min_t(size_t, txb->space, msg_data_left(msg)); + + _debug("add %zu", copy); + if (!copy_from_iter_full(txb->data + txb->offset, copy, + &msg->msg_iter)) goto efault; - sp->remain -= copy; - skb->mark += copy; + _debug("added"); + txb->space -= copy; + txb->len += copy; + txb->offset += copy; copied += copy; if (call->tx_total_len != -1) call->tx_total_len -= copy; @@ -413,50 +333,40 @@ reload: goto call_terminated; /* add the packet to the send queue if it's now full */ - if (sp->remain <= 0 || + if (!txb->space || (msg_data_left(msg) == 0 && !more)) { - struct rxrpc_connection *conn = call->conn; - uint32_t seq; - - seq = call->tx_top + 1; - - sp->hdr.seq = seq; - sp->hdr._rsvd = 0; - sp->hdr.flags = conn->out_clientflag; - - if (msg_data_left(msg) == 0 && !more) - sp->hdr.flags |= RXRPC_LAST_PACKET; - else if (call->tx_top - call->tx_hard_ack < + if (msg_data_left(msg) == 0 && !more) { + txb->wire.flags |= RXRPC_LAST_PACKET; + __set_bit(RXRPC_TXBUF_LAST, &txb->flags); + } + else if (call->tx_top - call->acks_hard_ack < call->tx_winsize) - sp->hdr.flags |= RXRPC_MORE_PACKETS; + txb->wire.flags |= RXRPC_MORE_PACKETS; - ret = call->security->secure_packet(call, skb, skb->mark); + ret = call->security->secure_packet(call, txb); if (ret < 0) goto out; - ret = rxrpc_queue_packet(rx, call, skb, - !msg_data_left(msg) && !more, - notify_end_tx); - /* Should check for failure here */ - skb = NULL; + rxrpc_queue_packet(rx, call, txb, notify_end_tx); + txb = NULL; } } while (msg_data_left(msg) > 0); success: ret = copied; if (READ_ONCE(call->state) == RXRPC_CALL_COMPLETE) { - read_lock_bh(&call->state_lock); + read_lock(&call->state_lock); if (call->error < 0) ret = call->error; - read_unlock_bh(&call->state_lock); + read_unlock(&call->state_lock); } out: - call->tx_pending = skb; + call->tx_pending = txb; _leave(" = %d", ret); return ret; call_terminated: - rxrpc_free_skb(skb, rxrpc_skb_freed); + rxrpc_put_txbuf(txb, rxrpc_txbuf_put_send_aborted); _leave(" = %d", call->error); return call->error; @@ -633,7 +543,7 @@ rxrpc_new_client_call_for_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, atomic_inc_return(&rxrpc_debug_id)); /* The socket is now unlocked */ - rxrpc_put_peer(cp.peer); + rxrpc_put_peer(cp.peer, rxrpc_peer_put_discard_tmp); _leave(" = %p\n", call); return call; } @@ -645,7 +555,6 @@ rxrpc_new_client_call_for_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, */ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len) __releases(&rx->sk.sk_lock.slock) - __releases(&call->user_mutex) { enum rxrpc_call_state state; struct rxrpc_call *call; @@ -697,7 +606,7 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len) case RXRPC_CALL_CLIENT_AWAIT_CONN: case RXRPC_CALL_SERVER_PREALLOC: case RXRPC_CALL_SERVER_SECURING: - rxrpc_put_call(call, rxrpc_call_put); + rxrpc_put_call(call, rxrpc_call_put_sendmsg); ret = -EBUSY; goto error_release_sock; default: @@ -767,7 +676,7 @@ out_put_unlock: if (!dropped_lock) mutex_unlock(&call->user_mutex); error_put: - rxrpc_put_call(call, rxrpc_call_put); + rxrpc_put_call(call, rxrpc_call_put_sendmsg); _leave(" = %d", ret); return ret; @@ -814,9 +723,9 @@ int rxrpc_kernel_send_data(struct socket *sock, struct rxrpc_call *call, notify_end_tx, &dropped_lock); break; case RXRPC_CALL_COMPLETE: - read_lock_bh(&call->state_lock); + read_lock(&call->state_lock); ret = call->error; - read_unlock_bh(&call->state_lock); + read_unlock(&call->state_lock); break; default: /* Request phase complete for this client call */ diff --git a/net/rxrpc/server_key.c b/net/rxrpc/server_key.c index ee269e0e6ee8..e51940589ee5 100644 --- a/net/rxrpc/server_key.c +++ b/net/rxrpc/server_key.c @@ -144,3 +144,28 @@ int rxrpc_server_keyring(struct rxrpc_sock *rx, sockptr_t optval, int optlen) _leave(" = 0 [key %x]", key->serial); return 0; } + +/** + * rxrpc_sock_set_security_keyring - Set the security keyring for a kernel service + * @sk: The socket to set the keyring on + * @keyring: The keyring to set + * + * Set the server security keyring on an rxrpc socket. This is used to provide + * the encryption keys for a kernel service. + */ +int rxrpc_sock_set_security_keyring(struct sock *sk, struct key *keyring) +{ + struct rxrpc_sock *rx = rxrpc_sk(sk); + int ret = 0; + + lock_sock(sk); + if (rx->securities) + ret = -EINVAL; + else if (rx->sk.sk_state != RXRPC_UNBOUND) + ret = -EISCONN; + else + rx->securities = key_get(keyring); + release_sock(sk); + return ret; +} +EXPORT_SYMBOL(rxrpc_sock_set_security_keyring); diff --git a/net/rxrpc/skbuff.c b/net/rxrpc/skbuff.c index 580a5acffee7..ebe0c75e7b07 100644 --- a/net/rxrpc/skbuff.c +++ b/net/rxrpc/skbuff.c @@ -1,5 +1,5 @@ // SPDX-License-Identifier: GPL-2.0-or-later -/* ar-skbuff.c: socket buffer destruction handling +/* Socket buffer accounting * * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) @@ -14,66 +14,55 @@ #include <net/af_rxrpc.h> #include "ar-internal.h" -#define is_tx_skb(skb) (rxrpc_skb(skb)->rx_flags & RXRPC_SKB_TX_BUFFER) -#define select_skb_count(skb) (is_tx_skb(skb) ? &rxrpc_n_tx_skbs : &rxrpc_n_rx_skbs) +#define select_skb_count(skb) (&rxrpc_n_rx_skbs) /* * Note the allocation or reception of a socket buffer. */ -void rxrpc_new_skb(struct sk_buff *skb, enum rxrpc_skb_trace op) +void rxrpc_new_skb(struct sk_buff *skb, enum rxrpc_skb_trace why) { - const void *here = __builtin_return_address(0); int n = atomic_inc_return(select_skb_count(skb)); - trace_rxrpc_skb(skb, op, refcount_read(&skb->users), n, - rxrpc_skb(skb)->rx_flags, here); + trace_rxrpc_skb(skb, refcount_read(&skb->users), n, why); } /* * Note the re-emergence of a socket buffer from a queue or buffer. */ -void rxrpc_see_skb(struct sk_buff *skb, enum rxrpc_skb_trace op) +void rxrpc_see_skb(struct sk_buff *skb, enum rxrpc_skb_trace why) { - const void *here = __builtin_return_address(0); if (skb) { int n = atomic_read(select_skb_count(skb)); - trace_rxrpc_skb(skb, op, refcount_read(&skb->users), n, - rxrpc_skb(skb)->rx_flags, here); + trace_rxrpc_skb(skb, refcount_read(&skb->users), n, why); } } /* * Note the addition of a ref on a socket buffer. */ -void rxrpc_get_skb(struct sk_buff *skb, enum rxrpc_skb_trace op) +void rxrpc_get_skb(struct sk_buff *skb, enum rxrpc_skb_trace why) { - const void *here = __builtin_return_address(0); int n = atomic_inc_return(select_skb_count(skb)); - trace_rxrpc_skb(skb, op, refcount_read(&skb->users), n, - rxrpc_skb(skb)->rx_flags, here); + trace_rxrpc_skb(skb, refcount_read(&skb->users), n, why); skb_get(skb); } /* * Note the dropping of a ref on a socket buffer by the core. */ -void rxrpc_eaten_skb(struct sk_buff *skb, enum rxrpc_skb_trace op) +void rxrpc_eaten_skb(struct sk_buff *skb, enum rxrpc_skb_trace why) { - const void *here = __builtin_return_address(0); int n = atomic_inc_return(&rxrpc_n_rx_skbs); - trace_rxrpc_skb(skb, op, 0, n, 0, here); + trace_rxrpc_skb(skb, 0, n, why); } /* * Note the destruction of a socket buffer. */ -void rxrpc_free_skb(struct sk_buff *skb, enum rxrpc_skb_trace op) +void rxrpc_free_skb(struct sk_buff *skb, enum rxrpc_skb_trace why) { - const void *here = __builtin_return_address(0); if (skb) { - int n; - n = atomic_dec_return(select_skb_count(skb)); - trace_rxrpc_skb(skb, op, refcount_read(&skb->users), n, - rxrpc_skb(skb)->rx_flags, here); + int n = atomic_dec_return(select_skb_count(skb)); + trace_rxrpc_skb(skb, refcount_read(&skb->users), n, why); kfree_skb(skb); } } @@ -83,13 +72,12 @@ void rxrpc_free_skb(struct sk_buff *skb, enum rxrpc_skb_trace op) */ void rxrpc_purge_queue(struct sk_buff_head *list) { - const void *here = __builtin_return_address(0); struct sk_buff *skb; + while ((skb = skb_dequeue((list))) != NULL) { int n = atomic_dec_return(select_skb_count(skb)); - trace_rxrpc_skb(skb, rxrpc_skb_purged, - refcount_read(&skb->users), n, - rxrpc_skb(skb)->rx_flags, here); + trace_rxrpc_skb(skb, refcount_read(&skb->users), n, + rxrpc_skb_put_purge); kfree_skb(skb); } } diff --git a/net/rxrpc/sysctl.c b/net/rxrpc/sysctl.c index 555e0910786b..cde3224a5cd2 100644 --- a/net/rxrpc/sysctl.c +++ b/net/rxrpc/sysctl.c @@ -14,7 +14,7 @@ static struct ctl_table_header *rxrpc_sysctl_reg_table; static const unsigned int four = 4; static const unsigned int max_backlog = RXRPC_BACKLOG_MAX - 1; static const unsigned int n_65535 = 65535; -static const unsigned int n_max_acks = RXRPC_RXTX_BUFF_SIZE - 1; +static const unsigned int n_max_acks = 255; static const unsigned long one_jiffy = 1; static const unsigned long max_jiffies = MAX_JIFFY_OFFSET; @@ -27,15 +27,6 @@ static const unsigned long max_jiffies = MAX_JIFFY_OFFSET; static struct ctl_table rxrpc_sysctl_table[] = { /* Values measured in milliseconds but used in jiffies */ { - .procname = "req_ack_delay", - .data = &rxrpc_requested_ack_delay, - .maxlen = sizeof(unsigned long), - .mode = 0644, - .proc_handler = proc_doulongvec_ms_jiffies_minmax, - .extra1 = (void *)&one_jiffy, - .extra2 = (void *)&max_jiffies, - }, - { .procname = "soft_ack_delay", .data = &rxrpc_soft_ack_delay, .maxlen = sizeof(unsigned long), diff --git a/net/rxrpc/txbuf.c b/net/rxrpc/txbuf.c new file mode 100644 index 000000000000..d2cf2aac3adb --- /dev/null +++ b/net/rxrpc/txbuf.c @@ -0,0 +1,142 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* RxRPC Tx data buffering. + * + * Copyright (C) 2022 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/slab.h> +#include "ar-internal.h" + +static atomic_t rxrpc_txbuf_debug_ids; +atomic_t rxrpc_nr_txbuf; + +/* + * Allocate and partially initialise an I/O request structure. + */ +struct rxrpc_txbuf *rxrpc_alloc_txbuf(struct rxrpc_call *call, u8 packet_type, + gfp_t gfp) +{ + struct rxrpc_txbuf *txb; + + txb = kmalloc(sizeof(*txb), gfp); + if (txb) { + INIT_LIST_HEAD(&txb->call_link); + INIT_LIST_HEAD(&txb->tx_link); + refcount_set(&txb->ref, 1); + txb->call_debug_id = call->debug_id; + txb->debug_id = atomic_inc_return(&rxrpc_txbuf_debug_ids); + txb->space = sizeof(txb->data); + txb->len = 0; + txb->offset = 0; + txb->flags = 0; + txb->ack_why = 0; + txb->seq = call->tx_prepared + 1; + txb->wire.epoch = htonl(call->conn->proto.epoch); + txb->wire.cid = htonl(call->cid); + txb->wire.callNumber = htonl(call->call_id); + txb->wire.seq = htonl(txb->seq); + txb->wire.type = packet_type; + txb->wire.flags = call->conn->out_clientflag; + txb->wire.userStatus = 0; + txb->wire.securityIndex = call->security_ix; + txb->wire._rsvd = 0; + txb->wire.serviceId = htons(call->dest_srx.srx_service); + + trace_rxrpc_txbuf(txb->debug_id, + txb->call_debug_id, txb->seq, 1, + packet_type == RXRPC_PACKET_TYPE_DATA ? + rxrpc_txbuf_alloc_data : + rxrpc_txbuf_alloc_ack); + atomic_inc(&rxrpc_nr_txbuf); + } + + return txb; +} + +void rxrpc_get_txbuf(struct rxrpc_txbuf *txb, enum rxrpc_txbuf_trace what) +{ + int r; + + __refcount_inc(&txb->ref, &r); + trace_rxrpc_txbuf(txb->debug_id, txb->call_debug_id, txb->seq, r + 1, what); +} + +void rxrpc_see_txbuf(struct rxrpc_txbuf *txb, enum rxrpc_txbuf_trace what) +{ + int r = refcount_read(&txb->ref); + + trace_rxrpc_txbuf(txb->debug_id, txb->call_debug_id, txb->seq, r, what); +} + +static void rxrpc_free_txbuf(struct rcu_head *rcu) +{ + struct rxrpc_txbuf *txb = container_of(rcu, struct rxrpc_txbuf, rcu); + + trace_rxrpc_txbuf(txb->debug_id, txb->call_debug_id, txb->seq, 0, + rxrpc_txbuf_free); + kfree(txb); + atomic_dec(&rxrpc_nr_txbuf); +} + +void rxrpc_put_txbuf(struct rxrpc_txbuf *txb, enum rxrpc_txbuf_trace what) +{ + unsigned int debug_id, call_debug_id; + rxrpc_seq_t seq; + bool dead; + int r; + + if (txb) { + debug_id = txb->debug_id; + call_debug_id = txb->call_debug_id; + seq = txb->seq; + dead = __refcount_dec_and_test(&txb->ref, &r); + trace_rxrpc_txbuf(debug_id, call_debug_id, seq, r - 1, what); + if (dead) + call_rcu(&txb->rcu, rxrpc_free_txbuf); + } +} + +/* + * Shrink the transmit buffer. + */ +void rxrpc_shrink_call_tx_buffer(struct rxrpc_call *call) +{ + struct rxrpc_txbuf *txb; + rxrpc_seq_t hard_ack = smp_load_acquire(&call->acks_hard_ack); + bool wake = false; + + _enter("%x/%x/%x", call->tx_bottom, call->acks_hard_ack, call->tx_top); + + for (;;) { + spin_lock(&call->tx_lock); + txb = list_first_entry_or_null(&call->tx_buffer, + struct rxrpc_txbuf, call_link); + if (!txb) + break; + hard_ack = smp_load_acquire(&call->acks_hard_ack); + if (before(hard_ack, txb->seq)) + break; + + if (txb->seq != call->tx_bottom + 1) + rxrpc_see_txbuf(txb, rxrpc_txbuf_see_out_of_step); + ASSERTCMP(txb->seq, ==, call->tx_bottom + 1); + smp_store_release(&call->tx_bottom, call->tx_bottom + 1); + list_del_rcu(&txb->call_link); + + trace_rxrpc_txqueue(call, rxrpc_txqueue_dequeue); + + spin_unlock(&call->tx_lock); + + rxrpc_put_txbuf(txb, rxrpc_txbuf_put_rotated); + if (after(call->acks_hard_ack, call->tx_bottom + 128)) + wake = true; + } + + spin_unlock(&call->tx_lock); + + if (wake) + wake_up(&call->waitq); +} |