diff options
-rw-r--r-- | Documentation/networking/tls.rst | 18 | ||||
-rw-r--r-- | include/linux/sockptr.h | 8 | ||||
-rw-r--r-- | include/net/tls.h | 3 | ||||
-rw-r--r-- | include/uapi/linux/snmp.h | 1 | ||||
-rw-r--r-- | include/uapi/linux/tls.h | 2 | ||||
-rw-r--r-- | net/core/sock.c | 1 | ||||
-rw-r--r-- | net/tls/tls_main.c | 75 | ||||
-rw-r--r-- | net/tls/tls_proc.c | 1 | ||||
-rw-r--r-- | net/tls/tls_sw.c | 84 | ||||
-rw-r--r-- | tools/testing/selftests/net/tls.c | 15 |
10 files changed, 191 insertions, 17 deletions
diff --git a/Documentation/networking/tls.rst b/Documentation/networking/tls.rst index be8e10c14b05..7a6643836e42 100644 --- a/Documentation/networking/tls.rst +++ b/Documentation/networking/tls.rst @@ -239,6 +239,19 @@ for the original TCP transmission and TCP retransmissions. To the receiver this will look like TLS records had been tampered with and will result in record authentication failures. +TLS_RX_EXPECT_NO_PAD +~~~~~~~~~~~~~~~~~~~~ + +TLS 1.3 only. Expect the sender to not pad records. This allows the data +to be decrypted directly into user space buffers with TLS 1.3. + +This optimization is safe to enable only if the remote end is trusted, +otherwise it is an attack vector to doubling the TLS processing cost. + +If the record decrypted turns out to had been padded or is not a data +record it will be decrypted again into a kernel buffer without zero copy. +Such events are counted in the ``TlsDecryptRetry`` statistic. + Statistics ========== @@ -264,3 +277,8 @@ TLS implementation exposes the following per-namespace statistics - ``TlsDeviceRxResync`` - number of RX resyncs sent to NICs handling cryptography + +- ``TlsDecryptRetry`` - + number of RX records which had to be re-decrypted due to + ``TLS_RX_EXPECT_NO_PAD`` mis-prediction. Note that this counter will + also increment for non-data records. diff --git a/include/linux/sockptr.h b/include/linux/sockptr.h index ea193414298b..d45902fb4cad 100644 --- a/include/linux/sockptr.h +++ b/include/linux/sockptr.h @@ -102,4 +102,12 @@ static inline long strncpy_from_sockptr(char *dst, sockptr_t src, size_t count) return strncpy_from_user(dst, src.user, count); } +static inline int check_zeroed_sockptr(sockptr_t src, size_t offset, + size_t size) +{ + if (!sockptr_is_kernel(src)) + return check_zeroed_user(src.user + offset, size); + return memchr_inv(src.kernel + offset, 0, size) == NULL; +} + #endif /* _LINUX_SOCKPTR_H */ diff --git a/include/net/tls.h b/include/net/tls.h index 8017f1703447..4fc16ca5f469 100644 --- a/include/net/tls.h +++ b/include/net/tls.h @@ -149,6 +149,7 @@ struct tls_sw_context_rx { struct sk_buff *recv_pkt; u8 async_capable:1; + u8 zc_capable:1; atomic_t decrypt_pending; /* protect crypto_wait with decrypt_pending*/ spinlock_t decrypt_compl_lock; @@ -239,6 +240,7 @@ struct tls_context { u8 tx_conf:3; u8 rx_conf:3; u8 zerocopy_sendfile:1; + u8 rx_no_pad:1; int (*push_pending_record)(struct sock *sk, int flags); void (*sk_write_space)(struct sock *sk); @@ -358,6 +360,7 @@ int tls_sk_attach(struct sock *sk, int optname, char __user *optval, void tls_err_abort(struct sock *sk, int err); int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx); +void tls_update_rx_zc_capable(struct tls_context *tls_ctx); void tls_sw_strparser_arm(struct sock *sk, struct tls_context *ctx); void tls_sw_strparser_done(struct tls_context *tls_ctx); int tls_sw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size); diff --git a/include/uapi/linux/snmp.h b/include/uapi/linux/snmp.h index 904909d020e2..1c9152add663 100644 --- a/include/uapi/linux/snmp.h +++ b/include/uapi/linux/snmp.h @@ -344,6 +344,7 @@ enum LINUX_MIB_TLSRXDEVICE, /* TlsRxDevice */ LINUX_MIB_TLSDECRYPTERROR, /* TlsDecryptError */ LINUX_MIB_TLSRXDEVICERESYNC, /* TlsRxDeviceResync */ + LINUX_MIN_TLSDECRYPTRETRY, /* TlsDecryptRetry */ __LINUX_MIB_TLSMAX }; diff --git a/include/uapi/linux/tls.h b/include/uapi/linux/tls.h index bb8f80812b0b..f1157d8f4acd 100644 --- a/include/uapi/linux/tls.h +++ b/include/uapi/linux/tls.h @@ -40,6 +40,7 @@ #define TLS_TX 1 /* Set transmit parameters */ #define TLS_RX 2 /* Set receive parameters */ #define TLS_TX_ZEROCOPY_RO 3 /* TX zerocopy (only sendfile now) */ +#define TLS_RX_EXPECT_NO_PAD 4 /* Attempt opportunistic zero-copy */ /* Supported versions */ #define TLS_VERSION_MINOR(ver) ((ver) & 0xFF) @@ -162,6 +163,7 @@ enum { TLS_INFO_TXCONF, TLS_INFO_RXCONF, TLS_INFO_ZC_RO_TX, + TLS_INFO_RX_NO_PAD, __TLS_INFO_MAX, }; #define TLS_INFO_MAX (__TLS_INFO_MAX - 1) diff --git a/net/core/sock.c b/net/core/sock.c index 92a0296ccb18..4cb957d934a2 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -2870,6 +2870,7 @@ void __sk_flush_backlog(struct sock *sk) __release_sock(sk); spin_unlock_bh(&sk->sk_lock.slock); } +EXPORT_SYMBOL_GPL(__sk_flush_backlog); /** * sk_wait_data - wait for data to arrive at sk_receive_queue diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c index 2ffede463e4a..1b3efc96db0b 100644 --- a/net/tls/tls_main.c +++ b/net/tls/tls_main.c @@ -533,6 +533,37 @@ static int do_tls_getsockopt_tx_zc(struct sock *sk, char __user *optval, return 0; } +static int do_tls_getsockopt_no_pad(struct sock *sk, char __user *optval, + int __user *optlen) +{ + struct tls_context *ctx = tls_get_ctx(sk); + unsigned int value; + int err, len; + + if (ctx->prot_info.version != TLS_1_3_VERSION) + return -EINVAL; + + if (get_user(len, optlen)) + return -EFAULT; + if (len < sizeof(value)) + return -EINVAL; + + lock_sock(sk); + err = -EINVAL; + if (ctx->rx_conf == TLS_SW || ctx->rx_conf == TLS_HW) + value = ctx->rx_no_pad; + release_sock(sk); + if (err) + return err; + + if (put_user(sizeof(value), optlen)) + return -EFAULT; + if (copy_to_user(optval, &value, sizeof(value))) + return -EFAULT; + + return 0; +} + static int do_tls_getsockopt(struct sock *sk, int optname, char __user *optval, int __user *optlen) { @@ -547,6 +578,9 @@ static int do_tls_getsockopt(struct sock *sk, int optname, case TLS_TX_ZEROCOPY_RO: rc = do_tls_getsockopt_tx_zc(sk, optval, optlen); break; + case TLS_RX_EXPECT_NO_PAD: + rc = do_tls_getsockopt_no_pad(sk, optval, optlen); + break; default: rc = -ENOPROTOOPT; break; @@ -718,6 +752,38 @@ static int do_tls_setsockopt_tx_zc(struct sock *sk, sockptr_t optval, return 0; } +static int do_tls_setsockopt_no_pad(struct sock *sk, sockptr_t optval, + unsigned int optlen) +{ + struct tls_context *ctx = tls_get_ctx(sk); + u32 val; + int rc; + + if (ctx->prot_info.version != TLS_1_3_VERSION || + sockptr_is_null(optval) || optlen < sizeof(val)) + return -EINVAL; + + rc = copy_from_sockptr(&val, optval, sizeof(val)); + if (rc) + return -EFAULT; + if (val > 1) + return -EINVAL; + rc = check_zeroed_sockptr(optval, sizeof(val), optlen - sizeof(val)); + if (rc < 1) + return rc == 0 ? -EINVAL : rc; + + lock_sock(sk); + rc = -EINVAL; + if (ctx->rx_conf == TLS_SW || ctx->rx_conf == TLS_HW) { + ctx->rx_no_pad = val; + tls_update_rx_zc_capable(ctx); + rc = 0; + } + release_sock(sk); + + return rc; +} + static int do_tls_setsockopt(struct sock *sk, int optname, sockptr_t optval, unsigned int optlen) { @@ -736,6 +802,9 @@ static int do_tls_setsockopt(struct sock *sk, int optname, sockptr_t optval, rc = do_tls_setsockopt_tx_zc(sk, optval, optlen); release_sock(sk); break; + case TLS_RX_EXPECT_NO_PAD: + rc = do_tls_setsockopt_no_pad(sk, optval, optlen); + break; default: rc = -ENOPROTOOPT; break; @@ -976,6 +1045,11 @@ static int tls_get_info(const struct sock *sk, struct sk_buff *skb) if (err) goto nla_failure; } + if (ctx->rx_no_pad) { + err = nla_put_flag(skb, TLS_INFO_RX_NO_PAD); + if (err) + goto nla_failure; + } rcu_read_unlock(); nla_nest_end(skb, start); @@ -997,6 +1071,7 @@ static size_t tls_get_info_size(const struct sock *sk) nla_total_size(sizeof(u16)) + /* TLS_INFO_RXCONF */ nla_total_size(sizeof(u16)) + /* TLS_INFO_TXCONF */ nla_total_size(0) + /* TLS_INFO_ZC_RO_TX */ + nla_total_size(0) + /* TLS_INFO_RX_NO_PAD */ 0; return size; diff --git a/net/tls/tls_proc.c b/net/tls/tls_proc.c index feeceb0e4cb4..0c200000cc45 100644 --- a/net/tls/tls_proc.c +++ b/net/tls/tls_proc.c @@ -18,6 +18,7 @@ static const struct snmp_mib tls_mib_list[] = { SNMP_MIB_ITEM("TlsRxDevice", LINUX_MIB_TLSRXDEVICE), SNMP_MIB_ITEM("TlsDecryptError", LINUX_MIB_TLSDECRYPTERROR), SNMP_MIB_ITEM("TlsRxDeviceResync", LINUX_MIB_TLSRXDEVICERESYNC), + SNMP_MIB_ITEM("TlsDecryptRetry", LINUX_MIN_TLSDECRYPTRETRY), SNMP_MIB_SENTINEL }; diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 0513f82b8537..79043bc3da39 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -47,6 +47,7 @@ struct tls_decrypt_arg { bool zc; bool async; + u8 tail; }; noinline void tls_err_abort(struct sock *sk, int err) @@ -133,7 +134,8 @@ static int skb_nsg(struct sk_buff *skb, int offset, int len) return __skb_nsg(skb, offset, len, 0); } -static int padding_length(struct tls_prot_info *prot, struct sk_buff *skb) +static int tls_padding_length(struct tls_prot_info *prot, struct sk_buff *skb, + struct tls_decrypt_arg *darg) { struct strp_msg *rxm = strp_msg(skb); struct tls_msg *tlm = tls_msg(skb); @@ -142,7 +144,7 @@ static int padding_length(struct tls_prot_info *prot, struct sk_buff *skb) /* Determine zero-padding length */ if (prot->version == TLS_1_3_VERSION) { int offset = rxm->full_len - TLS_TAG_SIZE - 1; - char content_type = 0; + char content_type = darg->zc ? darg->tail : 0; int err; while (content_type == 0) { @@ -1418,18 +1420,18 @@ static int decrypt_internal(struct sock *sk, struct sk_buff *skb, struct strp_msg *rxm = strp_msg(skb); struct tls_msg *tlm = tls_msg(skb); int n_sgin, n_sgout, nsg, mem_size, aead_size, err, pages = 0; + u8 *aad, *iv, *tail, *mem = NULL; struct aead_request *aead_req; struct sk_buff *unused; - u8 *aad, *iv, *mem = NULL; struct scatterlist *sgin = NULL; struct scatterlist *sgout = NULL; - const int data_len = rxm->full_len - prot->overhead_size + - prot->tail_size; + const int data_len = rxm->full_len - prot->overhead_size; + int tail_pages = !!prot->tail_size; int iv_offset = 0; if (darg->zc && (out_iov || out_sg)) { if (out_iov) - n_sgout = 1 + + n_sgout = 1 + tail_pages + iov_iter_npages_cap(out_iov, INT_MAX, data_len); else n_sgout = sg_nents(out_sg); @@ -1453,9 +1455,10 @@ static int decrypt_internal(struct sock *sk, struct sk_buff *skb, mem_size = aead_size + (nsg * sizeof(struct scatterlist)); mem_size = mem_size + prot->aad_size; mem_size = mem_size + MAX_IV_SIZE; + mem_size = mem_size + prot->tail_size; /* Allocate a single block of memory which contains - * aead_req || sgin[] || sgout[] || aad || iv. + * aead_req || sgin[] || sgout[] || aad || iv || tail. * This order achieves correct alignment for aead_req, sgin, sgout. */ mem = kmalloc(mem_size, sk->sk_allocation); @@ -1468,6 +1471,7 @@ static int decrypt_internal(struct sock *sk, struct sk_buff *skb, sgout = sgin + n_sgin; aad = (u8 *)(sgout + n_sgout); iv = aad + prot->aad_size; + tail = iv + MAX_IV_SIZE; /* For CCM based ciphers, first byte of nonce+iv is a constant */ switch (prot->cipher_type) { @@ -1521,9 +1525,16 @@ static int decrypt_internal(struct sock *sk, struct sk_buff *skb, err = tls_setup_from_iter(out_iov, data_len, &pages, &sgout[1], - (n_sgout - 1)); + (n_sgout - 1 - tail_pages)); if (err < 0) goto fallback_to_reg_recv; + + if (prot->tail_size) { + sg_unmark_end(&sgout[pages]); + sg_set_buf(&sgout[pages + 1], tail, + prot->tail_size); + sg_mark_end(&sgout[pages + 1]); + } } else if (out_sg) { memcpy(sgout, out_sg, n_sgout * sizeof(*sgout)); } else { @@ -1538,10 +1549,13 @@ fallback_to_reg_recv: /* Prepare and submit AEAD request */ err = tls_do_decryption(sk, skb, sgin, sgout, iv, - data_len, aead_req, darg); + data_len + prot->tail_size, aead_req, darg); if (darg->async) return 0; + if (prot->tail_size) + darg->tail = *tail; + /* Release the pages in case iov was mapped to pages */ for (; pages > 0; pages--) put_page(sg_page(&sgout[pages])); @@ -1583,9 +1597,16 @@ static int decrypt_skb_update(struct sock *sk, struct sk_buff *skb, return err; if (darg->async) goto decrypt_next; + /* If opportunistic TLS 1.3 ZC failed retry without ZC */ + if (unlikely(darg->zc && prot->version == TLS_1_3_VERSION && + darg->tail != TLS_RECORD_TYPE_DATA)) { + darg->zc = false; + TLS_INC_STATS(sock_net(sk), LINUX_MIN_TLSDECRYPTRETRY); + return decrypt_skb_update(sk, skb, dest, darg); + } decrypt_done: - pad = padding_length(prot, skb); + pad = tls_padding_length(prot, skb, darg); if (pad < 0) return pad; @@ -1717,6 +1738,24 @@ out: return copied ? : err; } +static void +tls_read_flush_backlog(struct sock *sk, struct tls_prot_info *prot, + size_t len_left, size_t decrypted, ssize_t done, + size_t *flushed_at) +{ + size_t max_rec; + + if (len_left <= decrypted) + return; + + max_rec = prot->overhead_size - prot->tail_size + TLS_MAX_PAYLOAD_SIZE; + if (done - *flushed_at < SZ_128K && tcp_inq(sk) > max_rec) + return; + + *flushed_at = done; + sk_flush_backlog(sk); +} + int tls_sw_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, @@ -1729,6 +1768,7 @@ int tls_sw_recvmsg(struct sock *sk, struct sk_psock *psock; unsigned char control = 0; ssize_t decrypted = 0; + size_t flushed_at = 0; struct strp_msg *rxm; struct tls_msg *tlm; struct sk_buff *skb; @@ -1767,7 +1807,7 @@ int tls_sw_recvmsg(struct sock *sk, timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); zc_capable = !bpf_strp_enabled && !is_kvec && !is_peek && - prot->version != TLS_1_3_VERSION; + ctx->zc_capable; decrypted = 0; while (len && (decrypted + copied < target || ctx->recv_pkt)) { struct tls_decrypt_arg darg = {}; @@ -1818,6 +1858,10 @@ int tls_sw_recvmsg(struct sock *sk, if (err <= 0) goto recv_end; + /* periodically flush backlog, and feed strparser */ + tls_read_flush_backlog(sk, prot, len, to_decrypt, + decrypted + copied, &flushed_at); + ctx->recv_pkt = NULL; __strp_unpause(&ctx->strp); __skb_queue_tail(&ctx->rx_list, skb); @@ -2249,6 +2293,14 @@ void tls_sw_strparser_arm(struct sock *sk, struct tls_context *tls_ctx) strp_check_rcv(&rx_ctx->strp); } +void tls_update_rx_zc_capable(struct tls_context *tls_ctx) +{ + struct tls_sw_context_rx *rx_ctx = tls_sw_ctx_rx(tls_ctx); + + rx_ctx->zc_capable = tls_ctx->rx_no_pad || + tls_ctx->prot_info.version != TLS_1_3_VERSION; +} + int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx) { struct tls_context *tls_ctx = tls_get_ctx(sk); @@ -2484,12 +2536,10 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx) if (sw_ctx_rx) { tfm = crypto_aead_tfm(sw_ctx_rx->aead_recv); - if (crypto_info->version == TLS_1_3_VERSION) - sw_ctx_rx->async_capable = 0; - else - sw_ctx_rx->async_capable = - !!(tfm->__crt_alg->cra_flags & - CRYPTO_ALG_ASYNC); + tls_update_rx_zc_capable(ctx); + sw_ctx_rx->async_capable = + crypto_info->version != TLS_1_3_VERSION && + !!(tfm->__crt_alg->cra_flags & CRYPTO_ALG_ASYNC); /* Set up strparser */ memset(&cb, 0, sizeof(cb)); diff --git a/tools/testing/selftests/net/tls.c b/tools/testing/selftests/net/tls.c index 5d70b04c482c..e71ec5846be9 100644 --- a/tools/testing/selftests/net/tls.c +++ b/tools/testing/selftests/net/tls.c @@ -235,6 +235,7 @@ FIXTURE_VARIANT(tls) { uint16_t tls_version; uint16_t cipher_type; + bool nopad; }; FIXTURE_VARIANT_ADD(tls, 12_aes_gcm) @@ -297,9 +298,17 @@ FIXTURE_VARIANT_ADD(tls, 13_aes_gcm_256) .cipher_type = TLS_CIPHER_AES_GCM_256, }; +FIXTURE_VARIANT_ADD(tls, 13_nopad) +{ + .tls_version = TLS_1_3_VERSION, + .cipher_type = TLS_CIPHER_AES_GCM_128, + .nopad = true, +}; + FIXTURE_SETUP(tls) { struct tls_crypto_info_keys tls12; + int one = 1; int ret; tls_crypto_info_init(variant->tls_version, variant->cipher_type, @@ -315,6 +324,12 @@ FIXTURE_SETUP(tls) ret = setsockopt(self->cfd, SOL_TLS, TLS_RX, &tls12, tls12.len); ASSERT_EQ(ret, 0); + + if (variant->nopad) { + ret = setsockopt(self->cfd, SOL_TLS, TLS_RX_EXPECT_NO_PAD, + (void *)&one, sizeof(one)); + ASSERT_EQ(ret, 0); + } } FIXTURE_TEARDOWN(tls) |