summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/networking/tls.rst18
-rw-r--r--include/linux/sockptr.h8
-rw-r--r--include/net/tls.h3
-rw-r--r--include/uapi/linux/snmp.h1
-rw-r--r--include/uapi/linux/tls.h2
-rw-r--r--net/core/sock.c1
-rw-r--r--net/tls/tls_main.c75
-rw-r--r--net/tls/tls_proc.c1
-rw-r--r--net/tls/tls_sw.c84
-rw-r--r--tools/testing/selftests/net/tls.c15
10 files changed, 191 insertions, 17 deletions
diff --git a/Documentation/networking/tls.rst b/Documentation/networking/tls.rst
index be8e10c14b05..7a6643836e42 100644
--- a/Documentation/networking/tls.rst
+++ b/Documentation/networking/tls.rst
@@ -239,6 +239,19 @@ for the original TCP transmission and TCP retransmissions. To the receiver
this will look like TLS records had been tampered with and will result
in record authentication failures.
+TLS_RX_EXPECT_NO_PAD
+~~~~~~~~~~~~~~~~~~~~
+
+TLS 1.3 only. Expect the sender to not pad records. This allows the data
+to be decrypted directly into user space buffers with TLS 1.3.
+
+This optimization is safe to enable only if the remote end is trusted,
+otherwise it is an attack vector to doubling the TLS processing cost.
+
+If the record decrypted turns out to had been padded or is not a data
+record it will be decrypted again into a kernel buffer without zero copy.
+Such events are counted in the ``TlsDecryptRetry`` statistic.
+
Statistics
==========
@@ -264,3 +277,8 @@ TLS implementation exposes the following per-namespace statistics
- ``TlsDeviceRxResync`` -
number of RX resyncs sent to NICs handling cryptography
+
+- ``TlsDecryptRetry`` -
+ number of RX records which had to be re-decrypted due to
+ ``TLS_RX_EXPECT_NO_PAD`` mis-prediction. Note that this counter will
+ also increment for non-data records.
diff --git a/include/linux/sockptr.h b/include/linux/sockptr.h
index ea193414298b..d45902fb4cad 100644
--- a/include/linux/sockptr.h
+++ b/include/linux/sockptr.h
@@ -102,4 +102,12 @@ static inline long strncpy_from_sockptr(char *dst, sockptr_t src, size_t count)
return strncpy_from_user(dst, src.user, count);
}
+static inline int check_zeroed_sockptr(sockptr_t src, size_t offset,
+ size_t size)
+{
+ if (!sockptr_is_kernel(src))
+ return check_zeroed_user(src.user + offset, size);
+ return memchr_inv(src.kernel + offset, 0, size) == NULL;
+}
+
#endif /* _LINUX_SOCKPTR_H */
diff --git a/include/net/tls.h b/include/net/tls.h
index 8017f1703447..4fc16ca5f469 100644
--- a/include/net/tls.h
+++ b/include/net/tls.h
@@ -149,6 +149,7 @@ struct tls_sw_context_rx {
struct sk_buff *recv_pkt;
u8 async_capable:1;
+ u8 zc_capable:1;
atomic_t decrypt_pending;
/* protect crypto_wait with decrypt_pending*/
spinlock_t decrypt_compl_lock;
@@ -239,6 +240,7 @@ struct tls_context {
u8 tx_conf:3;
u8 rx_conf:3;
u8 zerocopy_sendfile:1;
+ u8 rx_no_pad:1;
int (*push_pending_record)(struct sock *sk, int flags);
void (*sk_write_space)(struct sock *sk);
@@ -358,6 +360,7 @@ int tls_sk_attach(struct sock *sk, int optname, char __user *optval,
void tls_err_abort(struct sock *sk, int err);
int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx);
+void tls_update_rx_zc_capable(struct tls_context *tls_ctx);
void tls_sw_strparser_arm(struct sock *sk, struct tls_context *ctx);
void tls_sw_strparser_done(struct tls_context *tls_ctx);
int tls_sw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size);
diff --git a/include/uapi/linux/snmp.h b/include/uapi/linux/snmp.h
index 904909d020e2..1c9152add663 100644
--- a/include/uapi/linux/snmp.h
+++ b/include/uapi/linux/snmp.h
@@ -344,6 +344,7 @@ enum
LINUX_MIB_TLSRXDEVICE, /* TlsRxDevice */
LINUX_MIB_TLSDECRYPTERROR, /* TlsDecryptError */
LINUX_MIB_TLSRXDEVICERESYNC, /* TlsRxDeviceResync */
+ LINUX_MIN_TLSDECRYPTRETRY, /* TlsDecryptRetry */
__LINUX_MIB_TLSMAX
};
diff --git a/include/uapi/linux/tls.h b/include/uapi/linux/tls.h
index bb8f80812b0b..f1157d8f4acd 100644
--- a/include/uapi/linux/tls.h
+++ b/include/uapi/linux/tls.h
@@ -40,6 +40,7 @@
#define TLS_TX 1 /* Set transmit parameters */
#define TLS_RX 2 /* Set receive parameters */
#define TLS_TX_ZEROCOPY_RO 3 /* TX zerocopy (only sendfile now) */
+#define TLS_RX_EXPECT_NO_PAD 4 /* Attempt opportunistic zero-copy */
/* Supported versions */
#define TLS_VERSION_MINOR(ver) ((ver) & 0xFF)
@@ -162,6 +163,7 @@ enum {
TLS_INFO_TXCONF,
TLS_INFO_RXCONF,
TLS_INFO_ZC_RO_TX,
+ TLS_INFO_RX_NO_PAD,
__TLS_INFO_MAX,
};
#define TLS_INFO_MAX (__TLS_INFO_MAX - 1)
diff --git a/net/core/sock.c b/net/core/sock.c
index 92a0296ccb18..4cb957d934a2 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -2870,6 +2870,7 @@ void __sk_flush_backlog(struct sock *sk)
__release_sock(sk);
spin_unlock_bh(&sk->sk_lock.slock);
}
+EXPORT_SYMBOL_GPL(__sk_flush_backlog);
/**
* sk_wait_data - wait for data to arrive at sk_receive_queue
diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c
index 2ffede463e4a..1b3efc96db0b 100644
--- a/net/tls/tls_main.c
+++ b/net/tls/tls_main.c
@@ -533,6 +533,37 @@ static int do_tls_getsockopt_tx_zc(struct sock *sk, char __user *optval,
return 0;
}
+static int do_tls_getsockopt_no_pad(struct sock *sk, char __user *optval,
+ int __user *optlen)
+{
+ struct tls_context *ctx = tls_get_ctx(sk);
+ unsigned int value;
+ int err, len;
+
+ if (ctx->prot_info.version != TLS_1_3_VERSION)
+ return -EINVAL;
+
+ if (get_user(len, optlen))
+ return -EFAULT;
+ if (len < sizeof(value))
+ return -EINVAL;
+
+ lock_sock(sk);
+ err = -EINVAL;
+ if (ctx->rx_conf == TLS_SW || ctx->rx_conf == TLS_HW)
+ value = ctx->rx_no_pad;
+ release_sock(sk);
+ if (err)
+ return err;
+
+ if (put_user(sizeof(value), optlen))
+ return -EFAULT;
+ if (copy_to_user(optval, &value, sizeof(value)))
+ return -EFAULT;
+
+ return 0;
+}
+
static int do_tls_getsockopt(struct sock *sk, int optname,
char __user *optval, int __user *optlen)
{
@@ -547,6 +578,9 @@ static int do_tls_getsockopt(struct sock *sk, int optname,
case TLS_TX_ZEROCOPY_RO:
rc = do_tls_getsockopt_tx_zc(sk, optval, optlen);
break;
+ case TLS_RX_EXPECT_NO_PAD:
+ rc = do_tls_getsockopt_no_pad(sk, optval, optlen);
+ break;
default:
rc = -ENOPROTOOPT;
break;
@@ -718,6 +752,38 @@ static int do_tls_setsockopt_tx_zc(struct sock *sk, sockptr_t optval,
return 0;
}
+static int do_tls_setsockopt_no_pad(struct sock *sk, sockptr_t optval,
+ unsigned int optlen)
+{
+ struct tls_context *ctx = tls_get_ctx(sk);
+ u32 val;
+ int rc;
+
+ if (ctx->prot_info.version != TLS_1_3_VERSION ||
+ sockptr_is_null(optval) || optlen < sizeof(val))
+ return -EINVAL;
+
+ rc = copy_from_sockptr(&val, optval, sizeof(val));
+ if (rc)
+ return -EFAULT;
+ if (val > 1)
+ return -EINVAL;
+ rc = check_zeroed_sockptr(optval, sizeof(val), optlen - sizeof(val));
+ if (rc < 1)
+ return rc == 0 ? -EINVAL : rc;
+
+ lock_sock(sk);
+ rc = -EINVAL;
+ if (ctx->rx_conf == TLS_SW || ctx->rx_conf == TLS_HW) {
+ ctx->rx_no_pad = val;
+ tls_update_rx_zc_capable(ctx);
+ rc = 0;
+ }
+ release_sock(sk);
+
+ return rc;
+}
+
static int do_tls_setsockopt(struct sock *sk, int optname, sockptr_t optval,
unsigned int optlen)
{
@@ -736,6 +802,9 @@ static int do_tls_setsockopt(struct sock *sk, int optname, sockptr_t optval,
rc = do_tls_setsockopt_tx_zc(sk, optval, optlen);
release_sock(sk);
break;
+ case TLS_RX_EXPECT_NO_PAD:
+ rc = do_tls_setsockopt_no_pad(sk, optval, optlen);
+ break;
default:
rc = -ENOPROTOOPT;
break;
@@ -976,6 +1045,11 @@ static int tls_get_info(const struct sock *sk, struct sk_buff *skb)
if (err)
goto nla_failure;
}
+ if (ctx->rx_no_pad) {
+ err = nla_put_flag(skb, TLS_INFO_RX_NO_PAD);
+ if (err)
+ goto nla_failure;
+ }
rcu_read_unlock();
nla_nest_end(skb, start);
@@ -997,6 +1071,7 @@ static size_t tls_get_info_size(const struct sock *sk)
nla_total_size(sizeof(u16)) + /* TLS_INFO_RXCONF */
nla_total_size(sizeof(u16)) + /* TLS_INFO_TXCONF */
nla_total_size(0) + /* TLS_INFO_ZC_RO_TX */
+ nla_total_size(0) + /* TLS_INFO_RX_NO_PAD */
0;
return size;
diff --git a/net/tls/tls_proc.c b/net/tls/tls_proc.c
index feeceb0e4cb4..0c200000cc45 100644
--- a/net/tls/tls_proc.c
+++ b/net/tls/tls_proc.c
@@ -18,6 +18,7 @@ static const struct snmp_mib tls_mib_list[] = {
SNMP_MIB_ITEM("TlsRxDevice", LINUX_MIB_TLSRXDEVICE),
SNMP_MIB_ITEM("TlsDecryptError", LINUX_MIB_TLSDECRYPTERROR),
SNMP_MIB_ITEM("TlsRxDeviceResync", LINUX_MIB_TLSRXDEVICERESYNC),
+ SNMP_MIB_ITEM("TlsDecryptRetry", LINUX_MIN_TLSDECRYPTRETRY),
SNMP_MIB_SENTINEL
};
diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index 0513f82b8537..79043bc3da39 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -47,6 +47,7 @@
struct tls_decrypt_arg {
bool zc;
bool async;
+ u8 tail;
};
noinline void tls_err_abort(struct sock *sk, int err)
@@ -133,7 +134,8 @@ static int skb_nsg(struct sk_buff *skb, int offset, int len)
return __skb_nsg(skb, offset, len, 0);
}
-static int padding_length(struct tls_prot_info *prot, struct sk_buff *skb)
+static int tls_padding_length(struct tls_prot_info *prot, struct sk_buff *skb,
+ struct tls_decrypt_arg *darg)
{
struct strp_msg *rxm = strp_msg(skb);
struct tls_msg *tlm = tls_msg(skb);
@@ -142,7 +144,7 @@ static int padding_length(struct tls_prot_info *prot, struct sk_buff *skb)
/* Determine zero-padding length */
if (prot->version == TLS_1_3_VERSION) {
int offset = rxm->full_len - TLS_TAG_SIZE - 1;
- char content_type = 0;
+ char content_type = darg->zc ? darg->tail : 0;
int err;
while (content_type == 0) {
@@ -1418,18 +1420,18 @@ static int decrypt_internal(struct sock *sk, struct sk_buff *skb,
struct strp_msg *rxm = strp_msg(skb);
struct tls_msg *tlm = tls_msg(skb);
int n_sgin, n_sgout, nsg, mem_size, aead_size, err, pages = 0;
+ u8 *aad, *iv, *tail, *mem = NULL;
struct aead_request *aead_req;
struct sk_buff *unused;
- u8 *aad, *iv, *mem = NULL;
struct scatterlist *sgin = NULL;
struct scatterlist *sgout = NULL;
- const int data_len = rxm->full_len - prot->overhead_size +
- prot->tail_size;
+ const int data_len = rxm->full_len - prot->overhead_size;
+ int tail_pages = !!prot->tail_size;
int iv_offset = 0;
if (darg->zc && (out_iov || out_sg)) {
if (out_iov)
- n_sgout = 1 +
+ n_sgout = 1 + tail_pages +
iov_iter_npages_cap(out_iov, INT_MAX, data_len);
else
n_sgout = sg_nents(out_sg);
@@ -1453,9 +1455,10 @@ static int decrypt_internal(struct sock *sk, struct sk_buff *skb,
mem_size = aead_size + (nsg * sizeof(struct scatterlist));
mem_size = mem_size + prot->aad_size;
mem_size = mem_size + MAX_IV_SIZE;
+ mem_size = mem_size + prot->tail_size;
/* Allocate a single block of memory which contains
- * aead_req || sgin[] || sgout[] || aad || iv.
+ * aead_req || sgin[] || sgout[] || aad || iv || tail.
* This order achieves correct alignment for aead_req, sgin, sgout.
*/
mem = kmalloc(mem_size, sk->sk_allocation);
@@ -1468,6 +1471,7 @@ static int decrypt_internal(struct sock *sk, struct sk_buff *skb,
sgout = sgin + n_sgin;
aad = (u8 *)(sgout + n_sgout);
iv = aad + prot->aad_size;
+ tail = iv + MAX_IV_SIZE;
/* For CCM based ciphers, first byte of nonce+iv is a constant */
switch (prot->cipher_type) {
@@ -1521,9 +1525,16 @@ static int decrypt_internal(struct sock *sk, struct sk_buff *skb,
err = tls_setup_from_iter(out_iov, data_len,
&pages, &sgout[1],
- (n_sgout - 1));
+ (n_sgout - 1 - tail_pages));
if (err < 0)
goto fallback_to_reg_recv;
+
+ if (prot->tail_size) {
+ sg_unmark_end(&sgout[pages]);
+ sg_set_buf(&sgout[pages + 1], tail,
+ prot->tail_size);
+ sg_mark_end(&sgout[pages + 1]);
+ }
} else if (out_sg) {
memcpy(sgout, out_sg, n_sgout * sizeof(*sgout));
} else {
@@ -1538,10 +1549,13 @@ fallback_to_reg_recv:
/* Prepare and submit AEAD request */
err = tls_do_decryption(sk, skb, sgin, sgout, iv,
- data_len, aead_req, darg);
+ data_len + prot->tail_size, aead_req, darg);
if (darg->async)
return 0;
+ if (prot->tail_size)
+ darg->tail = *tail;
+
/* Release the pages in case iov was mapped to pages */
for (; pages > 0; pages--)
put_page(sg_page(&sgout[pages]));
@@ -1583,9 +1597,16 @@ static int decrypt_skb_update(struct sock *sk, struct sk_buff *skb,
return err;
if (darg->async)
goto decrypt_next;
+ /* If opportunistic TLS 1.3 ZC failed retry without ZC */
+ if (unlikely(darg->zc && prot->version == TLS_1_3_VERSION &&
+ darg->tail != TLS_RECORD_TYPE_DATA)) {
+ darg->zc = false;
+ TLS_INC_STATS(sock_net(sk), LINUX_MIN_TLSDECRYPTRETRY);
+ return decrypt_skb_update(sk, skb, dest, darg);
+ }
decrypt_done:
- pad = padding_length(prot, skb);
+ pad = tls_padding_length(prot, skb, darg);
if (pad < 0)
return pad;
@@ -1717,6 +1738,24 @@ out:
return copied ? : err;
}
+static void
+tls_read_flush_backlog(struct sock *sk, struct tls_prot_info *prot,
+ size_t len_left, size_t decrypted, ssize_t done,
+ size_t *flushed_at)
+{
+ size_t max_rec;
+
+ if (len_left <= decrypted)
+ return;
+
+ max_rec = prot->overhead_size - prot->tail_size + TLS_MAX_PAYLOAD_SIZE;
+ if (done - *flushed_at < SZ_128K && tcp_inq(sk) > max_rec)
+ return;
+
+ *flushed_at = done;
+ sk_flush_backlog(sk);
+}
+
int tls_sw_recvmsg(struct sock *sk,
struct msghdr *msg,
size_t len,
@@ -1729,6 +1768,7 @@ int tls_sw_recvmsg(struct sock *sk,
struct sk_psock *psock;
unsigned char control = 0;
ssize_t decrypted = 0;
+ size_t flushed_at = 0;
struct strp_msg *rxm;
struct tls_msg *tlm;
struct sk_buff *skb;
@@ -1767,7 +1807,7 @@ int tls_sw_recvmsg(struct sock *sk,
timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
zc_capable = !bpf_strp_enabled && !is_kvec && !is_peek &&
- prot->version != TLS_1_3_VERSION;
+ ctx->zc_capable;
decrypted = 0;
while (len && (decrypted + copied < target || ctx->recv_pkt)) {
struct tls_decrypt_arg darg = {};
@@ -1818,6 +1858,10 @@ int tls_sw_recvmsg(struct sock *sk,
if (err <= 0)
goto recv_end;
+ /* periodically flush backlog, and feed strparser */
+ tls_read_flush_backlog(sk, prot, len, to_decrypt,
+ decrypted + copied, &flushed_at);
+
ctx->recv_pkt = NULL;
__strp_unpause(&ctx->strp);
__skb_queue_tail(&ctx->rx_list, skb);
@@ -2249,6 +2293,14 @@ void tls_sw_strparser_arm(struct sock *sk, struct tls_context *tls_ctx)
strp_check_rcv(&rx_ctx->strp);
}
+void tls_update_rx_zc_capable(struct tls_context *tls_ctx)
+{
+ struct tls_sw_context_rx *rx_ctx = tls_sw_ctx_rx(tls_ctx);
+
+ rx_ctx->zc_capable = tls_ctx->rx_no_pad ||
+ tls_ctx->prot_info.version != TLS_1_3_VERSION;
+}
+
int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx)
{
struct tls_context *tls_ctx = tls_get_ctx(sk);
@@ -2484,12 +2536,10 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx)
if (sw_ctx_rx) {
tfm = crypto_aead_tfm(sw_ctx_rx->aead_recv);
- if (crypto_info->version == TLS_1_3_VERSION)
- sw_ctx_rx->async_capable = 0;
- else
- sw_ctx_rx->async_capable =
- !!(tfm->__crt_alg->cra_flags &
- CRYPTO_ALG_ASYNC);
+ tls_update_rx_zc_capable(ctx);
+ sw_ctx_rx->async_capable =
+ crypto_info->version != TLS_1_3_VERSION &&
+ !!(tfm->__crt_alg->cra_flags & CRYPTO_ALG_ASYNC);
/* Set up strparser */
memset(&cb, 0, sizeof(cb));
diff --git a/tools/testing/selftests/net/tls.c b/tools/testing/selftests/net/tls.c
index 5d70b04c482c..e71ec5846be9 100644
--- a/tools/testing/selftests/net/tls.c
+++ b/tools/testing/selftests/net/tls.c
@@ -235,6 +235,7 @@ FIXTURE_VARIANT(tls)
{
uint16_t tls_version;
uint16_t cipher_type;
+ bool nopad;
};
FIXTURE_VARIANT_ADD(tls, 12_aes_gcm)
@@ -297,9 +298,17 @@ FIXTURE_VARIANT_ADD(tls, 13_aes_gcm_256)
.cipher_type = TLS_CIPHER_AES_GCM_256,
};
+FIXTURE_VARIANT_ADD(tls, 13_nopad)
+{
+ .tls_version = TLS_1_3_VERSION,
+ .cipher_type = TLS_CIPHER_AES_GCM_128,
+ .nopad = true,
+};
+
FIXTURE_SETUP(tls)
{
struct tls_crypto_info_keys tls12;
+ int one = 1;
int ret;
tls_crypto_info_init(variant->tls_version, variant->cipher_type,
@@ -315,6 +324,12 @@ FIXTURE_SETUP(tls)
ret = setsockopt(self->cfd, SOL_TLS, TLS_RX, &tls12, tls12.len);
ASSERT_EQ(ret, 0);
+
+ if (variant->nopad) {
+ ret = setsockopt(self->cfd, SOL_TLS, TLS_RX_EXPECT_NO_PAD,
+ (void *)&one, sizeof(one));
+ ASSERT_EQ(ret, 0);
+ }
}
FIXTURE_TEARDOWN(tls)