From 52db6685932e326ed607644ab7ebdae8c194adda Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Wed, 10 Jul 2019 16:59:55 +0900 Subject: ASoC: simple_card_utils.h: care NULL dai at asoc_simple_debug_dai() props->xxx_dai might be NULL when DPCM. This patch cares it for debug. Fixes: commit 0580dde59438 ("ASoC: simple-card-utils: add asoc_simple_debug_info()") Signed-off-by: Kuninori Morimoto Link: https://lore.kernel.org/r/87o922gw4u.wl-kuninori.morimoto.gx@renesas.com Signed-off-by: Mark Brown --- include/sound/simple_card_utils.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/sound/simple_card_utils.h b/include/sound/simple_card_utils.h index 954563ee2277..985a5f583de4 100644 --- a/include/sound/simple_card_utils.h +++ b/include/sound/simple_card_utils.h @@ -141,6 +141,10 @@ inline void asoc_simple_debug_dai(struct asoc_simple_priv *priv, { struct device *dev = simple_priv_to_dev(priv); + /* dai might be NULL */ + if (!dai) + return; + if (dai->name) dev_dbg(dev, "%s dai name = %s\n", name, dai->name); -- cgit v1.2.3 From 62ec3d13601bd626ca7a0edef6d45dbb753d94e8 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Sun, 21 Jul 2019 23:23:08 +0900 Subject: ASoC: SOF: use __u32 instead of uint32_t in uapi headers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When CONFIG_UAPI_HEADER_TEST=y, exported headers are compile-tested to make sure they can be included from user-space. Currently, header.h and fw.h are excluded from the test coverage. To make them join the compile-test, we need to fix the build errors attached below. For a case like this, we decided to use __u{8,16,32,64} variable types in this discussion: https://lkml.org/lkml/2019/6/5/18 Build log: CC usr/include/sound/sof/header.h.s CC usr/include/sound/sof/fw.h.s In file included from :32:0: ./usr/include/sound/sof/header.h:19:2: error: unknown type name ‘uint32_t’ uint32_t magic; /**< 'S', 'O', 'F', '\0' */ ^~~~~~~~ ./usr/include/sound/sof/header.h:20:2: error: unknown type name ‘uint32_t’ uint32_t type; /**< component specific type */ ^~~~~~~~ ./usr/include/sound/sof/header.h:21:2: error: unknown type name ‘uint32_t’ uint32_t size; /**< size in bytes of data excl. this struct */ ^~~~~~~~ ./usr/include/sound/sof/header.h:22:2: error: unknown type name ‘uint32_t’ uint32_t abi; /**< SOF ABI version */ ^~~~~~~~ ./usr/include/sound/sof/header.h:23:2: error: unknown type name ‘uint32_t’ uint32_t reserved[4]; /**< reserved for future use */ ^~~~~~~~ ./usr/include/sound/sof/header.h:24:2: error: unknown type name ‘uint32_t’ uint32_t data[0]; /**< Component data - opaque to core */ ^~~~~~~~ In file included from :32:0: ./usr/include/sound/sof/fw.h:49:2: error: unknown type name ‘uint32_t’ uint32_t size; /* bytes minus this header */ ^~~~~~~~ ./usr/include/sound/sof/fw.h:50:2: error: unknown type name ‘uint32_t’ uint32_t offset; /* offset from base */ ^~~~~~~~ ./usr/include/sound/sof/fw.h:64:2: error: unknown type name ‘uint32_t’ uint32_t size; /* bytes minus this header */ ^~~~~~~~ ./usr/include/sound/sof/fw.h:65:2: error: unknown type name ‘uint32_t’ uint32_t num_blocks; /* number of blocks */ ^~~~~~~~ ./usr/include/sound/sof/fw.h:73:2: error: unknown type name ‘uint32_t’ uint32_t file_size; /* size of file minus this header */ ^~~~~~~~ ./usr/include/sound/sof/fw.h:74:2: error: unknown type name ‘uint32_t’ uint32_t num_modules; /* number of modules */ ^~~~~~~~ ./usr/include/sound/sof/fw.h:75:2: error: unknown type name ‘uint32_t’ uint32_t abi; /* version of header format */ ^~~~~~~~ Signed-off-by: Masahiro Yamada Link: https://lore.kernel.org/r/20190721142308.30306-1-yamada.masahiro@socionext.com Signed-off-by: Mark Brown --- include/uapi/sound/sof/fw.h | 16 +++++++++------- include/uapi/sound/sof/header.h | 14 ++++++++------ 2 files changed, 17 insertions(+), 13 deletions(-) (limited to 'include') diff --git a/include/uapi/sound/sof/fw.h b/include/uapi/sound/sof/fw.h index 1afca973eb09..e9f697467a86 100644 --- a/include/uapi/sound/sof/fw.h +++ b/include/uapi/sound/sof/fw.h @@ -13,6 +13,8 @@ #ifndef __INCLUDE_UAPI_SOF_FW_H__ #define __INCLUDE_UAPI_SOF_FW_H__ +#include + #define SND_SOF_FW_SIG_SIZE 4 #define SND_SOF_FW_ABI 1 #define SND_SOF_FW_SIG "Reef" @@ -46,8 +48,8 @@ enum snd_sof_fw_blk_type { struct snd_sof_blk_hdr { enum snd_sof_fw_blk_type type; - uint32_t size; /* bytes minus this header */ - uint32_t offset; /* offset from base */ + __u32 size; /* bytes minus this header */ + __u32 offset; /* offset from base */ } __packed; /* @@ -61,8 +63,8 @@ enum snd_sof_fw_mod_type { struct snd_sof_mod_hdr { enum snd_sof_fw_mod_type type; - uint32_t size; /* bytes minus this header */ - uint32_t num_blocks; /* number of blocks */ + __u32 size; /* bytes minus this header */ + __u32 num_blocks; /* number of blocks */ } __packed; /* @@ -70,9 +72,9 @@ struct snd_sof_mod_hdr { */ struct snd_sof_fw_header { unsigned char sig[SND_SOF_FW_SIG_SIZE]; /* "Reef" */ - uint32_t file_size; /* size of file minus this header */ - uint32_t num_modules; /* number of modules */ - uint32_t abi; /* version of header format */ + __u32 file_size; /* size of file minus this header */ + __u32 num_modules; /* number of modules */ + __u32 abi; /* version of header format */ } __packed; #endif diff --git a/include/uapi/sound/sof/header.h b/include/uapi/sound/sof/header.h index 7868990b0d6f..5f4518e7a972 100644 --- a/include/uapi/sound/sof/header.h +++ b/include/uapi/sound/sof/header.h @@ -9,6 +9,8 @@ #ifndef __INCLUDE_UAPI_SOUND_SOF_USER_HEADER_H__ #define __INCLUDE_UAPI_SOUND_SOF_USER_HEADER_H__ +#include + /* * Header for all non IPC ABI data. * @@ -16,12 +18,12 @@ * Used by any bespoke component data structures or binary blobs. */ struct sof_abi_hdr { - uint32_t magic; /**< 'S', 'O', 'F', '\0' */ - uint32_t type; /**< component specific type */ - uint32_t size; /**< size in bytes of data excl. this struct */ - uint32_t abi; /**< SOF ABI version */ - uint32_t reserved[4]; /**< reserved for future use */ - uint32_t data[0]; /**< Component data - opaque to core */ + __u32 magic; /**< 'S', 'O', 'F', '\0' */ + __u32 type; /**< component specific type */ + __u32 size; /**< size in bytes of data excl. this struct */ + __u32 abi; /**< SOF ABI version */ + __u32 reserved[4]; /**< reserved for future use */ + __u32 data[0]; /**< Component data - opaque to core */ } __packed; #endif -- cgit v1.2.3 From 318892ac068397f40ff81d9155898da01493b1d2 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 19 Jul 2019 10:29:14 -0700 Subject: net/tls: don't arm strparser immediately in tls_set_sw_offload() In tls_set_device_offload_rx() we prepare the software context for RX fallback and proceed to add the connection to the device. Unfortunately, software context prep includes arming strparser so in case of a later error we have to release the socket lock to call strp_done(). In preparation for not releasing the socket lock half way through callbacks move arming strparser into a separate function. Following patches will make use of that. Signed-off-by: Jakub Kicinski Reviewed-by: Dirk van der Merwe Signed-off-by: Daniel Borkmann --- include/net/tls.h | 1 + net/tls/tls_device.c | 1 + net/tls/tls_main.c | 8 +++++--- net/tls/tls_sw.c | 19 ++++++++++++------- 4 files changed, 19 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/net/tls.h b/include/net/tls.h index 584609174fe0..43f551cd508b 100644 --- a/include/net/tls.h +++ b/include/net/tls.h @@ -355,6 +355,7 @@ int tls_sk_attach(struct sock *sk, int optname, char __user *optval, unsigned int optlen); int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx); +void tls_sw_strparser_arm(struct sock *sk, struct tls_context *ctx); int tls_sw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size); int tls_sw_sendpage(struct sock *sk, struct page *page, int offset, size_t size, int flags); diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c index 7c0b2b778703..4d67d72f007c 100644 --- a/net/tls/tls_device.c +++ b/net/tls/tls_device.c @@ -1045,6 +1045,7 @@ int tls_set_device_offload_rx(struct sock *sk, struct tls_context *ctx) rc = tls_set_sw_offload(sk, ctx, 0); if (rc) goto release_ctx; + tls_sw_strparser_arm(sk, ctx); rc = netdev->tlsdev_ops->tls_dev_add(netdev, sk, TLS_OFFLOAD_CTX_DIR_RX, &ctx->crypto_recv.info, diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c index 4674e57e66b0..85a9d7d57b32 100644 --- a/net/tls/tls_main.c +++ b/net/tls/tls_main.c @@ -526,6 +526,8 @@ static int do_tls_setsockopt_conf(struct sock *sk, char __user *optval, { #endif rc = tls_set_sw_offload(sk, ctx, 1); + if (rc) + goto err_crypto_info; conf = TLS_SW; } } else { @@ -537,13 +539,13 @@ static int do_tls_setsockopt_conf(struct sock *sk, char __user *optval, { #endif rc = tls_set_sw_offload(sk, ctx, 0); + if (rc) + goto err_crypto_info; + tls_sw_strparser_arm(sk, ctx); conf = TLS_SW; } } - if (rc) - goto err_crypto_info; - if (tx) ctx->tx_conf = conf; else diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 53b4ad94e74a..f58a8ffc2a9c 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -2160,6 +2160,18 @@ void tls_sw_write_space(struct sock *sk, struct tls_context *ctx) } } +void tls_sw_strparser_arm(struct sock *sk, struct tls_context *tls_ctx) +{ + struct tls_sw_context_rx *rx_ctx = tls_sw_ctx_rx(tls_ctx); + + write_lock_bh(&sk->sk_callback_lock); + rx_ctx->saved_data_ready = sk->sk_data_ready; + sk->sk_data_ready = tls_data_ready; + write_unlock_bh(&sk->sk_callback_lock); + + strp_check_rcv(&rx_ctx->strp); +} + int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx) { struct tls_context *tls_ctx = tls_get_ctx(sk); @@ -2357,13 +2369,6 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx) cb.parse_msg = tls_read_size; strp_init(&sw_ctx_rx->strp, sk, &cb); - - write_lock_bh(&sk->sk_callback_lock); - sw_ctx_rx->saved_data_ready = sk->sk_data_ready; - sk->sk_data_ready = tls_data_ready; - write_unlock_bh(&sk->sk_callback_lock); - - strp_check_rcv(&sw_ctx_rx->strp); } goto out; -- cgit v1.2.3 From f87e62d45e51b12d48d2cb46b5cde8f83b866bc4 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Fri, 19 Jul 2019 10:29:16 -0700 Subject: net/tls: remove close callback sock unlock/lock around TX work flush The tls close() callback currently drops the sock lock, makes a cancel_delayed_work_sync() call, and then relocks the sock. By restructuring the code we can avoid droping lock and then reclaiming it. To simplify this we do the following, tls_sk_proto_close set_bit(CLOSING) set_bit(SCHEDULE) cancel_delay_work_sync() <- cancel workqueue lock_sock(sk) ... release_sock(sk) strp_done() Setting the CLOSING bit prevents the SCHEDULE bit from being cleared by any workqueue items e.g. if one happens to be scheduled and run between when we set SCHEDULE bit and cancel work. Then because SCHEDULE bit is set now no new work will be scheduled. Tested with net selftests and bpf selftests. Signed-off-by: John Fastabend Signed-off-by: Jakub Kicinski Reviewed-by: Dirk van der Merwe Signed-off-by: Daniel Borkmann --- include/net/tls.h | 2 ++ net/tls/tls_main.c | 3 +++ net/tls/tls_sw.c | 24 +++++++++++++++++------- 3 files changed, 22 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/net/tls.h b/include/net/tls.h index 43f551cd508b..d4276cb6de53 100644 --- a/include/net/tls.h +++ b/include/net/tls.h @@ -162,6 +162,7 @@ struct tls_sw_context_tx { int async_capable; #define BIT_TX_SCHEDULED 0 +#define BIT_TX_CLOSING 1 unsigned long tx_bitmask; }; @@ -360,6 +361,7 @@ int tls_sw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size); int tls_sw_sendpage(struct sock *sk, struct page *page, int offset, size_t size, int flags); void tls_sw_close(struct sock *sk, long timeout); +void tls_sw_cancel_work_tx(struct tls_context *tls_ctx); void tls_sw_free_resources_tx(struct sock *sk); void tls_sw_free_resources_rx(struct sock *sk); void tls_sw_release_resources_rx(struct sock *sk); diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c index 7ab682ed99fa..5c29b410cf7d 100644 --- a/net/tls/tls_main.c +++ b/net/tls/tls_main.c @@ -268,6 +268,9 @@ static void tls_sk_proto_close(struct sock *sk, long timeout) void (*sk_proto_close)(struct sock *sk, long timeout); bool free_ctx = false; + if (ctx->tx_conf == TLS_SW) + tls_sw_cancel_work_tx(ctx); + lock_sock(sk); sk_proto_close = ctx->sk_proto_close; diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index f58a8ffc2a9c..38c0e53c727d 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -2054,6 +2054,15 @@ static void tls_data_ready(struct sock *sk) } } +void tls_sw_cancel_work_tx(struct tls_context *tls_ctx) +{ + struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx); + + set_bit(BIT_TX_CLOSING, &ctx->tx_bitmask); + set_bit(BIT_TX_SCHEDULED, &ctx->tx_bitmask); + cancel_delayed_work_sync(&ctx->tx_work.work); +} + void tls_sw_free_resources_tx(struct sock *sk) { struct tls_context *tls_ctx = tls_get_ctx(sk); @@ -2065,11 +2074,6 @@ void tls_sw_free_resources_tx(struct sock *sk) if (atomic_read(&ctx->encrypt_pending)) crypto_wait_req(-EINPROGRESS, &ctx->async_wait); - release_sock(sk); - cancel_delayed_work_sync(&ctx->tx_work.work); - lock_sock(sk); - - /* Tx whatever records we can transmit and abandon the rest */ tls_tx_records(sk, -1); /* Free up un-sent records in tx_list. First, free @@ -2137,11 +2141,17 @@ static void tx_work_handler(struct work_struct *work) struct tx_work, work); struct sock *sk = tx_work->sk; struct tls_context *tls_ctx = tls_get_ctx(sk); - struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx); + struct tls_sw_context_tx *ctx; - if (!test_and_clear_bit(BIT_TX_SCHEDULED, &ctx->tx_bitmask)) + if (unlikely(!tls_ctx)) return; + ctx = tls_sw_ctx_tx(tls_ctx); + if (test_bit(BIT_TX_CLOSING, &ctx->tx_bitmask)) + return; + + if (!test_and_clear_bit(BIT_TX_SCHEDULED, &ctx->tx_bitmask)) + return; lock_sock(sk); tls_tx_records(sk, -1); release_sock(sk); -- cgit v1.2.3 From 313ab004805cf52a42673b15852b3842474ccd87 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Fri, 19 Jul 2019 10:29:17 -0700 Subject: net/tls: remove sock unlock/lock around strp_done() The tls close() callback currently drops the sock lock to call strp_done(). Split up the RX cleanup into stopping the strparser and releasing most resources, syncing strparser and finally freeing the context. To avoid the need for a strp_done() call on the cleanup path of device offload make sure we don't arm the strparser until we are sure init will be successful. Signed-off-by: John Fastabend Signed-off-by: Jakub Kicinski Reviewed-by: Dirk van der Merwe Signed-off-by: Daniel Borkmann --- include/net/tls.h | 7 +++--- net/tls/tls_device.c | 1 - net/tls/tls_main.c | 61 ++++++++++++++++++++++++++-------------------------- net/tls/tls_sw.c | 40 +++++++++++++++++++++++++--------- 4 files changed, 64 insertions(+), 45 deletions(-) (limited to 'include') diff --git a/include/net/tls.h b/include/net/tls.h index d4276cb6de53..235508e35fd4 100644 --- a/include/net/tls.h +++ b/include/net/tls.h @@ -107,9 +107,7 @@ struct tls_device { enum { TLS_BASE, TLS_SW, -#ifdef CONFIG_TLS_DEVICE TLS_HW, -#endif TLS_HW_RECORD, TLS_NUM_CONFIG, }; @@ -357,14 +355,17 @@ int tls_sk_attach(struct sock *sk, int optname, char __user *optval, int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx); void tls_sw_strparser_arm(struct sock *sk, struct tls_context *ctx); +void tls_sw_strparser_done(struct tls_context *tls_ctx); int tls_sw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size); int tls_sw_sendpage(struct sock *sk, struct page *page, int offset, size_t size, int flags); void tls_sw_close(struct sock *sk, long timeout); void tls_sw_cancel_work_tx(struct tls_context *tls_ctx); -void tls_sw_free_resources_tx(struct sock *sk); +void tls_sw_release_resources_tx(struct sock *sk); +void tls_sw_free_ctx_tx(struct tls_context *tls_ctx); void tls_sw_free_resources_rx(struct sock *sk); void tls_sw_release_resources_rx(struct sock *sk); +void tls_sw_free_ctx_rx(struct tls_context *tls_ctx); int tls_sw_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock, int flags, int *addr_len); bool tls_sw_stream_read(const struct sock *sk); diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c index 4d67d72f007c..7c0b2b778703 100644 --- a/net/tls/tls_device.c +++ b/net/tls/tls_device.c @@ -1045,7 +1045,6 @@ int tls_set_device_offload_rx(struct sock *sk, struct tls_context *ctx) rc = tls_set_sw_offload(sk, ctx, 0); if (rc) goto release_ctx; - tls_sw_strparser_arm(sk, ctx); rc = netdev->tlsdev_ops->tls_dev_add(netdev, sk, TLS_OFFLOAD_CTX_DIR_RX, &ctx->crypto_recv.info, diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c index 5c29b410cf7d..d152a00a7a27 100644 --- a/net/tls/tls_main.c +++ b/net/tls/tls_main.c @@ -261,24 +261,9 @@ void tls_ctx_free(struct tls_context *ctx) kfree(ctx); } -static void tls_sk_proto_close(struct sock *sk, long timeout) +static void tls_sk_proto_cleanup(struct sock *sk, + struct tls_context *ctx, long timeo) { - struct tls_context *ctx = tls_get_ctx(sk); - long timeo = sock_sndtimeo(sk, 0); - void (*sk_proto_close)(struct sock *sk, long timeout); - bool free_ctx = false; - - if (ctx->tx_conf == TLS_SW) - tls_sw_cancel_work_tx(ctx); - - lock_sock(sk); - sk_proto_close = ctx->sk_proto_close; - - if (ctx->tx_conf == TLS_BASE && ctx->rx_conf == TLS_BASE) { - free_ctx = true; - goto skip_tx_cleanup; - } - if (unlikely(sk->sk_write_pending) && !wait_on_pending_writer(sk, &timeo)) tls_handle_open_record(sk, 0); @@ -287,7 +272,7 @@ static void tls_sk_proto_close(struct sock *sk, long timeout) if (ctx->tx_conf == TLS_SW) { kfree(ctx->tx.rec_seq); kfree(ctx->tx.iv); - tls_sw_free_resources_tx(sk); + tls_sw_release_resources_tx(sk); #ifdef CONFIG_TLS_DEVICE } else if (ctx->tx_conf == TLS_HW) { tls_device_free_resources_tx(sk); @@ -295,26 +280,40 @@ static void tls_sk_proto_close(struct sock *sk, long timeout) } if (ctx->rx_conf == TLS_SW) - tls_sw_free_resources_rx(sk); + tls_sw_release_resources_rx(sk); #ifdef CONFIG_TLS_DEVICE if (ctx->rx_conf == TLS_HW) tls_device_offload_cleanup_rx(sk); - - if (ctx->tx_conf != TLS_HW && ctx->rx_conf != TLS_HW) { -#else - { #endif - tls_ctx_free(ctx); - ctx = NULL; - } +} + +static void tls_sk_proto_close(struct sock *sk, long timeout) +{ + void (*sk_proto_close)(struct sock *sk, long timeout); + struct tls_context *ctx = tls_get_ctx(sk); + long timeo = sock_sndtimeo(sk, 0); + bool free_ctx; + + if (ctx->tx_conf == TLS_SW) + tls_sw_cancel_work_tx(ctx); + + lock_sock(sk); + free_ctx = ctx->tx_conf != TLS_HW && ctx->rx_conf != TLS_HW; + sk_proto_close = ctx->sk_proto_close; + + if (ctx->tx_conf != TLS_BASE || ctx->rx_conf != TLS_BASE) + tls_sk_proto_cleanup(sk, ctx, timeo); -skip_tx_cleanup: release_sock(sk); + if (ctx->tx_conf == TLS_SW) + tls_sw_free_ctx_tx(ctx); + if (ctx->rx_conf == TLS_SW || ctx->rx_conf == TLS_HW) + tls_sw_strparser_done(ctx); + if (ctx->rx_conf == TLS_SW) + tls_sw_free_ctx_rx(ctx); sk_proto_close(sk, timeout); - /* free ctx for TLS_HW_RECORD, used by tcp_set_state - * for sk->sk_prot->unhash [tls_hw_unhash] - */ + if (free_ctx) tls_ctx_free(ctx); } @@ -541,9 +540,9 @@ static int do_tls_setsockopt_conf(struct sock *sk, char __user *optval, rc = tls_set_sw_offload(sk, ctx, 0); if (rc) goto err_crypto_info; - tls_sw_strparser_arm(sk, ctx); conf = TLS_SW; } + tls_sw_strparser_arm(sk, ctx); } if (tx) diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 38c0e53c727d..91d21b048a9b 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -2063,7 +2063,7 @@ void tls_sw_cancel_work_tx(struct tls_context *tls_ctx) cancel_delayed_work_sync(&ctx->tx_work.work); } -void tls_sw_free_resources_tx(struct sock *sk) +void tls_sw_release_resources_tx(struct sock *sk) { struct tls_context *tls_ctx = tls_get_ctx(sk); struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx); @@ -2096,6 +2096,11 @@ void tls_sw_free_resources_tx(struct sock *sk) crypto_free_aead(ctx->aead_send); tls_free_open_rec(sk); +} + +void tls_sw_free_ctx_tx(struct tls_context *tls_ctx) +{ + struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx); kfree(ctx); } @@ -2114,25 +2119,40 @@ void tls_sw_release_resources_rx(struct sock *sk) skb_queue_purge(&ctx->rx_list); crypto_free_aead(ctx->aead_recv); strp_stop(&ctx->strp); - write_lock_bh(&sk->sk_callback_lock); - sk->sk_data_ready = ctx->saved_data_ready; - write_unlock_bh(&sk->sk_callback_lock); - release_sock(sk); - strp_done(&ctx->strp); - lock_sock(sk); + /* If tls_sw_strparser_arm() was not called (cleanup paths) + * we still want to strp_stop(), but sk->sk_data_ready was + * never swapped. + */ + if (ctx->saved_data_ready) { + write_lock_bh(&sk->sk_callback_lock); + sk->sk_data_ready = ctx->saved_data_ready; + write_unlock_bh(&sk->sk_callback_lock); + } } } -void tls_sw_free_resources_rx(struct sock *sk) +void tls_sw_strparser_done(struct tls_context *tls_ctx) { - struct tls_context *tls_ctx = tls_get_ctx(sk); struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx); - tls_sw_release_resources_rx(sk); + strp_done(&ctx->strp); +} + +void tls_sw_free_ctx_rx(struct tls_context *tls_ctx) +{ + struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx); kfree(ctx); } +void tls_sw_free_resources_rx(struct sock *sk) +{ + struct tls_context *tls_ctx = tls_get_ctx(sk); + + tls_sw_release_resources_rx(sk); + tls_sw_free_ctx_rx(tls_ctx); +} + /* The work handler to transmitt the encrypted records in tx_list */ static void tx_work_handler(struct work_struct *work) { -- cgit v1.2.3 From 32857cf57f920cdc03b5095f08febec94cf9c36b Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Fri, 19 Jul 2019 10:29:18 -0700 Subject: net/tls: fix transition through disconnect with close It is possible (via shutdown()) for TCP socks to go through TCP_CLOSE state via tcp_disconnect() without actually calling tcp_close which would then call the tls close callback. Because of this a user could disconnect a socket then put it in a LISTEN state which would break our assumptions about sockets always being ESTABLISHED state. More directly because close() can call unhash() and unhash is implemented by sockmap if a sockmap socket has TLS enabled we can incorrectly destroy the psock from unhash() and then call its close handler again. But because the psock (sockmap socket representation) is already destroyed we call close handler in sk->prot. However, in some cases (TLS BASE/BASE case) this will still point at the sockmap close handler resulting in a circular call and crash reported by syzbot. To fix both above issues implement the unhash() routine for TLS. v4: - add note about tls offload still needing the fix; - move sk_proto to the cold cache line; - split TX context free into "release" and "free", otherwise the GC work itself is in already freed memory; - more TX before RX for consistency; - reuse tls_ctx_free(); - schedule the GC work after we're done with context to avoid UAF; - don't set the unhash in all modes, all modes "inherit" TLS_BASE's callbacks anyway; - disable the unhash hook for TLS_HW. Fixes: 3c4d7559159bf ("tls: kernel TLS support") Reported-by: Eric Dumazet Signed-off-by: John Fastabend Signed-off-by: Jakub Kicinski Signed-off-by: Daniel Borkmann --- Documentation/networking/tls-offload.rst | 6 ++++ include/net/tls.h | 5 ++- net/tls/tls_main.c | 55 ++++++++++++++++++++++++++++++++ 3 files changed, 65 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/Documentation/networking/tls-offload.rst b/Documentation/networking/tls-offload.rst index 048e5ca44824..8a1eeb393316 100644 --- a/Documentation/networking/tls-offload.rst +++ b/Documentation/networking/tls-offload.rst @@ -513,3 +513,9 @@ Redirects leak clear text In the RX direction, if segment has already been decrypted by the device and it gets redirected or mirrored - clear text will be transmitted out. + +shutdown() doesn't clear TLS state +---------------------------------- + +shutdown() system call allows for a TLS socket to be reused as a different +connection. Offload doesn't currently handle that. diff --git a/include/net/tls.h b/include/net/tls.h index 235508e35fd4..9e425ac2de45 100644 --- a/include/net/tls.h +++ b/include/net/tls.h @@ -271,6 +271,8 @@ struct tls_context { unsigned long flags; /* cache cold stuff */ + struct proto *sk_proto; + void (*sk_destruct)(struct sock *sk); void (*sk_proto_close)(struct sock *sk, long timeout); @@ -288,6 +290,8 @@ struct tls_context { struct list_head list; refcount_t refcount; + + struct work_struct gc; }; enum tls_offload_ctx_dir { @@ -359,7 +363,6 @@ void tls_sw_strparser_done(struct tls_context *tls_ctx); int tls_sw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size); int tls_sw_sendpage(struct sock *sk, struct page *page, int offset, size_t size, int flags); -void tls_sw_close(struct sock *sk, long timeout); void tls_sw_cancel_work_tx(struct tls_context *tls_ctx); void tls_sw_release_resources_tx(struct sock *sk); void tls_sw_free_ctx_tx(struct tls_context *tls_ctx); diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c index d152a00a7a27..48f1c26459d0 100644 --- a/net/tls/tls_main.c +++ b/net/tls/tls_main.c @@ -261,6 +261,33 @@ void tls_ctx_free(struct tls_context *ctx) kfree(ctx); } +static void tls_ctx_free_deferred(struct work_struct *gc) +{ + struct tls_context *ctx = container_of(gc, struct tls_context, gc); + + /* Ensure any remaining work items are completed. The sk will + * already have lost its tls_ctx reference by the time we get + * here so no xmit operation will actually be performed. + */ + if (ctx->tx_conf == TLS_SW) { + tls_sw_cancel_work_tx(ctx); + tls_sw_free_ctx_tx(ctx); + } + + if (ctx->rx_conf == TLS_SW) { + tls_sw_strparser_done(ctx); + tls_sw_free_ctx_rx(ctx); + } + + tls_ctx_free(ctx); +} + +static void tls_ctx_free_wq(struct tls_context *ctx) +{ + INIT_WORK(&ctx->gc, tls_ctx_free_deferred); + schedule_work(&ctx->gc); +} + static void tls_sk_proto_cleanup(struct sock *sk, struct tls_context *ctx, long timeo) { @@ -288,6 +315,26 @@ static void tls_sk_proto_cleanup(struct sock *sk, #endif } +static void tls_sk_proto_unhash(struct sock *sk) +{ + struct inet_connection_sock *icsk = inet_csk(sk); + long timeo = sock_sndtimeo(sk, 0); + struct tls_context *ctx; + + if (unlikely(!icsk->icsk_ulp_data)) { + if (sk->sk_prot->unhash) + sk->sk_prot->unhash(sk); + } + + ctx = tls_get_ctx(sk); + tls_sk_proto_cleanup(sk, ctx, timeo); + icsk->icsk_ulp_data = NULL; + + if (ctx->sk_proto->unhash) + ctx->sk_proto->unhash(sk); + tls_ctx_free_wq(ctx); +} + static void tls_sk_proto_close(struct sock *sk, long timeout) { void (*sk_proto_close)(struct sock *sk, long timeout); @@ -305,6 +352,7 @@ static void tls_sk_proto_close(struct sock *sk, long timeout) if (ctx->tx_conf != TLS_BASE || ctx->rx_conf != TLS_BASE) tls_sk_proto_cleanup(sk, ctx, timeo); + sk->sk_prot = ctx->sk_proto; release_sock(sk); if (ctx->tx_conf == TLS_SW) tls_sw_free_ctx_tx(ctx); @@ -608,6 +656,7 @@ static struct tls_context *create_ctx(struct sock *sk) ctx->setsockopt = sk->sk_prot->setsockopt; ctx->getsockopt = sk->sk_prot->getsockopt; ctx->sk_proto_close = sk->sk_prot->close; + ctx->unhash = sk->sk_prot->unhash; return ctx; } @@ -731,6 +780,7 @@ static void build_protos(struct proto prot[TLS_NUM_CONFIG][TLS_NUM_CONFIG], prot[TLS_BASE][TLS_BASE].setsockopt = tls_setsockopt; prot[TLS_BASE][TLS_BASE].getsockopt = tls_getsockopt; prot[TLS_BASE][TLS_BASE].close = tls_sk_proto_close; + prot[TLS_BASE][TLS_BASE].unhash = tls_sk_proto_unhash; prot[TLS_SW][TLS_BASE] = prot[TLS_BASE][TLS_BASE]; prot[TLS_SW][TLS_BASE].sendmsg = tls_sw_sendmsg; @@ -748,16 +798,20 @@ static void build_protos(struct proto prot[TLS_NUM_CONFIG][TLS_NUM_CONFIG], #ifdef CONFIG_TLS_DEVICE prot[TLS_HW][TLS_BASE] = prot[TLS_BASE][TLS_BASE]; + prot[TLS_HW][TLS_BASE].unhash = base->unhash; prot[TLS_HW][TLS_BASE].sendmsg = tls_device_sendmsg; prot[TLS_HW][TLS_BASE].sendpage = tls_device_sendpage; prot[TLS_HW][TLS_SW] = prot[TLS_BASE][TLS_SW]; + prot[TLS_HW][TLS_SW].unhash = base->unhash; prot[TLS_HW][TLS_SW].sendmsg = tls_device_sendmsg; prot[TLS_HW][TLS_SW].sendpage = tls_device_sendpage; prot[TLS_BASE][TLS_HW] = prot[TLS_BASE][TLS_SW]; + prot[TLS_BASE][TLS_HW].unhash = base->unhash; prot[TLS_SW][TLS_HW] = prot[TLS_SW][TLS_SW]; + prot[TLS_SW][TLS_HW].unhash = base->unhash; prot[TLS_HW][TLS_HW] = prot[TLS_HW][TLS_SW]; #endif @@ -794,6 +848,7 @@ static int tls_init(struct sock *sk) tls_build_proto(sk); ctx->tx_conf = TLS_BASE; ctx->rx_conf = TLS_BASE; + ctx->sk_proto = sk->sk_prot; update_sk_prot(sk, ctx); out: return rc; -- cgit v1.2.3 From 95fa145479fbc0a0c1fd3274ceb42ec03c042a4a Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Fri, 19 Jul 2019 10:29:22 -0700 Subject: bpf: sockmap/tls, close can race with map free When a map free is called and in parallel a socket is closed we have two paths that can potentially reset the socket prot ops, the bpf close() path and the map free path. This creates a problem with which prot ops should be used from the socket closed side. If the map_free side completes first then we want to call the original lowest level ops. However, if the tls path runs first we want to call the sockmap ops. Additionally there was no locking around prot updates in TLS code paths so the prot ops could be changed multiple times once from TLS path and again from sockmap side potentially leaving ops pointed at either TLS or sockmap when psock and/or tls context have already been destroyed. To fix this race first only update ops inside callback lock so that TLS, sockmap and lowest level all agree on prot state. Second and a ULP callback update() so that lower layers can inform the upper layer when they are being removed allowing the upper layer to reset prot ops. This gets us close to allowing sockmap and tls to be stacked in arbitrary order but will save that patch for *next trees. v4: - make sure we don't free things for device; - remove the checks which swap the callbacks back only if TLS is at the top. Reported-by: syzbot+06537213db7ba2745c4a@syzkaller.appspotmail.com Fixes: 02c558b2d5d6 ("bpf: sockmap, support for msg_peek in sk_msg with redirect ingress") Signed-off-by: John Fastabend Signed-off-by: Jakub Kicinski Reviewed-by: Dirk van der Merwe Signed-off-by: Daniel Borkmann --- include/linux/skmsg.h | 8 +++++++- include/net/tcp.h | 3 +++ net/core/skmsg.c | 4 ++-- net/ipv4/tcp_ulp.c | 13 +++++++++++++ net/tls/tls_main.c | 33 ++++++++++++++++++++++++++++----- 5 files changed, 53 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h index 50ced8aba9db..e4b3fb4bb77c 100644 --- a/include/linux/skmsg.h +++ b/include/linux/skmsg.h @@ -354,7 +354,13 @@ static inline void sk_psock_restore_proto(struct sock *sk, sk->sk_write_space = psock->saved_write_space; if (psock->sk_proto) { - sk->sk_prot = psock->sk_proto; + struct inet_connection_sock *icsk = inet_csk(sk); + bool has_ulp = !!icsk->icsk_ulp_data; + + if (has_ulp) + tcp_update_ulp(sk, psock->sk_proto); + else + sk->sk_prot = psock->sk_proto; psock->sk_proto = NULL; } } diff --git a/include/net/tcp.h b/include/net/tcp.h index f42d300f0cfa..c82a23470081 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -2103,6 +2103,8 @@ struct tcp_ulp_ops { /* initialize ulp */ int (*init)(struct sock *sk); + /* update ulp */ + void (*update)(struct sock *sk, struct proto *p); /* cleanup ulp */ void (*release)(struct sock *sk); @@ -2114,6 +2116,7 @@ void tcp_unregister_ulp(struct tcp_ulp_ops *type); int tcp_set_ulp(struct sock *sk, const char *name); void tcp_get_available_ulp(char *buf, size_t len); void tcp_cleanup_ulp(struct sock *sk); +void tcp_update_ulp(struct sock *sk, struct proto *p); #define MODULE_ALIAS_TCP_ULP(name) \ __MODULE_INFO(alias, alias_userspace, name); \ diff --git a/net/core/skmsg.c b/net/core/skmsg.c index 93bffaad2135..6832eeb4b785 100644 --- a/net/core/skmsg.c +++ b/net/core/skmsg.c @@ -585,12 +585,12 @@ EXPORT_SYMBOL_GPL(sk_psock_destroy); void sk_psock_drop(struct sock *sk, struct sk_psock *psock) { - rcu_assign_sk_user_data(sk, NULL); sk_psock_cork_free(psock); sk_psock_zap_ingress(psock); - sk_psock_restore_proto(sk, psock); write_lock_bh(&sk->sk_callback_lock); + sk_psock_restore_proto(sk, psock); + rcu_assign_sk_user_data(sk, NULL); if (psock->progs.skb_parser) sk_psock_stop_strp(sk, psock); write_unlock_bh(&sk->sk_callback_lock); diff --git a/net/ipv4/tcp_ulp.c b/net/ipv4/tcp_ulp.c index 3d8a1d835471..4849edb62d52 100644 --- a/net/ipv4/tcp_ulp.c +++ b/net/ipv4/tcp_ulp.c @@ -96,6 +96,19 @@ void tcp_get_available_ulp(char *buf, size_t maxlen) rcu_read_unlock(); } +void tcp_update_ulp(struct sock *sk, struct proto *proto) +{ + struct inet_connection_sock *icsk = inet_csk(sk); + + if (!icsk->icsk_ulp_ops) { + sk->sk_prot = proto; + return; + } + + if (icsk->icsk_ulp_ops->update) + icsk->icsk_ulp_ops->update(sk, proto); +} + void tcp_cleanup_ulp(struct sock *sk) { struct inet_connection_sock *icsk = inet_csk(sk); diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c index 48f1c26459d0..f208f8455ef2 100644 --- a/net/tls/tls_main.c +++ b/net/tls/tls_main.c @@ -328,7 +328,10 @@ static void tls_sk_proto_unhash(struct sock *sk) ctx = tls_get_ctx(sk); tls_sk_proto_cleanup(sk, ctx, timeo); + write_lock_bh(&sk->sk_callback_lock); icsk->icsk_ulp_data = NULL; + sk->sk_prot = ctx->sk_proto; + write_unlock_bh(&sk->sk_callback_lock); if (ctx->sk_proto->unhash) ctx->sk_proto->unhash(sk); @@ -337,7 +340,7 @@ static void tls_sk_proto_unhash(struct sock *sk) static void tls_sk_proto_close(struct sock *sk, long timeout) { - void (*sk_proto_close)(struct sock *sk, long timeout); + struct inet_connection_sock *icsk = inet_csk(sk); struct tls_context *ctx = tls_get_ctx(sk); long timeo = sock_sndtimeo(sk, 0); bool free_ctx; @@ -347,12 +350,15 @@ static void tls_sk_proto_close(struct sock *sk, long timeout) lock_sock(sk); free_ctx = ctx->tx_conf != TLS_HW && ctx->rx_conf != TLS_HW; - sk_proto_close = ctx->sk_proto_close; if (ctx->tx_conf != TLS_BASE || ctx->rx_conf != TLS_BASE) tls_sk_proto_cleanup(sk, ctx, timeo); + write_lock_bh(&sk->sk_callback_lock); + if (free_ctx) + icsk->icsk_ulp_data = NULL; sk->sk_prot = ctx->sk_proto; + write_unlock_bh(&sk->sk_callback_lock); release_sock(sk); if (ctx->tx_conf == TLS_SW) tls_sw_free_ctx_tx(ctx); @@ -360,7 +366,7 @@ static void tls_sk_proto_close(struct sock *sk, long timeout) tls_sw_strparser_done(ctx); if (ctx->rx_conf == TLS_SW) tls_sw_free_ctx_rx(ctx); - sk_proto_close(sk, timeout); + ctx->sk_proto_close(sk, timeout); if (free_ctx) tls_ctx_free(ctx); @@ -827,7 +833,7 @@ static int tls_init(struct sock *sk) int rc = 0; if (tls_hw_prot(sk)) - goto out; + return 0; /* The TLS ulp is currently supported only for TCP sockets * in ESTABLISHED state. @@ -838,22 +844,38 @@ static int tls_init(struct sock *sk) if (sk->sk_state != TCP_ESTABLISHED) return -ENOTSUPP; + tls_build_proto(sk); + /* allocate tls context */ + write_lock_bh(&sk->sk_callback_lock); ctx = create_ctx(sk); if (!ctx) { rc = -ENOMEM; goto out; } - tls_build_proto(sk); ctx->tx_conf = TLS_BASE; ctx->rx_conf = TLS_BASE; ctx->sk_proto = sk->sk_prot; update_sk_prot(sk, ctx); out: + write_unlock_bh(&sk->sk_callback_lock); return rc; } +static void tls_update(struct sock *sk, struct proto *p) +{ + struct tls_context *ctx; + + ctx = tls_get_ctx(sk); + if (likely(ctx)) { + ctx->sk_proto_close = p->close; + ctx->sk_proto = p; + } else { + sk->sk_prot = p; + } +} + void tls_register_device(struct tls_device *device) { spin_lock_bh(&device_spinlock); @@ -874,6 +896,7 @@ static struct tcp_ulp_ops tcp_tls_ulp_ops __read_mostly = { .name = "tls", .owner = THIS_MODULE, .init = tls_init, + .update = tls_update, }; static int __init tls_register(void) -- cgit v1.2.3 From ae24fb49d01103c80d6ff3b78714259c1c62c958 Mon Sep 17 00:00:00 2001 From: Jean-Philippe Brucker Date: Mon, 22 Jul 2019 15:40:07 +0100 Subject: iommu/virtio: Update to most recent specification Following specification review a few things were changed in v8 of the virtio-iommu series [1], but have been omitted when merging the base driver. Add them now: * Remove the EXEC flag. * Add feature bit for the MMIO flag. * Change domain_bits to domain_range. * Add NOMEM status flag. [1] https://lore.kernel.org/linux-iommu/20190530170929.19366-1-jean-philippe.brucker@arm.com/ Fixes: edcd69ab9a32 ("iommu: Add virtio-iommu driver") Reported-by: Eric Auger Signed-off-by: Jean-Philippe Brucker Signed-off-by: Michael S. Tsirkin Reviewed-by: Eric Auger Tested-by: Eric Auger Acked-by: Joerg Roedel --- drivers/iommu/virtio-iommu.c | 40 ++++++++++++++++++++++++++++----------- include/uapi/linux/virtio_iommu.h | 32 +++++++++++++++++-------------- 2 files changed, 47 insertions(+), 25 deletions(-) (limited to 'include') diff --git a/drivers/iommu/virtio-iommu.c b/drivers/iommu/virtio-iommu.c index 433f4d2ee956..80a740df0737 100644 --- a/drivers/iommu/virtio-iommu.c +++ b/drivers/iommu/virtio-iommu.c @@ -2,7 +2,7 @@ /* * Virtio driver for the paravirtualized IOMMU * - * Copyright (C) 2018 Arm Limited + * Copyright (C) 2019 Arm Limited */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt @@ -47,7 +47,10 @@ struct viommu_dev { /* Device configuration */ struct iommu_domain_geometry geometry; u64 pgsize_bitmap; - u8 domain_bits; + u32 first_domain; + u32 last_domain; + /* Supported MAP flags */ + u32 map_flags; u32 probe_size; }; @@ -62,6 +65,7 @@ struct viommu_domain { struct viommu_dev *viommu; struct mutex mutex; /* protects viommu pointer */ unsigned int id; + u32 map_flags; spinlock_t mappings_lock; struct rb_root_cached mappings; @@ -113,6 +117,8 @@ static int viommu_get_req_errno(void *buf, size_t len) return -ENOENT; case VIRTIO_IOMMU_S_FAULT: return -EFAULT; + case VIRTIO_IOMMU_S_NOMEM: + return -ENOMEM; case VIRTIO_IOMMU_S_IOERR: case VIRTIO_IOMMU_S_DEVERR: default: @@ -607,15 +613,15 @@ static int viommu_domain_finalise(struct viommu_dev *viommu, { int ret; struct viommu_domain *vdomain = to_viommu_domain(domain); - unsigned int max_domain = viommu->domain_bits > 31 ? ~0 : - (1U << viommu->domain_bits) - 1; vdomain->viommu = viommu; + vdomain->map_flags = viommu->map_flags; domain->pgsize_bitmap = viommu->pgsize_bitmap; domain->geometry = viommu->geometry; - ret = ida_alloc_max(&viommu->domain_ids, max_domain, GFP_KERNEL); + ret = ida_alloc_range(&viommu->domain_ids, viommu->first_domain, + viommu->last_domain, GFP_KERNEL); if (ret >= 0) vdomain->id = (unsigned int)ret; @@ -710,7 +716,7 @@ static int viommu_map(struct iommu_domain *domain, unsigned long iova, phys_addr_t paddr, size_t size, int prot) { int ret; - int flags; + u32 flags; struct virtio_iommu_req_map map; struct viommu_domain *vdomain = to_viommu_domain(domain); @@ -718,6 +724,9 @@ static int viommu_map(struct iommu_domain *domain, unsigned long iova, (prot & IOMMU_WRITE ? VIRTIO_IOMMU_MAP_F_WRITE : 0) | (prot & IOMMU_MMIO ? VIRTIO_IOMMU_MAP_F_MMIO : 0); + if (flags & ~vdomain->map_flags) + return -EINVAL; + ret = viommu_add_mapping(vdomain, iova, paddr, size, flags); if (ret) return ret; @@ -1027,7 +1036,8 @@ static int viommu_probe(struct virtio_device *vdev) goto err_free_vqs; } - viommu->domain_bits = 32; + viommu->map_flags = VIRTIO_IOMMU_MAP_F_READ | VIRTIO_IOMMU_MAP_F_WRITE; + viommu->last_domain = ~0U; /* Optional features */ virtio_cread_feature(vdev, VIRTIO_IOMMU_F_INPUT_RANGE, @@ -1038,9 +1048,13 @@ static int viommu_probe(struct virtio_device *vdev) struct virtio_iommu_config, input_range.end, &input_end); - virtio_cread_feature(vdev, VIRTIO_IOMMU_F_DOMAIN_BITS, - struct virtio_iommu_config, domain_bits, - &viommu->domain_bits); + virtio_cread_feature(vdev, VIRTIO_IOMMU_F_DOMAIN_RANGE, + struct virtio_iommu_config, domain_range.start, + &viommu->first_domain); + + virtio_cread_feature(vdev, VIRTIO_IOMMU_F_DOMAIN_RANGE, + struct virtio_iommu_config, domain_range.end, + &viommu->last_domain); virtio_cread_feature(vdev, VIRTIO_IOMMU_F_PROBE, struct virtio_iommu_config, probe_size, @@ -1052,6 +1066,9 @@ static int viommu_probe(struct virtio_device *vdev) .force_aperture = true, }; + if (virtio_has_feature(vdev, VIRTIO_IOMMU_F_MMIO)) + viommu->map_flags |= VIRTIO_IOMMU_MAP_F_MMIO; + viommu_ops.pgsize_bitmap = viommu->pgsize_bitmap; virtio_device_ready(vdev); @@ -1130,9 +1147,10 @@ static void viommu_config_changed(struct virtio_device *vdev) static unsigned int features[] = { VIRTIO_IOMMU_F_MAP_UNMAP, - VIRTIO_IOMMU_F_DOMAIN_BITS, VIRTIO_IOMMU_F_INPUT_RANGE, + VIRTIO_IOMMU_F_DOMAIN_RANGE, VIRTIO_IOMMU_F_PROBE, + VIRTIO_IOMMU_F_MMIO, }; static struct virtio_device_id id_table[] = { diff --git a/include/uapi/linux/virtio_iommu.h b/include/uapi/linux/virtio_iommu.h index ba1b460c9944..237e36a280cb 100644 --- a/include/uapi/linux/virtio_iommu.h +++ b/include/uapi/linux/virtio_iommu.h @@ -1,8 +1,8 @@ /* SPDX-License-Identifier: BSD-3-Clause */ /* - * Virtio-iommu definition v0.9 + * Virtio-iommu definition v0.12 * - * Copyright (C) 2018 Arm Ltd. + * Copyright (C) 2019 Arm Ltd. */ #ifndef _UAPI_LINUX_VIRTIO_IOMMU_H #define _UAPI_LINUX_VIRTIO_IOMMU_H @@ -11,26 +11,31 @@ /* Feature bits */ #define VIRTIO_IOMMU_F_INPUT_RANGE 0 -#define VIRTIO_IOMMU_F_DOMAIN_BITS 1 +#define VIRTIO_IOMMU_F_DOMAIN_RANGE 1 #define VIRTIO_IOMMU_F_MAP_UNMAP 2 #define VIRTIO_IOMMU_F_BYPASS 3 #define VIRTIO_IOMMU_F_PROBE 4 +#define VIRTIO_IOMMU_F_MMIO 5 -struct virtio_iommu_range { - __u64 start; - __u64 end; +struct virtio_iommu_range_64 { + __le64 start; + __le64 end; +}; + +struct virtio_iommu_range_32 { + __le32 start; + __le32 end; }; struct virtio_iommu_config { /* Supported page sizes */ - __u64 page_size_mask; + __le64 page_size_mask; /* Supported IOVA range */ - struct virtio_iommu_range input_range; + struct virtio_iommu_range_64 input_range; /* Max domain ID size */ - __u8 domain_bits; - __u8 padding[3]; + struct virtio_iommu_range_32 domain_range; /* Probe buffer size */ - __u32 probe_size; + __le32 probe_size; }; /* Request types */ @@ -49,6 +54,7 @@ struct virtio_iommu_config { #define VIRTIO_IOMMU_S_RANGE 0x05 #define VIRTIO_IOMMU_S_NOENT 0x06 #define VIRTIO_IOMMU_S_FAULT 0x07 +#define VIRTIO_IOMMU_S_NOMEM 0x08 struct virtio_iommu_req_head { __u8 type; @@ -78,12 +84,10 @@ struct virtio_iommu_req_detach { #define VIRTIO_IOMMU_MAP_F_READ (1 << 0) #define VIRTIO_IOMMU_MAP_F_WRITE (1 << 1) -#define VIRTIO_IOMMU_MAP_F_EXEC (1 << 2) -#define VIRTIO_IOMMU_MAP_F_MMIO (1 << 3) +#define VIRTIO_IOMMU_MAP_F_MMIO (1 << 2) #define VIRTIO_IOMMU_MAP_F_MASK (VIRTIO_IOMMU_MAP_F_READ | \ VIRTIO_IOMMU_MAP_F_WRITE | \ - VIRTIO_IOMMU_MAP_F_EXEC | \ VIRTIO_IOMMU_MAP_F_MMIO) struct virtio_iommu_req_map { -- cgit v1.2.3 From 2b74c878b0eae4c32629c2d5ba69a29f69048313 Mon Sep 17 00:00:00 2001 From: Kaike Wan Date: Mon, 15 Jul 2019 12:45:28 -0400 Subject: IB/hfi1: Unreserve a flushed OPFN request When an OPFN request is flushed, the request is completed without unreserving itself from the send queue. Subsequently, when a new request is post sent, the following warning will be triggered: WARNING: CPU: 4 PID: 8130 at rdmavt/qp.c:1761 rvt_post_send+0x72a/0x880 [rdmavt] Call Trace: [] dump_stack+0x19/0x1b [] __warn+0xd8/0x100 [] warn_slowpath_null+0x1d/0x20 [] rvt_post_send+0x72a/0x880 [rdmavt] [] ? account_entity_dequeue+0xae/0xd0 [] ? __kmalloc+0x55/0x230 [] ib_uverbs_post_send+0x37c/0x5d0 [ib_uverbs] [] ? rdma_lookup_put_uobject+0x26/0x60 [ib_uverbs] [] ib_uverbs_write+0x286/0x460 [ib_uverbs] [] ? security_file_permission+0x27/0xa0 [] vfs_write+0xc0/0x1f0 [] SyS_write+0x7f/0xf0 [] system_call_fastpath+0x22/0x27 This patch fixes the problem by moving rvt_qp_wqe_unreserve() into rvt_qp_complete_swqe() to simplify the code and make it less error-prone. Fixes: ca95f802ef51 ("IB/hfi1: Unreserve a reserved request when it is completed") Link: https://lore.kernel.org/r/20190715164528.74174.31364.stgit@awfm-01.aw.intel.com Cc: Reviewed-by: Mike Marciniszyn Reviewed-by: Dennis Dalessandro Signed-off-by: Kaike Wan Signed-off-by: Mike Marciniszyn Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hfi1/rc.c | 2 -- include/rdma/rdmavt_qp.h | 9 ++++----- 2 files changed, 4 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/drivers/infiniband/hw/hfi1/rc.c b/drivers/infiniband/hw/hfi1/rc.c index 0477c14633ab..024a7c2b6124 100644 --- a/drivers/infiniband/hw/hfi1/rc.c +++ b/drivers/infiniband/hw/hfi1/rc.c @@ -1835,7 +1835,6 @@ void hfi1_rc_send_complete(struct rvt_qp *qp, struct hfi1_opa_header *opah) cmp_psn(qp->s_sending_psn, qp->s_sending_hpsn) <= 0) break; trdma_clean_swqe(qp, wqe); - rvt_qp_wqe_unreserve(qp, wqe); trace_hfi1_qp_send_completion(qp, wqe, qp->s_last); rvt_qp_complete_swqe(qp, wqe, @@ -1882,7 +1881,6 @@ struct rvt_swqe *do_rc_completion(struct rvt_qp *qp, if (cmp_psn(wqe->lpsn, qp->s_sending_psn) < 0 || cmp_psn(qp->s_sending_psn, qp->s_sending_hpsn) > 0) { trdma_clean_swqe(qp, wqe); - rvt_qp_wqe_unreserve(qp, wqe); trace_hfi1_qp_send_completion(qp, wqe, qp->s_last); rvt_qp_complete_swqe(qp, wqe, diff --git a/include/rdma/rdmavt_qp.h b/include/rdma/rdmavt_qp.h index 0eeea520a853..e06c77d76463 100644 --- a/include/rdma/rdmavt_qp.h +++ b/include/rdma/rdmavt_qp.h @@ -608,7 +608,7 @@ static inline void rvt_qp_wqe_reserve( /** * rvt_qp_wqe_unreserve - clean reserved operation * @qp - the rvt qp - * @wqe - the send wqe + * @flags - send wqe flags * * This decrements the reserve use count. * @@ -620,11 +620,9 @@ static inline void rvt_qp_wqe_reserve( * the compiler does not juggle the order of the s_last * ring index and the decrementing of s_reserved_used. */ -static inline void rvt_qp_wqe_unreserve( - struct rvt_qp *qp, - struct rvt_swqe *wqe) +static inline void rvt_qp_wqe_unreserve(struct rvt_qp *qp, int flags) { - if (unlikely(wqe->wr.send_flags & RVT_SEND_RESERVE_USED)) { + if (unlikely(flags & RVT_SEND_RESERVE_USED)) { atomic_dec(&qp->s_reserved_used); /* insure no compiler re-order up to s_last change */ smp_mb__after_atomic(); @@ -853,6 +851,7 @@ rvt_qp_complete_swqe(struct rvt_qp *qp, u32 byte_len, last; int flags = wqe->wr.send_flags; + rvt_qp_wqe_unreserve(qp, flags); rvt_put_qp_swqe(qp, wqe); need_completion = -- cgit v1.2.3 From 6ee82ef04e38b0d8b09b04bc1b068673deed6582 Mon Sep 17 00:00:00 2001 From: Sylwester Nawrocki Date: Mon, 1 Jul 2019 13:46:51 +0200 Subject: clk: Add missing documentation of devm_clk_bulk_get_optional() argument Fix an incomplete devm_clk_bulk_get_optional() function documentation by adding description of the num_clks argument as in other *clk_bulk* functions. Fixes: 9bd5ef0bd874 ("clk: Add devm_clk_bulk_get_optional() function") Reported-by: kbuild test robot Signed-off-by: Sylwester Nawrocki Signed-off-by: Stephen Boyd --- include/linux/clk.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/clk.h b/include/linux/clk.h index 3c096c7a51dc..853a8f181394 100644 --- a/include/linux/clk.h +++ b/include/linux/clk.h @@ -359,6 +359,7 @@ int __must_check devm_clk_bulk_get(struct device *dev, int num_clks, /** * devm_clk_bulk_get_optional - managed get multiple optional consumer clocks * @dev: device for clock "consumer" + * @num_clks: the number of clk_bulk_data * @clks: pointer to the clk_bulk_data table of consumer * * Behaves the same as devm_clk_bulk_get() except where there is no clock -- cgit v1.2.3 From bca031e2c8aa22a978a2452bf959e27e9fa73dc7 Mon Sep 17 00:00:00 2001 From: Zenghui Yu Date: Thu, 18 Jul 2019 08:15:10 +0000 Subject: KVM: arm/arm64: Introduce kvm_pmu_vcpu_init() to setup PMU counter index We use "pmc->idx" and the "chained" bitmap to determine if the pmc is chained, in kvm_pmu_pmc_is_chained(). But idx might be uninitialized (and random) when we doing this decision, through a KVM_ARM_VCPU_INIT ioctl -> kvm_pmu_vcpu_reset(). And the test_bit() against this random idx will potentially hit a KASAN BUG [1]. In general, idx is the static property of a PMU counter that is not expected to be modified across resets, as suggested by Julien. It looks more reasonable if we can setup the PMU counter idx for a vcpu in its creation time. Introduce a new function - kvm_pmu_vcpu_init() for this basic setup. Oh, and the KASAN BUG will get fixed this way. [1] https://www.spinics.net/lists/kvm-arm/msg36700.html Fixes: 80f393a23be6 ("KVM: arm/arm64: Support chained PMU counters") Suggested-by: Andrew Murray Suggested-by: Julien Thierry Acked-by: Julien Thierry Signed-off-by: Zenghui Yu Signed-off-by: Marc Zyngier --- include/kvm/arm_pmu.h | 2 ++ virt/kvm/arm/arm.c | 2 ++ virt/kvm/arm/pmu.c | 18 +++++++++++++++--- 3 files changed, 19 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/kvm/arm_pmu.h b/include/kvm/arm_pmu.h index 16c769a7f979..6db030439e29 100644 --- a/include/kvm/arm_pmu.h +++ b/include/kvm/arm_pmu.h @@ -34,6 +34,7 @@ struct kvm_pmu { u64 kvm_pmu_get_counter_value(struct kvm_vcpu *vcpu, u64 select_idx); void kvm_pmu_set_counter_value(struct kvm_vcpu *vcpu, u64 select_idx, u64 val); u64 kvm_pmu_valid_counter_mask(struct kvm_vcpu *vcpu); +void kvm_pmu_vcpu_init(struct kvm_vcpu *vcpu); void kvm_pmu_vcpu_reset(struct kvm_vcpu *vcpu); void kvm_pmu_vcpu_destroy(struct kvm_vcpu *vcpu); void kvm_pmu_disable_counter_mask(struct kvm_vcpu *vcpu, u64 val); @@ -71,6 +72,7 @@ static inline u64 kvm_pmu_valid_counter_mask(struct kvm_vcpu *vcpu) { return 0; } +static inline void kvm_pmu_vcpu_init(struct kvm_vcpu *vcpu) {} static inline void kvm_pmu_vcpu_reset(struct kvm_vcpu *vcpu) {} static inline void kvm_pmu_vcpu_destroy(struct kvm_vcpu *vcpu) {} static inline void kvm_pmu_disable_counter_mask(struct kvm_vcpu *vcpu, u64 val) {} diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c index f645c0fbf7ec..c704fa696184 100644 --- a/virt/kvm/arm/arm.c +++ b/virt/kvm/arm/arm.c @@ -340,6 +340,8 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) /* Set up the timer */ kvm_timer_vcpu_init(vcpu); + kvm_pmu_vcpu_init(vcpu); + kvm_arm_reset_debug_ptr(vcpu); return kvm_vgic_vcpu_init(vcpu); diff --git a/virt/kvm/arm/pmu.c b/virt/kvm/arm/pmu.c index 3dd8238ed246..362a01886bab 100644 --- a/virt/kvm/arm/pmu.c +++ b/virt/kvm/arm/pmu.c @@ -214,6 +214,20 @@ static void kvm_pmu_stop_counter(struct kvm_vcpu *vcpu, struct kvm_pmc *pmc) kvm_pmu_release_perf_event(pmc); } +/** + * kvm_pmu_vcpu_init - assign pmu counter idx for cpu + * @vcpu: The vcpu pointer + * + */ +void kvm_pmu_vcpu_init(struct kvm_vcpu *vcpu) +{ + int i; + struct kvm_pmu *pmu = &vcpu->arch.pmu; + + for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++) + pmu->pmc[i].idx = i; +} + /** * kvm_pmu_vcpu_reset - reset pmu state for cpu * @vcpu: The vcpu pointer @@ -224,10 +238,8 @@ void kvm_pmu_vcpu_reset(struct kvm_vcpu *vcpu) int i; struct kvm_pmu *pmu = &vcpu->arch.pmu; - for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++) { + for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++) kvm_pmu_stop_counter(vcpu, &pmu->pmc[i]); - pmu->pmc[i].idx = i; - } bitmap_zero(vcpu->arch.pmu.chained, ARMV8_PMU_MAX_COUNTER_PAIRS); } -- cgit v1.2.3 From d9b8aadaffa65809d146cf0f8632a22a946367d7 Mon Sep 17 00:00:00 2001 From: Ilya Leoshkevich Date: Fri, 19 Jul 2019 11:18:15 +0200 Subject: bpf: fix narrower loads on s390 The very first check in test_pkt_md_access is failing on s390, which happens because loading a part of a struct __sk_buff field produces an incorrect result. The preprocessed code of the check is: { __u8 tmp = *((volatile __u8 *)&skb->len + ((sizeof(skb->len) - sizeof(__u8)) / sizeof(__u8))); if (tmp != ((*(volatile __u32 *)&skb->len) & 0xFF)) return 2; }; clang generates the following code for it: 0: 71 21 00 03 00 00 00 00 r2 = *(u8 *)(r1 + 3) 1: 61 31 00 00 00 00 00 00 r3 = *(u32 *)(r1 + 0) 2: 57 30 00 00 00 00 00 ff r3 &= 255 3: 5d 23 00 1d 00 00 00 00 if r2 != r3 goto +29 Finally, verifier transforms it to: 0: (61) r2 = *(u32 *)(r1 +104) 1: (bc) w2 = w2 2: (74) w2 >>= 24 3: (bc) w2 = w2 4: (54) w2 &= 255 5: (bc) w2 = w2 The problem is that when verifier emits the code to replace a partial load of a struct __sk_buff field (*(u8 *)(r1 + 3)) with a full load of struct sk_buff field (*(u32 *)(r1 + 104)), an optional shift and a bitwise AND, it assumes that the machine is little endian and incorrectly decides to use a shift. Adjust shift count calculation to account for endianness. Fixes: 31fd85816dbe ("bpf: permits narrower load from bpf program context fields") Signed-off-by: Ilya Leoshkevich Signed-off-by: Alexei Starovoitov --- include/linux/filter.h | 13 +++++++++++++ kernel/bpf/verifier.c | 4 ++-- 2 files changed, 15 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/filter.h b/include/linux/filter.h index ff65d22cf336..92c6e31fb008 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -24,6 +24,7 @@ #include +#include #include #include @@ -747,6 +748,18 @@ bpf_ctx_narrow_access_ok(u32 off, u32 size, u32 size_default) return size <= size_default && (size & (size - 1)) == 0; } +static inline u8 +bpf_ctx_narrow_load_shift(u32 off, u32 size, u32 size_default) +{ + u8 load_off = off & (size_default - 1); + +#ifdef __LITTLE_ENDIAN + return load_off * 8; +#else + return (size_default - (load_off + size)) * 8; +#endif +} + #define bpf_ctx_wide_access_ok(off, size, type, field) \ (size == sizeof(__u64) && \ off >= offsetof(type, field) && \ diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 5900cbb966b1..c84d83f86141 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -8616,8 +8616,8 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env) } if (is_narrower_load && size < target_size) { - u8 shift = (off & (size_default - 1)) * 8; - + u8 shift = bpf_ctx_narrow_load_shift(off, size, + size_default); if (ctx_field_size <= 4) { if (shift) insn_buf[cnt++] = BPF_ALU32_IMM(BPF_RSH, -- cgit v1.2.3 From 8732d85a69a0411f16a4b78df8fdc7b09c50a849 Mon Sep 17 00:00:00 2001 From: Mattias Jacobsson <2pi@mok.nu> Date: Fri, 19 Jul 2019 19:51:45 +0200 Subject: platform/x86: wmi: add missing struct parameter description Add a description for the context parameter in the struct wmi_device_id. Reported-by: kbuild test robot Fixes: a48e23385fcf ("platform/x86: wmi: add context pointer field to struct wmi_device_id") Signed-off-by: Mattias Jacobsson <2pi@mok.nu> Signed-off-by: Andy Shevchenko --- include/linux/mod_devicetable.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h index b2c1648f7e5d..5714fd35a83c 100644 --- a/include/linux/mod_devicetable.h +++ b/include/linux/mod_devicetable.h @@ -814,6 +814,7 @@ struct tee_client_device_id { /** * struct wmi_device_id - WMI device identifier * @guid_string: 36 char string of the form fa50ff2b-f2e8-45de-83fa-65417f2f49ba + * @context: pointer to driver specific data */ struct wmi_device_id { const char guid_string[UUID_STRING_LEN+1]; -- cgit v1.2.3 From f8be17b81d44aed1f9ea68c3fc70f501c9616e2d Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 23 Jul 2019 10:22:48 +0300 Subject: lib/dim: Fix -Wunused-const-variable warnings DIM causes to the following warnings during kernel compilation which indicates that tx_profile and rx_profile are supposed to be declared in *.c and not in *.h files. In file included from ./include/rdma/ib_verbs.h:64, from ./include/linux/mlx5/device.h:37, from ./include/linux/mlx5/driver.h:51, from ./include/linux/mlx5/vport.h:36, from drivers/infiniband/hw/mlx5/ib_virt.c:34: ./include/linux/dim.h:326:1: warning: _tx_profile_ defined but not used [-Wunused-const-variable=] 326 | tx_profile[DIM_CQ_PERIOD_NUM_MODES][NET_DIM_PARAMS_NUM_PROFILES] = { | ^~~~~~~~~~ ./include/linux/dim.h:320:1: warning: _rx_profile_ defined but not used [-Wunused-const-variable=] 320 | rx_profile[DIM_CQ_PERIOD_NUM_MODES][NET_DIM_PARAMS_NUM_PROFILES] = { | ^~~~~~~~~~ Fixes: 4f75da3666c0 ("linux/dim: Move implementation to .c files") Signed-off-by: Leon Romanovsky Reviewed-by: Bart Van Assche Acked-by: Saeed Mahameed Signed-off-by: David S. Miller --- include/linux/dim.h | 56 ----------------------------------------------------- lib/dim/net_dim.c | 56 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 56 insertions(+), 56 deletions(-) (limited to 'include') diff --git a/include/linux/dim.h b/include/linux/dim.h index d3a0fbfff2bb..9fa4b3f88c39 100644 --- a/include/linux/dim.h +++ b/include/linux/dim.h @@ -272,62 +272,6 @@ dim_update_sample_with_comps(u16 event_ctr, u64 packets, u64 bytes, u64 comps, /* Net DIM */ -/* - * Net DIM profiles: - * There are different set of profiles for each CQ period mode. - * There are different set of profiles for RX/TX CQs. - * Each profile size must be of NET_DIM_PARAMS_NUM_PROFILES - */ -#define NET_DIM_PARAMS_NUM_PROFILES 5 -#define NET_DIM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE 256 -#define NET_DIM_DEFAULT_TX_CQ_MODERATION_PKTS_FROM_EQE 128 -#define NET_DIM_DEF_PROFILE_CQE 1 -#define NET_DIM_DEF_PROFILE_EQE 1 - -#define NET_DIM_RX_EQE_PROFILES { \ - {1, NET_DIM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE}, \ - {8, NET_DIM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE}, \ - {64, NET_DIM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE}, \ - {128, NET_DIM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE}, \ - {256, NET_DIM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE}, \ -} - -#define NET_DIM_RX_CQE_PROFILES { \ - {2, 256}, \ - {8, 128}, \ - {16, 64}, \ - {32, 64}, \ - {64, 64} \ -} - -#define NET_DIM_TX_EQE_PROFILES { \ - {1, NET_DIM_DEFAULT_TX_CQ_MODERATION_PKTS_FROM_EQE}, \ - {8, NET_DIM_DEFAULT_TX_CQ_MODERATION_PKTS_FROM_EQE}, \ - {32, NET_DIM_DEFAULT_TX_CQ_MODERATION_PKTS_FROM_EQE}, \ - {64, NET_DIM_DEFAULT_TX_CQ_MODERATION_PKTS_FROM_EQE}, \ - {128, NET_DIM_DEFAULT_TX_CQ_MODERATION_PKTS_FROM_EQE} \ -} - -#define NET_DIM_TX_CQE_PROFILES { \ - {5, 128}, \ - {8, 64}, \ - {16, 32}, \ - {32, 32}, \ - {64, 32} \ -} - -static const struct dim_cq_moder -rx_profile[DIM_CQ_PERIOD_NUM_MODES][NET_DIM_PARAMS_NUM_PROFILES] = { - NET_DIM_RX_EQE_PROFILES, - NET_DIM_RX_CQE_PROFILES, -}; - -static const struct dim_cq_moder -tx_profile[DIM_CQ_PERIOD_NUM_MODES][NET_DIM_PARAMS_NUM_PROFILES] = { - NET_DIM_TX_EQE_PROFILES, - NET_DIM_TX_CQE_PROFILES, -}; - /** * net_dim_get_rx_moderation - provide a CQ moderation object for the given RX profile * @cq_period_mode: CQ period mode diff --git a/lib/dim/net_dim.c b/lib/dim/net_dim.c index 5bcc902c5388..a4db51c21266 100644 --- a/lib/dim/net_dim.c +++ b/lib/dim/net_dim.c @@ -5,6 +5,62 @@ #include +/* + * Net DIM profiles: + * There are different set of profiles for each CQ period mode. + * There are different set of profiles for RX/TX CQs. + * Each profile size must be of NET_DIM_PARAMS_NUM_PROFILES + */ +#define NET_DIM_PARAMS_NUM_PROFILES 5 +#define NET_DIM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE 256 +#define NET_DIM_DEFAULT_TX_CQ_MODERATION_PKTS_FROM_EQE 128 +#define NET_DIM_DEF_PROFILE_CQE 1 +#define NET_DIM_DEF_PROFILE_EQE 1 + +#define NET_DIM_RX_EQE_PROFILES { \ + {1, NET_DIM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE}, \ + {8, NET_DIM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE}, \ + {64, NET_DIM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE}, \ + {128, NET_DIM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE}, \ + {256, NET_DIM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE}, \ +} + +#define NET_DIM_RX_CQE_PROFILES { \ + {2, 256}, \ + {8, 128}, \ + {16, 64}, \ + {32, 64}, \ + {64, 64} \ +} + +#define NET_DIM_TX_EQE_PROFILES { \ + {1, NET_DIM_DEFAULT_TX_CQ_MODERATION_PKTS_FROM_EQE}, \ + {8, NET_DIM_DEFAULT_TX_CQ_MODERATION_PKTS_FROM_EQE}, \ + {32, NET_DIM_DEFAULT_TX_CQ_MODERATION_PKTS_FROM_EQE}, \ + {64, NET_DIM_DEFAULT_TX_CQ_MODERATION_PKTS_FROM_EQE}, \ + {128, NET_DIM_DEFAULT_TX_CQ_MODERATION_PKTS_FROM_EQE} \ +} + +#define NET_DIM_TX_CQE_PROFILES { \ + {5, 128}, \ + {8, 64}, \ + {16, 32}, \ + {32, 32}, \ + {64, 32} \ +} + +static const struct dim_cq_moder +rx_profile[DIM_CQ_PERIOD_NUM_MODES][NET_DIM_PARAMS_NUM_PROFILES] = { + NET_DIM_RX_EQE_PROFILES, + NET_DIM_RX_CQE_PROFILES, +}; + +static const struct dim_cq_moder +tx_profile[DIM_CQ_PERIOD_NUM_MODES][NET_DIM_PARAMS_NUM_PROFILES] = { + NET_DIM_TX_EQE_PROFILES, + NET_DIM_TX_CQE_PROFILES, +}; + struct dim_cq_moder net_dim_get_rx_moderation(u8 cq_period_mode, int ix) { -- cgit v1.2.3 From 02712bc3250849c1cf99d626aea98f610e695f34 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 24 Jul 2019 08:52:53 +0200 Subject: mm/hmm: move hmm_vma_range_done and hmm_vma_fault to nouveau These two functions are marked as a legacy APIs to get rid of, but seem to suit the current nouveau flow. Move it to the only user in preparation for fixing a locking bug involving caller and callee. All comments referring to the old API have been removed as this now is a driver private helper. Link: https://lore.kernel.org/r/20190724065258.16603-3-hch@lst.de Tested-by: Ralph Campbell Signed-off-by: Christoph Hellwig Reviewed-by: Jason Gunthorpe Signed-off-by: Jason Gunthorpe --- drivers/gpu/drm/nouveau/nouveau_svm.c | 46 +++++++++++++++++++++++++++-- include/linux/hmm.h | 54 ----------------------------------- 2 files changed, 44 insertions(+), 56 deletions(-) (limited to 'include') diff --git a/drivers/gpu/drm/nouveau/nouveau_svm.c b/drivers/gpu/drm/nouveau/nouveau_svm.c index 8c92374afcf2..6c1b04de0db8 100644 --- a/drivers/gpu/drm/nouveau/nouveau_svm.c +++ b/drivers/gpu/drm/nouveau/nouveau_svm.c @@ -475,6 +475,48 @@ nouveau_svm_fault_cache(struct nouveau_svm *svm, fault->inst, fault->addr, fault->access); } +static inline bool +nouveau_range_done(struct hmm_range *range) +{ + bool ret = hmm_range_valid(range); + + hmm_range_unregister(range); + return ret; +} + +static int +nouveau_range_fault(struct hmm_mirror *mirror, struct hmm_range *range, + bool block) +{ + long ret; + + range->default_flags = 0; + range->pfn_flags_mask = -1UL; + + ret = hmm_range_register(range, mirror, + range->start, range->end, + PAGE_SHIFT); + if (ret) + return (int)ret; + + if (!hmm_range_wait_until_valid(range, HMM_RANGE_DEFAULT_TIMEOUT)) { + up_read(&range->vma->vm_mm->mmap_sem); + return -EAGAIN; + } + + ret = hmm_range_fault(range, block); + if (ret <= 0) { + if (ret == -EBUSY || !ret) { + up_read(&range->vma->vm_mm->mmap_sem); + ret = -EBUSY; + } else if (ret == -EAGAIN) + ret = -EBUSY; + hmm_range_unregister(range); + return ret; + } + return 0; +} + static int nouveau_svm_fault(struct nvif_notify *notify) { @@ -649,10 +691,10 @@ nouveau_svm_fault(struct nvif_notify *notify) range.values = nouveau_svm_pfn_values; range.pfn_shift = NVIF_VMM_PFNMAP_V0_ADDR_SHIFT; again: - ret = hmm_vma_fault(&svmm->mirror, &range, true); + ret = nouveau_range_fault(&svmm->mirror, &range, true); if (ret == 0) { mutex_lock(&svmm->mutex); - if (!hmm_vma_range_done(&range)) { + if (!nouveau_range_done(&range)) { mutex_unlock(&svmm->mutex); goto again; } diff --git a/include/linux/hmm.h b/include/linux/hmm.h index b8a08b2a10ca..7ef56dc18050 100644 --- a/include/linux/hmm.h +++ b/include/linux/hmm.h @@ -484,60 +484,6 @@ long hmm_range_dma_unmap(struct hmm_range *range, */ #define HMM_RANGE_DEFAULT_TIMEOUT 1000 -/* This is a temporary helper to avoid merge conflict between trees. */ -static inline bool hmm_vma_range_done(struct hmm_range *range) -{ - bool ret = hmm_range_valid(range); - - hmm_range_unregister(range); - return ret; -} - -/* This is a temporary helper to avoid merge conflict between trees. */ -static inline int hmm_vma_fault(struct hmm_mirror *mirror, - struct hmm_range *range, bool block) -{ - long ret; - - /* - * With the old API the driver must set each individual entries with - * the requested flags (valid, write, ...). So here we set the mask to - * keep intact the entries provided by the driver and zero out the - * default_flags. - */ - range->default_flags = 0; - range->pfn_flags_mask = -1UL; - - ret = hmm_range_register(range, mirror, - range->start, range->end, - PAGE_SHIFT); - if (ret) - return (int)ret; - - if (!hmm_range_wait_until_valid(range, HMM_RANGE_DEFAULT_TIMEOUT)) { - /* - * The mmap_sem was taken by driver we release it here and - * returns -EAGAIN which correspond to mmap_sem have been - * drop in the old API. - */ - up_read(&range->vma->vm_mm->mmap_sem); - return -EAGAIN; - } - - ret = hmm_range_fault(range, block); - if (ret <= 0) { - if (ret == -EBUSY || !ret) { - /* Same as above, drop mmap_sem to match old API. */ - up_read(&range->vma->vm_mm->mmap_sem); - ret = -EBUSY; - } else if (ret == -EAGAIN) - ret = -EBUSY; - hmm_range_unregister(range); - return ret; - } - return 0; -} - /* Below are for HMM internal use only! Not to be used by device driver! */ static inline void hmm_mm_init(struct mm_struct *mm) { -- cgit v1.2.3 From 7a32f2962c56d9d8a836b4469855caeee8766bd4 Mon Sep 17 00:00:00 2001 From: Edward Srouji Date: Tue, 23 Jul 2019 10:12:55 +0300 Subject: net/mlx5: Fix modify_cq_in alignment Fix modify_cq_in alignment to match the device specification. After this fix the 'cq_umem_valid' field will be in the right offset. Cc: # 4.19 Fixes: bd37197554eb ("net/mlx5: Update mlx5_ifc with DEVX UID bits") Signed-off-by: Edward Srouji Reviewed-by: Yishai Hadas Signed-off-by: Leon Romanovsky Signed-off-by: Saeed Mahameed --- include/linux/mlx5/mlx5_ifc.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index b3d5752657d9..ec571fd7fcf8 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -5975,10 +5975,12 @@ struct mlx5_ifc_modify_cq_in_bits { struct mlx5_ifc_cqc_bits cq_context; - u8 reserved_at_280[0x40]; + u8 reserved_at_280[0x60]; u8 cq_umem_valid[0x1]; - u8 reserved_at_2c1[0x5bf]; + u8 reserved_at_2e1[0x1f]; + + u8 reserved_at_300[0x580]; u8 pas[0][0x40]; }; -- cgit v1.2.3 From 90bb769291161cf25a818d69cf608c181654473e Mon Sep 17 00:00:00 2001 From: Ariel Levkovich Date: Sat, 6 Jul 2019 18:06:15 +0300 Subject: net/mlx5e: Prevent encap flow counter update async to user query This patch prevents a race between user invoked cached counters query and a neighbor last usage updater. The cached flow counter stats can be queried by calling "mlx5_fc_query_cached" which provides the number of bytes and packets that passed via this flow since the last time this counter was queried. It does so by reducting the last saved stats from the current, cached stats and then updating the last saved stats with the cached stats. It also provide the lastuse value for that flow. Since "mlx5e_tc_update_neigh_used_value" needs to retrieve the last usage time of encapsulation flows, it calls the flow counter query method periodically and async to user queries of the flow counter using cls_flower. This call is causing the driver to update the last reported bytes and packets from the cache and therefore, future user queries of the flow stats will return lower than expected number for bytes and packets since the last saved stats in the driver was updated async to the last saved stats in cls_flower. This causes wrong stats presentation of encapsulation flows to user. Since the neighbor usage updater only needs the lastuse stats from the cached counter, the fix is to use a dedicated lastuse query call that returns the lastuse value without synching between the cached stats and the last saved stats. Fixes: f6dfb4c3f216 ("net/mlx5e: Update neighbour 'used' state using HW flow rules counters") Signed-off-by: Ariel Levkovich Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 4 ++-- drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c | 5 +++++ include/linux/mlx5/fs.h | 1 + 3 files changed, 8 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index cc096f6011d9..7ecfc53cf5f6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -1230,13 +1230,13 @@ static struct mlx5_fc *mlx5e_tc_get_counter(struct mlx5e_tc_flow *flow) void mlx5e_tc_update_neigh_used_value(struct mlx5e_neigh_hash_entry *nhe) { struct mlx5e_neigh *m_neigh = &nhe->m_neigh; - u64 bytes, packets, lastuse = 0; struct mlx5e_tc_flow *flow; struct mlx5e_encap_entry *e; struct mlx5_fc *counter; struct neigh_table *tbl; bool neigh_used = false; struct neighbour *n; + u64 lastuse; if (m_neigh->family == AF_INET) tbl = &arp_tbl; @@ -1256,7 +1256,7 @@ void mlx5e_tc_update_neigh_used_value(struct mlx5e_neigh_hash_entry *nhe) encaps[efi->index]); if (flow->flags & MLX5E_TC_FLOW_OFFLOADED) { counter = mlx5e_tc_get_counter(flow); - mlx5_fc_query_cached(counter, &bytes, &packets, &lastuse); + lastuse = mlx5_fc_query_lastuse(counter); if (time_after((unsigned long)lastuse, nhe->reported_lastuse)) { neigh_used = true; break; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c index b3762123a69c..1834d9f3aa1c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c @@ -369,6 +369,11 @@ int mlx5_fc_query(struct mlx5_core_dev *dev, struct mlx5_fc *counter, } EXPORT_SYMBOL(mlx5_fc_query); +u64 mlx5_fc_query_lastuse(struct mlx5_fc *counter) +{ + return counter->cache.lastuse; +} + void mlx5_fc_query_cached(struct mlx5_fc *counter, u64 *bytes, u64 *packets, u64 *lastuse) { diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h index 04a569568eac..f049af3f3cd8 100644 --- a/include/linux/mlx5/fs.h +++ b/include/linux/mlx5/fs.h @@ -220,6 +220,7 @@ int mlx5_modify_rule_destination(struct mlx5_flow_handle *handler, struct mlx5_fc *mlx5_fc_create(struct mlx5_core_dev *dev, bool aging); void mlx5_fc_destroy(struct mlx5_core_dev *dev, struct mlx5_fc *counter); +u64 mlx5_fc_query_lastuse(struct mlx5_fc *counter); void mlx5_fc_query_cached(struct mlx5_fc *counter, u64 *bytes, u64 *packets, u64 *lastuse); int mlx5_fc_query(struct mlx5_core_dev *dev, struct mlx5_fc *counter, -- cgit v1.2.3 From e6f4051123fd33901e9655a675b22aefcdc5d277 Mon Sep 17 00:00:00 2001 From: Manikanta Pubbisetty Date: Mon, 22 Jul 2019 12:44:50 +0530 Subject: {nl,mac}80211: fix interface combinations on crypto controlled devices Commit 33d915d9e8ce ("{nl,mac}80211: allow 4addr AP operation on crypto controlled devices") has introduced a change which allows 4addr operation on crypto controlled devices (ex: ath10k). This change has inadvertently impacted the interface combinations logic on such devices. General rule is that software interfaces like AP/VLAN should not be listed under supported interface combinations and should not be considered during validation of these combinations; because of the aforementioned change, AP/VLAN interfaces(if present) will be checked against interfaces supported by the device and blocks valid interface combinations. Consider a case where an AP and AP/VLAN are up and running; when a second AP device is brought up on the same physical device, this AP will be checked against the AP/VLAN interface (which will not be part of supported interface combinations of the device) and blocks second AP to come up. Add a new API cfg80211_iftype_allowed() to fix the problem, this API works for all devices with/without SW crypto control. Signed-off-by: Manikanta Pubbisetty Fixes: 33d915d9e8ce ("{nl,mac}80211: allow 4addr AP operation on crypto controlled devices") Link: https://lore.kernel.org/r/1563779690-9716-1-git-send-email-mpubbise@codeaurora.org Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 15 +++++++++++++++ net/mac80211/util.c | 7 +++---- net/wireless/core.c | 6 ++---- net/wireless/nl80211.c | 4 +--- net/wireless/util.c | 27 +++++++++++++++++++++++++-- 5 files changed, 46 insertions(+), 13 deletions(-) (limited to 'include') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 45850a8391d9..26e2ad2c7027 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -7320,6 +7320,21 @@ void cfg80211_pmsr_complete(struct wireless_dev *wdev, struct cfg80211_pmsr_request *req, gfp_t gfp); +/** + * cfg80211_iftype_allowed - check whether the interface can be allowed + * @wiphy: the wiphy + * @iftype: interface type + * @is_4addr: use_4addr flag, must be '0' when check_swif is '1' + * @check_swif: check iftype against software interfaces + * + * Check whether the interface is allowed to operate; additionally, this API + * can be used to check iftype against the software interfaces when + * check_swif is '1'. + */ +bool cfg80211_iftype_allowed(struct wiphy *wiphy, enum nl80211_iftype iftype, + bool is_4addr, u8 check_swif); + + /* Logging, debugging and troubleshooting/diagnostic helpers. */ /* wiphy_printk helpers, similar to dev_printk */ diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 1b224fa27367..ad1e58184c4e 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -3796,9 +3796,7 @@ int ieee80211_check_combinations(struct ieee80211_sub_if_data *sdata, } /* Always allow software iftypes */ - if (local->hw.wiphy->software_iftypes & BIT(iftype) || - (iftype == NL80211_IFTYPE_AP_VLAN && - local->hw.wiphy->flags & WIPHY_FLAG_4ADDR_AP)) { + if (cfg80211_iftype_allowed(local->hw.wiphy, iftype, 0, 1)) { if (radar_detect) return -EINVAL; return 0; @@ -3833,7 +3831,8 @@ int ieee80211_check_combinations(struct ieee80211_sub_if_data *sdata, if (sdata_iter == sdata || !ieee80211_sdata_running(sdata_iter) || - local->hw.wiphy->software_iftypes & BIT(wdev_iter->iftype)) + cfg80211_iftype_allowed(local->hw.wiphy, + wdev_iter->iftype, 0, 1)) continue; params.iftype_num[wdev_iter->iftype]++; diff --git a/net/wireless/core.c b/net/wireless/core.c index 45d9afcff6d5..32b3c719fdfc 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -1410,10 +1410,8 @@ static int cfg80211_netdev_notifier_call(struct notifier_block *nb, } break; case NETDEV_PRE_UP: - if (!(wdev->wiphy->interface_modes & BIT(wdev->iftype)) && - !(wdev->iftype == NL80211_IFTYPE_AP_VLAN && - rdev->wiphy.flags & WIPHY_FLAG_4ADDR_AP && - wdev->use_4addr)) + if (!cfg80211_iftype_allowed(wdev->wiphy, wdev->iftype, + wdev->use_4addr, 0)) return notifier_from_errno(-EOPNOTSUPP); if (rfkill_blocked(rdev->rfkill)) diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index fc83dd179c1a..fd05ae1437a9 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -3484,9 +3484,7 @@ static int nl80211_new_interface(struct sk_buff *skb, struct genl_info *info) return err; } - if (!(rdev->wiphy.interface_modes & (1 << type)) && - !(type == NL80211_IFTYPE_AP_VLAN && params.use_4addr && - rdev->wiphy.flags & WIPHY_FLAG_4ADDR_AP)) + if (!cfg80211_iftype_allowed(&rdev->wiphy, type, params.use_4addr, 0)) return -EOPNOTSUPP; err = nl80211_parse_mon_options(rdev, type, info, ¶ms); diff --git a/net/wireless/util.c b/net/wireless/util.c index 1c39d6a2e850..d0e35b7b9e35 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -1697,7 +1697,7 @@ int cfg80211_iter_combinations(struct wiphy *wiphy, for (iftype = 0; iftype < NUM_NL80211_IFTYPES; iftype++) { num_interfaces += params->iftype_num[iftype]; if (params->iftype_num[iftype] > 0 && - !(wiphy->software_iftypes & BIT(iftype))) + !cfg80211_iftype_allowed(wiphy, iftype, 0, 1)) used_iftypes |= BIT(iftype); } @@ -1719,7 +1719,7 @@ int cfg80211_iter_combinations(struct wiphy *wiphy, return -ENOMEM; for (iftype = 0; iftype < NUM_NL80211_IFTYPES; iftype++) { - if (wiphy->software_iftypes & BIT(iftype)) + if (cfg80211_iftype_allowed(wiphy, iftype, 0, 1)) continue; for (j = 0; j < c->n_limits; j++) { all_iftypes |= limits[j].types; @@ -2072,3 +2072,26 @@ int ieee80211_get_vht_max_nss(struct ieee80211_vht_cap *cap, return max_vht_nss; } EXPORT_SYMBOL(ieee80211_get_vht_max_nss); + +bool cfg80211_iftype_allowed(struct wiphy *wiphy, enum nl80211_iftype iftype, + bool is_4addr, u8 check_swif) + +{ + bool is_vlan = iftype == NL80211_IFTYPE_AP_VLAN; + + switch (check_swif) { + case 0: + if (is_vlan && is_4addr) + return wiphy->flags & WIPHY_FLAG_4ADDR_AP; + return wiphy->interface_modes & BIT(iftype); + case 1: + if (!(wiphy->software_iftypes & BIT(iftype)) && is_vlan) + return wiphy->flags & WIPHY_FLAG_4ADDR_AP; + return wiphy->software_iftypes & BIT(iftype); + default: + break; + } + + return false; +} +EXPORT_SYMBOL(cfg80211_iftype_allowed); -- cgit v1.2.3 From a7cf3d24ee6081930feb4c830a7f6f16ebe31c49 Mon Sep 17 00:00:00 2001 From: Subash Abhinov Kasiviswanathan Date: Thu, 25 Jul 2019 12:07:12 -0600 Subject: net: qualcomm: rmnet: Fix incorrect UL checksum offload logic The udp_ip4_ind bit is set only for IPv4 UDP non-fragmented packets so that the hardware can flip the checksum to 0xFFFF if the computed checksum is 0 per RFC768. However, this bit had to be set for IPv6 UDP non fragmented packets as well per hardware requirements. Otherwise, IPv6 UDP packets with computed checksum as 0 were transmitted by hardware and were dropped in the network. In addition to setting this bit for IPv6 UDP, the field is also appropriately renamed to udp_ind as part of this change. Fixes: 5eb5f8608ef1 ("net: qualcomm: rmnet: Add support for TX checksum offload") Cc: Sean Tranchetti Signed-off-by: Subash Abhinov Kasiviswanathan Signed-off-by: David S. Miller --- drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c | 13 +++++++++---- include/linux/if_rmnet.h | 4 ++-- 2 files changed, 11 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c index 60189923737a..21d38167f961 100644 --- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c +++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c @@ -206,9 +206,9 @@ rmnet_map_ipv4_ul_csum_header(void *iphdr, ul_header->csum_insert_offset = skb->csum_offset; ul_header->csum_enabled = 1; if (ip4h->protocol == IPPROTO_UDP) - ul_header->udp_ip4_ind = 1; + ul_header->udp_ind = 1; else - ul_header->udp_ip4_ind = 0; + ul_header->udp_ind = 0; /* Changing remaining fields to network order */ hdr++; @@ -239,6 +239,7 @@ rmnet_map_ipv6_ul_csum_header(void *ip6hdr, struct rmnet_map_ul_csum_header *ul_header, struct sk_buff *skb) { + struct ipv6hdr *ip6h = (struct ipv6hdr *)ip6hdr; __be16 *hdr = (__be16 *)ul_header, offset; offset = htons((__force u16)(skb_transport_header(skb) - @@ -246,7 +247,11 @@ rmnet_map_ipv6_ul_csum_header(void *ip6hdr, ul_header->csum_start_offset = offset; ul_header->csum_insert_offset = skb->csum_offset; ul_header->csum_enabled = 1; - ul_header->udp_ip4_ind = 0; + + if (ip6h->nexthdr == IPPROTO_UDP) + ul_header->udp_ind = 1; + else + ul_header->udp_ind = 0; /* Changing remaining fields to network order */ hdr++; @@ -419,7 +424,7 @@ sw_csum: ul_header->csum_start_offset = 0; ul_header->csum_insert_offset = 0; ul_header->csum_enabled = 0; - ul_header->udp_ip4_ind = 0; + ul_header->udp_ind = 0; priv->stats.csum_sw++; } diff --git a/include/linux/if_rmnet.h b/include/linux/if_rmnet.h index b4f5403383fc..9661416a9bb4 100644 --- a/include/linux/if_rmnet.h +++ b/include/linux/if_rmnet.h @@ -41,11 +41,11 @@ struct rmnet_map_ul_csum_header { __be16 csum_start_offset; #if defined(__LITTLE_ENDIAN_BITFIELD) u16 csum_insert_offset:14; - u16 udp_ip4_ind:1; + u16 udp_ind:1; u16 csum_enabled:1; #elif defined (__BIG_ENDIAN_BITFIELD) u16 csum_enabled:1; - u16 udp_ip4_ind:1; + u16 udp_ind:1; u16 csum_insert_offset:14; #else #error "Please fix " -- cgit v1.2.3 From ffe0bbabb0cffceceae07484fde1ec2a63b1537c Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Mon, 8 Jul 2019 10:23:43 +0200 Subject: gpio: don't WARN() on NULL descs if gpiolib is disabled If gpiolib is disabled, we use the inline stubs from gpio/consumer.h instead of regular definitions of GPIO API. The stubs for 'optional' variants of gpiod_get routines return NULL in this case as if the relevant GPIO wasn't found. This is correct so far. Calling other (non-gpio_get) stubs from this header triggers a warning because the GPIO descriptor couldn't have been requested. The warning however is unconditional (WARN_ON(1)) and is emitted even if the passed descriptor pointer is NULL. We don't want to force the users of 'optional' gpio_get to check the returned pointer before calling e.g. gpiod_set_value() so let's only WARN on non-NULL descriptors. Cc: stable@vger.kernel.org Reported-by: Claus H. Stovgaard Signed-off-by: Bartosz Golaszewski --- include/linux/gpio/consumer.h | 64 +++++++++++++++++++++---------------------- 1 file changed, 32 insertions(+), 32 deletions(-) (limited to 'include') diff --git a/include/linux/gpio/consumer.h b/include/linux/gpio/consumer.h index 9ddcf50a3c59..a7f08fb0f865 100644 --- a/include/linux/gpio/consumer.h +++ b/include/linux/gpio/consumer.h @@ -247,7 +247,7 @@ static inline void gpiod_put(struct gpio_desc *desc) might_sleep(); /* GPIO can never have been requested */ - WARN_ON(1); + WARN_ON(desc); } static inline void devm_gpiod_unhinge(struct device *dev, @@ -256,7 +256,7 @@ static inline void devm_gpiod_unhinge(struct device *dev, might_sleep(); /* GPIO can never have been requested */ - WARN_ON(1); + WARN_ON(desc); } static inline void gpiod_put_array(struct gpio_descs *descs) @@ -264,7 +264,7 @@ static inline void gpiod_put_array(struct gpio_descs *descs) might_sleep(); /* GPIO can never have been requested */ - WARN_ON(1); + WARN_ON(descs); } static inline struct gpio_desc *__must_check @@ -317,7 +317,7 @@ static inline void devm_gpiod_put(struct device *dev, struct gpio_desc *desc) might_sleep(); /* GPIO can never have been requested */ - WARN_ON(1); + WARN_ON(desc); } static inline void devm_gpiod_put_array(struct device *dev, @@ -326,32 +326,32 @@ static inline void devm_gpiod_put_array(struct device *dev, might_sleep(); /* GPIO can never have been requested */ - WARN_ON(1); + WARN_ON(descs); } static inline int gpiod_get_direction(const struct gpio_desc *desc) { /* GPIO can never have been requested */ - WARN_ON(1); + WARN_ON(desc); return -ENOSYS; } static inline int gpiod_direction_input(struct gpio_desc *desc) { /* GPIO can never have been requested */ - WARN_ON(1); + WARN_ON(desc); return -ENOSYS; } static inline int gpiod_direction_output(struct gpio_desc *desc, int value) { /* GPIO can never have been requested */ - WARN_ON(1); + WARN_ON(desc); return -ENOSYS; } static inline int gpiod_direction_output_raw(struct gpio_desc *desc, int value) { /* GPIO can never have been requested */ - WARN_ON(1); + WARN_ON(desc); return -ENOSYS; } @@ -359,7 +359,7 @@ static inline int gpiod_direction_output_raw(struct gpio_desc *desc, int value) static inline int gpiod_get_value(const struct gpio_desc *desc) { /* GPIO can never have been requested */ - WARN_ON(1); + WARN_ON(desc); return 0; } static inline int gpiod_get_array_value(unsigned int array_size, @@ -368,13 +368,13 @@ static inline int gpiod_get_array_value(unsigned int array_size, unsigned long *value_bitmap) { /* GPIO can never have been requested */ - WARN_ON(1); + WARN_ON(desc_array); return 0; } static inline void gpiod_set_value(struct gpio_desc *desc, int value) { /* GPIO can never have been requested */ - WARN_ON(1); + WARN_ON(desc); } static inline int gpiod_set_array_value(unsigned int array_size, struct gpio_desc **desc_array, @@ -382,13 +382,13 @@ static inline int gpiod_set_array_value(unsigned int array_size, unsigned long *value_bitmap) { /* GPIO can never have been requested */ - WARN_ON(1); + WARN_ON(desc_array); return 0; } static inline int gpiod_get_raw_value(const struct gpio_desc *desc) { /* GPIO can never have been requested */ - WARN_ON(1); + WARN_ON(desc); return 0; } static inline int gpiod_get_raw_array_value(unsigned int array_size, @@ -397,13 +397,13 @@ static inline int gpiod_get_raw_array_value(unsigned int array_size, unsigned long *value_bitmap) { /* GPIO can never have been requested */ - WARN_ON(1); + WARN_ON(desc_array); return 0; } static inline void gpiod_set_raw_value(struct gpio_desc *desc, int value) { /* GPIO can never have been requested */ - WARN_ON(1); + WARN_ON(desc); } static inline int gpiod_set_raw_array_value(unsigned int array_size, struct gpio_desc **desc_array, @@ -411,14 +411,14 @@ static inline int gpiod_set_raw_array_value(unsigned int array_size, unsigned long *value_bitmap) { /* GPIO can never have been requested */ - WARN_ON(1); + WARN_ON(desc_array); return 0; } static inline int gpiod_get_value_cansleep(const struct gpio_desc *desc) { /* GPIO can never have been requested */ - WARN_ON(1); + WARN_ON(desc); return 0; } static inline int gpiod_get_array_value_cansleep(unsigned int array_size, @@ -427,13 +427,13 @@ static inline int gpiod_get_array_value_cansleep(unsigned int array_size, unsigned long *value_bitmap) { /* GPIO can never have been requested */ - WARN_ON(1); + WARN_ON(desc_array); return 0; } static inline void gpiod_set_value_cansleep(struct gpio_desc *desc, int value) { /* GPIO can never have been requested */ - WARN_ON(1); + WARN_ON(desc); } static inline int gpiod_set_array_value_cansleep(unsigned int array_size, struct gpio_desc **desc_array, @@ -441,13 +441,13 @@ static inline int gpiod_set_array_value_cansleep(unsigned int array_size, unsigned long *value_bitmap) { /* GPIO can never have been requested */ - WARN_ON(1); + WARN_ON(desc_array); return 0; } static inline int gpiod_get_raw_value_cansleep(const struct gpio_desc *desc) { /* GPIO can never have been requested */ - WARN_ON(1); + WARN_ON(desc); return 0; } static inline int gpiod_get_raw_array_value_cansleep(unsigned int array_size, @@ -456,14 +456,14 @@ static inline int gpiod_get_raw_array_value_cansleep(unsigned int array_size, unsigned long *value_bitmap) { /* GPIO can never have been requested */ - WARN_ON(1); + WARN_ON(desc_array); return 0; } static inline void gpiod_set_raw_value_cansleep(struct gpio_desc *desc, int value) { /* GPIO can never have been requested */ - WARN_ON(1); + WARN_ON(desc); } static inline int gpiod_set_raw_array_value_cansleep(unsigned int array_size, struct gpio_desc **desc_array, @@ -471,41 +471,41 @@ static inline int gpiod_set_raw_array_value_cansleep(unsigned int array_size, unsigned long *value_bitmap) { /* GPIO can never have been requested */ - WARN_ON(1); + WARN_ON(desc_array); return 0; } static inline int gpiod_set_debounce(struct gpio_desc *desc, unsigned debounce) { /* GPIO can never have been requested */ - WARN_ON(1); + WARN_ON(desc); return -ENOSYS; } static inline int gpiod_set_transitory(struct gpio_desc *desc, bool transitory) { /* GPIO can never have been requested */ - WARN_ON(1); + WARN_ON(desc); return -ENOSYS; } static inline int gpiod_is_active_low(const struct gpio_desc *desc) { /* GPIO can never have been requested */ - WARN_ON(1); + WARN_ON(desc); return 0; } static inline int gpiod_cansleep(const struct gpio_desc *desc) { /* GPIO can never have been requested */ - WARN_ON(1); + WARN_ON(desc); return 0; } static inline int gpiod_to_irq(const struct gpio_desc *desc) { /* GPIO can never have been requested */ - WARN_ON(1); + WARN_ON(desc); return -EINVAL; } @@ -513,7 +513,7 @@ static inline int gpiod_set_consumer_name(struct gpio_desc *desc, const char *name) { /* GPIO can never have been requested */ - WARN_ON(1); + WARN_ON(desc); return -EINVAL; } @@ -525,7 +525,7 @@ static inline struct gpio_desc *gpio_to_desc(unsigned gpio) static inline int desc_to_gpio(const struct gpio_desc *desc) { /* GPIO can never have been requested */ - WARN_ON(1); + WARN_ON(desc); return -EINVAL; } -- cgit v1.2.3 From 91826ba13855f73e252fef68369b3b0e1ed25253 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Mon, 29 Jul 2019 00:51:38 +0900 Subject: netfilter: add include guard to xt_connlabel.h Add a header include guard just in case. Signed-off-by: Masahiro Yamada Acked-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/xt_connlabel.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/netfilter/xt_connlabel.h b/include/uapi/linux/netfilter/xt_connlabel.h index 2312f0ec07b2..323f0dfc2a4e 100644 --- a/include/uapi/linux/netfilter/xt_connlabel.h +++ b/include/uapi/linux/netfilter/xt_connlabel.h @@ -1,4 +1,8 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + +#ifndef _UAPI_XT_CONNLABEL_H +#define _UAPI_XT_CONNLABEL_H + #include #define XT_CONNLABEL_MAXBIT 127 @@ -11,3 +15,5 @@ struct xt_connlabel_mtinfo { __u16 bit; __u16 options; }; + +#endif /* _UAPI_XT_CONNLABEL_H */ -- cgit v1.2.3 From f3e4ff28b8685d856f381ee6bcf88b6149a6db5b Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Wed, 24 Jul 2019 11:00:54 +0200 Subject: scsi: libfc: Whitespace cleanup in libfc.h No functional change. [mkp: typo] Signed-off-by: Hannes Reinecke Reviewed-by: Christoph Hellwig Signed-off-by: Martin K. Petersen --- include/scsi/libfc.h | 52 ++++++++++++++++++++++++++-------------------------- 1 file changed, 26 insertions(+), 26 deletions(-) (limited to 'include') diff --git a/include/scsi/libfc.h b/include/scsi/libfc.h index 2d64b53f947c..9b87e1a1c646 100644 --- a/include/scsi/libfc.h +++ b/include/scsi/libfc.h @@ -115,7 +115,7 @@ struct fc_disc_port { struct fc_lport *lp; struct list_head peers; struct work_struct rport_work; - u32 port_id; + u32 port_id; }; /** @@ -155,14 +155,14 @@ struct fc_rport_operations { */ struct fc_rport_libfc_priv { struct fc_lport *local_port; - enum fc_rport_state rp_state; + enum fc_rport_state rp_state; u16 flags; #define FC_RP_FLAGS_REC_SUPPORTED (1 << 0) #define FC_RP_FLAGS_RETRY (1 << 1) #define FC_RP_STARTED (1 << 2) #define FC_RP_FLAGS_CONF_REQ (1 << 3) - unsigned int e_d_tov; - unsigned int r_a_tov; + unsigned int e_d_tov; + unsigned int r_a_tov; }; /** @@ -191,24 +191,24 @@ struct fc_rport_priv { struct fc_lport *local_port; struct fc_rport *rport; struct kref kref; - enum fc_rport_state rp_state; + enum fc_rport_state rp_state; struct fc_rport_identifiers ids; u16 flags; - u16 max_seq; + u16 max_seq; u16 disc_id; u16 maxframe_size; - unsigned int retries; - unsigned int major_retries; - unsigned int e_d_tov; - unsigned int r_a_tov; - struct mutex rp_mutex; + unsigned int retries; + unsigned int major_retries; + unsigned int e_d_tov; + unsigned int r_a_tov; + struct mutex rp_mutex; struct delayed_work retry_work; - enum fc_rport_event event; + enum fc_rport_event event; struct fc_rport_operations *ops; - struct list_head peers; - struct work_struct event_work; + struct list_head peers; + struct work_struct event_work; u32 supported_classes; - u16 prli_count; + u16 prli_count; struct rcu_head rcu; u16 sp_features; u8 spp_type; @@ -618,12 +618,12 @@ struct libfc_function_template { * @disc_callback: Callback routine called when discovery completes */ struct fc_disc { - unsigned char retry_count; - unsigned char pending; - unsigned char requested; - unsigned short seq_count; - unsigned char buf_len; - u16 disc_id; + unsigned char retry_count; + unsigned char pending; + unsigned char requested; + unsigned short seq_count; + unsigned char buf_len; + u16 disc_id; struct list_head rports; void *priv; @@ -697,7 +697,7 @@ struct fc_lport { struct fc_rport_priv *ms_rdata; struct fc_rport_priv *ptp_rdata; void *scsi_priv; - struct fc_disc disc; + struct fc_disc disc; /* Virtual port information */ struct list_head vports; @@ -715,7 +715,7 @@ struct fc_lport { u8 retry_count; /* Fabric information */ - u32 port_id; + u32 port_id; u64 wwpn; u64 wwnn; unsigned int service_params; @@ -743,11 +743,11 @@ struct fc_lport { struct fc_ns_fts fcts; /* Miscellaneous */ - struct mutex lp_mutex; - struct list_head list; + struct mutex lp_mutex; + struct list_head list; struct delayed_work retry_work; void *prov[FC_FC4_PROV_SIZE]; - struct list_head lport_list; + struct list_head lport_list; }; /** -- cgit v1.2.3 From 023358b136d490ca91735ac6490db3741af5a8bd Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Wed, 24 Jul 2019 11:00:55 +0200 Subject: scsi: fcoe: Embed fc_rport_priv in fcoe_rport structure Gcc-9 complains for a memset across pointer boundaries, which happens as the code tries to allocate a flexible array on the stack. Turns out we cannot do this without relying on gcc-isms, so with this patch we'll embed the fc_rport_priv structure into fcoe_rport, can use the normal 'container_of' outcast, and will only have to do a memset over one structure. Signed-off-by: Hannes Reinecke Signed-off-by: Martin K. Petersen --- drivers/scsi/fcoe/fcoe_ctlr.c | 51 +++++++++++++++++-------------------------- drivers/scsi/libfc/fc_rport.c | 5 ++++- include/scsi/libfcoe.h | 1 + 3 files changed, 25 insertions(+), 32 deletions(-) (limited to 'include') diff --git a/drivers/scsi/fcoe/fcoe_ctlr.c b/drivers/scsi/fcoe/fcoe_ctlr.c index 1a85fe9e4b7b..fc32b5d76821 100644 --- a/drivers/scsi/fcoe/fcoe_ctlr.c +++ b/drivers/scsi/fcoe/fcoe_ctlr.c @@ -2005,7 +2005,7 @@ EXPORT_SYMBOL_GPL(fcoe_wwn_from_mac); */ static inline struct fcoe_rport *fcoe_ctlr_rport(struct fc_rport_priv *rdata) { - return (struct fcoe_rport *)(rdata + 1); + return container_of(rdata, struct fcoe_rport, rdata); } /** @@ -2269,7 +2269,7 @@ static void fcoe_ctlr_vn_start(struct fcoe_ctlr *fip) */ static int fcoe_ctlr_vn_parse(struct fcoe_ctlr *fip, struct sk_buff *skb, - struct fc_rport_priv *rdata) + struct fcoe_rport *frport) { struct fip_header *fiph; struct fip_desc *desc = NULL; @@ -2277,16 +2277,12 @@ static int fcoe_ctlr_vn_parse(struct fcoe_ctlr *fip, struct fip_wwn_desc *wwn = NULL; struct fip_vn_desc *vn = NULL; struct fip_size_desc *size = NULL; - struct fcoe_rport *frport; size_t rlen; size_t dlen; u32 desc_mask = 0; u32 dtype; u8 sub; - memset(rdata, 0, sizeof(*rdata) + sizeof(*frport)); - frport = fcoe_ctlr_rport(rdata); - fiph = (struct fip_header *)skb->data; frport->flags = ntohs(fiph->fip_flags); @@ -2349,15 +2345,17 @@ static int fcoe_ctlr_vn_parse(struct fcoe_ctlr *fip, if (dlen != sizeof(struct fip_wwn_desc)) goto len_err; wwn = (struct fip_wwn_desc *)desc; - rdata->ids.node_name = get_unaligned_be64(&wwn->fd_wwn); + frport->rdata.ids.node_name = + get_unaligned_be64(&wwn->fd_wwn); break; case FIP_DT_VN_ID: if (dlen != sizeof(struct fip_vn_desc)) goto len_err; vn = (struct fip_vn_desc *)desc; memcpy(frport->vn_mac, vn->fd_mac, ETH_ALEN); - rdata->ids.port_id = ntoh24(vn->fd_fc_id); - rdata->ids.port_name = get_unaligned_be64(&vn->fd_wwpn); + frport->rdata.ids.port_id = ntoh24(vn->fd_fc_id); + frport->rdata.ids.port_name = + get_unaligned_be64(&vn->fd_wwpn); break; case FIP_DT_FC4F: if (dlen != sizeof(struct fip_fc4_feat)) @@ -2738,10 +2736,7 @@ static int fcoe_ctlr_vn_recv(struct fcoe_ctlr *fip, struct sk_buff *skb) { struct fip_header *fiph; enum fip_vn2vn_subcode sub; - struct { - struct fc_rport_priv rdata; - struct fcoe_rport frport; - } buf; + struct fcoe_rport frport = { }; int rc, vlan_id = 0; fiph = (struct fip_header *)skb->data; @@ -2757,7 +2752,7 @@ static int fcoe_ctlr_vn_recv(struct fcoe_ctlr *fip, struct sk_buff *skb) goto drop; } - rc = fcoe_ctlr_vn_parse(fip, skb, &buf.rdata); + rc = fcoe_ctlr_vn_parse(fip, skb, &frport); if (rc) { LIBFCOE_FIP_DBG(fip, "vn_recv vn_parse error %d\n", rc); goto drop; @@ -2766,19 +2761,19 @@ static int fcoe_ctlr_vn_recv(struct fcoe_ctlr *fip, struct sk_buff *skb) mutex_lock(&fip->ctlr_mutex); switch (sub) { case FIP_SC_VN_PROBE_REQ: - fcoe_ctlr_vn_probe_req(fip, &buf.rdata); + fcoe_ctlr_vn_probe_req(fip, &frport.rdata); break; case FIP_SC_VN_PROBE_REP: - fcoe_ctlr_vn_probe_reply(fip, &buf.rdata); + fcoe_ctlr_vn_probe_reply(fip, &frport.rdata); break; case FIP_SC_VN_CLAIM_NOTIFY: - fcoe_ctlr_vn_claim_notify(fip, &buf.rdata); + fcoe_ctlr_vn_claim_notify(fip, &frport.rdata); break; case FIP_SC_VN_CLAIM_REP: - fcoe_ctlr_vn_claim_resp(fip, &buf.rdata); + fcoe_ctlr_vn_claim_resp(fip, &frport.rdata); break; case FIP_SC_VN_BEACON: - fcoe_ctlr_vn_beacon(fip, &buf.rdata); + fcoe_ctlr_vn_beacon(fip, &frport.rdata); break; default: LIBFCOE_FIP_DBG(fip, "vn_recv unknown subcode %d\n", sub); @@ -2802,22 +2797,18 @@ drop: */ static int fcoe_ctlr_vlan_parse(struct fcoe_ctlr *fip, struct sk_buff *skb, - struct fc_rport_priv *rdata) + struct fcoe_rport *frport) { struct fip_header *fiph; struct fip_desc *desc = NULL; struct fip_mac_desc *macd = NULL; struct fip_wwn_desc *wwn = NULL; - struct fcoe_rport *frport; size_t rlen; size_t dlen; u32 desc_mask = 0; u32 dtype; u8 sub; - memset(rdata, 0, sizeof(*rdata) + sizeof(*frport)); - frport = fcoe_ctlr_rport(rdata); - fiph = (struct fip_header *)skb->data; frport->flags = ntohs(fiph->fip_flags); @@ -2871,7 +2862,8 @@ static int fcoe_ctlr_vlan_parse(struct fcoe_ctlr *fip, if (dlen != sizeof(struct fip_wwn_desc)) goto len_err; wwn = (struct fip_wwn_desc *)desc; - rdata->ids.node_name = get_unaligned_be64(&wwn->fd_wwn); + frport->rdata.ids.node_name = + get_unaligned_be64(&wwn->fd_wwn); break; default: LIBFCOE_FIP_DBG(fip, "unexpected descriptor type %x " @@ -2982,22 +2974,19 @@ static int fcoe_ctlr_vlan_recv(struct fcoe_ctlr *fip, struct sk_buff *skb) { struct fip_header *fiph; enum fip_vlan_subcode sub; - struct { - struct fc_rport_priv rdata; - struct fcoe_rport frport; - } buf; + struct fcoe_rport frport = { }; int rc; fiph = (struct fip_header *)skb->data; sub = fiph->fip_subcode; - rc = fcoe_ctlr_vlan_parse(fip, skb, &buf.rdata); + rc = fcoe_ctlr_vlan_parse(fip, skb, &frport); if (rc) { LIBFCOE_FIP_DBG(fip, "vlan_recv vlan_parse error %d\n", rc); goto drop; } mutex_lock(&fip->ctlr_mutex); if (sub == FIP_SC_VL_REQ) - fcoe_ctlr_vlan_disc_reply(fip, &buf.rdata); + fcoe_ctlr_vlan_disc_reply(fip, &frport.rdata); mutex_unlock(&fip->ctlr_mutex); drop: diff --git a/drivers/scsi/libfc/fc_rport.c b/drivers/scsi/libfc/fc_rport.c index e0f3852fdad1..da6e97d8dc3b 100644 --- a/drivers/scsi/libfc/fc_rport.c +++ b/drivers/scsi/libfc/fc_rport.c @@ -128,6 +128,7 @@ EXPORT_SYMBOL(fc_rport_lookup); struct fc_rport_priv *fc_rport_create(struct fc_lport *lport, u32 port_id) { struct fc_rport_priv *rdata; + size_t rport_priv_size = sizeof(*rdata); lockdep_assert_held(&lport->disc.disc_mutex); @@ -135,7 +136,9 @@ struct fc_rport_priv *fc_rport_create(struct fc_lport *lport, u32 port_id) if (rdata) return rdata; - rdata = kzalloc(sizeof(*rdata) + lport->rport_priv_size, GFP_KERNEL); + if (lport->rport_priv_size > 0) + rport_priv_size = lport->rport_priv_size; + rdata = kzalloc(rport_priv_size, GFP_KERNEL); if (!rdata) return NULL; diff --git a/include/scsi/libfcoe.h b/include/scsi/libfcoe.h index dc14b52577f7..2568cb0627ec 100644 --- a/include/scsi/libfcoe.h +++ b/include/scsi/libfcoe.h @@ -229,6 +229,7 @@ struct fcoe_fcf { * @vn_mac: VN_Node assigned MAC address for data */ struct fcoe_rport { + struct fc_rport_priv rdata; unsigned long time; u16 fcoe_len; u16 flags; -- cgit v1.2.3 From 89e524c04fa966330e2e80ab2bc50b9944c5847a Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 30 Jul 2019 13:10:14 +0200 Subject: loop: Fix mount(2) failure due to race with LOOP_SET_FD Commit 33ec3e53e7b1 ("loop: Don't change loop device under exclusive opener") made LOOP_SET_FD ioctl acquire exclusive block device reference while it updates loop device binding. However this can make perfectly valid mount(2) fail with EBUSY due to racing LOOP_SET_FD holding temporarily the exclusive bdev reference in cases like this: for i in {a..z}{a..z}; do dd if=/dev/zero of=$i.image bs=1k count=0 seek=1024 mkfs.ext2 $i.image mkdir mnt$i done echo "Run" for i in {a..z}{a..z}; do mount -o loop -t ext2 $i.image mnt$i & done Fix the problem by not getting full exclusive bdev reference in LOOP_SET_FD but instead just mark the bdev as being claimed while we update the binding information. This just blocks new exclusive openers instead of failing them with EBUSY thus fixing the problem. Fixes: 33ec3e53e7b1 ("loop: Don't change loop device under exclusive opener") Cc: stable@vger.kernel.org Tested-by: Kai-Heng Feng Signed-off-by: Jan Kara Signed-off-by: Jens Axboe --- drivers/block/loop.c | 16 +++++----- fs/block_dev.c | 83 ++++++++++++++++++++++++++++++++++++---------------- include/linux/fs.h | 6 ++++ 3 files changed, 73 insertions(+), 32 deletions(-) (limited to 'include') diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 44c9985f352a..3036883fc9f8 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -924,6 +924,7 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode, struct file *file; struct inode *inode; struct address_space *mapping; + struct block_device *claimed_bdev = NULL; int lo_flags = 0; int error; loff_t size; @@ -942,10 +943,11 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode, * here to avoid changing device under exclusive owner. */ if (!(mode & FMODE_EXCL)) { - bdgrab(bdev); - error = blkdev_get(bdev, mode | FMODE_EXCL, loop_set_fd); - if (error) + claimed_bdev = bd_start_claiming(bdev, loop_set_fd); + if (IS_ERR(claimed_bdev)) { + error = PTR_ERR(claimed_bdev); goto out_putf; + } } error = mutex_lock_killable(&loop_ctl_mutex); @@ -1015,15 +1017,15 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode, mutex_unlock(&loop_ctl_mutex); if (partscan) loop_reread_partitions(lo, bdev); - if (!(mode & FMODE_EXCL)) - blkdev_put(bdev, mode | FMODE_EXCL); + if (claimed_bdev) + bd_abort_claiming(bdev, claimed_bdev, loop_set_fd); return 0; out_unlock: mutex_unlock(&loop_ctl_mutex); out_bdev: - if (!(mode & FMODE_EXCL)) - blkdev_put(bdev, mode | FMODE_EXCL); + if (claimed_bdev) + bd_abort_claiming(bdev, claimed_bdev, loop_set_fd); out_putf: fput(file); out: diff --git a/fs/block_dev.c b/fs/block_dev.c index c2a85b587922..22591bad9353 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -1181,8 +1181,7 @@ static struct gendisk *bdev_get_gendisk(struct block_device *bdev, int *partno) * Pointer to the block device containing @bdev on success, ERR_PTR() * value on failure. */ -static struct block_device *bd_start_claiming(struct block_device *bdev, - void *holder) +struct block_device *bd_start_claiming(struct block_device *bdev, void *holder) { struct gendisk *disk; struct block_device *whole; @@ -1229,6 +1228,62 @@ static struct block_device *bd_start_claiming(struct block_device *bdev, return ERR_PTR(err); } } +EXPORT_SYMBOL(bd_start_claiming); + +static void bd_clear_claiming(struct block_device *whole, void *holder) +{ + lockdep_assert_held(&bdev_lock); + /* tell others that we're done */ + BUG_ON(whole->bd_claiming != holder); + whole->bd_claiming = NULL; + wake_up_bit(&whole->bd_claiming, 0); +} + +/** + * bd_finish_claiming - finish claiming of a block device + * @bdev: block device of interest + * @whole: whole block device (returned from bd_start_claiming()) + * @holder: holder that has claimed @bdev + * + * Finish exclusive open of a block device. Mark the device as exlusively + * open by the holder and wake up all waiters for exclusive open to finish. + */ +void bd_finish_claiming(struct block_device *bdev, struct block_device *whole, + void *holder) +{ + spin_lock(&bdev_lock); + BUG_ON(!bd_may_claim(bdev, whole, holder)); + /* + * Note that for a whole device bd_holders will be incremented twice, + * and bd_holder will be set to bd_may_claim before being set to holder + */ + whole->bd_holders++; + whole->bd_holder = bd_may_claim; + bdev->bd_holders++; + bdev->bd_holder = holder; + bd_clear_claiming(whole, holder); + spin_unlock(&bdev_lock); +} +EXPORT_SYMBOL(bd_finish_claiming); + +/** + * bd_abort_claiming - abort claiming of a block device + * @bdev: block device of interest + * @whole: whole block device (returned from bd_start_claiming()) + * @holder: holder that has claimed @bdev + * + * Abort claiming of a block device when the exclusive open failed. This can be + * also used when exclusive open is not actually desired and we just needed + * to block other exclusive openers for a while. + */ +void bd_abort_claiming(struct block_device *bdev, struct block_device *whole, + void *holder) +{ + spin_lock(&bdev_lock); + bd_clear_claiming(whole, holder); + spin_unlock(&bdev_lock); +} +EXPORT_SYMBOL(bd_abort_claiming); #ifdef CONFIG_SYSFS struct bd_holder_disk { @@ -1698,29 +1753,7 @@ int blkdev_get(struct block_device *bdev, fmode_t mode, void *holder) /* finish claiming */ mutex_lock(&bdev->bd_mutex); - spin_lock(&bdev_lock); - - if (!res) { - BUG_ON(!bd_may_claim(bdev, whole, holder)); - /* - * Note that for a whole device bd_holders - * will be incremented twice, and bd_holder - * will be set to bd_may_claim before being - * set to holder - */ - whole->bd_holders++; - whole->bd_holder = bd_may_claim; - bdev->bd_holders++; - bdev->bd_holder = holder; - } - - /* tell others that we're done */ - BUG_ON(whole->bd_claiming != holder); - whole->bd_claiming = NULL; - wake_up_bit(&whole->bd_claiming, 0); - - spin_unlock(&bdev_lock); - + bd_finish_claiming(bdev, whole, holder); /* * Block event polling for write claims if requested. Any * write holder makes the write_holder state stick until diff --git a/include/linux/fs.h b/include/linux/fs.h index 56b8e358af5c..997a530ff4e9 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2598,6 +2598,12 @@ extern struct block_device *blkdev_get_by_path(const char *path, fmode_t mode, void *holder); extern struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode, void *holder); +extern struct block_device *bd_start_claiming(struct block_device *bdev, + void *holder); +extern void bd_finish_claiming(struct block_device *bdev, + struct block_device *whole, void *holder); +extern void bd_abort_claiming(struct block_device *bdev, + struct block_device *whole, void *holder); extern void blkdev_put(struct block_device *bdev, fmode_t mode); extern int __blkdev_reread_part(struct block_device *bdev); extern int blkdev_reread_part(struct block_device *bdev); -- cgit v1.2.3 From 055d88242a6046a1ceac3167290f054c72571cd9 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 30 Jul 2019 21:25:20 +0200 Subject: compat_ioctl: pppoe: fix PPPOEIOCSFWD handling Support for handling the PPPOEIOCSFWD ioctl in compat mode was added in linux-2.5.69 along with hundreds of other commands, but was always broken sincen only the structure is compatible, but the command number is not, due to the size being sizeof(size_t), or at first sizeof(sizeof((struct sockaddr_pppox)), which is different on 64-bit architectures. Guillaume Nault adds: And the implementation was broken until 2016 (see 29e73269aa4d ("pppoe: fix reference counting in PPPoE proxy")), and nobody ever noticed. I should probably have removed this ioctl entirely instead of fixing it. Clearly, it has never been used. Fix it by adding a compat_ioctl handler for all pppoe variants that translates the command number and then calls the regular ioctl function. All other ioctl commands handled by pppoe are compatible between 32-bit and 64-bit, and require compat_ptr() conversion. This should apply to all stable kernels. Acked-by: Guillaume Nault Signed-off-by: Arnd Bergmann Signed-off-by: David S. Miller --- drivers/net/ppp/pppoe.c | 3 +++ drivers/net/ppp/pppox.c | 13 +++++++++++++ drivers/net/ppp/pptp.c | 3 +++ fs/compat_ioctl.c | 3 --- include/linux/if_pppox.h | 3 +++ net/l2tp/l2tp_ppp.c | 3 +++ 6 files changed, 25 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/drivers/net/ppp/pppoe.c b/drivers/net/ppp/pppoe.c index 1d902ecb4aa8..a44dd3c8af63 100644 --- a/drivers/net/ppp/pppoe.c +++ b/drivers/net/ppp/pppoe.c @@ -1115,6 +1115,9 @@ static const struct proto_ops pppoe_ops = { .recvmsg = pppoe_recvmsg, .mmap = sock_no_mmap, .ioctl = pppox_ioctl, +#ifdef CONFIG_COMPAT + .compat_ioctl = pppox_compat_ioctl, +#endif }; static const struct pppox_proto pppoe_proto = { diff --git a/drivers/net/ppp/pppox.c b/drivers/net/ppp/pppox.c index 5ef422a43d70..08364f10a43f 100644 --- a/drivers/net/ppp/pppox.c +++ b/drivers/net/ppp/pppox.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include @@ -98,6 +99,18 @@ int pppox_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) EXPORT_SYMBOL(pppox_ioctl); +#ifdef CONFIG_COMPAT +int pppox_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) +{ + if (cmd == PPPOEIOCSFWD32) + cmd = PPPOEIOCSFWD; + + return pppox_ioctl(sock, cmd, (unsigned long)compat_ptr(arg)); +} + +EXPORT_SYMBOL(pppox_compat_ioctl); +#endif + static int pppox_create(struct net *net, struct socket *sock, int protocol, int kern) { diff --git a/drivers/net/ppp/pptp.c b/drivers/net/ppp/pptp.c index a8e52c8e4128..734de7de03f7 100644 --- a/drivers/net/ppp/pptp.c +++ b/drivers/net/ppp/pptp.c @@ -623,6 +623,9 @@ static const struct proto_ops pptp_ops = { .recvmsg = sock_no_recvmsg, .mmap = sock_no_mmap, .ioctl = pppox_ioctl, +#ifdef CONFIG_COMPAT + .compat_ioctl = pppox_compat_ioctl, +#endif }; static const struct pppox_proto pppox_pptp_proto = { diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c index 6e30949d9f77..a7ec2d3dff92 100644 --- a/fs/compat_ioctl.c +++ b/fs/compat_ioctl.c @@ -638,9 +638,6 @@ COMPATIBLE_IOCTL(PPPIOCDISCONN) COMPATIBLE_IOCTL(PPPIOCATTCHAN) COMPATIBLE_IOCTL(PPPIOCGCHAN) COMPATIBLE_IOCTL(PPPIOCGL2TPSTATS) -/* PPPOX */ -COMPATIBLE_IOCTL(PPPOEIOCSFWD) -COMPATIBLE_IOCTL(PPPOEIOCDFWD) /* Big A */ /* sparc only */ /* Big Q for sound/OSS */ diff --git a/include/linux/if_pppox.h b/include/linux/if_pppox.h index 8b728750a625..69e813bcb947 100644 --- a/include/linux/if_pppox.h +++ b/include/linux/if_pppox.h @@ -80,6 +80,9 @@ extern int register_pppox_proto(int proto_num, const struct pppox_proto *pp); extern void unregister_pppox_proto(int proto_num); extern void pppox_unbind_sock(struct sock *sk);/* delete ppp-channel binding */ extern int pppox_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg); +extern int pppox_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg); + +#define PPPOEIOCSFWD32 _IOW(0xB1 ,0, compat_size_t) /* PPPoX socket states */ enum { diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c index 1d0e5904dedf..c54cb59593ef 100644 --- a/net/l2tp/l2tp_ppp.c +++ b/net/l2tp/l2tp_ppp.c @@ -1681,6 +1681,9 @@ static const struct proto_ops pppol2tp_ops = { .recvmsg = pppol2tp_recvmsg, .mmap = sock_no_mmap, .ioctl = pppox_ioctl, +#ifdef CONFIG_COMPAT + .compat_ioctl = pppox_compat_ioctl, +#endif }; static const struct pppox_proto pppol2tp_proto = { -- cgit v1.2.3 From b1d45c23284e55a379f85554a27a548b7988d47a Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Sat, 20 Jul 2019 19:39:43 +0900 Subject: tracing: Fix header include guards in trace event headers These include guards are broken. Match the #if !define() and #define lines so that they work correctly. Link: http://lkml.kernel.org/r/20190720103943.16982-1-yamada.masahiro@socionext.com Fixes: f54d1867005c3 ("dma-buf: Rename struct fence to dma_fence") Fixes: 2e26ca7150a4f ("tracing: Fix tracepoint.h DECLARE_TRACE() to allow more than one header") Fixes: e543002f77f46 ("qdisc: add tracepoint qdisc:qdisc_dequeue for dequeued SKBs") Fixes: 95f295f9fe081 ("dmaengine: tegra: add tracepoints to driver") Signed-off-by: Masahiro Yamada Signed-off-by: Steven Rostedt (VMware) --- include/trace/events/dma_fence.h | 2 +- include/trace/events/napi.h | 4 ++-- include/trace/events/qdisc.h | 4 ++-- include/trace/events/tegra_apb_dma.h | 4 ++-- 4 files changed, 7 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/trace/events/dma_fence.h b/include/trace/events/dma_fence.h index 2212adda8f77..64e92d56c6a8 100644 --- a/include/trace/events/dma_fence.h +++ b/include/trace/events/dma_fence.h @@ -2,7 +2,7 @@ #undef TRACE_SYSTEM #define TRACE_SYSTEM dma_fence -#if !defined(_TRACE_FENCE_H) || defined(TRACE_HEADER_MULTI_READ) +#if !defined(_TRACE_DMA_FENCE_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_DMA_FENCE_H #include diff --git a/include/trace/events/napi.h b/include/trace/events/napi.h index f3a12566bed0..6678cf8b235b 100644 --- a/include/trace/events/napi.h +++ b/include/trace/events/napi.h @@ -3,7 +3,7 @@ #define TRACE_SYSTEM napi #if !defined(_TRACE_NAPI_H) || defined(TRACE_HEADER_MULTI_READ) -#define _TRACE_NAPI_H_ +#define _TRACE_NAPI_H #include #include @@ -38,7 +38,7 @@ TRACE_EVENT(napi_poll, #undef NO_DEV -#endif /* _TRACE_NAPI_H_ */ +#endif /* _TRACE_NAPI_H */ /* This part must be outside protection */ #include diff --git a/include/trace/events/qdisc.h b/include/trace/events/qdisc.h index 60d0d8bd336d..0d1a9ebf55ba 100644 --- a/include/trace/events/qdisc.h +++ b/include/trace/events/qdisc.h @@ -2,7 +2,7 @@ #define TRACE_SYSTEM qdisc #if !defined(_TRACE_QDISC_H) || defined(TRACE_HEADER_MULTI_READ) -#define _TRACE_QDISC_H_ +#define _TRACE_QDISC_H #include #include @@ -44,7 +44,7 @@ TRACE_EVENT(qdisc_dequeue, __entry->txq_state, __entry->packets, __entry->skbaddr ) ); -#endif /* _TRACE_QDISC_H_ */ +#endif /* _TRACE_QDISC_H */ /* This part must be outside protection */ #include diff --git a/include/trace/events/tegra_apb_dma.h b/include/trace/events/tegra_apb_dma.h index 0818f6286110..971cd02d2daf 100644 --- a/include/trace/events/tegra_apb_dma.h +++ b/include/trace/events/tegra_apb_dma.h @@ -1,5 +1,5 @@ #if !defined(_TRACE_TEGRA_APB_DMA_H) || defined(TRACE_HEADER_MULTI_READ) -#define _TRACE_TEGRA_APM_DMA_H +#define _TRACE_TEGRA_APB_DMA_H #include #include @@ -55,7 +55,7 @@ TRACE_EVENT(tegra_dma_isr, TP_printk("%s: irq %d\n", __get_str(chan), __entry->irq) ); -#endif /* _TRACE_TEGRADMA_H */ +#endif /* _TRACE_TEGRA_APB_DMA_H */ /* This part must be outside protection */ #include -- cgit v1.2.3 From a78d14a31666c636a9e00a589032119fb59e3b94 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 22 Jul 2019 09:46:29 +0200 Subject: xen: avoid link error on ARM Building the privcmd code as a loadable module on ARM, we get a link error due to the private cache management functions: ERROR: "__sync_icache_dcache" [drivers/xen/xen-privcmd.ko] undefined! Move the code into a new that is always built in when Xen is enabled, as suggested by Juergen Gross and Boris Ostrovsky. Signed-off-by: Arnd Bergmann Reviewed-by: Stefano Stabellini Signed-off-by: Juergen Gross --- drivers/xen/privcmd.c | 35 +++++------------------------------ drivers/xen/xlate_mmu.c | 32 ++++++++++++++++++++++++++++++++ include/xen/xen-ops.h | 3 +++ 3 files changed, 40 insertions(+), 30 deletions(-) (limited to 'include') diff --git a/drivers/xen/privcmd.c b/drivers/xen/privcmd.c index 2f5ce7230a43..c6070e70dd73 100644 --- a/drivers/xen/privcmd.c +++ b/drivers/xen/privcmd.c @@ -724,25 +724,6 @@ static long privcmd_ioctl_restrict(struct file *file, void __user *udata) return 0; } -struct remap_pfn { - struct mm_struct *mm; - struct page **pages; - pgprot_t prot; - unsigned long i; -}; - -static int remap_pfn_fn(pte_t *ptep, unsigned long addr, void *data) -{ - struct remap_pfn *r = data; - struct page *page = r->pages[r->i]; - pte_t pte = pte_mkspecial(pfn_pte(page_to_pfn(page), r->prot)); - - set_pte_at(r->mm, addr, ptep, pte); - r->i++; - - return 0; -} - static long privcmd_ioctl_mmap_resource(struct file *file, void __user *udata) { struct privcmd_data *data = file->private_data; @@ -774,7 +755,8 @@ static long privcmd_ioctl_mmap_resource(struct file *file, void __user *udata) goto out; } - if (xen_feature(XENFEAT_auto_translated_physmap)) { + if (IS_ENABLED(CONFIG_XEN_AUTO_XLATE) && + xen_feature(XENFEAT_auto_translated_physmap)) { unsigned int nr = DIV_ROUND_UP(kdata.num, XEN_PFN_PER_PAGE); struct page **pages; unsigned int i; @@ -808,16 +790,9 @@ static long privcmd_ioctl_mmap_resource(struct file *file, void __user *udata) if (rc) goto out; - if (xen_feature(XENFEAT_auto_translated_physmap)) { - struct remap_pfn r = { - .mm = vma->vm_mm, - .pages = vma->vm_private_data, - .prot = vma->vm_page_prot, - }; - - rc = apply_to_page_range(r.mm, kdata.addr, - kdata.num << PAGE_SHIFT, - remap_pfn_fn, &r); + if (IS_ENABLED(CONFIG_XEN_AUTO_XLATE) && + xen_feature(XENFEAT_auto_translated_physmap)) { + rc = xen_remap_vma_range(vma, kdata.addr, kdata.num << PAGE_SHIFT); } else { unsigned int domid = (xdata.flags & XENMEM_rsrc_acq_caller_owned) ? diff --git a/drivers/xen/xlate_mmu.c b/drivers/xen/xlate_mmu.c index ba883a80b3c0..7b1077f0abcb 100644 --- a/drivers/xen/xlate_mmu.c +++ b/drivers/xen/xlate_mmu.c @@ -262,3 +262,35 @@ int __init xen_xlate_map_ballooned_pages(xen_pfn_t **gfns, void **virt, return 0; } EXPORT_SYMBOL_GPL(xen_xlate_map_ballooned_pages); + +struct remap_pfn { + struct mm_struct *mm; + struct page **pages; + pgprot_t prot; + unsigned long i; +}; + +static int remap_pfn_fn(pte_t *ptep, unsigned long addr, void *data) +{ + struct remap_pfn *r = data; + struct page *page = r->pages[r->i]; + pte_t pte = pte_mkspecial(pfn_pte(page_to_pfn(page), r->prot)); + + set_pte_at(r->mm, addr, ptep, pte); + r->i++; + + return 0; +} + +/* Used by the privcmd module, but has to be built-in on ARM */ +int xen_remap_vma_range(struct vm_area_struct *vma, unsigned long addr, unsigned long len) +{ + struct remap_pfn r = { + .mm = vma->vm_mm, + .pages = vma->vm_private_data, + .prot = vma->vm_page_prot, + }; + + return apply_to_page_range(vma->vm_mm, addr, len, remap_pfn_fn, &r); +} +EXPORT_SYMBOL_GPL(xen_remap_vma_range); diff --git a/include/xen/xen-ops.h b/include/xen/xen-ops.h index 4969817124a8..98b30c1613b2 100644 --- a/include/xen/xen-ops.h +++ b/include/xen/xen-ops.h @@ -109,6 +109,9 @@ static inline int xen_xlate_unmap_gfn_range(struct vm_area_struct *vma, } #endif +int xen_remap_vma_range(struct vm_area_struct *vma, unsigned long addr, + unsigned long len); + /* * xen_remap_domain_gfn_array() - map an array of foreign frames by gfn * @vma: VMA to map the pages into -- cgit v1.2.3 From b877ac9815a8fe7e5f6d7fdde3dc34652408840a Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Fri, 14 Jun 2019 07:46:04 +0200 Subject: xen/swiotlb: remember having called xen_create_contiguous_region() Instead of always calling xen_destroy_contiguous_region() in case the memory is DMA-able for the used device, do so only in case it has been made DMA-able via xen_create_contiguous_region() before. This will avoid a lot of xen_destroy_contiguous_region() calls for 64-bit capable devices. As the memory in question is owned by swiotlb-xen the PG_owner_priv_1 flag of the first allocated page can be used for remembering. Signed-off-by: Juergen Gross Acked-by: Konrad Rzeszutek Wilk Signed-off-by: Juergen Gross --- drivers/xen/swiotlb-xen.c | 4 +++- include/linux/page-flags.h | 4 ++++ 2 files changed, 7 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c index 37ddcfcfbb21..ceb681cf64bb 100644 --- a/drivers/xen/swiotlb-xen.c +++ b/drivers/xen/swiotlb-xen.c @@ -322,6 +322,7 @@ xen_swiotlb_alloc_coherent(struct device *hwdev, size_t size, xen_free_coherent_pages(hwdev, size, ret, (dma_addr_t)phys, attrs); return NULL; } + SetPageXenRemapped(virt_to_page(ret)); } memset(ret, 0, size); return ret; @@ -346,7 +347,8 @@ xen_swiotlb_free_coherent(struct device *hwdev, size_t size, void *vaddr, size = 1UL << (order + XEN_PAGE_SHIFT); if (!WARN_ON((dev_addr + size - 1 > dma_mask) || - range_straddles_page_boundary(phys, size))) + range_straddles_page_boundary(phys, size)) && + TestClearPageXenRemapped(virt_to_page(vaddr))) xen_destroy_contiguous_region(phys, order); xen_free_coherent_pages(hwdev, size, vaddr, (dma_addr_t)phys, attrs); diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index b848517da64c..f91cb8898ff0 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -152,6 +152,8 @@ enum pageflags { PG_savepinned = PG_dirty, /* Has a grant mapping of another (foreign) domain's page. */ PG_foreign = PG_owner_priv_1, + /* Remapped by swiotlb-xen. */ + PG_xen_remapped = PG_owner_priv_1, /* SLOB */ PG_slob_free = PG_private, @@ -329,6 +331,8 @@ PAGEFLAG(Pinned, pinned, PF_NO_COMPOUND) TESTSCFLAG(Pinned, pinned, PF_NO_COMPOUND) PAGEFLAG(SavePinned, savepinned, PF_NO_COMPOUND); PAGEFLAG(Foreign, foreign, PF_NO_COMPOUND); +PAGEFLAG(XenRemapped, xen_remapped, PF_NO_COMPOUND) + TESTCLEARFLAG(XenRemapped, xen_remapped, PF_NO_COMPOUND) PAGEFLAG(Reserved, reserved, PF_NO_COMPOUND) __CLEARPAGEFLAG(Reserved, reserved, PF_NO_COMPOUND) -- cgit v1.2.3 From d9b42dfab513c9130ee0458f2e6febb75c89d1c8 Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Wed, 3 Jul 2019 09:58:18 +0200 Subject: drm/client: Support unmapping of DRM client buffers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit DRM clients, such as the fbdev emulation, have their buffer objects mapped by default. Mapping a buffer implicitly prevents its relocation. Hence, the buffer may permanently consume video memory while it's allocated. This is a problem for drivers of low-memory devices, such as ast, mgag200 or older framebuffer hardware, which will then not have enough memory to display other content (e.g., X11). This patch introduces drm_client_buffer_vmap() and _vunmap(). Internal DRM clients can use these functions to unmap and remap buffer objects as needed. There's no reference counting for vmap operations. Callers are expected to either keep buffers mapped (as it is now), or call vmap and vunmap in pairs around code that accesses the mapped memory. v2: * remove several duplicated NULL-pointer checks v3: * style and typo fixes Signed-off-by: Thomas Zimmermann Reviewed-by: Noralf Trønnes Link: https://patchwork.freedesktop.org/patch/315831/ Signed-off-by: Gerd Hoffmann --- drivers/gpu/drm/drm_client.c | 66 +++++++++++++++++++++++++++++++++++++------- include/drm/drm_client.h | 2 ++ 2 files changed, 58 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/drivers/gpu/drm/drm_client.c b/drivers/gpu/drm/drm_client.c index 410572f14257..fb107b24baae 100644 --- a/drivers/gpu/drm/drm_client.c +++ b/drivers/gpu/drm/drm_client.c @@ -281,6 +281,42 @@ drm_client_buffer_create(struct drm_client_dev *client, u32 width, u32 height, u buffer->gem = obj; + vaddr = drm_client_buffer_vmap(buffer); + if (IS_ERR(vaddr)) { + ret = PTR_ERR(vaddr); + goto err_delete; + } + + return buffer; + +err_delete: + drm_client_buffer_delete(buffer); + + return ERR_PTR(ret); +} + +/** + * drm_client_buffer_vmap - Map DRM client buffer into address space + * @buffer: DRM client buffer + * + * This function maps a client buffer into kernel address space. If the + * buffer is already mapped, it returns the mapping's address. + * + * Client buffer mappings are not ref'counted. Each call to + * drm_client_buffer_vmap() should be followed by a call to + * drm_client_buffer_vunmap(); or the client buffer should be mapped + * throughout its lifetime. The latter is the default. + * + * Returns: + * The mapped memory's address + */ +void *drm_client_buffer_vmap(struct drm_client_buffer *buffer) +{ + void *vaddr; + + if (buffer->vaddr) + return buffer->vaddr; + /* * FIXME: The dependency on GEM here isn't required, we could * convert the driver handle to a dma-buf instead and use the @@ -289,21 +325,31 @@ drm_client_buffer_create(struct drm_client_dev *client, u32 width, u32 height, u * fd_install step out of the driver backend hooks, to make that * final step optional for internal users. */ - vaddr = drm_gem_vmap(obj); - if (IS_ERR(vaddr)) { - ret = PTR_ERR(vaddr); - goto err_delete; - } + vaddr = drm_gem_vmap(buffer->gem); + if (IS_ERR(vaddr)) + return vaddr; buffer->vaddr = vaddr; - return buffer; - -err_delete: - drm_client_buffer_delete(buffer); + return vaddr; +} +EXPORT_SYMBOL(drm_client_buffer_vmap); - return ERR_PTR(ret); +/** + * drm_client_buffer_vunmap - Unmap DRM client buffer + * @buffer: DRM client buffer + * + * This function removes a client buffer's memory mapping. This + * function is only required by clients that manage their buffers + * by themselves. By default, DRM client buffers are mapped throughout + * their entire lifetime. + */ +void drm_client_buffer_vunmap(struct drm_client_buffer *buffer) +{ + drm_gem_vunmap(buffer->gem, buffer->vaddr); + buffer->vaddr = NULL; } +EXPORT_SYMBOL(drm_client_buffer_vunmap); static void drm_client_buffer_rmfb(struct drm_client_buffer *buffer) { diff --git a/include/drm/drm_client.h b/include/drm/drm_client.h index 72d51d1e9dd9..5cf2c5dd8b1e 100644 --- a/include/drm/drm_client.h +++ b/include/drm/drm_client.h @@ -149,6 +149,8 @@ struct drm_client_buffer { struct drm_client_buffer * drm_client_framebuffer_create(struct drm_client_dev *client, u32 width, u32 height, u32 format); void drm_client_framebuffer_delete(struct drm_client_buffer *buffer); +void *drm_client_buffer_vmap(struct drm_client_buffer *buffer); +void drm_client_buffer_vunmap(struct drm_client_buffer *buffer); int drm_client_modeset_create(struct drm_client_dev *client); void drm_client_modeset_free(struct drm_client_dev *client); -- cgit v1.2.3 From 01b947afaa940327e7adf57070a4bf3d0bed9810 Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Fri, 5 Jul 2019 09:31:00 +0200 Subject: drm/fb-helper: Instanciate shadow FB if configured in device's mode_config MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Generic framebuffer emulation uses a shadow buffer for framebuffers with dirty() function. If drivers want to use the shadow FB without such a function, they can now set prefer_shadow or prefer_shadow_fbdev in their mode_config structures. The former flag is exported to userspace, the latter flag is fbdev-only. v3: * only schedule dirty worker if fbdev uses shadow fb * test shadow fb settings with boolean operators * use bool for struct drm_mode_config.prefer_shadow_fbdev * fix documentation comments Signed-off-by: Thomas Zimmermann Reviewed-by: Noralf Trønnes Tested-by: Noralf Trønnes Link: https://patchwork.freedesktop.org/patch/315834/ Signed-off-by: Gerd Hoffmann --- drivers/gpu/drm/drm_fb_helper.c | 18 +++++++++++++++--- include/drm/drm_mode_config.h | 7 +++++++ 2 files changed, 22 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c index 7ba6a0255821..a7ba5b4902d6 100644 --- a/drivers/gpu/drm/drm_fb_helper.c +++ b/drivers/gpu/drm/drm_fb_helper.c @@ -421,7 +421,9 @@ static void drm_fb_helper_dirty_work(struct work_struct *work) return; drm_fb_helper_dirty_blit_real(helper, &clip_copy); } - helper->fb->funcs->dirty(helper->fb, NULL, 0, 0, &clip_copy, 1); + if (helper->fb->funcs->dirty) + helper->fb->funcs->dirty(helper->fb, NULL, 0, 0, + &clip_copy, 1); if (helper->buffer) drm_client_buffer_vunmap(helper->buffer); @@ -613,6 +615,16 @@ void drm_fb_helper_unlink_fbi(struct drm_fb_helper *fb_helper) } EXPORT_SYMBOL(drm_fb_helper_unlink_fbi); +static bool drm_fbdev_use_shadow_fb(struct drm_fb_helper *fb_helper) +{ + struct drm_device *dev = fb_helper->dev; + struct drm_framebuffer *fb = fb_helper->fb; + + return dev->mode_config.prefer_shadow_fbdev || + dev->mode_config.prefer_shadow || + fb->funcs->dirty; +} + static void drm_fb_helper_dirty(struct fb_info *info, u32 x, u32 y, u32 width, u32 height) { @@ -620,7 +632,7 @@ static void drm_fb_helper_dirty(struct fb_info *info, u32 x, u32 y, struct drm_clip_rect *clip = &helper->dirty_clip; unsigned long flags; - if (!helper->fb->funcs->dirty) + if (!drm_fbdev_use_shadow_fb(helper)) return; spin_lock_irqsave(&helper->dirty_lock, flags); @@ -2213,7 +2225,7 @@ int drm_fb_helper_generic_probe(struct drm_fb_helper *fb_helper, drm_fb_helper_fill_info(fbi, fb_helper, sizes); - if (fb->funcs->dirty) { + if (drm_fbdev_use_shadow_fb(fb_helper)) { struct fb_ops *fbops; void *shadow; diff --git a/include/drm/drm_mode_config.h b/include/drm/drm_mode_config.h index 759d462d028b..f57eea0481e0 100644 --- a/include/drm/drm_mode_config.h +++ b/include/drm/drm_mode_config.h @@ -852,6 +852,13 @@ struct drm_mode_config { /* dumb ioctl parameters */ uint32_t preferred_depth, prefer_shadow; + /** + * @prefer_shadow_fbdev: + * + * Hint to framebuffer emulation to prefer shadow-fb rendering. + */ + bool prefer_shadow_fbdev; + /** * @quirk_addfb_prefer_xbgr_30bpp: * -- cgit v1.2.3 From 621e55ff5b8e0ab5d1063f0eae0ef3960bef8f6e Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Wed, 31 Jul 2019 11:18:40 +0300 Subject: RDMA/devices: Do not deadlock during client removal lockdep reports: WARNING: possible circular locking dependency detected modprobe/302 is trying to acquire lock: 0000000007c8919c ((wq_completion)ib_cm){+.+.}, at: flush_workqueue+0xdf/0x990 but task is already holding lock: 000000002d3d2ca9 (&device->client_data_rwsem){++++}, at: remove_client_context+0x79/0xd0 [ib_core] which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #2 (&device->client_data_rwsem){++++}: down_read+0x3f/0x160 ib_get_net_dev_by_params+0xd5/0x200 [ib_core] cma_ib_req_handler+0x5f6/0x2090 [rdma_cm] cm_process_work+0x29/0x110 [ib_cm] cm_req_handler+0x10f5/0x1c00 [ib_cm] cm_work_handler+0x54c/0x311d [ib_cm] process_one_work+0x4aa/0xa30 worker_thread+0x62/0x5b0 kthread+0x1ca/0x1f0 ret_from_fork+0x24/0x30 -> #1 ((work_completion)(&(&work->work)->work)){+.+.}: process_one_work+0x45f/0xa30 worker_thread+0x62/0x5b0 kthread+0x1ca/0x1f0 ret_from_fork+0x24/0x30 -> #0 ((wq_completion)ib_cm){+.+.}: lock_acquire+0xc8/0x1d0 flush_workqueue+0x102/0x990 cm_remove_one+0x30e/0x3c0 [ib_cm] remove_client_context+0x94/0xd0 [ib_core] disable_device+0x10a/0x1f0 [ib_core] __ib_unregister_device+0x5a/0xe0 [ib_core] ib_unregister_device+0x21/0x30 [ib_core] mlx5_ib_stage_ib_reg_cleanup+0x9/0x10 [mlx5_ib] __mlx5_ib_remove+0x3d/0x70 [mlx5_ib] mlx5_ib_remove+0x12e/0x140 [mlx5_ib] mlx5_remove_device+0x144/0x150 [mlx5_core] mlx5_unregister_interface+0x3f/0xf0 [mlx5_core] mlx5_ib_cleanup+0x10/0x3a [mlx5_ib] __x64_sys_delete_module+0x227/0x350 do_syscall_64+0xc3/0x6a4 entry_SYSCALL_64_after_hwframe+0x49/0xbe Which is due to the read side of the client_data_rwsem being obtained recursively through a work queue flush during cm client removal. The lock is being held across the remove in remove_client_context() so that the function is a fence, once it returns the client is removed. This is required so that the two callers do not proceed with destruction until the client completes removal. Instead of using client_data_rwsem use the existing device unregistration refcount and add a similar client unregistration (client->uses) refcount. This will fence the two unregistration paths without holding any locks. Cc: Fixes: 921eab1143aa ("RDMA/devices: Re-organize device.c locking") Signed-off-by: Jason Gunthorpe Signed-off-by: Leon Romanovsky Link: https://lore.kernel.org/r/20190731081841.32345-2-leon@kernel.org Signed-off-by: Doug Ledford --- drivers/infiniband/core/device.c | 54 ++++++++++++++++++++++++++++++---------- include/rdma/ib_verbs.h | 3 +++ 2 files changed, 44 insertions(+), 13 deletions(-) (limited to 'include') diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index 9773145dee09..d86fbabe48d6 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -99,6 +99,12 @@ static LIST_HEAD(client_list); static DEFINE_XARRAY_FLAGS(clients, XA_FLAGS_ALLOC); static DECLARE_RWSEM(clients_rwsem); +static void ib_client_put(struct ib_client *client) +{ + if (refcount_dec_and_test(&client->uses)) + complete(&client->uses_zero); +} + /* * If client_data is registered then the corresponding client must also still * be registered. @@ -660,6 +666,14 @@ static int add_client_context(struct ib_device *device, return 0; down_write(&device->client_data_rwsem); + /* + * So long as the client is registered hold both the client and device + * unregistration locks. + */ + if (!refcount_inc_not_zero(&client->uses)) + goto out_unlock; + refcount_inc(&device->refcount); + /* * Another caller to add_client_context got here first and has already * completely initialized context. @@ -683,6 +697,9 @@ static int add_client_context(struct ib_device *device, return 0; out: + ib_device_put(device); + ib_client_put(client); +out_unlock: up_write(&device->client_data_rwsem); return ret; } @@ -702,7 +719,7 @@ static void remove_client_context(struct ib_device *device, client_data = xa_load(&device->client_data, client_id); xa_clear_mark(&device->client_data, client_id, CLIENT_DATA_REGISTERED); client = xa_load(&clients, client_id); - downgrade_write(&device->client_data_rwsem); + up_write(&device->client_data_rwsem); /* * Notice we cannot be holding any exclusive locks when calling the @@ -712,17 +729,13 @@ static void remove_client_context(struct ib_device *device, * * For this reason clients and drivers should not call the * unregistration functions will holdling any locks. - * - * It tempting to drop the client_data_rwsem too, but this is required - * to ensure that unregister_client does not return until all clients - * are completely unregistered, which is required to avoid module - * unloading races. */ if (client->remove) client->remove(device, client_data); xa_erase(&device->client_data, client_id); - up_read(&device->client_data_rwsem); + ib_device_put(device); + ib_client_put(client); } static int alloc_port_data(struct ib_device *device) @@ -1705,6 +1718,8 @@ int ib_register_client(struct ib_client *client) unsigned long index; int ret; + refcount_set(&client->uses, 1); + init_completion(&client->uses_zero); ret = assign_client_id(client); if (ret) return ret; @@ -1740,16 +1755,29 @@ void ib_unregister_client(struct ib_client *client) unsigned long index; down_write(&clients_rwsem); + ib_client_put(client); xa_clear_mark(&clients, client->client_id, CLIENT_REGISTERED); up_write(&clients_rwsem); + + /* We do not want to have locks while calling client->remove() */ + rcu_read_lock(); + xa_for_each (&devices, index, device) { + if (!ib_device_try_get(device)) + continue; + rcu_read_unlock(); + + remove_client_context(device, client->client_id); + + ib_device_put(device); + rcu_read_lock(); + } + rcu_read_unlock(); + /* - * Every device still known must be serialized to make sure we are - * done with the client callbacks before we return. + * remove_client_context() is not a fence, it can return even though a + * removal is ongoing. Wait until all removals are completed. */ - down_read(&devices_rwsem); - xa_for_each (&devices, index, device) - remove_client_context(device, client->client_id); - up_read(&devices_rwsem); + wait_for_completion(&client->uses_zero); down_write(&clients_rwsem); list_del(&client->list); diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index c5f8a9f17063..7b80ec822043 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -2647,6 +2647,9 @@ struct ib_client { const union ib_gid *gid, const struct sockaddr *addr, void *client_data); + + refcount_t uses; + struct completion uses_zero; struct list_head list; u32 client_id; -- cgit v1.2.3 From 9cd5881719e9555cae300ec8b389eda3c8101339 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Wed, 31 Jul 2019 11:18:41 +0300 Subject: RDMA/devices: Remove the lock around remove_client_context Due to the complexity of client->remove() callbacks it is desirable to not hold any locks while calling them. Remove the last one by tracking only the highest client ID and running backwards from there over the xarray. Since the only purpose of that lock was to protect the linked list, we can drop the lock. Signed-off-by: Jason Gunthorpe Signed-off-by: Leon Romanovsky Link: https://lore.kernel.org/r/20190731081841.32345-3-leon@kernel.org Signed-off-by: Doug Ledford --- drivers/infiniband/core/device.c | 48 ++++++++++++++++++++++------------------ include/rdma/ib_verbs.h | 1 - 2 files changed, 27 insertions(+), 22 deletions(-) (limited to 'include') diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index d86fbabe48d6..ea8661a00651 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -94,7 +94,7 @@ static DEFINE_XARRAY_FLAGS(devices, XA_FLAGS_ALLOC); static DECLARE_RWSEM(devices_rwsem); #define DEVICE_REGISTERED XA_MARK_1 -static LIST_HEAD(client_list); +static u32 highest_client_id; #define CLIENT_REGISTERED XA_MARK_1 static DEFINE_XARRAY_FLAGS(clients, XA_FLAGS_ALLOC); static DECLARE_RWSEM(clients_rwsem); @@ -1237,7 +1237,7 @@ static int setup_device(struct ib_device *device) static void disable_device(struct ib_device *device) { - struct ib_client *client; + u32 cid; WARN_ON(!refcount_read(&device->refcount)); @@ -1245,10 +1245,19 @@ static void disable_device(struct ib_device *device) xa_clear_mark(&devices, device->index, DEVICE_REGISTERED); up_write(&devices_rwsem); + /* + * Remove clients in LIFO order, see assign_client_id. This could be + * more efficient if xarray learns to reverse iterate. Since no new + * clients can be added to this ib_device past this point we only need + * the maximum possible client_id value here. + */ down_read(&clients_rwsem); - list_for_each_entry_reverse(client, &client_list, list) - remove_client_context(device, client->client_id); + cid = highest_client_id; up_read(&clients_rwsem); + while (cid) { + cid--; + remove_client_context(device, cid); + } /* Pairs with refcount_set in enable_device */ ib_device_put(device); @@ -1675,30 +1684,31 @@ static int assign_client_id(struct ib_client *client) /* * The add/remove callbacks must be called in FIFO/LIFO order. To * achieve this we assign client_ids so they are sorted in - * registration order, and retain a linked list we can reverse iterate - * to get the LIFO order. The extra linked list can go away if xarray - * learns to reverse iterate. + * registration order. */ - if (list_empty(&client_list)) { - client->client_id = 0; - } else { - struct ib_client *last; - - last = list_last_entry(&client_list, struct ib_client, list); - client->client_id = last->client_id + 1; - } + client->client_id = highest_client_id; ret = xa_insert(&clients, client->client_id, client, GFP_KERNEL); if (ret) goto out; + highest_client_id++; xa_set_mark(&clients, client->client_id, CLIENT_REGISTERED); - list_add_tail(&client->list, &client_list); out: up_write(&clients_rwsem); return ret; } +static void remove_client_id(struct ib_client *client) +{ + down_write(&clients_rwsem); + xa_erase(&clients, client->client_id); + for (; highest_client_id; highest_client_id--) + if (xa_load(&clients, highest_client_id - 1)) + break; + up_write(&clients_rwsem); +} + /** * ib_register_client - Register an IB client * @client:Client to register @@ -1778,11 +1788,7 @@ void ib_unregister_client(struct ib_client *client) * removal is ongoing. Wait until all removals are completed. */ wait_for_completion(&client->uses_zero); - - down_write(&clients_rwsem); - list_del(&client->list); - xa_erase(&clients, client->client_id); - up_write(&clients_rwsem); + remove_client_id(client); } EXPORT_SYMBOL(ib_unregister_client); diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 7b80ec822043..4f225175cb91 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -2650,7 +2650,6 @@ struct ib_client { refcount_t uses; struct completion uses_zero; - struct list_head list; u32 client_id; /* kverbs are not required by the client */ -- cgit v1.2.3 From 9f00baf74e4b6f79a3a3dfab44fb7bb2e797b551 Mon Sep 17 00:00:00 2001 From: Gary R Hook Date: Tue, 30 Jul 2019 16:05:24 +0000 Subject: crypto: ccp - Add support for valid authsize values less than 16 AES GCM encryption allows for authsize values of 4, 8, and 12-16 bytes. Validate the requested authsize, and retain it to save in the request context. Fixes: 36cf515b9bbe2 ("crypto: ccp - Enable support for AES GCM on v5 CCPs") Cc: Signed-off-by: Gary R Hook Signed-off-by: Herbert Xu --- drivers/crypto/ccp/ccp-crypto-aes-galois.c | 14 ++++++++++++++ drivers/crypto/ccp/ccp-ops.c | 26 +++++++++++++++++++++----- include/linux/ccp.h | 2 ++ 3 files changed, 37 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/drivers/crypto/ccp/ccp-crypto-aes-galois.c b/drivers/crypto/ccp/ccp-crypto-aes-galois.c index d22631cb2bb3..02eba84028b3 100644 --- a/drivers/crypto/ccp/ccp-crypto-aes-galois.c +++ b/drivers/crypto/ccp/ccp-crypto-aes-galois.c @@ -58,6 +58,19 @@ static int ccp_aes_gcm_setkey(struct crypto_aead *tfm, const u8 *key, static int ccp_aes_gcm_setauthsize(struct crypto_aead *tfm, unsigned int authsize) { + switch (authsize) { + case 16: + case 15: + case 14: + case 13: + case 12: + case 8: + case 4: + break; + default: + return -EINVAL; + } + return 0; } @@ -104,6 +117,7 @@ static int ccp_aes_gcm_crypt(struct aead_request *req, bool encrypt) memset(&rctx->cmd, 0, sizeof(rctx->cmd)); INIT_LIST_HEAD(&rctx->cmd.entry); rctx->cmd.engine = CCP_ENGINE_AES; + rctx->cmd.u.aes.authsize = crypto_aead_authsize(tfm); rctx->cmd.u.aes.type = ctx->u.aes.type; rctx->cmd.u.aes.mode = ctx->u.aes.mode; rctx->cmd.u.aes.action = encrypt; diff --git a/drivers/crypto/ccp/ccp-ops.c b/drivers/crypto/ccp/ccp-ops.c index 59f9849c3662..ef723e2722a8 100644 --- a/drivers/crypto/ccp/ccp-ops.c +++ b/drivers/crypto/ccp/ccp-ops.c @@ -622,6 +622,7 @@ static int ccp_run_aes_gcm_cmd(struct ccp_cmd_queue *cmd_q, unsigned long long *final; unsigned int dm_offset; + unsigned int authsize; unsigned int jobid; unsigned int ilen; bool in_place = true; /* Default value */ @@ -643,6 +644,21 @@ static int ccp_run_aes_gcm_cmd(struct ccp_cmd_queue *cmd_q, if (!aes->key) /* Gotta have a key SGL */ return -EINVAL; + /* Zero defaults to 16 bytes, the maximum size */ + authsize = aes->authsize ? aes->authsize : AES_BLOCK_SIZE; + switch (authsize) { + case 16: + case 15: + case 14: + case 13: + case 12: + case 8: + case 4: + break; + default: + return -EINVAL; + } + /* First, decompose the source buffer into AAD & PT, * and the destination buffer into AAD, CT & tag, or * the input into CT & tag. @@ -657,7 +673,7 @@ static int ccp_run_aes_gcm_cmd(struct ccp_cmd_queue *cmd_q, p_tag = scatterwalk_ffwd(sg_tag, p_outp, ilen); } else { /* Input length for decryption includes tag */ - ilen = aes->src_len - AES_BLOCK_SIZE; + ilen = aes->src_len - authsize; p_tag = scatterwalk_ffwd(sg_tag, p_inp, ilen); } @@ -839,19 +855,19 @@ static int ccp_run_aes_gcm_cmd(struct ccp_cmd_queue *cmd_q, if (aes->action == CCP_AES_ACTION_ENCRYPT) { /* Put the ciphered tag after the ciphertext. */ - ccp_get_dm_area(&final_wa, 0, p_tag, 0, AES_BLOCK_SIZE); + ccp_get_dm_area(&final_wa, 0, p_tag, 0, authsize); } else { /* Does this ciphered tag match the input? */ - ret = ccp_init_dm_workarea(&tag, cmd_q, AES_BLOCK_SIZE, + ret = ccp_init_dm_workarea(&tag, cmd_q, authsize, DMA_BIDIRECTIONAL); if (ret) goto e_tag; - ret = ccp_set_dm_area(&tag, 0, p_tag, 0, AES_BLOCK_SIZE); + ret = ccp_set_dm_area(&tag, 0, p_tag, 0, authsize); if (ret) goto e_tag; ret = crypto_memneq(tag.address, final_wa.address, - AES_BLOCK_SIZE) ? -EBADMSG : 0; + authsize) ? -EBADMSG : 0; ccp_dm_free(&tag); } diff --git a/include/linux/ccp.h b/include/linux/ccp.h index 7e9c991c95e0..43ed9e77cf81 100644 --- a/include/linux/ccp.h +++ b/include/linux/ccp.h @@ -173,6 +173,8 @@ struct ccp_aes_engine { enum ccp_aes_mode mode; enum ccp_aes_action action; + u32 authsize; + struct scatterlist *key; u32 key_len; /* In bytes */ -- cgit v1.2.3 From ee38d94a0ad89890b770f6c876263cf9fcbfde84 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 2 Aug 2019 21:49:02 -0700 Subject: page flags: prioritize kasan bits over last-cpuid ARM64 randdconfig builds regularly run into a build error, especially when NUMA_BALANCING and SPARSEMEM are enabled but not SPARSEMEM_VMEMMAP: #error "KASAN: not enough bits in page flags for tag" The last-cpuid bits are already contitional on the available space, so the result of the calculation is a bit random on whether they were already left out or not. Adding the kasan tag bits before last-cpuid makes it much more likely to end up with a successful build here, and should be reliable for randconfig at least, as long as that does not randomize NR_CPUS or NODES_SHIFT but uses the defaults. In order for the modified check to not trigger in the x86 vdso32 code where all constants are wrong (building with -m32), enclose all the definitions with an #ifdef. [arnd@arndb.de: build fix] Link: http://lkml.kernel.org/r/CAK8P3a3Mno1SWTcuAOT0Wa9VS15pdU6EfnkxLbDpyS55yO04+g@mail.gmail.com Link: http://lkml.kernel.org/r/20190722115520.3743282-1-arnd@arndb.de Link: https://lore.kernel.org/lkml/20190618095347.3850490-1-arnd@arndb.de/ Fixes: 2813b9c02962 ("kasan, mm, arm64: tag non slab memory allocated via pagealloc") Signed-off-by: Arnd Bergmann Signed-off-by: Arnd Bergmann Reviewed-by: Andrey Konovalov Reviewed-by: Andrey Ryabinin Cc: Andrey Konovalov Cc: Dmitry Vyukov Cc: Will Deacon Cc: Christoph Lameter Cc: Mark Rutland Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/mips/vdso/vdso.h | 1 + include/linux/page-flags-layout.h | 18 +++++++++++------- 2 files changed, 12 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/arch/mips/vdso/vdso.h b/arch/mips/vdso/vdso.h index 14b1931be69c..b65b169778e3 100644 --- a/arch/mips/vdso/vdso.h +++ b/arch/mips/vdso/vdso.h @@ -9,6 +9,7 @@ #if _MIPS_SIM != _MIPS_SIM_ABI64 && defined(CONFIG_64BIT) /* Building 32-bit VDSO for the 64-bit kernel. Fake a 32-bit Kconfig. */ +#define BUILD_VDSO32_64 #undef CONFIG_64BIT #define CONFIG_32BIT 1 #ifndef __ASSEMBLY__ diff --git a/include/linux/page-flags-layout.h b/include/linux/page-flags-layout.h index 1dda31825ec4..71283739ffd2 100644 --- a/include/linux/page-flags-layout.h +++ b/include/linux/page-flags-layout.h @@ -32,6 +32,7 @@ #endif /* CONFIG_SPARSEMEM */ +#ifndef BUILD_VDSO32_64 /* * page->flags layout: * @@ -76,20 +77,22 @@ #define LAST_CPUPID_SHIFT 0 #endif -#if SECTIONS_WIDTH+ZONES_WIDTH+NODES_SHIFT+LAST_CPUPID_SHIFT <= BITS_PER_LONG - NR_PAGEFLAGS +#ifdef CONFIG_KASAN_SW_TAGS +#define KASAN_TAG_WIDTH 8 +#else +#define KASAN_TAG_WIDTH 0 +#endif + +#if SECTIONS_WIDTH+ZONES_WIDTH+NODES_SHIFT+LAST_CPUPID_SHIFT+KASAN_TAG_WIDTH \ + <= BITS_PER_LONG - NR_PAGEFLAGS #define LAST_CPUPID_WIDTH LAST_CPUPID_SHIFT #else #define LAST_CPUPID_WIDTH 0 #endif -#ifdef CONFIG_KASAN_SW_TAGS -#define KASAN_TAG_WIDTH 8 #if SECTIONS_WIDTH+NODES_WIDTH+ZONES_WIDTH+LAST_CPUPID_WIDTH+KASAN_TAG_WIDTH \ > BITS_PER_LONG - NR_PAGEFLAGS -#error "KASAN: not enough bits in page flags for tag" -#endif -#else -#define KASAN_TAG_WIDTH 0 +#error "Not enough bits in page flags" #endif /* @@ -104,4 +107,5 @@ #define LAST_CPUPID_NOT_IN_PAGE_FLAGS #endif +#endif #endif /* _LINUX_PAGE_FLAGS_LAYOUT */ -- cgit v1.2.3 From cbedfe11347fe418621bd188d58a206beb676218 Mon Sep 17 00:00:00 2001 From: Qian Cai Date: Fri, 2 Aug 2019 21:49:19 -0700 Subject: asm-generic: fix -Wtype-limits compiler warnings Commit d66acc39c7ce ("bitops: Optimise get_order()") introduced a compilation warning because "rx_frag_size" is an "ushort" while PAGE_SHIFT here is 16. The commit changed the get_order() to be a multi-line macro where compilers insist to check all statements in the macro even when __builtin_constant_p(rx_frag_size) will return false as "rx_frag_size" is a module parameter. In file included from ./arch/powerpc/include/asm/page_64.h:107, from ./arch/powerpc/include/asm/page.h:242, from ./arch/powerpc/include/asm/mmu.h:132, from ./arch/powerpc/include/asm/lppaca.h:47, from ./arch/powerpc/include/asm/paca.h:17, from ./arch/powerpc/include/asm/current.h:13, from ./include/linux/thread_info.h:21, from ./arch/powerpc/include/asm/processor.h:39, from ./include/linux/prefetch.h:15, from drivers/net/ethernet/emulex/benet/be_main.c:14: drivers/net/ethernet/emulex/benet/be_main.c: In function 'be_rx_cqs_create': ./include/asm-generic/getorder.h:54:9: warning: comparison is always true due to limited range of data type [-Wtype-limits] (((n) < (1UL << PAGE_SHIFT)) ? 0 : \ ^ drivers/net/ethernet/emulex/benet/be_main.c:3138:33: note: in expansion of macro 'get_order' adapter->big_page_size = (1 << get_order(rx_frag_size)) * PAGE_SIZE; ^~~~~~~~~ Fix it by moving all of this multi-line macro into a proper function, and killing __get_order() off. [akpm@linux-foundation.org: remove __get_order() altogether] [cai@lca.pw: v2] Link: http://lkml.kernel.org/r/1564000166-31428-1-git-send-email-cai@lca.pw Link: http://lkml.kernel.org/r/1563914986-26502-1-git-send-email-cai@lca.pw Fixes: d66acc39c7ce ("bitops: Optimise get_order()") Signed-off-by: Qian Cai Reviewed-by: Nathan Chancellor Cc: David S. Miller Cc: Arnd Bergmann Cc: David Howells Cc: Jakub Jelinek Cc: Nick Desaulniers Cc: Bill Wendling Cc: James Y Knight Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/asm-generic/getorder.h | 50 +++++++++++++++++------------------------- 1 file changed, 20 insertions(+), 30 deletions(-) (limited to 'include') diff --git a/include/asm-generic/getorder.h b/include/asm-generic/getorder.h index c64bea7a52be..e9f20b813a69 100644 --- a/include/asm-generic/getorder.h +++ b/include/asm-generic/getorder.h @@ -7,24 +7,6 @@ #include #include -/* - * Runtime evaluation of get_order() - */ -static inline __attribute_const__ -int __get_order(unsigned long size) -{ - int order; - - size--; - size >>= PAGE_SHIFT; -#if BITS_PER_LONG == 32 - order = fls(size); -#else - order = fls64(size); -#endif - return order; -} - /** * get_order - Determine the allocation order of a memory size * @size: The size for which to get the order @@ -43,19 +25,27 @@ int __get_order(unsigned long size) * to hold an object of the specified size. * * The result is undefined if the size is 0. - * - * This function may be used to initialise variables with compile time - * evaluations of constants. */ -#define get_order(n) \ -( \ - __builtin_constant_p(n) ? ( \ - ((n) == 0UL) ? BITS_PER_LONG - PAGE_SHIFT : \ - (((n) < (1UL << PAGE_SHIFT)) ? 0 : \ - ilog2((n) - 1) - PAGE_SHIFT + 1) \ - ) : \ - __get_order(n) \ -) +static inline __attribute_const__ int get_order(unsigned long size) +{ + if (__builtin_constant_p(size)) { + if (!size) + return BITS_PER_LONG - PAGE_SHIFT; + + if (size < (1UL << PAGE_SHIFT)) + return 0; + + return ilog2((size) - 1) - PAGE_SHIFT + 1; + } + + size--; + size >>= PAGE_SHIFT; +#if BITS_PER_LONG == 32 + return fls(size); +#else + return fls64(size); +#endif +} #endif /* __ASSEMBLY__ */ -- cgit v1.2.3 From 5e5412c365a32e452daa762eac36121cb8a370bb Mon Sep 17 00:00:00 2001 From: Qian Cai Date: Tue, 30 Jul 2019 11:30:33 -0400 Subject: net/socket: fix GCC8+ Wpacked-not-aligned warnings There are a lot of those warnings with GCC8+ 64-bit, In file included from ./include/linux/sctp.h:42, from net/core/skbuff.c:47: ./include/uapi/linux/sctp.h:395:1: warning: alignment 4 of 'struct sctp_paddr_change' is less than 8 [-Wpacked-not-aligned] } __attribute__((packed, aligned(4))); ^ ./include/uapi/linux/sctp.h:728:1: warning: alignment 4 of 'struct sctp_setpeerprim' is less than 8 [-Wpacked-not-aligned] } __attribute__((packed, aligned(4))); ^ ./include/uapi/linux/sctp.h:727:26: warning: 'sspp_addr' offset 4 in 'struct sctp_setpeerprim' isn't aligned to 8 [-Wpacked-not-aligned] struct sockaddr_storage sspp_addr; ^~~~~~~~~ ./include/uapi/linux/sctp.h:741:1: warning: alignment 4 of 'struct sctp_prim' is less than 8 [-Wpacked-not-aligned] } __attribute__((packed, aligned(4))); ^ ./include/uapi/linux/sctp.h:740:26: warning: 'ssp_addr' offset 4 in 'struct sctp_prim' isn't aligned to 8 [-Wpacked-not-aligned] struct sockaddr_storage ssp_addr; ^~~~~~~~ ./include/uapi/linux/sctp.h:792:1: warning: alignment 4 of 'struct sctp_paddrparams' is less than 8 [-Wpacked-not-aligned] } __attribute__((packed, aligned(4))); ^ ./include/uapi/linux/sctp.h:784:26: warning: 'spp_address' offset 4 in 'struct sctp_paddrparams' isn't aligned to 8 [-Wpacked-not-aligned] struct sockaddr_storage spp_address; ^~~~~~~~~~~ ./include/uapi/linux/sctp.h:905:1: warning: alignment 4 of 'struct sctp_paddrinfo' is less than 8 [-Wpacked-not-aligned] } __attribute__((packed, aligned(4))); ^ ./include/uapi/linux/sctp.h:899:26: warning: 'spinfo_address' offset 4 in 'struct sctp_paddrinfo' isn't aligned to 8 [-Wpacked-not-aligned] struct sockaddr_storage spinfo_address; ^~~~~~~~~~~~~~ This is because the commit 20c9c825b12f ("[SCTP] Fix SCTP socket options to work with 32-bit apps on 64-bit kernels.") added "packed, aligned(4)" GCC attributes to some structures but one of the members, i.e, "struct sockaddr_storage" in those structures has the attribute, "aligned(__alignof__ (struct sockaddr *)" which is 8-byte on 64-bit systems, so the commit overwrites the designed alignments for "sockaddr_storage". To fix this, "struct sockaddr_storage" needs to be aligned to 4-byte as it is only used in those packed sctp structure which is part of UAPI, and "struct __kernel_sockaddr_storage" is used in some other places of UAPI that need not to change alignments in order to not breaking userspace. Use an implicit alignment for "struct __kernel_sockaddr_storage" so it can keep the same alignments as a member in both packed and un-packed structures without breaking UAPI. Suggested-by: David Laight Signed-off-by: Qian Cai Signed-off-by: David S. Miller --- include/uapi/linux/socket.h | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/socket.h b/include/uapi/linux/socket.h index 8eb96021709c..c3409c8ec0dd 100644 --- a/include/uapi/linux/socket.h +++ b/include/uapi/linux/socket.h @@ -6,17 +6,24 @@ * Desired design of maximum size and alignment (see RFC2553) */ #define _K_SS_MAXSIZE 128 /* Implementation specific max size */ -#define _K_SS_ALIGNSIZE (__alignof__ (struct sockaddr *)) - /* Implementation specific desired alignment */ typedef unsigned short __kernel_sa_family_t; +/* + * The definition uses anonymous union and struct in order to control the + * default alignment. + */ struct __kernel_sockaddr_storage { - __kernel_sa_family_t ss_family; /* address family */ - /* Following field(s) are implementation specific */ - char __data[_K_SS_MAXSIZE - sizeof(unsigned short)]; + union { + struct { + __kernel_sa_family_t ss_family; /* address family */ + /* Following field(s) are implementation specific */ + char __data[_K_SS_MAXSIZE - sizeof(unsigned short)]; /* space to achieve desired size, */ /* _SS_MAXSIZE value minus size of ss_family */ -} __attribute__ ((aligned(_K_SS_ALIGNSIZE))); /* force desired alignment */ + }; + void *__align; /* implementation specific desired alignment */ + }; +}; #endif /* _UAPI_LINUX_SOCKET_H */ -- cgit v1.2.3 From 17e433b54393a6269acbcb792da97791fe1592d8 Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Mon, 5 Aug 2019 10:03:19 +0800 Subject: KVM: Fix leak vCPU's VMCS value into other pCPU MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit After commit d73eb57b80b (KVM: Boost vCPUs that are delivering interrupts), a five years old bug is exposed. Running ebizzy benchmark in three 80 vCPUs VMs on one 80 pCPUs Skylake server, a lot of rcu_sched stall warning splatting in the VMs after stress testing: INFO: rcu_sched detected stalls on CPUs/tasks: { 4 41 57 62 77} (detected by 15, t=60004 jiffies, g=899, c=898, q=15073) Call Trace: flush_tlb_mm_range+0x68/0x140 tlb_flush_mmu.part.75+0x37/0xe0 tlb_finish_mmu+0x55/0x60 zap_page_range+0x142/0x190 SyS_madvise+0x3cd/0x9c0 system_call_fastpath+0x1c/0x21 swait_active() sustains to be true before finish_swait() is called in kvm_vcpu_block(), voluntarily preempted vCPUs are taken into account by kvm_vcpu_on_spin() loop greatly increases the probability condition kvm_arch_vcpu_runnable(vcpu) is checked and can be true, when APICv is enabled the yield-candidate vCPU's VMCS RVI field leaks(by vmx_sync_pir_to_irr()) into spinning-on-a-taken-lock vCPU's current VMCS. This patch fixes it by checking conservatively a subset of events. Cc: Paolo Bonzini Cc: Radim Krčmář Cc: Christian Borntraeger Cc: Marc Zyngier Cc: stable@vger.kernel.org Fixes: 98f4a1467 (KVM: add kvm_arch_vcpu_runnable() test to kvm_vcpu_on_spin() loop) Signed-off-by: Wanpeng Li Signed-off-by: Paolo Bonzini --- arch/powerpc/kvm/powerpc.c | 5 +++++ arch/x86/include/asm/kvm_host.h | 1 + arch/x86/kvm/svm.c | 6 ++++++ arch/x86/kvm/vmx/vmx.c | 6 ++++++ arch/x86/kvm/x86.c | 16 ++++++++++++++++ include/linux/kvm_host.h | 1 + virt/kvm/kvm_main.c | 25 ++++++++++++++++++++++++- 7 files changed, 59 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 0dba7eb24f92..3e34d5fa6708 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -50,6 +50,11 @@ int kvm_arch_vcpu_runnable(struct kvm_vcpu *v) return !!(v->arch.pending_exceptions) || kvm_request_pending(v); } +bool kvm_arch_dy_runnable(struct kvm_vcpu *vcpu) +{ + return kvm_arch_vcpu_runnable(vcpu); +} + bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu) { return false; diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index e74f0711eaaf..fc046ca89d32 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1175,6 +1175,7 @@ struct kvm_x86_ops { int (*update_pi_irte)(struct kvm *kvm, unsigned int host_irq, uint32_t guest_irq, bool set); void (*apicv_post_state_restore)(struct kvm_vcpu *vcpu); + bool (*dy_apicv_has_pending_interrupt)(struct kvm_vcpu *vcpu); int (*set_hv_timer)(struct kvm_vcpu *vcpu, u64 guest_deadline_tsc, bool *expired); diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 7eafc6907861..d685491fce4d 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -5190,6 +5190,11 @@ static void svm_deliver_avic_intr(struct kvm_vcpu *vcpu, int vec) kvm_vcpu_wake_up(vcpu); } +static bool svm_dy_apicv_has_pending_interrupt(struct kvm_vcpu *vcpu) +{ + return false; +} + static void svm_ir_list_del(struct vcpu_svm *svm, struct amd_iommu_pi_data *pi) { unsigned long flags; @@ -7314,6 +7319,7 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = { .pmu_ops = &amd_pmu_ops, .deliver_posted_interrupt = svm_deliver_avic_intr, + .dy_apicv_has_pending_interrupt = svm_dy_apicv_has_pending_interrupt, .update_pi_irte = svm_update_pi_irte, .setup_mce = svm_setup_mce, diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 074385c86c09..42ed3faa6af8 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -6117,6 +6117,11 @@ static int vmx_sync_pir_to_irr(struct kvm_vcpu *vcpu) return max_irr; } +static bool vmx_dy_apicv_has_pending_interrupt(struct kvm_vcpu *vcpu) +{ + return pi_test_on(vcpu_to_pi_desc(vcpu)); +} + static void vmx_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap) { if (!kvm_vcpu_apicv_active(vcpu)) @@ -7726,6 +7731,7 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = { .guest_apic_has_interrupt = vmx_guest_apic_has_interrupt, .sync_pir_to_irr = vmx_sync_pir_to_irr, .deliver_posted_interrupt = vmx_deliver_posted_interrupt, + .dy_apicv_has_pending_interrupt = vmx_dy_apicv_has_pending_interrupt, .set_tss_addr = vmx_set_tss_addr, .set_identity_map_addr = vmx_set_identity_map_addr, diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index c6d951cbd76c..93b0bd45ac73 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -9698,6 +9698,22 @@ int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) return kvm_vcpu_running(vcpu) || kvm_vcpu_has_events(vcpu); } +bool kvm_arch_dy_runnable(struct kvm_vcpu *vcpu) +{ + if (READ_ONCE(vcpu->arch.pv.pv_unhalted)) + return true; + + if (kvm_test_request(KVM_REQ_NMI, vcpu) || + kvm_test_request(KVM_REQ_SMI, vcpu) || + kvm_test_request(KVM_REQ_EVENT, vcpu)) + return true; + + if (vcpu->arch.apicv_active && kvm_x86_ops->dy_apicv_has_pending_interrupt(vcpu)) + return true; + + return false; +} + bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu) { return vcpu->arch.preempted_in_kernel; diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 5c5b5867024c..9e4c2bb90297 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -872,6 +872,7 @@ int kvm_arch_check_processor_compat(void); int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu); bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu); int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu); +bool kvm_arch_dy_runnable(struct kvm_vcpu *vcpu); #ifndef __KVM_HAVE_ARCH_VM_ALLOC /* diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index ed061d8a457c..1f05aeb9da27 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -2477,6 +2477,29 @@ static bool kvm_vcpu_eligible_for_directed_yield(struct kvm_vcpu *vcpu) #endif } +/* + * Unlike kvm_arch_vcpu_runnable, this function is called outside + * a vcpu_load/vcpu_put pair. However, for most architectures + * kvm_arch_vcpu_runnable does not require vcpu_load. + */ +bool __weak kvm_arch_dy_runnable(struct kvm_vcpu *vcpu) +{ + return kvm_arch_vcpu_runnable(vcpu); +} + +static bool vcpu_dy_runnable(struct kvm_vcpu *vcpu) +{ + if (kvm_arch_dy_runnable(vcpu)) + return true; + +#ifdef CONFIG_KVM_ASYNC_PF + if (!list_empty_careful(&vcpu->async_pf.done)) + return true; +#endif + + return false; +} + void kvm_vcpu_on_spin(struct kvm_vcpu *me, bool yield_to_kernel_mode) { struct kvm *kvm = me->kvm; @@ -2506,7 +2529,7 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *me, bool yield_to_kernel_mode) continue; if (vcpu == me) continue; - if (swait_active(&vcpu->wq) && !kvm_arch_vcpu_runnable(vcpu)) + if (swait_active(&vcpu->wq) && !vcpu_dy_runnable(vcpu)) continue; if (READ_ONCE(vcpu->preempted) && yield_to_kernel_mode && !kvm_arch_vcpu_in_kernel(vcpu)) -- cgit v1.2.3 From 741cbbae0768b828be2d48331eb371a4f08bbea8 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Sat, 3 Aug 2019 08:14:25 +0200 Subject: KVM: remove kvm_arch_has_vcpu_debugfs() There is no need for this function as all arches have to implement kvm_arch_create_vcpu_debugfs() no matter what. A #define symbol let us actually simplify the code. Signed-off-by: Paolo Bonzini --- arch/mips/kvm/mips.c | 10 ---------- arch/powerpc/kvm/powerpc.c | 10 ---------- arch/s390/kvm/kvm-s390.c | 10 ---------- arch/x86/include/asm/kvm_host.h | 2 ++ arch/x86/kvm/debugfs.c | 5 ----- include/linux/kvm_host.h | 3 ++- virt/kvm/arm/arm.c | 5 ----- virt/kvm/kvm_main.c | 5 ++--- 8 files changed, 6 insertions(+), 44 deletions(-) (limited to 'include') diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c index 2cfe839f0b3a..1109924560d8 100644 --- a/arch/mips/kvm/mips.c +++ b/arch/mips/kvm/mips.c @@ -150,16 +150,6 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) return 0; } -bool kvm_arch_has_vcpu_debugfs(void) -{ - return false; -} - -int kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu) -{ - return 0; -} - void kvm_mips_free_vcpus(struct kvm *kvm) { unsigned int i; diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 3e34d5fa6708..3e566c2e6066 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -457,16 +457,6 @@ err_out: return -EINVAL; } -bool kvm_arch_has_vcpu_debugfs(void) -{ - return false; -} - -int kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu) -{ - return 0; -} - void kvm_arch_destroy_vm(struct kvm *kvm) { unsigned int i; diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 3f520cd837fb..f329dcb3f44c 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -2516,16 +2516,6 @@ out_err: return rc; } -bool kvm_arch_has_vcpu_debugfs(void) -{ - return false; -} - -int kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu) -{ - return 0; -} - void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) { VCPU_EVENT(vcpu, 3, "%s", "free cpu"); diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index fc046ca89d32..e92725b2a46f 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -35,6 +35,8 @@ #include #include +#define __KVM_HAVE_ARCH_VCPU_DEBUGFS + #define KVM_MAX_VCPUS 288 #define KVM_SOFT_MAX_VCPUS 240 #define KVM_MAX_VCPU_ID 1023 diff --git a/arch/x86/kvm/debugfs.c b/arch/x86/kvm/debugfs.c index 329361b69d5e..9bd93e0d5f63 100644 --- a/arch/x86/kvm/debugfs.c +++ b/arch/x86/kvm/debugfs.c @@ -8,11 +8,6 @@ #include #include "lapic.h" -bool kvm_arch_has_vcpu_debugfs(void) -{ - return true; -} - static int vcpu_get_timer_advance_ns(void *data, u64 *val) { struct kvm_vcpu *vcpu = (struct kvm_vcpu *) data; diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 9e4c2bb90297..8d34db3c8bc6 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -861,8 +861,9 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu); void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu); void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu); -bool kvm_arch_has_vcpu_debugfs(void); +#ifdef __KVM_HAVE_ARCH_VCPU_DEBUGFS int kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu); +#endif int kvm_arch_hardware_enable(void); void kvm_arch_hardware_disable(void); diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c index acc43242a310..13f5a1aa6d79 100644 --- a/virt/kvm/arm/arm.c +++ b/virt/kvm/arm/arm.c @@ -144,11 +144,6 @@ out_fail_alloc: return ret; } -bool kvm_arch_has_vcpu_debugfs(void) -{ - return false; -} - int kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu) { return 0; diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 1f05aeb9da27..4afb1a234018 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -2617,12 +2617,10 @@ static int create_vcpu_fd(struct kvm_vcpu *vcpu) static int kvm_create_vcpu_debugfs(struct kvm_vcpu *vcpu) { +#ifdef __KVM_HAVE_ARCH_VCPU_DEBUGFS char dir_name[ITOA_MAX_LEN * 2]; int ret; - if (!kvm_arch_has_vcpu_debugfs()) - return 0; - if (!debugfs_initialized()) return 0; @@ -2637,6 +2635,7 @@ static int kvm_create_vcpu_debugfs(struct kvm_vcpu *vcpu) debugfs_remove_recursive(vcpu->debugfs_dentry); return ret; } +#endif return 0; } -- cgit v1.2.3 From 3e7093d045196b1016517631645e874fe903db7e Mon Sep 17 00:00:00 2001 From: Greg KH Date: Wed, 31 Jul 2019 20:56:20 +0200 Subject: KVM: no need to check return value of debugfs_create functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When calling debugfs functions, there is no need to ever check the return value. The function can work or not, but the code logic should never do something different based on this. Also, when doing this, change kvm_arch_create_vcpu_debugfs() to return void instead of an integer, as we should not care at all about if this function actually does anything or not. Cc: Paolo Bonzini Cc: "Radim Krčmář" Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Borislav Petkov Cc: "H. Peter Anvin" Cc: Cc: Signed-off-by: Greg Kroah-Hartman Signed-off-by: Paolo Bonzini --- arch/x86/kvm/debugfs.c | 41 +++++++++++++---------------------------- include/linux/kvm_host.h | 2 +- virt/kvm/kvm_main.c | 21 +++++---------------- 3 files changed, 19 insertions(+), 45 deletions(-) (limited to 'include') diff --git a/arch/x86/kvm/debugfs.c b/arch/x86/kvm/debugfs.c index 9bd93e0d5f63..018aebce33ff 100644 --- a/arch/x86/kvm/debugfs.c +++ b/arch/x86/kvm/debugfs.c @@ -43,37 +43,22 @@ static int vcpu_get_tsc_scaling_frac_bits(void *data, u64 *val) DEFINE_SIMPLE_ATTRIBUTE(vcpu_tsc_scaling_frac_fops, vcpu_get_tsc_scaling_frac_bits, NULL, "%llu\n"); -int kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu) +void kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu) { - struct dentry *ret; + debugfs_create_file("tsc-offset", 0444, vcpu->debugfs_dentry, vcpu, + &vcpu_tsc_offset_fops); - ret = debugfs_create_file("tsc-offset", 0444, - vcpu->debugfs_dentry, - vcpu, &vcpu_tsc_offset_fops); - if (!ret) - return -ENOMEM; - - if (lapic_in_kernel(vcpu)) { - ret = debugfs_create_file("lapic_timer_advance_ns", 0444, - vcpu->debugfs_dentry, - vcpu, &vcpu_timer_advance_ns_fops); - if (!ret) - return -ENOMEM; - } + if (lapic_in_kernel(vcpu)) + debugfs_create_file("lapic_timer_advance_ns", 0444, + vcpu->debugfs_dentry, vcpu, + &vcpu_timer_advance_ns_fops); if (kvm_has_tsc_control) { - ret = debugfs_create_file("tsc-scaling-ratio", 0444, - vcpu->debugfs_dentry, - vcpu, &vcpu_tsc_scaling_fops); - if (!ret) - return -ENOMEM; - ret = debugfs_create_file("tsc-scaling-ratio-frac-bits", 0444, - vcpu->debugfs_dentry, - vcpu, &vcpu_tsc_scaling_frac_fops); - if (!ret) - return -ENOMEM; - + debugfs_create_file("tsc-scaling-ratio", 0444, + vcpu->debugfs_dentry, vcpu, + &vcpu_tsc_scaling_fops); + debugfs_create_file("tsc-scaling-ratio-frac-bits", 0444, + vcpu->debugfs_dentry, vcpu, + &vcpu_tsc_scaling_frac_fops); } - - return 0; } diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 8d34db3c8bc6..fcb46b3374c6 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -862,7 +862,7 @@ void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu); void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu); #ifdef __KVM_HAVE_ARCH_VCPU_DEBUGFS -int kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu); +void kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu); #endif int kvm_arch_hardware_enable(void); diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 4afb1a234018..4feceaa03fb1 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -2615,29 +2615,20 @@ static int create_vcpu_fd(struct kvm_vcpu *vcpu) return anon_inode_getfd(name, &kvm_vcpu_fops, vcpu, O_RDWR | O_CLOEXEC); } -static int kvm_create_vcpu_debugfs(struct kvm_vcpu *vcpu) +static void kvm_create_vcpu_debugfs(struct kvm_vcpu *vcpu) { #ifdef __KVM_HAVE_ARCH_VCPU_DEBUGFS char dir_name[ITOA_MAX_LEN * 2]; - int ret; if (!debugfs_initialized()) - return 0; + return; snprintf(dir_name, sizeof(dir_name), "vcpu%d", vcpu->vcpu_id); vcpu->debugfs_dentry = debugfs_create_dir(dir_name, - vcpu->kvm->debugfs_dentry); - if (!vcpu->debugfs_dentry) - return -ENOMEM; + vcpu->kvm->debugfs_dentry); - ret = kvm_arch_create_vcpu_debugfs(vcpu); - if (ret < 0) { - debugfs_remove_recursive(vcpu->debugfs_dentry); - return ret; - } + kvm_arch_create_vcpu_debugfs(vcpu); #endif - - return 0; } /* @@ -2672,9 +2663,7 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, u32 id) if (r) goto vcpu_destroy; - r = kvm_create_vcpu_debugfs(vcpu); - if (r) - goto vcpu_destroy; + kvm_create_vcpu_debugfs(vcpu); mutex_lock(&kvm->lock); if (kvm_get_vcpu_by_id(kvm, id)) { -- cgit v1.2.3 From 5eeaf10eec394b28fad2c58f1f5c3a5da0e87d1c Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 2 Aug 2019 10:28:32 +0100 Subject: KVM: arm/arm64: Sync ICH_VMCR_EL2 back when about to block Since commit commit 328e56647944 ("KVM: arm/arm64: vgic: Defer touching GICH_VMCR to vcpu_load/put"), we leave ICH_VMCR_EL2 (or its GICv2 equivalent) loaded as long as we can, only syncing it back when we're scheduled out. There is a small snag with that though: kvm_vgic_vcpu_pending_irq(), which is indirectly called from kvm_vcpu_check_block(), needs to evaluate the guest's view of ICC_PMR_EL1. At the point were we call kvm_vcpu_check_block(), the vcpu is still loaded, and whatever changes to PMR is not visible in memory until we do a vcpu_put(). Things go really south if the guest does the following: mov x0, #0 // or any small value masking interrupts msr ICC_PMR_EL1, x0 [vcpu preempted, then rescheduled, VMCR sampled] mov x0, #ff // allow all interrupts msr ICC_PMR_EL1, x0 wfi // traps to EL2, so samping of VMCR [interrupt arrives just after WFI] Here, the hypervisor's view of PMR is zero, while the guest has enabled its interrupts. kvm_vgic_vcpu_pending_irq() will then say that no interrupts are pending (despite an interrupt being received) and we'll block for no reason. If the guest doesn't have a periodic interrupt firing once it has blocked, it will stay there forever. To avoid this unfortuante situation, let's resync VMCR from kvm_arch_vcpu_blocking(), ensuring that a following kvm_vcpu_check_block() will observe the latest value of PMR. This has been found by booting an arm64 Linux guest with the pseudo NMI feature, and thus using interrupt priorities to mask interrupts instead of the usual PSTATE masking. Cc: stable@vger.kernel.org # 4.12 Fixes: 328e56647944 ("KVM: arm/arm64: vgic: Defer touching GICH_VMCR to vcpu_load/put") Signed-off-by: Marc Zyngier --- include/kvm/arm_vgic.h | 1 + virt/kvm/arm/arm.c | 11 +++++++++++ virt/kvm/arm/vgic/vgic-v2.c | 9 ++++++++- virt/kvm/arm/vgic/vgic-v3.c | 7 ++++++- virt/kvm/arm/vgic/vgic.c | 11 +++++++++++ virt/kvm/arm/vgic/vgic.h | 2 ++ 6 files changed, 39 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index 46bbc949c20a..7a30524a80ee 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -350,6 +350,7 @@ int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu); void kvm_vgic_load(struct kvm_vcpu *vcpu); void kvm_vgic_put(struct kvm_vcpu *vcpu); +void kvm_vgic_vmcr_sync(struct kvm_vcpu *vcpu); #define irqchip_in_kernel(k) (!!((k)->arch.vgic.in_kernel)) #define vgic_initialized(k) ((k)->arch.vgic.initialized) diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c index c704fa696184..482b20256fa8 100644 --- a/virt/kvm/arm/arm.c +++ b/virt/kvm/arm/arm.c @@ -323,6 +323,17 @@ int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu) void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) { + /* + * If we're about to block (most likely because we've just hit a + * WFI), we need to sync back the state of the GIC CPU interface + * so that we have the lastest PMR and group enables. This ensures + * that kvm_arch_vcpu_runnable has up-to-date data to decide + * whether we have pending interrupts. + */ + preempt_disable(); + kvm_vgic_vmcr_sync(vcpu); + preempt_enable(); + kvm_vgic_v4_enable_doorbell(vcpu); } diff --git a/virt/kvm/arm/vgic/vgic-v2.c b/virt/kvm/arm/vgic/vgic-v2.c index 6dd5ad706c92..96aab77d0471 100644 --- a/virt/kvm/arm/vgic/vgic-v2.c +++ b/virt/kvm/arm/vgic/vgic-v2.c @@ -484,10 +484,17 @@ void vgic_v2_load(struct kvm_vcpu *vcpu) kvm_vgic_global_state.vctrl_base + GICH_APR); } -void vgic_v2_put(struct kvm_vcpu *vcpu) +void vgic_v2_vmcr_sync(struct kvm_vcpu *vcpu) { struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2; cpu_if->vgic_vmcr = readl_relaxed(kvm_vgic_global_state.vctrl_base + GICH_VMCR); +} + +void vgic_v2_put(struct kvm_vcpu *vcpu) +{ + struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2; + + vgic_v2_vmcr_sync(vcpu); cpu_if->vgic_apr = readl_relaxed(kvm_vgic_global_state.vctrl_base + GICH_APR); } diff --git a/virt/kvm/arm/vgic/vgic-v3.c b/virt/kvm/arm/vgic/vgic-v3.c index c2c9ce009f63..0c653a1e5215 100644 --- a/virt/kvm/arm/vgic/vgic-v3.c +++ b/virt/kvm/arm/vgic/vgic-v3.c @@ -662,12 +662,17 @@ void vgic_v3_load(struct kvm_vcpu *vcpu) __vgic_v3_activate_traps(vcpu); } -void vgic_v3_put(struct kvm_vcpu *vcpu) +void vgic_v3_vmcr_sync(struct kvm_vcpu *vcpu) { struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3; if (likely(cpu_if->vgic_sre)) cpu_if->vgic_vmcr = kvm_call_hyp_ret(__vgic_v3_read_vmcr); +} + +void vgic_v3_put(struct kvm_vcpu *vcpu) +{ + vgic_v3_vmcr_sync(vcpu); kvm_call_hyp(__vgic_v3_save_aprs, vcpu); diff --git a/virt/kvm/arm/vgic/vgic.c b/virt/kvm/arm/vgic/vgic.c index 04786c8ec77e..13d4b38a94ec 100644 --- a/virt/kvm/arm/vgic/vgic.c +++ b/virt/kvm/arm/vgic/vgic.c @@ -919,6 +919,17 @@ void kvm_vgic_put(struct kvm_vcpu *vcpu) vgic_v3_put(vcpu); } +void kvm_vgic_vmcr_sync(struct kvm_vcpu *vcpu) +{ + if (unlikely(!irqchip_in_kernel(vcpu->kvm))) + return; + + if (kvm_vgic_global_state.type == VGIC_V2) + vgic_v2_vmcr_sync(vcpu); + else + vgic_v3_vmcr_sync(vcpu); +} + int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu) { struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; diff --git a/virt/kvm/arm/vgic/vgic.h b/virt/kvm/arm/vgic/vgic.h index 57205beaa981..11adbdac1d56 100644 --- a/virt/kvm/arm/vgic/vgic.h +++ b/virt/kvm/arm/vgic/vgic.h @@ -193,6 +193,7 @@ int vgic_register_dist_iodev(struct kvm *kvm, gpa_t dist_base_address, void vgic_v2_init_lrs(void); void vgic_v2_load(struct kvm_vcpu *vcpu); void vgic_v2_put(struct kvm_vcpu *vcpu); +void vgic_v2_vmcr_sync(struct kvm_vcpu *vcpu); void vgic_v2_save_state(struct kvm_vcpu *vcpu); void vgic_v2_restore_state(struct kvm_vcpu *vcpu); @@ -223,6 +224,7 @@ bool vgic_v3_check_base(struct kvm *kvm); void vgic_v3_load(struct kvm_vcpu *vcpu); void vgic_v3_put(struct kvm_vcpu *vcpu); +void vgic_v3_vmcr_sync(struct kvm_vcpu *vcpu); bool vgic_has_its(struct kvm *kvm); int kvm_vgic_register_its_device(void); -- cgit v1.2.3 From 5d92e631b8be8965a90c144320f06e096081a551 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 1 Aug 2019 14:36:01 -0700 Subject: net/tls: partially revert fix transition through disconnect with close Looks like we were slightly overzealous with the shutdown() cleanup. Even though the sock->sk_state can reach CLOSED again, socket->state will not got back to SS_UNCONNECTED once connections is ESTABLISHED. Meaning we will see EISCONN if we try to reconnect, and EINVAL if we try to listen. Only listen sockets can be shutdown() and reused, but since ESTABLISHED sockets can never be re-connected() or used for listen() we don't need to try to clean up the ULP state early. Fixes: 32857cf57f92 ("net/tls: fix transition through disconnect with close") Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- Documentation/networking/tls-offload.rst | 6 ---- include/net/tls.h | 2 -- net/tls/tls_main.c | 55 -------------------------------- 3 files changed, 63 deletions(-) (limited to 'include') diff --git a/Documentation/networking/tls-offload.rst b/Documentation/networking/tls-offload.rst index 2d9f9ebf4117..b70b70dc4524 100644 --- a/Documentation/networking/tls-offload.rst +++ b/Documentation/networking/tls-offload.rst @@ -524,9 +524,3 @@ Redirects leak clear text In the RX direction, if segment has already been decrypted by the device and it gets redirected or mirrored - clear text will be transmitted out. - -shutdown() doesn't clear TLS state ----------------------------------- - -shutdown() system call allows for a TLS socket to be reused as a different -connection. Offload doesn't currently handle that. diff --git a/include/net/tls.h b/include/net/tls.h index 9e425ac2de45..41b2d41bb1b8 100644 --- a/include/net/tls.h +++ b/include/net/tls.h @@ -290,8 +290,6 @@ struct tls_context { struct list_head list; refcount_t refcount; - - struct work_struct gc; }; enum tls_offload_ctx_dir { diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c index f208f8455ef2..9cbbae606ced 100644 --- a/net/tls/tls_main.c +++ b/net/tls/tls_main.c @@ -261,33 +261,6 @@ void tls_ctx_free(struct tls_context *ctx) kfree(ctx); } -static void tls_ctx_free_deferred(struct work_struct *gc) -{ - struct tls_context *ctx = container_of(gc, struct tls_context, gc); - - /* Ensure any remaining work items are completed. The sk will - * already have lost its tls_ctx reference by the time we get - * here so no xmit operation will actually be performed. - */ - if (ctx->tx_conf == TLS_SW) { - tls_sw_cancel_work_tx(ctx); - tls_sw_free_ctx_tx(ctx); - } - - if (ctx->rx_conf == TLS_SW) { - tls_sw_strparser_done(ctx); - tls_sw_free_ctx_rx(ctx); - } - - tls_ctx_free(ctx); -} - -static void tls_ctx_free_wq(struct tls_context *ctx) -{ - INIT_WORK(&ctx->gc, tls_ctx_free_deferred); - schedule_work(&ctx->gc); -} - static void tls_sk_proto_cleanup(struct sock *sk, struct tls_context *ctx, long timeo) { @@ -315,29 +288,6 @@ static void tls_sk_proto_cleanup(struct sock *sk, #endif } -static void tls_sk_proto_unhash(struct sock *sk) -{ - struct inet_connection_sock *icsk = inet_csk(sk); - long timeo = sock_sndtimeo(sk, 0); - struct tls_context *ctx; - - if (unlikely(!icsk->icsk_ulp_data)) { - if (sk->sk_prot->unhash) - sk->sk_prot->unhash(sk); - } - - ctx = tls_get_ctx(sk); - tls_sk_proto_cleanup(sk, ctx, timeo); - write_lock_bh(&sk->sk_callback_lock); - icsk->icsk_ulp_data = NULL; - sk->sk_prot = ctx->sk_proto; - write_unlock_bh(&sk->sk_callback_lock); - - if (ctx->sk_proto->unhash) - ctx->sk_proto->unhash(sk); - tls_ctx_free_wq(ctx); -} - static void tls_sk_proto_close(struct sock *sk, long timeout) { struct inet_connection_sock *icsk = inet_csk(sk); @@ -786,7 +736,6 @@ static void build_protos(struct proto prot[TLS_NUM_CONFIG][TLS_NUM_CONFIG], prot[TLS_BASE][TLS_BASE].setsockopt = tls_setsockopt; prot[TLS_BASE][TLS_BASE].getsockopt = tls_getsockopt; prot[TLS_BASE][TLS_BASE].close = tls_sk_proto_close; - prot[TLS_BASE][TLS_BASE].unhash = tls_sk_proto_unhash; prot[TLS_SW][TLS_BASE] = prot[TLS_BASE][TLS_BASE]; prot[TLS_SW][TLS_BASE].sendmsg = tls_sw_sendmsg; @@ -804,20 +753,16 @@ static void build_protos(struct proto prot[TLS_NUM_CONFIG][TLS_NUM_CONFIG], #ifdef CONFIG_TLS_DEVICE prot[TLS_HW][TLS_BASE] = prot[TLS_BASE][TLS_BASE]; - prot[TLS_HW][TLS_BASE].unhash = base->unhash; prot[TLS_HW][TLS_BASE].sendmsg = tls_device_sendmsg; prot[TLS_HW][TLS_BASE].sendpage = tls_device_sendpage; prot[TLS_HW][TLS_SW] = prot[TLS_BASE][TLS_SW]; - prot[TLS_HW][TLS_SW].unhash = base->unhash; prot[TLS_HW][TLS_SW].sendmsg = tls_device_sendmsg; prot[TLS_HW][TLS_SW].sendpage = tls_device_sendpage; prot[TLS_BASE][TLS_HW] = prot[TLS_BASE][TLS_SW]; - prot[TLS_BASE][TLS_HW].unhash = base->unhash; prot[TLS_SW][TLS_HW] = prot[TLS_SW][TLS_SW]; - prot[TLS_SW][TLS_HW].unhash = base->unhash; prot[TLS_HW][TLS_HW] = prot[TLS_HW][TLS_SW]; #endif -- cgit v1.2.3 From c4bd48699beb92d6bb99d6139d1e9737cca73480 Mon Sep 17 00:00:00 2001 From: Vlad Buslov Date: Sat, 3 Aug 2019 16:36:18 +0300 Subject: net: sched: police: allow accessing police->params with rtnl Recently implemented support for police action in flow_offload infra leads to following rcu usage warning: [ 1925.881092] ============================= [ 1925.881094] WARNING: suspicious RCU usage [ 1925.881098] 5.3.0-rc1+ #574 Not tainted [ 1925.881100] ----------------------------- [ 1925.881104] include/net/tc_act/tc_police.h:57 suspicious rcu_dereference_check() usage! [ 1925.881106] other info that might help us debug this: [ 1925.881109] rcu_scheduler_active = 2, debug_locks = 1 [ 1925.881112] 1 lock held by tc/18591: [ 1925.881115] #0: 00000000b03cb918 (rtnl_mutex){+.+.}, at: tc_new_tfilter+0x47c/0x970 [ 1925.881124] stack backtrace: [ 1925.881127] CPU: 2 PID: 18591 Comm: tc Not tainted 5.3.0-rc1+ #574 [ 1925.881130] Hardware name: Supermicro SYS-2028TP-DECR/X10DRT-P, BIOS 2.0b 03/30/2017 [ 1925.881132] Call Trace: [ 1925.881138] dump_stack+0x85/0xc0 [ 1925.881145] tc_setup_flow_action+0x1771/0x2040 [ 1925.881155] fl_hw_replace_filter+0x11f/0x2e0 [cls_flower] [ 1925.881175] fl_change+0xd24/0x1b30 [cls_flower] [ 1925.881200] tc_new_tfilter+0x3e0/0x970 [ 1925.881231] ? tc_del_tfilter+0x720/0x720 [ 1925.881243] rtnetlink_rcv_msg+0x389/0x4b0 [ 1925.881250] ? netlink_deliver_tap+0x95/0x400 [ 1925.881257] ? rtnl_dellink+0x2d0/0x2d0 [ 1925.881264] netlink_rcv_skb+0x49/0x110 [ 1925.881275] netlink_unicast+0x171/0x200 [ 1925.881284] netlink_sendmsg+0x224/0x3f0 [ 1925.881299] sock_sendmsg+0x5e/0x60 [ 1925.881305] ___sys_sendmsg+0x2ae/0x330 [ 1925.881309] ? task_work_add+0x43/0x50 [ 1925.881314] ? fput_many+0x45/0x80 [ 1925.881329] ? __lock_acquire+0x248/0x1930 [ 1925.881342] ? find_held_lock+0x2b/0x80 [ 1925.881347] ? task_work_run+0x7b/0xd0 [ 1925.881359] __sys_sendmsg+0x59/0xa0 [ 1925.881375] do_syscall_64+0x5c/0xb0 [ 1925.881381] entry_SYSCALL_64_after_hwframe+0x49/0xbe [ 1925.881384] RIP: 0033:0x7feb245047b8 [ 1925.881388] Code: 89 02 48 c7 c0 ff ff ff ff eb bb 0f 1f 80 00 00 00 00 f3 0f 1e fa 48 8d 05 65 8f 0c 00 8b 00 85 c0 75 17 b8 2e 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 58 c3 0f 1f 80 00 00 00 00 48 83 ec 28 89 54 [ 1925.881391] RSP: 002b:00007ffc2d2a5788 EFLAGS: 00000246 ORIG_RAX: 000000000000002e [ 1925.881395] RAX: ffffffffffffffda RBX: 000000005d4497ed RCX: 00007feb245047b8 [ 1925.881398] RDX: 0000000000000000 RSI: 00007ffc2d2a57f0 RDI: 0000000000000003 [ 1925.881400] RBP: 0000000000000000 R08: 0000000000000001 R09: 0000000000000006 [ 1925.881403] R10: 0000000000404ec2 R11: 0000000000000246 R12: 0000000000000001 [ 1925.881406] R13: 0000000000480640 R14: 0000000000000012 R15: 0000000000000001 Change tcf_police_rate_bytes_ps() and tcf_police_tcfp_burst() helpers to allow using them from both rtnl and rcu protected contexts. Fixes: 8c8cfc6ed274 ("net/sched: add police action to the hardware intermediate representation") Signed-off-by: Vlad Buslov Reviewed-by: Pieter Jansen van Vuuren Signed-off-by: David S. Miller --- include/net/tc_act/tc_police.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/tc_act/tc_police.h b/include/net/tc_act/tc_police.h index 8b9ef3664262..cfdc7cb82cad 100644 --- a/include/net/tc_act/tc_police.h +++ b/include/net/tc_act/tc_police.h @@ -54,7 +54,7 @@ static inline u64 tcf_police_rate_bytes_ps(const struct tc_action *act) struct tcf_police *police = to_police(act); struct tcf_police_params *params; - params = rcu_dereference_bh(police->params); + params = rcu_dereference_bh_rtnl(police->params); return params->rate.rate_bytes_ps; } @@ -63,7 +63,7 @@ static inline s64 tcf_police_tcfp_burst(const struct tc_action *act) struct tcf_police *police = to_police(act); struct tcf_police_params *params; - params = rcu_dereference_bh(police->params); + params = rcu_dereference_bh_rtnl(police->params); return params->tcfp_burst; } -- cgit v1.2.3 From 67cbf7dedd03a63ca2fbd9df2049eabba7a37edf Mon Sep 17 00:00:00 2001 From: Vlad Buslov Date: Sat, 3 Aug 2019 16:36:19 +0300 Subject: net: sched: sample: allow accessing psample_group with rtnl Recently implemented support for sample action in flow_offload infra leads to following rcu usage warning: [ 1938.234856] ============================= [ 1938.234858] WARNING: suspicious RCU usage [ 1938.234863] 5.3.0-rc1+ #574 Not tainted [ 1938.234866] ----------------------------- [ 1938.234869] include/net/tc_act/tc_sample.h:47 suspicious rcu_dereference_check() usage! [ 1938.234872] other info that might help us debug this: [ 1938.234875] rcu_scheduler_active = 2, debug_locks = 1 [ 1938.234879] 1 lock held by tc/19540: [ 1938.234881] #0: 00000000b03cb918 (rtnl_mutex){+.+.}, at: tc_new_tfilter+0x47c/0x970 [ 1938.234900] stack backtrace: [ 1938.234905] CPU: 2 PID: 19540 Comm: tc Not tainted 5.3.0-rc1+ #574 [ 1938.234908] Hardware name: Supermicro SYS-2028TP-DECR/X10DRT-P, BIOS 2.0b 03/30/2017 [ 1938.234911] Call Trace: [ 1938.234922] dump_stack+0x85/0xc0 [ 1938.234930] tc_setup_flow_action+0xed5/0x2040 [ 1938.234944] fl_hw_replace_filter+0x11f/0x2e0 [cls_flower] [ 1938.234965] fl_change+0xd24/0x1b30 [cls_flower] [ 1938.234990] tc_new_tfilter+0x3e0/0x970 [ 1938.235021] ? tc_del_tfilter+0x720/0x720 [ 1938.235028] rtnetlink_rcv_msg+0x389/0x4b0 [ 1938.235038] ? netlink_deliver_tap+0x95/0x400 [ 1938.235044] ? rtnl_dellink+0x2d0/0x2d0 [ 1938.235053] netlink_rcv_skb+0x49/0x110 [ 1938.235063] netlink_unicast+0x171/0x200 [ 1938.235073] netlink_sendmsg+0x224/0x3f0 [ 1938.235091] sock_sendmsg+0x5e/0x60 [ 1938.235097] ___sys_sendmsg+0x2ae/0x330 [ 1938.235111] ? __handle_mm_fault+0x12cd/0x19e0 [ 1938.235125] ? __handle_mm_fault+0x12cd/0x19e0 [ 1938.235138] ? find_held_lock+0x2b/0x80 [ 1938.235147] ? do_user_addr_fault+0x22d/0x490 [ 1938.235160] __sys_sendmsg+0x59/0xa0 [ 1938.235178] do_syscall_64+0x5c/0xb0 [ 1938.235187] entry_SYSCALL_64_after_hwframe+0x49/0xbe [ 1938.235192] RIP: 0033:0x7ff9a4d597b8 [ 1938.235197] Code: 89 02 48 c7 c0 ff ff ff ff eb bb 0f 1f 80 00 00 00 00 f3 0f 1e fa 48 8d 05 65 8f 0c 00 8b 00 85 c0 75 17 b8 2e 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 58 c3 0f 1f 80 00 00 00 00 48 83 ec 28 89 54 [ 1938.235200] RSP: 002b:00007ffcfe381c48 EFLAGS: 00000246 ORIG_RAX: 000000000000002e [ 1938.235205] RAX: ffffffffffffffda RBX: 000000005d4497f9 RCX: 00007ff9a4d597b8 [ 1938.235208] RDX: 0000000000000000 RSI: 00007ffcfe381cb0 RDI: 0000000000000003 [ 1938.235211] RBP: 0000000000000000 R08: 0000000000000001 R09: 0000000000000006 [ 1938.235214] R10: 0000000000404ec2 R11: 0000000000000246 R12: 0000000000000001 [ 1938.235217] R13: 0000000000480640 R14: 0000000000000012 R15: 0000000000000001 Change tcf_sample_psample_group() helper to allow using it from both rtnl and rcu protected contexts. Fixes: a7a7be6087b0 ("net/sched: add sample action to the hardware intermediate representation") Signed-off-by: Vlad Buslov Reviewed-by: Pieter Jansen van Vuuren Signed-off-by: David S. Miller --- include/net/tc_act/tc_sample.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/tc_act/tc_sample.h b/include/net/tc_act/tc_sample.h index 0a559d4b6f0f..b4fce0fae645 100644 --- a/include/net/tc_act/tc_sample.h +++ b/include/net/tc_act/tc_sample.h @@ -44,7 +44,7 @@ static inline int tcf_sample_trunc_size(const struct tc_action *a) static inline struct psample_group * tcf_sample_psample_group(const struct tc_action *a) { - return rcu_dereference(to_sample(a)->psample_group); + return rcu_dereference_rtnl(to_sample(a)->psample_group); } #endif /* __NET_TC_SAMPLE_H */ -- cgit v1.2.3 From 4b3e30ed3ec7864e798403a63ff2e96bd0c19ab0 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 7 Aug 2019 00:23:07 -0500 Subject: Revert "drm/amdkfd: New IOCTL to allocate queue GWS" This reverts commit 1a058c3376765ee31d65e28cbbb9d4ff15120056. This interface is still in too much flux. Revert until it's sorted out. Acked-by: Oak Zeng Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 28 ---------------------------- include/uapi/linux/kfd_ioctl.h | 20 +------------------- 2 files changed, 1 insertion(+), 47 deletions(-) (limited to 'include') diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index 26b15cc56c31..1d3cd5c50d5f 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -1567,32 +1567,6 @@ copy_from_user_failed: return err; } -static int kfd_ioctl_alloc_queue_gws(struct file *filep, - struct kfd_process *p, void *data) -{ - int retval; - struct kfd_ioctl_alloc_queue_gws_args *args = data; - struct kfd_dev *dev; - - if (!hws_gws_support) - return -ENODEV; - - dev = kfd_device_by_id(args->gpu_id); - if (!dev) { - pr_debug("Could not find gpu id 0x%x\n", args->gpu_id); - return -ENODEV; - } - if (dev->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) - return -ENODEV; - - mutex_lock(&p->mutex); - retval = pqm_set_gws(&p->pqm, args->queue_id, args->num_gws ? dev->gws : NULL); - mutex_unlock(&p->mutex); - - args->first_gws = 0; - return retval; -} - static int kfd_ioctl_get_dmabuf_info(struct file *filep, struct kfd_process *p, void *data) { @@ -1795,8 +1769,6 @@ static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = { AMDKFD_IOCTL_DEF(AMDKFD_IOC_IMPORT_DMABUF, kfd_ioctl_import_dmabuf, 0), - AMDKFD_IOCTL_DEF(AMDKFD_IOC_ALLOC_QUEUE_GWS, - kfd_ioctl_alloc_queue_gws, 0), }; #define AMDKFD_CORE_IOCTL_COUNT ARRAY_SIZE(amdkfd_ioctls) diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h index 070d1bc7e725..20917c59f39c 100644 --- a/include/uapi/linux/kfd_ioctl.h +++ b/include/uapi/linux/kfd_ioctl.h @@ -410,21 +410,6 @@ struct kfd_ioctl_unmap_memory_from_gpu_args { __u32 n_success; /* to/from KFD */ }; -/* Allocate GWS for specific queue - * - * @gpu_id: device identifier - * @queue_id: queue's id that GWS is allocated for - * @num_gws: how many GWS to allocate - * @first_gws: index of the first GWS allocated. - * only support contiguous GWS allocation - */ -struct kfd_ioctl_alloc_queue_gws_args { - __u32 gpu_id; /* to KFD */ - __u32 queue_id; /* to KFD */ - __u32 num_gws; /* to KFD */ - __u32 first_gws; /* from KFD */ -}; - struct kfd_ioctl_get_dmabuf_info_args { __u64 size; /* from KFD */ __u64 metadata_ptr; /* to KFD */ @@ -544,10 +529,7 @@ enum kfd_mmio_remap { #define AMDKFD_IOC_IMPORT_DMABUF \ AMDKFD_IOWR(0x1D, struct kfd_ioctl_import_dmabuf_args) -#define AMDKFD_IOC_ALLOC_QUEUE_GWS \ - AMDKFD_IOWR(0x1E, struct kfd_ioctl_alloc_queue_gws_args) - #define AMDKFD_COMMAND_START 0x01 -#define AMDKFD_COMMAND_END 0x1F +#define AMDKFD_COMMAND_END 0x1E #endif -- cgit v1.2.3