From 0dbffbb5335a1e3aa6855e4ee317e25e669dd302 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 29 Jun 2021 07:12:45 -0700 Subject: net: annotate data race around sk_ll_usec sk_ll_usec is read locklessly from sk_can_busy_loop() while another thread can change its value in sock_setsockopt() This is correct but needs annotations. BUG: KCSAN: data-race in __skb_try_recv_datagram / sock_setsockopt write to 0xffff88814eb5f904 of 4 bytes by task 14011 on cpu 0: sock_setsockopt+0x1287/0x2090 net/core/sock.c:1175 __sys_setsockopt+0x14f/0x200 net/socket.c:2100 __do_sys_setsockopt net/socket.c:2115 [inline] __se_sys_setsockopt net/socket.c:2112 [inline] __x64_sys_setsockopt+0x62/0x70 net/socket.c:2112 do_syscall_64+0x4a/0x90 arch/x86/entry/common.c:47 entry_SYSCALL_64_after_hwframe+0x44/0xae read to 0xffff88814eb5f904 of 4 bytes by task 14001 on cpu 1: sk_can_busy_loop include/net/busy_poll.h:41 [inline] __skb_try_recv_datagram+0x14f/0x320 net/core/datagram.c:273 unix_dgram_recvmsg+0x14c/0x870 net/unix/af_unix.c:2101 unix_seqpacket_recvmsg+0x5a/0x70 net/unix/af_unix.c:2067 ____sys_recvmsg+0x15d/0x310 include/linux/uio.h:244 ___sys_recvmsg net/socket.c:2598 [inline] do_recvmmsg+0x35c/0x9f0 net/socket.c:2692 __sys_recvmmsg net/socket.c:2771 [inline] __do_sys_recvmmsg net/socket.c:2794 [inline] __se_sys_recvmmsg net/socket.c:2787 [inline] __x64_sys_recvmmsg+0xcf/0x150 net/socket.c:2787 do_syscall_64+0x4a/0x90 arch/x86/entry/common.c:47 entry_SYSCALL_64_after_hwframe+0x44/0xae value changed: 0x00000000 -> 0x00000101 Reported by Kernel Concurrency Sanitizer on: CPU: 1 PID: 14001 Comm: syz-executor.3 Not tainted 5.13.0-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Signed-off-by: Eric Dumazet Reported-by: syzbot Signed-off-by: David S. Miller --- include/net/busy_poll.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/busy_poll.h b/include/net/busy_poll.h index 73af4a64a599..40296ed976a9 100644 --- a/include/net/busy_poll.h +++ b/include/net/busy_poll.h @@ -38,7 +38,7 @@ static inline bool net_busy_loop_on(void) static inline bool sk_can_busy_loop(const struct sock *sk) { - return sk->sk_ll_usec && !signal_pending(current); + return READ_ONCE(sk->sk_ll_usec) && !signal_pending(current); } bool sk_busy_loop_end(void *p, unsigned long start_time); -- cgit v1.2.3 From 1d11fa231cabeae09a95cb3e4cf1d9dd34e00f08 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Tue, 29 Jun 2021 23:34:08 -0400 Subject: sctp: move 198 addresses from unusable to private scope MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The doc draft-stewart-tsvwg-sctp-ipv4-00 that restricts 198 addresses was never published. These addresses as private addresses should be allowed to use in SCTP. As Michael Tuexen suggested, this patch is to move 198 addresses from unusable to private scope. Reported-by: Sérgio Signed-off-by: Xin Long Acked-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- include/net/sctp/constants.h | 4 +--- net/sctp/protocol.c | 3 ++- 2 files changed, 3 insertions(+), 4 deletions(-) (limited to 'include/net') diff --git a/include/net/sctp/constants.h b/include/net/sctp/constants.h index 265fffa33dad..5859e0a16a58 100644 --- a/include/net/sctp/constants.h +++ b/include/net/sctp/constants.h @@ -360,8 +360,7 @@ enum { #define SCTP_SCOPE_POLICY_MAX SCTP_SCOPE_POLICY_LINK /* Based on IPv4 scoping , - * SCTP IPv4 unusable addresses: 0.0.0.0/8, 224.0.0.0/4, 198.18.0.0/24, - * 192.88.99.0/24. + * SCTP IPv4 unusable addresses: 0.0.0.0/8, 224.0.0.0/4, 192.88.99.0/24. * Also, RFC 8.4, non-unicast addresses are not considered valid SCTP * addresses. */ @@ -369,7 +368,6 @@ enum { ((htonl(INADDR_BROADCAST) == a) || \ ipv4_is_multicast(a) || \ ipv4_is_zeronet(a) || \ - ipv4_is_test_198(a) || \ ipv4_is_anycast_6to4(a)) /* Flags used for the bind address copy functions. */ diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 3c1fbf38f4f7..ec0f52567c16 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -398,7 +398,8 @@ static enum sctp_scope sctp_v4_scope(union sctp_addr *addr) retval = SCTP_SCOPE_LINK; } else if (ipv4_is_private_10(addr->v4.sin_addr.s_addr) || ipv4_is_private_172(addr->v4.sin_addr.s_addr) || - ipv4_is_private_192(addr->v4.sin_addr.s_addr)) { + ipv4_is_private_192(addr->v4.sin_addr.s_addr) || + ipv4_is_test_198(addr->v4.sin_addr.s_addr)) { retval = SCTP_SCOPE_PRIVATE; } else { retval = SCTP_SCOPE_GLOBAL; -- cgit v1.2.3 From d463126e23f112629edb01594141ca437a92a108 Mon Sep 17 00:00:00 2001 From: Yangbo Lu Date: Wed, 30 Jun 2021 16:11:59 +0800 Subject: net: sock: extend SO_TIMESTAMPING for PHC binding Since PTP virtual clock support is added, there can be several PTP virtual clocks based on one PTP physical clock for timestamping. This patch is to extend SO_TIMESTAMPING API to support PHC (PTP Hardware Clock) binding by adding a new flag SOF_TIMESTAMPING_BIND_PHC. When PTP virtual clocks are in use, user space can configure to bind one for timestamping, but PTP physical clock is not supported and not needed to bind. This patch is preparation for timestamp conversion from raw timestamp to a specific PTP virtual clock time in core net. Signed-off-by: Yangbo Lu Signed-off-by: David S. Miller --- include/net/sock.h | 8 +++-- include/uapi/linux/net_tstamp.h | 17 +++++++++-- net/core/sock.c | 65 +++++++++++++++++++++++++++++++++++++++-- net/ethtool/common.c | 1 + net/mptcp/sockopt.c | 23 +++++++++++---- 5 files changed, 101 insertions(+), 13 deletions(-) (limited to 'include/net') diff --git a/include/net/sock.h b/include/net/sock.h index 8bdd80027ffb..f23cb259b0e2 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -316,7 +316,9 @@ struct bpf_local_storage; * @sk_timer: sock cleanup timer * @sk_stamp: time stamp of last packet received * @sk_stamp_seq: lock for accessing sk_stamp on 32 bit architectures only - * @sk_tsflags: SO_TIMESTAMPING socket options + * @sk_tsflags: SO_TIMESTAMPING flags + * @sk_bind_phc: SO_TIMESTAMPING bind PHC index of PTP virtual clock + * for timestamping * @sk_tskey: counter to disambiguate concurrent tstamp requests * @sk_zckey: counter to order MSG_ZEROCOPY notifications * @sk_socket: Identd and reporting IO signals @@ -493,6 +495,7 @@ struct sock { seqlock_t sk_stamp_seq; #endif u16 sk_tsflags; + int sk_bind_phc; u8 sk_shutdown; u32 sk_tskey; atomic_t sk_zckey; @@ -2755,7 +2758,8 @@ void sock_def_readable(struct sock *sk); int sock_bindtoindex(struct sock *sk, int ifindex, bool lock_sk); void sock_set_timestamp(struct sock *sk, int optname, bool valbool); -int sock_set_timestamping(struct sock *sk, int optname, int val); +int sock_set_timestamping(struct sock *sk, int optname, + struct so_timestamping timestamping); void sock_enable_timestamps(struct sock *sk); void sock_no_linger(struct sock *sk); diff --git a/include/uapi/linux/net_tstamp.h b/include/uapi/linux/net_tstamp.h index 7ed0b3d1c00a..fcc61c73a666 100644 --- a/include/uapi/linux/net_tstamp.h +++ b/include/uapi/linux/net_tstamp.h @@ -13,7 +13,7 @@ #include #include /* for SO_TIMESTAMPING */ -/* SO_TIMESTAMPING gets an integer bit field comprised of these values */ +/* SO_TIMESTAMPING flags */ enum { SOF_TIMESTAMPING_TX_HARDWARE = (1<<0), SOF_TIMESTAMPING_TX_SOFTWARE = (1<<1), @@ -30,8 +30,9 @@ enum { SOF_TIMESTAMPING_OPT_STATS = (1<<12), SOF_TIMESTAMPING_OPT_PKTINFO = (1<<13), SOF_TIMESTAMPING_OPT_TX_SWHW = (1<<14), + SOF_TIMESTAMPING_BIND_PHC = (1 << 15), - SOF_TIMESTAMPING_LAST = SOF_TIMESTAMPING_OPT_TX_SWHW, + SOF_TIMESTAMPING_LAST = SOF_TIMESTAMPING_BIND_PHC, SOF_TIMESTAMPING_MASK = (SOF_TIMESTAMPING_LAST - 1) | SOF_TIMESTAMPING_LAST }; @@ -46,6 +47,18 @@ enum { SOF_TIMESTAMPING_TX_SCHED | \ SOF_TIMESTAMPING_TX_ACK) +/** + * struct so_timestamping - SO_TIMESTAMPING parameter + * + * @flags: SO_TIMESTAMPING flags + * @bind_phc: Index of PTP virtual clock bound to sock. This is available + * if flag SOF_TIMESTAMPING_BIND_PHC is set. + */ +struct so_timestamping { + int flags; + int bind_phc; +}; + /** * struct hwtstamp_config - %SIOCGHWTSTAMP and %SIOCSHWTSTAMP parameter * diff --git a/net/core/sock.c b/net/core/sock.c index dd9599656c40..cad107112204 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -139,6 +139,8 @@ #include #include +#include + static DEFINE_MUTEX(proto_list_mutex); static LIST_HEAD(proto_list); @@ -810,8 +812,47 @@ void sock_set_timestamp(struct sock *sk, int optname, bool valbool) } } -int sock_set_timestamping(struct sock *sk, int optname, int val) +static int sock_timestamping_bind_phc(struct sock *sk, int phc_index) { + struct net *net = sock_net(sk); + struct net_device *dev = NULL; + bool match = false; + int *vclock_index; + int i, num; + + if (sk->sk_bound_dev_if) + dev = dev_get_by_index(net, sk->sk_bound_dev_if); + + if (!dev) { + pr_err("%s: sock not bind to device\n", __func__); + return -EOPNOTSUPP; + } + + num = ethtool_get_phc_vclocks(dev, &vclock_index); + for (i = 0; i < num; i++) { + if (*(vclock_index + i) == phc_index) { + match = true; + break; + } + } + + if (num > 0) + kfree(vclock_index); + + if (!match) + return -EINVAL; + + sk->sk_bind_phc = phc_index; + + return 0; +} + +int sock_set_timestamping(struct sock *sk, int optname, + struct so_timestamping timestamping) +{ + int val = timestamping.flags; + int ret; + if (val & ~SOF_TIMESTAMPING_MASK) return -EINVAL; @@ -832,6 +873,12 @@ int sock_set_timestamping(struct sock *sk, int optname, int val) !(val & SOF_TIMESTAMPING_OPT_TSONLY)) return -EINVAL; + if (val & SOF_TIMESTAMPING_BIND_PHC) { + ret = sock_timestamping_bind_phc(sk, timestamping.bind_phc); + if (ret) + return ret; + } + sk->sk_tsflags = val; sock_valbool_flag(sk, SOCK_TSTAMP_NEW, optname == SO_TIMESTAMPING_NEW); @@ -907,6 +954,7 @@ EXPORT_SYMBOL(sock_set_mark); int sock_setsockopt(struct socket *sock, int level, int optname, sockptr_t optval, unsigned int optlen) { + struct so_timestamping timestamping; struct sock_txtime sk_txtime; struct sock *sk = sock->sk; int val; @@ -1073,7 +1121,15 @@ set_sndbuf: case SO_TIMESTAMPING_NEW: case SO_TIMESTAMPING_OLD: - ret = sock_set_timestamping(sk, optname, val); + if (optlen == sizeof(timestamping)) { + if (copy_from_sockptr(×tamping, optval, + sizeof(timestamping))) + return -EFAULT; + } else { + memset(×tamping, 0, sizeof(timestamping)); + timestamping.flags = val; + } + ret = sock_set_timestamping(sk, optname, timestamping); break; case SO_RCVLOWAT: @@ -1348,6 +1404,7 @@ int sock_getsockopt(struct socket *sock, int level, int optname, struct __kernel_old_timeval tm; struct __kernel_sock_timeval stm; struct sock_txtime txtime; + struct so_timestamping timestamping; } v; int lv = sizeof(int); @@ -1451,7 +1508,9 @@ int sock_getsockopt(struct socket *sock, int level, int optname, break; case SO_TIMESTAMPING_OLD: - v.val = sk->sk_tsflags; + lv = sizeof(v.timestamping); + v.timestamping.flags = sk->sk_tsflags; + v.timestamping.bind_phc = sk->sk_bind_phc; break; case SO_RCVTIMEO_OLD: diff --git a/net/ethtool/common.c b/net/ethtool/common.c index 798231b07676..c63e0739dc6a 100644 --- a/net/ethtool/common.c +++ b/net/ethtool/common.c @@ -398,6 +398,7 @@ const char sof_timestamping_names[][ETH_GSTRING_LEN] = { [const_ilog2(SOF_TIMESTAMPING_OPT_STATS)] = "option-stats", [const_ilog2(SOF_TIMESTAMPING_OPT_PKTINFO)] = "option-pktinfo", [const_ilog2(SOF_TIMESTAMPING_OPT_TX_SWHW)] = "option-tx-swhw", + [const_ilog2(SOF_TIMESTAMPING_BIND_PHC)] = "bind-phc", }; static_assert(ARRAY_SIZE(sof_timestamping_names) == __SOF_TIMESTAMPING_CNT); diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c index ea38cbcd2ad4..8c03afac5ca0 100644 --- a/net/mptcp/sockopt.c +++ b/net/mptcp/sockopt.c @@ -207,14 +207,25 @@ static int mptcp_setsockopt_sol_socket_timestamping(struct mptcp_sock *msk, { struct mptcp_subflow_context *subflow; struct sock *sk = (struct sock *)msk; - int val, ret; + struct so_timestamping timestamping; + int ret; - ret = mptcp_get_int_option(msk, optval, optlen, &val); - if (ret) - return ret; + if (optlen == sizeof(timestamping)) { + if (copy_from_sockptr(×tamping, optval, + sizeof(timestamping))) + return -EFAULT; + } else if (optlen == sizeof(int)) { + memset(×tamping, 0, sizeof(timestamping)); + + if (copy_from_sockptr(×tamping.flags, optval, sizeof(int))) + return -EFAULT; + } else { + return -EINVAL; + } ret = sock_setsockopt(sk->sk_socket, SOL_SOCKET, optname, - KERNEL_SOCKPTR(&val), sizeof(val)); + KERNEL_SOCKPTR(×tamping), + sizeof(timestamping)); if (ret) return ret; @@ -224,7 +235,7 @@ static int mptcp_setsockopt_sol_socket_timestamping(struct mptcp_sock *msk, struct sock *ssk = mptcp_subflow_tcp_sock(subflow); bool slow = lock_sock_fast(ssk); - sock_set_timestamping(sk, optname, val); + sock_set_timestamping(sk, optname, timestamping); unlock_sock_fast(ssk, slow); } -- cgit v1.2.3 From ca75bcf0a83b6cc7f53a593d98ec7121c4839b43 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 1 Jul 2021 10:15:09 +0200 Subject: net: remove the caif_hsi driver The caif_hsi driver relies on a cfhsi_get_ops symbol using symbol_get, but this symbol is not provided anywhere in the kernel tree. Remove this driver given that it is dead code. Signed-off-by: Christoph Hellwig Reviewed-by: Greg Kroah-Hartman Signed-off-by: David S. Miller --- drivers/net/caif/Kconfig | 9 - drivers/net/caif/Makefile | 3 - drivers/net/caif/caif_hsi.c | 1454 ------------------------------------------- include/net/caif/caif_hsi.h | 200 ------ 4 files changed, 1666 deletions(-) delete mode 100644 drivers/net/caif/caif_hsi.c delete mode 100644 include/net/caif/caif_hsi.h (limited to 'include/net') diff --git a/drivers/net/caif/Kconfig b/drivers/net/caif/Kconfig index a77124bc1f4b..709660cb38f8 100644 --- a/drivers/net/caif/Kconfig +++ b/drivers/net/caif/Kconfig @@ -20,15 +20,6 @@ config CAIF_TTY identified as N_CAIF. When this ldisc is opened from user space it will redirect the TTY's traffic into the CAIF stack. -config CAIF_HSI - tristate "CAIF HSI transport driver" - depends on CAIF - default n - help - The CAIF low level driver for CAIF over HSI. - Be aware that if you enable this then you also need to - enable a low-level HSI driver. - config CAIF_VIRTIO tristate "CAIF virtio transport driver" depends on CAIF && HAS_DMA diff --git a/drivers/net/caif/Makefile b/drivers/net/caif/Makefile index b1918c8c126c..97f664f8016c 100644 --- a/drivers/net/caif/Makefile +++ b/drivers/net/caif/Makefile @@ -4,8 +4,5 @@ ccflags-$(CONFIG_CAIF_DEBUG) := -DDEBUG # Serial interface obj-$(CONFIG_CAIF_TTY) += caif_serial.o -# HSI interface -obj-$(CONFIG_CAIF_HSI) += caif_hsi.o - # Virtio interface obj-$(CONFIG_CAIF_VIRTIO) += caif_virtio.o diff --git a/drivers/net/caif/caif_hsi.c b/drivers/net/caif/caif_hsi.c deleted file mode 100644 index 3d63b15bbaa1..000000000000 --- a/drivers/net/caif/caif_hsi.c +++ /dev/null @@ -1,1454 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Copyright (C) ST-Ericsson AB 2010 - * Author: Daniel Martensson - * Dmitry.Tarnyagin / dmitry.tarnyagin@lockless.no - */ - -#define pr_fmt(fmt) KBUILD_MODNAME fmt - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Daniel Martensson"); -MODULE_DESCRIPTION("CAIF HSI driver"); - -/* Returns the number of padding bytes for alignment. */ -#define PAD_POW2(x, pow) ((((x)&((pow)-1)) == 0) ? 0 :\ - (((pow)-((x)&((pow)-1))))) - -static const struct cfhsi_config hsi_default_config = { - - /* Inactivity timeout on HSI, ms */ - .inactivity_timeout = HZ, - - /* Aggregation timeout (ms) of zero means no aggregation is done*/ - .aggregation_timeout = 1, - - /* - * HSI link layer flow-control thresholds. - * Threshold values for the HSI packet queue. Flow-control will be - * asserted when the number of packets exceeds q_high_mark. It will - * not be de-asserted before the number of packets drops below - * q_low_mark. - * Warning: A high threshold value might increase throughput but it - * will at the same time prevent channel prioritization and increase - * the risk of flooding the modem. The high threshold should be above - * the low. - */ - .q_high_mark = 100, - .q_low_mark = 50, - - /* - * HSI padding options. - * Warning: must be a base of 2 (& operation used) and can not be zero ! - */ - .head_align = 4, - .tail_align = 4, -}; - -#define ON 1 -#define OFF 0 - -static LIST_HEAD(cfhsi_list); - -static void cfhsi_inactivity_tout(struct timer_list *t) -{ - struct cfhsi *cfhsi = from_timer(cfhsi, t, inactivity_timer); - - netdev_dbg(cfhsi->ndev, "%s.\n", - __func__); - - /* Schedule power down work queue. */ - if (!test_bit(CFHSI_SHUTDOWN, &cfhsi->bits)) - queue_work(cfhsi->wq, &cfhsi->wake_down_work); -} - -static void cfhsi_update_aggregation_stats(struct cfhsi *cfhsi, - const struct sk_buff *skb, - int direction) -{ - struct caif_payload_info *info; - int hpad, tpad, len; - - info = (struct caif_payload_info *)&skb->cb; - hpad = 1 + PAD_POW2((info->hdr_len + 1), cfhsi->cfg.head_align); - tpad = PAD_POW2((skb->len + hpad), cfhsi->cfg.tail_align); - len = skb->len + hpad + tpad; - - if (direction > 0) - cfhsi->aggregation_len += len; - else if (direction < 0) - cfhsi->aggregation_len -= len; -} - -static bool cfhsi_can_send_aggregate(struct cfhsi *cfhsi) -{ - int i; - - if (cfhsi->cfg.aggregation_timeout == 0) - return true; - - for (i = 0; i < CFHSI_PRIO_BEBK; ++i) { - if (cfhsi->qhead[i].qlen) - return true; - } - - /* TODO: Use aggregation_len instead */ - if (cfhsi->qhead[CFHSI_PRIO_BEBK].qlen >= CFHSI_MAX_PKTS) - return true; - - return false; -} - -static struct sk_buff *cfhsi_dequeue(struct cfhsi *cfhsi) -{ - struct sk_buff *skb; - int i; - - for (i = 0; i < CFHSI_PRIO_LAST; ++i) { - skb = skb_dequeue(&cfhsi->qhead[i]); - if (skb) - break; - } - - return skb; -} - -static int cfhsi_tx_queue_len(struct cfhsi *cfhsi) -{ - int i, len = 0; - for (i = 0; i < CFHSI_PRIO_LAST; ++i) - len += skb_queue_len(&cfhsi->qhead[i]); - return len; -} - -static void cfhsi_abort_tx(struct cfhsi *cfhsi) -{ - struct sk_buff *skb; - - for (;;) { - spin_lock_bh(&cfhsi->lock); - skb = cfhsi_dequeue(cfhsi); - if (!skb) - break; - - cfhsi->ndev->stats.tx_errors++; - cfhsi->ndev->stats.tx_dropped++; - cfhsi_update_aggregation_stats(cfhsi, skb, -1); - spin_unlock_bh(&cfhsi->lock); - kfree_skb(skb); - } - cfhsi->tx_state = CFHSI_TX_STATE_IDLE; - if (!test_bit(CFHSI_SHUTDOWN, &cfhsi->bits)) - mod_timer(&cfhsi->inactivity_timer, - jiffies + cfhsi->cfg.inactivity_timeout); - spin_unlock_bh(&cfhsi->lock); -} - -static int cfhsi_flush_fifo(struct cfhsi *cfhsi) -{ - char buffer[32]; /* Any reasonable value */ - size_t fifo_occupancy; - int ret; - - netdev_dbg(cfhsi->ndev, "%s.\n", - __func__); - - do { - ret = cfhsi->ops->cfhsi_fifo_occupancy(cfhsi->ops, - &fifo_occupancy); - if (ret) { - netdev_warn(cfhsi->ndev, - "%s: can't get FIFO occupancy: %d.\n", - __func__, ret); - break; - } else if (!fifo_occupancy) - /* No more data, exitting normally */ - break; - - fifo_occupancy = min(sizeof(buffer), fifo_occupancy); - set_bit(CFHSI_FLUSH_FIFO, &cfhsi->bits); - ret = cfhsi->ops->cfhsi_rx(buffer, fifo_occupancy, - cfhsi->ops); - if (ret) { - clear_bit(CFHSI_FLUSH_FIFO, &cfhsi->bits); - netdev_warn(cfhsi->ndev, - "%s: can't read data: %d.\n", - __func__, ret); - break; - } - - ret = 5 * HZ; - ret = wait_event_interruptible_timeout(cfhsi->flush_fifo_wait, - !test_bit(CFHSI_FLUSH_FIFO, &cfhsi->bits), ret); - - if (ret < 0) { - netdev_warn(cfhsi->ndev, - "%s: can't wait for flush complete: %d.\n", - __func__, ret); - break; - } else if (!ret) { - ret = -ETIMEDOUT; - netdev_warn(cfhsi->ndev, - "%s: timeout waiting for flush complete.\n", - __func__); - break; - } - } while (1); - - return ret; -} - -static int cfhsi_tx_frm(struct cfhsi_desc *desc, struct cfhsi *cfhsi) -{ - int nfrms = 0; - int pld_len = 0; - struct sk_buff *skb; - u8 *pfrm = desc->emb_frm + CFHSI_MAX_EMB_FRM_SZ; - - skb = cfhsi_dequeue(cfhsi); - if (!skb) - return 0; - - /* Clear offset. */ - desc->offset = 0; - - /* Check if we can embed a CAIF frame. */ - if (skb->len < CFHSI_MAX_EMB_FRM_SZ) { - struct caif_payload_info *info; - int hpad; - int tpad; - - /* Calculate needed head alignment and tail alignment. */ - info = (struct caif_payload_info *)&skb->cb; - - hpad = 1 + PAD_POW2((info->hdr_len + 1), cfhsi->cfg.head_align); - tpad = PAD_POW2((skb->len + hpad), cfhsi->cfg.tail_align); - - /* Check if frame still fits with added alignment. */ - if ((skb->len + hpad + tpad) <= CFHSI_MAX_EMB_FRM_SZ) { - u8 *pemb = desc->emb_frm; - desc->offset = CFHSI_DESC_SHORT_SZ; - *pemb = (u8)(hpad - 1); - pemb += hpad; - - /* Update network statistics. */ - spin_lock_bh(&cfhsi->lock); - cfhsi->ndev->stats.tx_packets++; - cfhsi->ndev->stats.tx_bytes += skb->len; - cfhsi_update_aggregation_stats(cfhsi, skb, -1); - spin_unlock_bh(&cfhsi->lock); - - /* Copy in embedded CAIF frame. */ - skb_copy_bits(skb, 0, pemb, skb->len); - - /* Consume the SKB */ - consume_skb(skb); - skb = NULL; - } - } - - /* Create payload CAIF frames. */ - while (nfrms < CFHSI_MAX_PKTS) { - struct caif_payload_info *info; - int hpad; - int tpad; - - if (!skb) - skb = cfhsi_dequeue(cfhsi); - - if (!skb) - break; - - /* Calculate needed head alignment and tail alignment. */ - info = (struct caif_payload_info *)&skb->cb; - - hpad = 1 + PAD_POW2((info->hdr_len + 1), cfhsi->cfg.head_align); - tpad = PAD_POW2((skb->len + hpad), cfhsi->cfg.tail_align); - - /* Fill in CAIF frame length in descriptor. */ - desc->cffrm_len[nfrms] = hpad + skb->len + tpad; - - /* Fill head padding information. */ - *pfrm = (u8)(hpad - 1); - pfrm += hpad; - - /* Update network statistics. */ - spin_lock_bh(&cfhsi->lock); - cfhsi->ndev->stats.tx_packets++; - cfhsi->ndev->stats.tx_bytes += skb->len; - cfhsi_update_aggregation_stats(cfhsi, skb, -1); - spin_unlock_bh(&cfhsi->lock); - - /* Copy in CAIF frame. */ - skb_copy_bits(skb, 0, pfrm, skb->len); - - /* Update payload length. */ - pld_len += desc->cffrm_len[nfrms]; - - /* Update frame pointer. */ - pfrm += skb->len + tpad; - - /* Consume the SKB */ - consume_skb(skb); - skb = NULL; - - /* Update number of frames. */ - nfrms++; - } - - /* Unused length fields should be zero-filled (according to SPEC). */ - while (nfrms < CFHSI_MAX_PKTS) { - desc->cffrm_len[nfrms] = 0x0000; - nfrms++; - } - - /* Check if we can piggy-back another descriptor. */ - if (cfhsi_can_send_aggregate(cfhsi)) - desc->header |= CFHSI_PIGGY_DESC; - else - desc->header &= ~CFHSI_PIGGY_DESC; - - return CFHSI_DESC_SZ + pld_len; -} - -static void cfhsi_start_tx(struct cfhsi *cfhsi) -{ - struct cfhsi_desc *desc = (struct cfhsi_desc *)cfhsi->tx_buf; - int len, res; - - netdev_dbg(cfhsi->ndev, "%s.\n", __func__); - - if (test_bit(CFHSI_SHUTDOWN, &cfhsi->bits)) - return; - - do { - /* Create HSI frame. */ - len = cfhsi_tx_frm(desc, cfhsi); - if (!len) { - spin_lock_bh(&cfhsi->lock); - if (unlikely(cfhsi_tx_queue_len(cfhsi))) { - spin_unlock_bh(&cfhsi->lock); - res = -EAGAIN; - continue; - } - cfhsi->tx_state = CFHSI_TX_STATE_IDLE; - /* Start inactivity timer. */ - mod_timer(&cfhsi->inactivity_timer, - jiffies + cfhsi->cfg.inactivity_timeout); - spin_unlock_bh(&cfhsi->lock); - break; - } - - /* Set up new transfer. */ - res = cfhsi->ops->cfhsi_tx(cfhsi->tx_buf, len, cfhsi->ops); - if (WARN_ON(res < 0)) - netdev_err(cfhsi->ndev, "%s: TX error %d.\n", - __func__, res); - } while (res < 0); -} - -static void cfhsi_tx_done(struct cfhsi *cfhsi) -{ - netdev_dbg(cfhsi->ndev, "%s.\n", __func__); - - if (test_bit(CFHSI_SHUTDOWN, &cfhsi->bits)) - return; - - /* - * Send flow on if flow off has been previously signalled - * and number of packets is below low water mark. - */ - spin_lock_bh(&cfhsi->lock); - if (cfhsi->flow_off_sent && - cfhsi_tx_queue_len(cfhsi) <= cfhsi->cfg.q_low_mark && - cfhsi->cfdev.flowctrl) { - - cfhsi->flow_off_sent = 0; - cfhsi->cfdev.flowctrl(cfhsi->ndev, ON); - } - - if (cfhsi_can_send_aggregate(cfhsi)) { - spin_unlock_bh(&cfhsi->lock); - cfhsi_start_tx(cfhsi); - } else { - mod_timer(&cfhsi->aggregation_timer, - jiffies + cfhsi->cfg.aggregation_timeout); - spin_unlock_bh(&cfhsi->lock); - } - - return; -} - -static void cfhsi_tx_done_cb(struct cfhsi_cb_ops *cb_ops) -{ - struct cfhsi *cfhsi; - - cfhsi = container_of(cb_ops, struct cfhsi, cb_ops); - netdev_dbg(cfhsi->ndev, "%s.\n", - __func__); - - if (test_bit(CFHSI_SHUTDOWN, &cfhsi->bits)) - return; - cfhsi_tx_done(cfhsi); -} - -static int cfhsi_rx_desc(struct cfhsi_desc *desc, struct cfhsi *cfhsi) -{ - int xfer_sz = 0; - int nfrms = 0; - u16 *plen = NULL; - u8 *pfrm = NULL; - - if ((desc->header & ~CFHSI_PIGGY_DESC) || - (desc->offset > CFHSI_MAX_EMB_FRM_SZ)) { - netdev_err(cfhsi->ndev, "%s: Invalid descriptor.\n", - __func__); - return -EPROTO; - } - - /* Check for embedded CAIF frame. */ - if (desc->offset) { - struct sk_buff *skb; - int len = 0; - pfrm = ((u8 *)desc) + desc->offset; - - /* Remove offset padding. */ - pfrm += *pfrm + 1; - - /* Read length of CAIF frame (little endian). */ - len = *pfrm; - len |= ((*(pfrm+1)) << 8) & 0xFF00; - len += 2; /* Add FCS fields. */ - - /* Sanity check length of CAIF frame. */ - if (unlikely(len > CFHSI_MAX_CAIF_FRAME_SZ)) { - netdev_err(cfhsi->ndev, "%s: Invalid length.\n", - __func__); - return -EPROTO; - } - - /* Allocate SKB (OK even in IRQ context). */ - skb = alloc_skb(len + 1, GFP_ATOMIC); - if (!skb) { - netdev_err(cfhsi->ndev, "%s: Out of memory !\n", - __func__); - return -ENOMEM; - } - caif_assert(skb != NULL); - - skb_put_data(skb, pfrm, len); - - skb->protocol = htons(ETH_P_CAIF); - skb_reset_mac_header(skb); - skb->dev = cfhsi->ndev; - - netif_rx_any_context(skb); - - /* Update network statistics. */ - cfhsi->ndev->stats.rx_packets++; - cfhsi->ndev->stats.rx_bytes += len; - } - - /* Calculate transfer length. */ - plen = desc->cffrm_len; - while (nfrms < CFHSI_MAX_PKTS && *plen) { - xfer_sz += *plen; - plen++; - nfrms++; - } - - /* Check for piggy-backed descriptor. */ - if (desc->header & CFHSI_PIGGY_DESC) - xfer_sz += CFHSI_DESC_SZ; - - if ((xfer_sz % 4) || (xfer_sz > (CFHSI_BUF_SZ_RX - CFHSI_DESC_SZ))) { - netdev_err(cfhsi->ndev, - "%s: Invalid payload len: %d, ignored.\n", - __func__, xfer_sz); - return -EPROTO; - } - return xfer_sz; -} - -static int cfhsi_rx_desc_len(struct cfhsi_desc *desc) -{ - int xfer_sz = 0; - int nfrms = 0; - u16 *plen; - - if ((desc->header & ~CFHSI_PIGGY_DESC) || - (desc->offset > CFHSI_MAX_EMB_FRM_SZ)) { - - pr_err("Invalid descriptor. %x %x\n", desc->header, - desc->offset); - return -EPROTO; - } - - /* Calculate transfer length. */ - plen = desc->cffrm_len; - while (nfrms < CFHSI_MAX_PKTS && *plen) { - xfer_sz += *plen; - plen++; - nfrms++; - } - - if (xfer_sz % 4) { - pr_err("Invalid payload len: %d, ignored.\n", xfer_sz); - return -EPROTO; - } - return xfer_sz; -} - -static int cfhsi_rx_pld(struct cfhsi_desc *desc, struct cfhsi *cfhsi) -{ - int rx_sz = 0; - int nfrms = 0; - u16 *plen = NULL; - u8 *pfrm = NULL; - - /* Sanity check header and offset. */ - if (WARN_ON((desc->header & ~CFHSI_PIGGY_DESC) || - (desc->offset > CFHSI_MAX_EMB_FRM_SZ))) { - netdev_err(cfhsi->ndev, "%s: Invalid descriptor.\n", - __func__); - return -EPROTO; - } - - /* Set frame pointer to start of payload. */ - pfrm = desc->emb_frm + CFHSI_MAX_EMB_FRM_SZ; - plen = desc->cffrm_len; - - /* Skip already processed frames. */ - while (nfrms < cfhsi->rx_state.nfrms) { - pfrm += *plen; - rx_sz += *plen; - plen++; - nfrms++; - } - - /* Parse payload. */ - while (nfrms < CFHSI_MAX_PKTS && *plen) { - struct sk_buff *skb; - u8 *pcffrm = NULL; - int len; - - /* CAIF frame starts after head padding. */ - pcffrm = pfrm + *pfrm + 1; - - /* Read length of CAIF frame (little endian). */ - len = *pcffrm; - len |= ((*(pcffrm + 1)) << 8) & 0xFF00; - len += 2; /* Add FCS fields. */ - - /* Sanity check length of CAIF frames. */ - if (unlikely(len > CFHSI_MAX_CAIF_FRAME_SZ)) { - netdev_err(cfhsi->ndev, "%s: Invalid length.\n", - __func__); - return -EPROTO; - } - - /* Allocate SKB (OK even in IRQ context). */ - skb = alloc_skb(len + 1, GFP_ATOMIC); - if (!skb) { - netdev_err(cfhsi->ndev, "%s: Out of memory !\n", - __func__); - cfhsi->rx_state.nfrms = nfrms; - return -ENOMEM; - } - caif_assert(skb != NULL); - - skb_put_data(skb, pcffrm, len); - - skb->protocol = htons(ETH_P_CAIF); - skb_reset_mac_header(skb); - skb->dev = cfhsi->ndev; - - netif_rx_any_context(skb); - - /* Update network statistics. */ - cfhsi->ndev->stats.rx_packets++; - cfhsi->ndev->stats.rx_bytes += len; - - pfrm += *plen; - rx_sz += *plen; - plen++; - nfrms++; - } - - return rx_sz; -} - -static void cfhsi_rx_done(struct cfhsi *cfhsi) -{ - int res; - int desc_pld_len = 0, rx_len, rx_state; - struct cfhsi_desc *desc = NULL; - u8 *rx_ptr, *rx_buf; - struct cfhsi_desc *piggy_desc = NULL; - - desc = (struct cfhsi_desc *)cfhsi->rx_buf; - - netdev_dbg(cfhsi->ndev, "%s\n", __func__); - - if (test_bit(CFHSI_SHUTDOWN, &cfhsi->bits)) - return; - - /* Update inactivity timer if pending. */ - spin_lock_bh(&cfhsi->lock); - mod_timer_pending(&cfhsi->inactivity_timer, - jiffies + cfhsi->cfg.inactivity_timeout); - spin_unlock_bh(&cfhsi->lock); - - if (cfhsi->rx_state.state == CFHSI_RX_STATE_DESC) { - desc_pld_len = cfhsi_rx_desc_len(desc); - - if (desc_pld_len < 0) - goto out_of_sync; - - rx_buf = cfhsi->rx_buf; - rx_len = desc_pld_len; - if (desc_pld_len > 0 && (desc->header & CFHSI_PIGGY_DESC)) - rx_len += CFHSI_DESC_SZ; - if (desc_pld_len == 0) - rx_buf = cfhsi->rx_flip_buf; - } else { - rx_buf = cfhsi->rx_flip_buf; - - rx_len = CFHSI_DESC_SZ; - if (cfhsi->rx_state.pld_len > 0 && - (desc->header & CFHSI_PIGGY_DESC)) { - - piggy_desc = (struct cfhsi_desc *) - (desc->emb_frm + CFHSI_MAX_EMB_FRM_SZ + - cfhsi->rx_state.pld_len); - - cfhsi->rx_state.piggy_desc = true; - - /* Extract payload len from piggy-backed descriptor. */ - desc_pld_len = cfhsi_rx_desc_len(piggy_desc); - if (desc_pld_len < 0) - goto out_of_sync; - - if (desc_pld_len > 0) { - rx_len = desc_pld_len; - if (piggy_desc->header & CFHSI_PIGGY_DESC) - rx_len += CFHSI_DESC_SZ; - } - - /* - * Copy needed information from the piggy-backed - * descriptor to the descriptor in the start. - */ - memcpy(rx_buf, (u8 *)piggy_desc, - CFHSI_DESC_SHORT_SZ); - } - } - - if (desc_pld_len) { - rx_state = CFHSI_RX_STATE_PAYLOAD; - rx_ptr = rx_buf + CFHSI_DESC_SZ; - } else { - rx_state = CFHSI_RX_STATE_DESC; - rx_ptr = rx_buf; - rx_len = CFHSI_DESC_SZ; - } - - /* Initiate next read */ - if (test_bit(CFHSI_AWAKE, &cfhsi->bits)) { - /* Set up new transfer. */ - netdev_dbg(cfhsi->ndev, "%s: Start RX.\n", - __func__); - - res = cfhsi->ops->cfhsi_rx(rx_ptr, rx_len, - cfhsi->ops); - if (WARN_ON(res < 0)) { - netdev_err(cfhsi->ndev, "%s: RX error %d.\n", - __func__, res); - cfhsi->ndev->stats.rx_errors++; - cfhsi->ndev->stats.rx_dropped++; - } - } - - if (cfhsi->rx_state.state == CFHSI_RX_STATE_DESC) { - /* Extract payload from descriptor */ - if (cfhsi_rx_desc(desc, cfhsi) < 0) - goto out_of_sync; - } else { - /* Extract payload */ - if (cfhsi_rx_pld(desc, cfhsi) < 0) - goto out_of_sync; - if (piggy_desc) { - /* Extract any payload in piggyback descriptor. */ - if (cfhsi_rx_desc(piggy_desc, cfhsi) < 0) - goto out_of_sync; - /* Mark no embedded frame after extracting it */ - piggy_desc->offset = 0; - } - } - - /* Update state info */ - memset(&cfhsi->rx_state, 0, sizeof(cfhsi->rx_state)); - cfhsi->rx_state.state = rx_state; - cfhsi->rx_ptr = rx_ptr; - cfhsi->rx_len = rx_len; - cfhsi->rx_state.pld_len = desc_pld_len; - cfhsi->rx_state.piggy_desc = desc->header & CFHSI_PIGGY_DESC; - - if (rx_buf != cfhsi->rx_buf) - swap(cfhsi->rx_buf, cfhsi->rx_flip_buf); - return; - -out_of_sync: - netdev_err(cfhsi->ndev, "%s: Out of sync.\n", __func__); - print_hex_dump_bytes("--> ", DUMP_PREFIX_NONE, - cfhsi->rx_buf, CFHSI_DESC_SZ); - schedule_work(&cfhsi->out_of_sync_work); -} - -static void cfhsi_rx_slowpath(struct timer_list *t) -{ - struct cfhsi *cfhsi = from_timer(cfhsi, t, rx_slowpath_timer); - - netdev_dbg(cfhsi->ndev, "%s.\n", - __func__); - - cfhsi_rx_done(cfhsi); -} - -static void cfhsi_rx_done_cb(struct cfhsi_cb_ops *cb_ops) -{ - struct cfhsi *cfhsi; - - cfhsi = container_of(cb_ops, struct cfhsi, cb_ops); - netdev_dbg(cfhsi->ndev, "%s.\n", - __func__); - - if (test_bit(CFHSI_SHUTDOWN, &cfhsi->bits)) - return; - - if (test_and_clear_bit(CFHSI_FLUSH_FIFO, &cfhsi->bits)) - wake_up_interruptible(&cfhsi->flush_fifo_wait); - else - cfhsi_rx_done(cfhsi); -} - -static void cfhsi_wake_up(struct work_struct *work) -{ - struct cfhsi *cfhsi = NULL; - int res; - int len; - long ret; - - cfhsi = container_of(work, struct cfhsi, wake_up_work); - - if (test_bit(CFHSI_SHUTDOWN, &cfhsi->bits)) - return; - - if (unlikely(test_bit(CFHSI_AWAKE, &cfhsi->bits))) { - /* It happenes when wakeup is requested by - * both ends at the same time. */ - clear_bit(CFHSI_WAKE_UP, &cfhsi->bits); - clear_bit(CFHSI_WAKE_UP_ACK, &cfhsi->bits); - return; - } - - /* Activate wake line. */ - cfhsi->ops->cfhsi_wake_up(cfhsi->ops); - - netdev_dbg(cfhsi->ndev, "%s: Start waiting.\n", - __func__); - - /* Wait for acknowledge. */ - ret = CFHSI_WAKE_TOUT; - ret = wait_event_interruptible_timeout(cfhsi->wake_up_wait, - test_and_clear_bit(CFHSI_WAKE_UP_ACK, - &cfhsi->bits), ret); - if (unlikely(ret < 0)) { - /* Interrupted by signal. */ - netdev_err(cfhsi->ndev, "%s: Signalled: %ld.\n", - __func__, ret); - - clear_bit(CFHSI_WAKE_UP, &cfhsi->bits); - cfhsi->ops->cfhsi_wake_down(cfhsi->ops); - return; - } else if (!ret) { - bool ca_wake = false; - size_t fifo_occupancy = 0; - - /* Wakeup timeout */ - netdev_dbg(cfhsi->ndev, "%s: Timeout.\n", - __func__); - - /* Check FIFO to check if modem has sent something. */ - WARN_ON(cfhsi->ops->cfhsi_fifo_occupancy(cfhsi->ops, - &fifo_occupancy)); - - netdev_dbg(cfhsi->ndev, "%s: Bytes in FIFO: %u.\n", - __func__, (unsigned) fifo_occupancy); - - /* Check if we misssed the interrupt. */ - WARN_ON(cfhsi->ops->cfhsi_get_peer_wake(cfhsi->ops, - &ca_wake)); - - if (ca_wake) { - netdev_err(cfhsi->ndev, "%s: CA Wake missed !.\n", - __func__); - - /* Clear the CFHSI_WAKE_UP_ACK bit to prevent race. */ - clear_bit(CFHSI_WAKE_UP_ACK, &cfhsi->bits); - - /* Continue execution. */ - goto wake_ack; - } - - clear_bit(CFHSI_WAKE_UP, &cfhsi->bits); - cfhsi->ops->cfhsi_wake_down(cfhsi->ops); - return; - } -wake_ack: - netdev_dbg(cfhsi->ndev, "%s: Woken.\n", - __func__); - - /* Clear power up bit. */ - set_bit(CFHSI_AWAKE, &cfhsi->bits); - clear_bit(CFHSI_WAKE_UP, &cfhsi->bits); - - /* Resume read operation. */ - netdev_dbg(cfhsi->ndev, "%s: Start RX.\n", __func__); - res = cfhsi->ops->cfhsi_rx(cfhsi->rx_ptr, cfhsi->rx_len, cfhsi->ops); - - if (WARN_ON(res < 0)) - netdev_err(cfhsi->ndev, "%s: RX err %d.\n", __func__, res); - - /* Clear power up acknowledment. */ - clear_bit(CFHSI_WAKE_UP_ACK, &cfhsi->bits); - - spin_lock_bh(&cfhsi->lock); - - /* Resume transmit if queues are not empty. */ - if (!cfhsi_tx_queue_len(cfhsi)) { - netdev_dbg(cfhsi->ndev, "%s: Peer wake, start timer.\n", - __func__); - /* Start inactivity timer. */ - mod_timer(&cfhsi->inactivity_timer, - jiffies + cfhsi->cfg.inactivity_timeout); - spin_unlock_bh(&cfhsi->lock); - return; - } - - netdev_dbg(cfhsi->ndev, "%s: Host wake.\n", - __func__); - - spin_unlock_bh(&cfhsi->lock); - - /* Create HSI frame. */ - len = cfhsi_tx_frm((struct cfhsi_desc *)cfhsi->tx_buf, cfhsi); - - if (likely(len > 0)) { - /* Set up new transfer. */ - res = cfhsi->ops->cfhsi_tx(cfhsi->tx_buf, len, cfhsi->ops); - if (WARN_ON(res < 0)) { - netdev_err(cfhsi->ndev, "%s: TX error %d.\n", - __func__, res); - cfhsi_abort_tx(cfhsi); - } - } else { - netdev_err(cfhsi->ndev, - "%s: Failed to create HSI frame: %d.\n", - __func__, len); - } -} - -static void cfhsi_wake_down(struct work_struct *work) -{ - long ret; - struct cfhsi *cfhsi = NULL; - size_t fifo_occupancy = 0; - int retry = CFHSI_WAKE_TOUT; - - cfhsi = container_of(work, struct cfhsi, wake_down_work); - netdev_dbg(cfhsi->ndev, "%s.\n", __func__); - - if (test_bit(CFHSI_SHUTDOWN, &cfhsi->bits)) - return; - - /* Deactivate wake line. */ - cfhsi->ops->cfhsi_wake_down(cfhsi->ops); - - /* Wait for acknowledge. */ - ret = CFHSI_WAKE_TOUT; - ret = wait_event_interruptible_timeout(cfhsi->wake_down_wait, - test_and_clear_bit(CFHSI_WAKE_DOWN_ACK, - &cfhsi->bits), ret); - if (ret < 0) { - /* Interrupted by signal. */ - netdev_err(cfhsi->ndev, "%s: Signalled: %ld.\n", - __func__, ret); - return; - } else if (!ret) { - bool ca_wake = true; - - /* Timeout */ - netdev_err(cfhsi->ndev, "%s: Timeout.\n", __func__); - - /* Check if we misssed the interrupt. */ - WARN_ON(cfhsi->ops->cfhsi_get_peer_wake(cfhsi->ops, - &ca_wake)); - if (!ca_wake) - netdev_err(cfhsi->ndev, "%s: CA Wake missed !.\n", - __func__); - } - - /* Check FIFO occupancy. */ - while (retry) { - WARN_ON(cfhsi->ops->cfhsi_fifo_occupancy(cfhsi->ops, - &fifo_occupancy)); - - if (!fifo_occupancy) - break; - - set_current_state(TASK_INTERRUPTIBLE); - schedule_timeout(1); - retry--; - } - - if (!retry) - netdev_err(cfhsi->ndev, "%s: FIFO Timeout.\n", __func__); - - /* Clear AWAKE condition. */ - clear_bit(CFHSI_AWAKE, &cfhsi->bits); - - /* Cancel pending RX requests. */ - cfhsi->ops->cfhsi_rx_cancel(cfhsi->ops); -} - -static void cfhsi_out_of_sync(struct work_struct *work) -{ - struct cfhsi *cfhsi = NULL; - - cfhsi = container_of(work, struct cfhsi, out_of_sync_work); - - rtnl_lock(); - dev_close(cfhsi->ndev); - rtnl_unlock(); -} - -static void cfhsi_wake_up_cb(struct cfhsi_cb_ops *cb_ops) -{ - struct cfhsi *cfhsi = NULL; - - cfhsi = container_of(cb_ops, struct cfhsi, cb_ops); - netdev_dbg(cfhsi->ndev, "%s.\n", - __func__); - - set_bit(CFHSI_WAKE_UP_ACK, &cfhsi->bits); - wake_up_interruptible(&cfhsi->wake_up_wait); - - if (test_bit(CFHSI_SHUTDOWN, &cfhsi->bits)) - return; - - /* Schedule wake up work queue if the peer initiates. */ - if (!test_and_set_bit(CFHSI_WAKE_UP, &cfhsi->bits)) - queue_work(cfhsi->wq, &cfhsi->wake_up_work); -} - -static void cfhsi_wake_down_cb(struct cfhsi_cb_ops *cb_ops) -{ - struct cfhsi *cfhsi = NULL; - - cfhsi = container_of(cb_ops, struct cfhsi, cb_ops); - netdev_dbg(cfhsi->ndev, "%s.\n", - __func__); - - /* Initiating low power is only permitted by the host (us). */ - set_bit(CFHSI_WAKE_DOWN_ACK, &cfhsi->bits); - wake_up_interruptible(&cfhsi->wake_down_wait); -} - -static void cfhsi_aggregation_tout(struct timer_list *t) -{ - struct cfhsi *cfhsi = from_timer(cfhsi, t, aggregation_timer); - - netdev_dbg(cfhsi->ndev, "%s.\n", - __func__); - - cfhsi_start_tx(cfhsi); -} - -static netdev_tx_t cfhsi_xmit(struct sk_buff *skb, struct net_device *dev) -{ - struct cfhsi *cfhsi = NULL; - int start_xfer = 0; - int timer_active; - int prio; - - if (!dev) - return -EINVAL; - - cfhsi = netdev_priv(dev); - - switch (skb->priority) { - case TC_PRIO_BESTEFFORT: - case TC_PRIO_FILLER: - case TC_PRIO_BULK: - prio = CFHSI_PRIO_BEBK; - break; - case TC_PRIO_INTERACTIVE_BULK: - prio = CFHSI_PRIO_VI; - break; - case TC_PRIO_INTERACTIVE: - prio = CFHSI_PRIO_VO; - break; - case TC_PRIO_CONTROL: - default: - prio = CFHSI_PRIO_CTL; - break; - } - - spin_lock_bh(&cfhsi->lock); - - /* Update aggregation statistics */ - cfhsi_update_aggregation_stats(cfhsi, skb, 1); - - /* Queue the SKB */ - skb_queue_tail(&cfhsi->qhead[prio], skb); - - /* Sanity check; xmit should not be called after unregister_netdev */ - if (WARN_ON(test_bit(CFHSI_SHUTDOWN, &cfhsi->bits))) { - spin_unlock_bh(&cfhsi->lock); - cfhsi_abort_tx(cfhsi); - return -EINVAL; - } - - /* Send flow off if number of packets is above high water mark. */ - if (!cfhsi->flow_off_sent && - cfhsi_tx_queue_len(cfhsi) > cfhsi->cfg.q_high_mark && - cfhsi->cfdev.flowctrl) { - cfhsi->flow_off_sent = 1; - cfhsi->cfdev.flowctrl(cfhsi->ndev, OFF); - } - - if (cfhsi->tx_state == CFHSI_TX_STATE_IDLE) { - cfhsi->tx_state = CFHSI_TX_STATE_XFER; - start_xfer = 1; - } - - if (!start_xfer) { - /* Send aggregate if it is possible */ - bool aggregate_ready = - cfhsi_can_send_aggregate(cfhsi) && - del_timer(&cfhsi->aggregation_timer) > 0; - spin_unlock_bh(&cfhsi->lock); - if (aggregate_ready) - cfhsi_start_tx(cfhsi); - return NETDEV_TX_OK; - } - - /* Delete inactivity timer if started. */ - timer_active = del_timer_sync(&cfhsi->inactivity_timer); - - spin_unlock_bh(&cfhsi->lock); - - if (timer_active) { - struct cfhsi_desc *desc = (struct cfhsi_desc *)cfhsi->tx_buf; - int len; - int res; - - /* Create HSI frame. */ - len = cfhsi_tx_frm(desc, cfhsi); - WARN_ON(!len); - - /* Set up new transfer. */ - res = cfhsi->ops->cfhsi_tx(cfhsi->tx_buf, len, cfhsi->ops); - if (WARN_ON(res < 0)) { - netdev_err(cfhsi->ndev, "%s: TX error %d.\n", - __func__, res); - cfhsi_abort_tx(cfhsi); - } - } else { - /* Schedule wake up work queue if the we initiate. */ - if (!test_and_set_bit(CFHSI_WAKE_UP, &cfhsi->bits)) - queue_work(cfhsi->wq, &cfhsi->wake_up_work); - } - - return NETDEV_TX_OK; -} - -static const struct net_device_ops cfhsi_netdevops; - -static void cfhsi_setup(struct net_device *dev) -{ - int i; - struct cfhsi *cfhsi = netdev_priv(dev); - dev->features = 0; - dev->type = ARPHRD_CAIF; - dev->flags = IFF_POINTOPOINT | IFF_NOARP; - dev->mtu = CFHSI_MAX_CAIF_FRAME_SZ; - dev->priv_flags |= IFF_NO_QUEUE; - dev->needs_free_netdev = true; - dev->netdev_ops = &cfhsi_netdevops; - for (i = 0; i < CFHSI_PRIO_LAST; ++i) - skb_queue_head_init(&cfhsi->qhead[i]); - cfhsi->cfdev.link_select = CAIF_LINK_HIGH_BANDW; - cfhsi->cfdev.use_frag = false; - cfhsi->cfdev.use_stx = false; - cfhsi->cfdev.use_fcs = false; - cfhsi->ndev = dev; - cfhsi->cfg = hsi_default_config; -} - -static int cfhsi_open(struct net_device *ndev) -{ - struct cfhsi *cfhsi = netdev_priv(ndev); - int res; - - clear_bit(CFHSI_SHUTDOWN, &cfhsi->bits); - - /* Initialize state vaiables. */ - cfhsi->tx_state = CFHSI_TX_STATE_IDLE; - cfhsi->rx_state.state = CFHSI_RX_STATE_DESC; - - /* Set flow info */ - cfhsi->flow_off_sent = 0; - - /* - * Allocate a TX buffer with the size of a HSI packet descriptors - * and the necessary room for CAIF payload frames. - */ - cfhsi->tx_buf = kzalloc(CFHSI_BUF_SZ_TX, GFP_KERNEL); - if (!cfhsi->tx_buf) { - res = -ENODEV; - goto err_alloc_tx; - } - - /* - * Allocate a RX buffer with the size of two HSI packet descriptors and - * the necessary room for CAIF payload frames. - */ - cfhsi->rx_buf = kzalloc(CFHSI_BUF_SZ_RX, GFP_KERNEL); - if (!cfhsi->rx_buf) { - res = -ENODEV; - goto err_alloc_rx; - } - - cfhsi->rx_flip_buf = kzalloc(CFHSI_BUF_SZ_RX, GFP_KERNEL); - if (!cfhsi->rx_flip_buf) { - res = -ENODEV; - goto err_alloc_rx_flip; - } - - /* Initialize aggregation timeout */ - cfhsi->cfg.aggregation_timeout = hsi_default_config.aggregation_timeout; - - /* Initialize recieve vaiables. */ - cfhsi->rx_ptr = cfhsi->rx_buf; - cfhsi->rx_len = CFHSI_DESC_SZ; - - /* Initialize spin locks. */ - spin_lock_init(&cfhsi->lock); - - /* Set up the driver. */ - cfhsi->cb_ops.tx_done_cb = cfhsi_tx_done_cb; - cfhsi->cb_ops.rx_done_cb = cfhsi_rx_done_cb; - cfhsi->cb_ops.wake_up_cb = cfhsi_wake_up_cb; - cfhsi->cb_ops.wake_down_cb = cfhsi_wake_down_cb; - - /* Initialize the work queues. */ - INIT_WORK(&cfhsi->wake_up_work, cfhsi_wake_up); - INIT_WORK(&cfhsi->wake_down_work, cfhsi_wake_down); - INIT_WORK(&cfhsi->out_of_sync_work, cfhsi_out_of_sync); - - /* Clear all bit fields. */ - clear_bit(CFHSI_WAKE_UP_ACK, &cfhsi->bits); - clear_bit(CFHSI_WAKE_DOWN_ACK, &cfhsi->bits); - clear_bit(CFHSI_WAKE_UP, &cfhsi->bits); - clear_bit(CFHSI_AWAKE, &cfhsi->bits); - - /* Create work thread. */ - cfhsi->wq = alloc_ordered_workqueue(cfhsi->ndev->name, WQ_MEM_RECLAIM); - if (!cfhsi->wq) { - netdev_err(cfhsi->ndev, "%s: Failed to create work queue.\n", - __func__); - res = -ENODEV; - goto err_create_wq; - } - - /* Initialize wait queues. */ - init_waitqueue_head(&cfhsi->wake_up_wait); - init_waitqueue_head(&cfhsi->wake_down_wait); - init_waitqueue_head(&cfhsi->flush_fifo_wait); - - /* Setup the inactivity timer. */ - timer_setup(&cfhsi->inactivity_timer, cfhsi_inactivity_tout, 0); - /* Setup the slowpath RX timer. */ - timer_setup(&cfhsi->rx_slowpath_timer, cfhsi_rx_slowpath, 0); - /* Setup the aggregation timer. */ - timer_setup(&cfhsi->aggregation_timer, cfhsi_aggregation_tout, 0); - - /* Activate HSI interface. */ - res = cfhsi->ops->cfhsi_up(cfhsi->ops); - if (res) { - netdev_err(cfhsi->ndev, - "%s: can't activate HSI interface: %d.\n", - __func__, res); - goto err_activate; - } - - /* Flush FIFO */ - res = cfhsi_flush_fifo(cfhsi); - if (res) { - netdev_err(cfhsi->ndev, "%s: Can't flush FIFO: %d.\n", - __func__, res); - goto err_net_reg; - } - return res; - - err_net_reg: - cfhsi->ops->cfhsi_down(cfhsi->ops); - err_activate: - destroy_workqueue(cfhsi->wq); - err_create_wq: - kfree(cfhsi->rx_flip_buf); - err_alloc_rx_flip: - kfree(cfhsi->rx_buf); - err_alloc_rx: - kfree(cfhsi->tx_buf); - err_alloc_tx: - return res; -} - -static int cfhsi_close(struct net_device *ndev) -{ - struct cfhsi *cfhsi = netdev_priv(ndev); - u8 *tx_buf, *rx_buf, *flip_buf; - - /* going to shutdown driver */ - set_bit(CFHSI_SHUTDOWN, &cfhsi->bits); - - /* Delete timers if pending */ - del_timer_sync(&cfhsi->inactivity_timer); - del_timer_sync(&cfhsi->rx_slowpath_timer); - del_timer_sync(&cfhsi->aggregation_timer); - - /* Cancel pending RX request (if any) */ - cfhsi->ops->cfhsi_rx_cancel(cfhsi->ops); - - /* Destroy workqueue */ - destroy_workqueue(cfhsi->wq); - - /* Store bufferes: will be freed later. */ - tx_buf = cfhsi->tx_buf; - rx_buf = cfhsi->rx_buf; - flip_buf = cfhsi->rx_flip_buf; - /* Flush transmit queues. */ - cfhsi_abort_tx(cfhsi); - - /* Deactivate interface */ - cfhsi->ops->cfhsi_down(cfhsi->ops); - - /* Free buffers. */ - kfree(tx_buf); - kfree(rx_buf); - kfree(flip_buf); - return 0; -} - -static void cfhsi_uninit(struct net_device *dev) -{ - struct cfhsi *cfhsi = netdev_priv(dev); - ASSERT_RTNL(); - symbol_put(cfhsi_get_device); - list_del(&cfhsi->list); -} - -static const struct net_device_ops cfhsi_netdevops = { - .ndo_uninit = cfhsi_uninit, - .ndo_open = cfhsi_open, - .ndo_stop = cfhsi_close, - .ndo_start_xmit = cfhsi_xmit -}; - -static void cfhsi_netlink_parms(struct nlattr *data[], struct cfhsi *cfhsi) -{ - int i; - - if (!data) { - pr_debug("no params data found\n"); - return; - } - - i = __IFLA_CAIF_HSI_INACTIVITY_TOUT; - /* - * Inactivity timeout in millisecs. Lowest possible value is 1, - * and highest possible is NEXT_TIMER_MAX_DELTA. - */ - if (data[i]) { - u32 inactivity_timeout = nla_get_u32(data[i]); - /* Pre-calculate inactivity timeout. */ - cfhsi->cfg.inactivity_timeout = inactivity_timeout * HZ / 1000; - if (cfhsi->cfg.inactivity_timeout == 0) - cfhsi->cfg.inactivity_timeout = 1; - else if (cfhsi->cfg.inactivity_timeout > NEXT_TIMER_MAX_DELTA) - cfhsi->cfg.inactivity_timeout = NEXT_TIMER_MAX_DELTA; - } - - i = __IFLA_CAIF_HSI_AGGREGATION_TOUT; - if (data[i]) - cfhsi->cfg.aggregation_timeout = nla_get_u32(data[i]); - - i = __IFLA_CAIF_HSI_HEAD_ALIGN; - if (data[i]) - cfhsi->cfg.head_align = nla_get_u32(data[i]); - - i = __IFLA_CAIF_HSI_TAIL_ALIGN; - if (data[i]) - cfhsi->cfg.tail_align = nla_get_u32(data[i]); - - i = __IFLA_CAIF_HSI_QHIGH_WATERMARK; - if (data[i]) - cfhsi->cfg.q_high_mark = nla_get_u32(data[i]); - - i = __IFLA_CAIF_HSI_QLOW_WATERMARK; - if (data[i]) - cfhsi->cfg.q_low_mark = nla_get_u32(data[i]); -} - -static int caif_hsi_changelink(struct net_device *dev, struct nlattr *tb[], - struct nlattr *data[], - struct netlink_ext_ack *extack) -{ - cfhsi_netlink_parms(data, netdev_priv(dev)); - netdev_state_change(dev); - return 0; -} - -static const struct nla_policy caif_hsi_policy[__IFLA_CAIF_HSI_MAX + 1] = { - [__IFLA_CAIF_HSI_INACTIVITY_TOUT] = { .type = NLA_U32, .len = 4 }, - [__IFLA_CAIF_HSI_AGGREGATION_TOUT] = { .type = NLA_U32, .len = 4 }, - [__IFLA_CAIF_HSI_HEAD_ALIGN] = { .type = NLA_U32, .len = 4 }, - [__IFLA_CAIF_HSI_TAIL_ALIGN] = { .type = NLA_U32, .len = 4 }, - [__IFLA_CAIF_HSI_QHIGH_WATERMARK] = { .type = NLA_U32, .len = 4 }, - [__IFLA_CAIF_HSI_QLOW_WATERMARK] = { .type = NLA_U32, .len = 4 }, -}; - -static size_t caif_hsi_get_size(const struct net_device *dev) -{ - int i; - size_t s = 0; - for (i = __IFLA_CAIF_HSI_UNSPEC + 1; i < __IFLA_CAIF_HSI_MAX; i++) - s += nla_total_size(caif_hsi_policy[i].len); - return s; -} - -static int caif_hsi_fill_info(struct sk_buff *skb, const struct net_device *dev) -{ - struct cfhsi *cfhsi = netdev_priv(dev); - - if (nla_put_u32(skb, __IFLA_CAIF_HSI_INACTIVITY_TOUT, - cfhsi->cfg.inactivity_timeout) || - nla_put_u32(skb, __IFLA_CAIF_HSI_AGGREGATION_TOUT, - cfhsi->cfg.aggregation_timeout) || - nla_put_u32(skb, __IFLA_CAIF_HSI_HEAD_ALIGN, - cfhsi->cfg.head_align) || - nla_put_u32(skb, __IFLA_CAIF_HSI_TAIL_ALIGN, - cfhsi->cfg.tail_align) || - nla_put_u32(skb, __IFLA_CAIF_HSI_QHIGH_WATERMARK, - cfhsi->cfg.q_high_mark) || - nla_put_u32(skb, __IFLA_CAIF_HSI_QLOW_WATERMARK, - cfhsi->cfg.q_low_mark)) - return -EMSGSIZE; - - return 0; -} - -static int caif_hsi_newlink(struct net *src_net, struct net_device *dev, - struct nlattr *tb[], struct nlattr *data[], - struct netlink_ext_ack *extack) -{ - struct cfhsi *cfhsi = NULL; - struct cfhsi_ops *(*get_ops)(void); - - ASSERT_RTNL(); - - cfhsi = netdev_priv(dev); - cfhsi_netlink_parms(data, cfhsi); - - get_ops = symbol_get(cfhsi_get_ops); - if (!get_ops) { - pr_err("%s: failed to get the cfhsi_ops\n", __func__); - return -ENODEV; - } - - /* Assign the HSI device. */ - cfhsi->ops = (*get_ops)(); - if (!cfhsi->ops) { - pr_err("%s: failed to get the cfhsi_ops\n", __func__); - goto err; - } - - /* Assign the driver to this HSI device. */ - cfhsi->ops->cb_ops = &cfhsi->cb_ops; - if (register_netdevice(dev)) { - pr_warn("%s: caif_hsi device registration failed\n", __func__); - goto err; - } - /* Add CAIF HSI device to list. */ - list_add_tail(&cfhsi->list, &cfhsi_list); - - return 0; -err: - symbol_put(cfhsi_get_ops); - return -ENODEV; -} - -static struct rtnl_link_ops caif_hsi_link_ops __read_mostly = { - .kind = "cfhsi", - .priv_size = sizeof(struct cfhsi), - .setup = cfhsi_setup, - .maxtype = __IFLA_CAIF_HSI_MAX, - .policy = caif_hsi_policy, - .newlink = caif_hsi_newlink, - .changelink = caif_hsi_changelink, - .get_size = caif_hsi_get_size, - .fill_info = caif_hsi_fill_info, -}; - -static void __exit cfhsi_exit_module(void) -{ - struct list_head *list_node; - struct list_head *n; - struct cfhsi *cfhsi; - - rtnl_link_unregister(&caif_hsi_link_ops); - - rtnl_lock(); - list_for_each_safe(list_node, n, &cfhsi_list) { - cfhsi = list_entry(list_node, struct cfhsi, list); - unregister_netdevice(cfhsi->ndev); - } - rtnl_unlock(); -} - -static int __init cfhsi_init_module(void) -{ - return rtnl_link_register(&caif_hsi_link_ops); -} - -module_init(cfhsi_init_module); -module_exit(cfhsi_exit_module); diff --git a/include/net/caif/caif_hsi.h b/include/net/caif/caif_hsi.h deleted file mode 100644 index 552cf68d28d2..000000000000 --- a/include/net/caif/caif_hsi.h +++ /dev/null @@ -1,200 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright (C) ST-Ericsson AB 2010 - * Author: Daniel Martensson / daniel.martensson@stericsson.com - * Dmitry.Tarnyagin / dmitry.tarnyagin@stericsson.com - */ - -#ifndef CAIF_HSI_H_ -#define CAIF_HSI_H_ - -#include -#include -#include - -/* - * Maximum number of CAIF frames that can reside in the same HSI frame. - */ -#define CFHSI_MAX_PKTS 15 - -/* - * Maximum number of bytes used for the frame that can be embedded in the - * HSI descriptor. - */ -#define CFHSI_MAX_EMB_FRM_SZ 96 - -/* - * Decides if HSI buffers should be prefilled with 0xFF pattern for easier - * debugging. Both TX and RX buffers will be filled before the transfer. - */ -#define CFHSI_DBG_PREFILL 0 - -/* Structure describing a HSI packet descriptor. */ -#pragma pack(1) /* Byte alignment. */ -struct cfhsi_desc { - u8 header; - u8 offset; - u16 cffrm_len[CFHSI_MAX_PKTS]; - u8 emb_frm[CFHSI_MAX_EMB_FRM_SZ]; -}; -#pragma pack() /* Default alignment. */ - -/* Size of the complete HSI packet descriptor. */ -#define CFHSI_DESC_SZ (sizeof(struct cfhsi_desc)) - -/* - * Size of the complete HSI packet descriptor excluding the optional embedded - * CAIF frame. - */ -#define CFHSI_DESC_SHORT_SZ (CFHSI_DESC_SZ - CFHSI_MAX_EMB_FRM_SZ) - -/* - * Maximum bytes transferred in one transfer. - */ -#define CFHSI_MAX_CAIF_FRAME_SZ 4096 - -#define CFHSI_MAX_PAYLOAD_SZ (CFHSI_MAX_PKTS * CFHSI_MAX_CAIF_FRAME_SZ) - -/* Size of the complete HSI TX buffer. */ -#define CFHSI_BUF_SZ_TX (CFHSI_DESC_SZ + CFHSI_MAX_PAYLOAD_SZ) - -/* Size of the complete HSI RX buffer. */ -#define CFHSI_BUF_SZ_RX ((2 * CFHSI_DESC_SZ) + CFHSI_MAX_PAYLOAD_SZ) - -/* Bitmasks for the HSI descriptor. */ -#define CFHSI_PIGGY_DESC (0x01 << 7) - -#define CFHSI_TX_STATE_IDLE 0 -#define CFHSI_TX_STATE_XFER 1 - -#define CFHSI_RX_STATE_DESC 0 -#define CFHSI_RX_STATE_PAYLOAD 1 - -/* Bitmasks for power management. */ -#define CFHSI_WAKE_UP 0 -#define CFHSI_WAKE_UP_ACK 1 -#define CFHSI_WAKE_DOWN_ACK 2 -#define CFHSI_AWAKE 3 -#define CFHSI_WAKELOCK_HELD 4 -#define CFHSI_SHUTDOWN 5 -#define CFHSI_FLUSH_FIFO 6 - -#ifndef CFHSI_INACTIVITY_TOUT -#define CFHSI_INACTIVITY_TOUT (1 * HZ) -#endif /* CFHSI_INACTIVITY_TOUT */ - -#ifndef CFHSI_WAKE_TOUT -#define CFHSI_WAKE_TOUT (3 * HZ) -#endif /* CFHSI_WAKE_TOUT */ - -#ifndef CFHSI_MAX_RX_RETRIES -#define CFHSI_MAX_RX_RETRIES (10 * HZ) -#endif - -/* Structure implemented by the CAIF HSI driver. */ -struct cfhsi_cb_ops { - void (*tx_done_cb) (struct cfhsi_cb_ops *drv); - void (*rx_done_cb) (struct cfhsi_cb_ops *drv); - void (*wake_up_cb) (struct cfhsi_cb_ops *drv); - void (*wake_down_cb) (struct cfhsi_cb_ops *drv); -}; - -/* Structure implemented by HSI device. */ -struct cfhsi_ops { - int (*cfhsi_up) (struct cfhsi_ops *dev); - int (*cfhsi_down) (struct cfhsi_ops *dev); - int (*cfhsi_tx) (u8 *ptr, int len, struct cfhsi_ops *dev); - int (*cfhsi_rx) (u8 *ptr, int len, struct cfhsi_ops *dev); - int (*cfhsi_wake_up) (struct cfhsi_ops *dev); - int (*cfhsi_wake_down) (struct cfhsi_ops *dev); - int (*cfhsi_get_peer_wake) (struct cfhsi_ops *dev, bool *status); - int (*cfhsi_fifo_occupancy) (struct cfhsi_ops *dev, size_t *occupancy); - int (*cfhsi_rx_cancel)(struct cfhsi_ops *dev); - struct cfhsi_cb_ops *cb_ops; -}; - -/* Structure holds status of received CAIF frames processing */ -struct cfhsi_rx_state { - int state; - int nfrms; - int pld_len; - int retries; - bool piggy_desc; -}; - -/* Priority mapping */ -enum { - CFHSI_PRIO_CTL = 0, - CFHSI_PRIO_VI, - CFHSI_PRIO_VO, - CFHSI_PRIO_BEBK, - CFHSI_PRIO_LAST, -}; - -struct cfhsi_config { - u32 inactivity_timeout; - u32 aggregation_timeout; - u32 head_align; - u32 tail_align; - u32 q_high_mark; - u32 q_low_mark; -}; - -/* Structure implemented by CAIF HSI drivers. */ -struct cfhsi { - struct caif_dev_common cfdev; - struct net_device *ndev; - struct platform_device *pdev; - struct sk_buff_head qhead[CFHSI_PRIO_LAST]; - struct cfhsi_cb_ops cb_ops; - struct cfhsi_ops *ops; - int tx_state; - struct cfhsi_rx_state rx_state; - struct cfhsi_config cfg; - int rx_len; - u8 *rx_ptr; - u8 *tx_buf; - u8 *rx_buf; - u8 *rx_flip_buf; - spinlock_t lock; - int flow_off_sent; - struct list_head list; - struct work_struct wake_up_work; - struct work_struct wake_down_work; - struct work_struct out_of_sync_work; - struct workqueue_struct *wq; - wait_queue_head_t wake_up_wait; - wait_queue_head_t wake_down_wait; - wait_queue_head_t flush_fifo_wait; - struct timer_list inactivity_timer; - struct timer_list rx_slowpath_timer; - - /* TX aggregation */ - int aggregation_len; - struct timer_list aggregation_timer; - - unsigned long bits; -}; -extern struct platform_driver cfhsi_driver; - -/** - * enum ifla_caif_hsi - CAIF HSI NetlinkRT parameters. - * @IFLA_CAIF_HSI_INACTIVITY_TOUT: Inactivity timeout before - * taking the HSI wakeline down, in milliseconds. - * When using RT Netlink to create, destroy or configure a CAIF HSI interface, - * enum ifla_caif_hsi is used to specify the configuration attributes. - */ -enum ifla_caif_hsi { - __IFLA_CAIF_HSI_UNSPEC, - __IFLA_CAIF_HSI_INACTIVITY_TOUT, - __IFLA_CAIF_HSI_AGGREGATION_TOUT, - __IFLA_CAIF_HSI_HEAD_ALIGN, - __IFLA_CAIF_HSI_TAIL_ALIGN, - __IFLA_CAIF_HSI_QHIGH_WATERMARK, - __IFLA_CAIF_HSI_QLOW_WATERMARK, - __IFLA_CAIF_HSI_MAX -}; - -struct cfhsi_ops *cfhsi_get_ops(void); - -#endif /* CAIF_HSI_H_ */ -- cgit v1.2.3 From 71158bb1f2d2da61385c58fc1114e1a1c19984ba Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Wed, 30 Jun 2021 13:42:13 +0200 Subject: tcp: consistently disable header prediction for mptcp The MPTCP receive path is hooked only into the TCP slow-path. The DSS presence allows plain MPTCP traffic to hit that consistently. Since commit e1ff9e82e2ea ("net: mptcp: improve fallback to TCP"), when an MPTCP socket falls back to TCP, it can hit the TCP receive fast-path, and delay or stop triggering the event notification. Address the issue explicitly disabling the header prediction for MPTCP sockets. Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/200 Fixes: e1ff9e82e2ea ("net: mptcp: improve fallback to TCP") Signed-off-by: Paolo Abeni Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/tcp.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/net') diff --git a/include/net/tcp.h b/include/net/tcp.h index e668f1bf780d..17df9b047ee4 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -686,6 +686,10 @@ static inline u32 __tcp_set_rto(const struct tcp_sock *tp) static inline void __tcp_fast_path_on(struct tcp_sock *tp, u32 snd_wnd) { + /* mptcp hooks are only on the slow path */ + if (sk_is_mptcp((struct sock *)tp)) + return; + tp->pred_flags = htonl((tp->tcp_header_len << 26) | ntohl(TCP_FLAG_ACK) | snd_wnd); -- cgit v1.2.3 From a23f89a9990684a0ca0cac4a2857c15d338ebe2d Mon Sep 17 00:00:00 2001 From: Vasily Averin Date: Thu, 1 Jul 2021 08:02:24 +0300 Subject: netfilter: conntrack: nf_ct_gre_keymap_flush() removal nf_ct_gre_keymap_flush() is useless. It is called from nf_conntrack_cleanup_net_list() only and tries to remove nf_ct_gre_keymap entries from pernet gre keymap list. Though: a) at this point the list should already be empty, all its entries were deleted during the conntracks cleanup, because nf_conntrack_cleanup_net_list() executes nf_ct_iterate_cleanup(kill_all) before nf_conntrack_proto_pernet_fini(): nf_conntrack_cleanup_net_list +- nf_ct_iterate_cleanup | nf_ct_put | nf_conntrack_put | nf_conntrack_destroy | destroy_conntrack | destroy_gre_conntrack | nf_ct_gre_keymap_destroy `- nf_conntrack_proto_pernet_fini nf_ct_gre_keymap_flush b) Let's say we find that the keymap list is not empty. This means netns still has a conntrack associated with gre, in which case we should not free its memory, because this will lead to a double free and related crashes. However I doubt it could have gone unnoticed for years, obviously this does not happen in real life. So I think we can remove both nf_ct_gre_keymap_flush() and nf_conntrack_proto_pernet_fini(). Signed-off-by: Vasily Averin Acked-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack_core.h | 1 - net/netfilter/nf_conntrack_core.c | 1 - net/netfilter/nf_conntrack_proto.c | 7 ------- net/netfilter/nf_conntrack_proto_gre.c | 13 ------------- 4 files changed, 22 deletions(-) (limited to 'include/net') diff --git a/include/net/netfilter/nf_conntrack_core.h b/include/net/netfilter/nf_conntrack_core.h index 09f2efea0b97..13807ea94cd2 100644 --- a/include/net/netfilter/nf_conntrack_core.h +++ b/include/net/netfilter/nf_conntrack_core.h @@ -30,7 +30,6 @@ void nf_conntrack_cleanup_net(struct net *net); void nf_conntrack_cleanup_net_list(struct list_head *net_exit_list); void nf_conntrack_proto_pernet_init(struct net *net); -void nf_conntrack_proto_pernet_fini(struct net *net); int nf_conntrack_proto_init(void); void nf_conntrack_proto_fini(void); diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 96ba19fc8155..085a11f1eb43 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -2457,7 +2457,6 @@ i_see_dead_people: } list_for_each_entry(net, net_exit_list, exit_list) { - nf_conntrack_proto_pernet_fini(net); nf_conntrack_ecache_pernet_fini(net); nf_conntrack_expect_pernet_fini(net); free_percpu(net->ct.stat); diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c index 55647409a9be..8f7a9837349c 100644 --- a/net/netfilter/nf_conntrack_proto.c +++ b/net/netfilter/nf_conntrack_proto.c @@ -697,13 +697,6 @@ void nf_conntrack_proto_pernet_init(struct net *net) #endif } -void nf_conntrack_proto_pernet_fini(struct net *net) -{ -#ifdef CONFIG_NF_CT_PROTO_GRE - nf_ct_gre_keymap_flush(net); -#endif -} - module_param_call(hashsize, nf_conntrack_set_hashsize, param_get_uint, &nf_conntrack_htable_size, 0600); diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c index db11e403d818..728eeb0aea87 100644 --- a/net/netfilter/nf_conntrack_proto_gre.c +++ b/net/netfilter/nf_conntrack_proto_gre.c @@ -55,19 +55,6 @@ static inline struct nf_gre_net *gre_pernet(struct net *net) return &net->ct.nf_ct_proto.gre; } -void nf_ct_gre_keymap_flush(struct net *net) -{ - struct nf_gre_net *net_gre = gre_pernet(net); - struct nf_ct_gre_keymap *km, *tmp; - - spin_lock_bh(&keymap_lock); - list_for_each_entry_safe(km, tmp, &net_gre->keymap_list, list) { - list_del_rcu(&km->list); - kfree_rcu(km, rcu); - } - spin_unlock_bh(&keymap_lock); -} - static inline int gre_key_cmpfn(const struct nf_ct_gre_keymap *km, const struct nf_conntrack_tuple *t) { -- cgit v1.2.3 From 2580d3f40022642452dd8422bfb8c22e54cf84bb Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Mon, 28 Jun 2021 15:34:28 +0200 Subject: xfrm: Fix RCU vs hash_resize_mutex lock inversion xfrm_bydst_resize() calls synchronize_rcu() while holding hash_resize_mutex. But then on PREEMPT_RT configurations, xfrm_policy_lookup_bytype() may acquire that mutex while running in an RCU read side critical section. This results in a deadlock. In fact the scope of hash_resize_mutex is way beyond the purpose of xfrm_policy_lookup_bytype() to just fetch a coherent and stable policy for a given destination/direction, along with other details. The lower level net->xfrm.xfrm_policy_lock, which among other things protects per destination/direction references to policy entries, is enough to serialize and benefit from priority inheritance against the write side. As a bonus, it makes it officially a per network namespace synchronization business where a policy table resize on namespace A shouldn't block a policy lookup on namespace B. Fixes: 77cc278f7b20 (xfrm: policy: Use sequence counters with associated lock) Cc: stable@vger.kernel.org Cc: Ahmed S. Darwish Cc: Peter Zijlstra (Intel) Cc: Varad Gautam Cc: Steffen Klassert Cc: Herbert Xu Cc: David S. Miller Signed-off-by: Frederic Weisbecker Signed-off-by: Steffen Klassert --- include/net/netns/xfrm.h | 1 + net/xfrm/xfrm_policy.c | 17 ++++++++--------- 2 files changed, 9 insertions(+), 9 deletions(-) (limited to 'include/net') diff --git a/include/net/netns/xfrm.h b/include/net/netns/xfrm.h index e816b6a3ef2b..9b376b87bd54 100644 --- a/include/net/netns/xfrm.h +++ b/include/net/netns/xfrm.h @@ -74,6 +74,7 @@ struct netns_xfrm { #endif spinlock_t xfrm_state_lock; seqcount_spinlock_t xfrm_state_hash_generation; + seqcount_spinlock_t xfrm_policy_hash_generation; spinlock_t xfrm_policy_lock; struct mutex xfrm_cfg_mutex; diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index ce500f847b99..46a6d15b66d6 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -155,7 +155,6 @@ static struct xfrm_policy_afinfo const __rcu *xfrm_policy_afinfo[AF_INET6 + 1] __read_mostly; static struct kmem_cache *xfrm_dst_cache __ro_after_init; -static __read_mostly seqcount_mutex_t xfrm_policy_hash_generation; static struct rhashtable xfrm_policy_inexact_table; static const struct rhashtable_params xfrm_pol_inexact_params; @@ -585,7 +584,7 @@ static void xfrm_bydst_resize(struct net *net, int dir) return; spin_lock_bh(&net->xfrm.xfrm_policy_lock); - write_seqcount_begin(&xfrm_policy_hash_generation); + write_seqcount_begin(&net->xfrm.xfrm_policy_hash_generation); odst = rcu_dereference_protected(net->xfrm.policy_bydst[dir].table, lockdep_is_held(&net->xfrm.xfrm_policy_lock)); @@ -596,7 +595,7 @@ static void xfrm_bydst_resize(struct net *net, int dir) rcu_assign_pointer(net->xfrm.policy_bydst[dir].table, ndst); net->xfrm.policy_bydst[dir].hmask = nhashmask; - write_seqcount_end(&xfrm_policy_hash_generation); + write_seqcount_end(&net->xfrm.xfrm_policy_hash_generation); spin_unlock_bh(&net->xfrm.xfrm_policy_lock); synchronize_rcu(); @@ -1245,7 +1244,7 @@ static void xfrm_hash_rebuild(struct work_struct *work) } while (read_seqretry(&net->xfrm.policy_hthresh.lock, seq)); spin_lock_bh(&net->xfrm.xfrm_policy_lock); - write_seqcount_begin(&xfrm_policy_hash_generation); + write_seqcount_begin(&net->xfrm.xfrm_policy_hash_generation); /* make sure that we can insert the indirect policies again before * we start with destructive action. @@ -1354,7 +1353,7 @@ static void xfrm_hash_rebuild(struct work_struct *work) out_unlock: __xfrm_policy_inexact_flush(net); - write_seqcount_end(&xfrm_policy_hash_generation); + write_seqcount_end(&net->xfrm.xfrm_policy_hash_generation); spin_unlock_bh(&net->xfrm.xfrm_policy_lock); mutex_unlock(&hash_resize_mutex); @@ -2095,9 +2094,9 @@ static struct xfrm_policy *xfrm_policy_lookup_bytype(struct net *net, u8 type, rcu_read_lock(); retry: do { - sequence = read_seqcount_begin(&xfrm_policy_hash_generation); + sequence = read_seqcount_begin(&net->xfrm.xfrm_policy_hash_generation); chain = policy_hash_direct(net, daddr, saddr, family, dir); - } while (read_seqcount_retry(&xfrm_policy_hash_generation, sequence)); + } while (read_seqcount_retry(&net->xfrm.xfrm_policy_hash_generation, sequence)); ret = NULL; hlist_for_each_entry_rcu(pol, chain, bydst) { @@ -2128,7 +2127,7 @@ static struct xfrm_policy *xfrm_policy_lookup_bytype(struct net *net, u8 type, } skip_inexact: - if (read_seqcount_retry(&xfrm_policy_hash_generation, sequence)) + if (read_seqcount_retry(&net->xfrm.xfrm_policy_hash_generation, sequence)) goto retry; if (ret && !xfrm_pol_hold_rcu(ret)) @@ -4084,6 +4083,7 @@ static int __net_init xfrm_net_init(struct net *net) /* Initialize the per-net locks here */ spin_lock_init(&net->xfrm.xfrm_state_lock); spin_lock_init(&net->xfrm.xfrm_policy_lock); + seqcount_spinlock_init(&net->xfrm.xfrm_policy_hash_generation, &net->xfrm.xfrm_policy_lock); mutex_init(&net->xfrm.xfrm_cfg_mutex); rv = xfrm_statistics_init(net); @@ -4128,7 +4128,6 @@ void __init xfrm_init(void) { register_pernet_subsys(&xfrm_net_ops); xfrm_dev_init(); - seqcount_mutex_init(&xfrm_policy_hash_generation, &hash_resize_mutex); xfrm_input_init(); #ifdef CONFIG_XFRM_ESPINTCP -- cgit v1.2.3 From 40fc3054b45820c28ea3c65e2c86d041dc244a8a Mon Sep 17 00:00:00 2001 From: Vadim Fedorenko Date: Fri, 2 Jul 2021 02:47:00 +0300 Subject: net: ipv6: fix return value of ip6_skb_dst_mtu Commit 628a5c561890 ("[INET]: Add IP(V6)_PMTUDISC_RPOBE") introduced ip6_skb_dst_mtu with return value of signed int which is inconsistent with actually returned values. Also 2 users of this function actually assign its value to unsigned int variable and only __xfrm6_output assigns result of this function to signed variable but actually uses as unsigned in further comparisons and calls. Change this function to return unsigned int value. Fixes: 628a5c561890 ("[INET]: Add IP(V6)_PMTUDISC_RPOBE") Reviewed-by: David Ahern Signed-off-by: Vadim Fedorenko Signed-off-by: David S. Miller --- include/net/ip6_route.h | 2 +- net/ipv6/xfrm6_output.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index f14149df5a65..625a38ccb5d9 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -263,7 +263,7 @@ static inline bool ipv6_anycast_destination(const struct dst_entry *dst, int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, int (*output)(struct net *, struct sock *, struct sk_buff *)); -static inline int ip6_skb_dst_mtu(struct sk_buff *skb) +static inline unsigned int ip6_skb_dst_mtu(struct sk_buff *skb) { int mtu; diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c index 57fa27c1cdf9..d0d280077721 100644 --- a/net/ipv6/xfrm6_output.c +++ b/net/ipv6/xfrm6_output.c @@ -49,7 +49,7 @@ static int __xfrm6_output(struct net *net, struct sock *sk, struct sk_buff *skb) { struct dst_entry *dst = skb_dst(skb); struct xfrm_state *x = dst->xfrm; - int mtu; + unsigned int mtu; bool toobig; #ifdef CONFIG_NETFILTER -- cgit v1.2.3 From 1da4cd82dd180224503e745ccf3220e3490d8897 Mon Sep 17 00:00:00 2001 From: Ali Abdallah Date: Thu, 27 May 2021 09:19:06 +0200 Subject: netfilter: conntrack: add new sysctl to disable RST check This patch adds a new sysctl tcp_ignore_invalid_rst to disable marking out of segments RSTs as INVALID. Signed-off-by: Ali Abdallah Acked-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- Documentation/networking/nf_conntrack-sysctl.rst | 6 ++++++ include/net/netns/conntrack.h | 1 + net/netfilter/nf_conntrack_proto_tcp.c | 6 +++++- net/netfilter/nf_conntrack_standalone.c | 10 ++++++++++ 4 files changed, 22 insertions(+), 1 deletion(-) (limited to 'include/net') diff --git a/Documentation/networking/nf_conntrack-sysctl.rst b/Documentation/networking/nf_conntrack-sysctl.rst index 0467b30e4abe..d31ed6c1cb0d 100644 --- a/Documentation/networking/nf_conntrack-sysctl.rst +++ b/Documentation/networking/nf_conntrack-sysctl.rst @@ -110,6 +110,12 @@ nf_conntrack_tcp_be_liberal - BOOLEAN Be conservative in what you do, be liberal in what you accept from others. If it's non-zero, we mark only out of window RST segments as INVALID. +nf_conntrack_tcp_ignore_invalid_rst - BOOLEAN + - 0 - disabled (default) + - 1 - enabled + + If it's 1, we don't mark out of window RST segments as INVALID. + nf_conntrack_tcp_loose - BOOLEAN - 0 - disabled - not 0 - enabled (default) diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h index c3094b83a525..37e5300c7e5a 100644 --- a/include/net/netns/conntrack.h +++ b/include/net/netns/conntrack.h @@ -27,6 +27,7 @@ struct nf_tcp_net { u8 tcp_loose; u8 tcp_be_liberal; u8 tcp_max_retrans; + u8 tcp_ignore_invalid_rst; #if IS_ENABLED(CONFIG_NF_FLOW_TABLE) unsigned int offload_timeout; unsigned int offload_pickup; diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index b8ff67671e93..3259416f2ea4 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -1068,7 +1068,8 @@ int nf_conntrack_tcp_packet(struct nf_conn *ct, if (seq == 0 && !nf_conntrack_tcp_established(ct)) break; - if (before(seq, ct->proto.tcp.seen[!dir].td_maxack)) { + if (before(seq, ct->proto.tcp.seen[!dir].td_maxack) && + !tn->tcp_ignore_invalid_rst) { /* Invalid RST */ spin_unlock_bh(&ct->lock); nf_ct_l4proto_log_invalid(skb, ct, state, "invalid rst"); @@ -1466,6 +1467,9 @@ void nf_conntrack_tcp_init_net(struct net *net) */ tn->tcp_be_liberal = 0; + /* If it's non-zero, we turn off RST sequence number check */ + tn->tcp_ignore_invalid_rst = 0; + /* Max number of the retransmitted packets without receiving an (acceptable) * ACK from the destination. If this number is reached, a shorter timer * will be started. diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index f57a951c9b5e..214d9f9e499b 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -579,6 +579,7 @@ enum nf_ct_sysctl_index { #endif NF_SYSCTL_CT_PROTO_TCP_LOOSE, NF_SYSCTL_CT_PROTO_TCP_LIBERAL, + NF_SYSCTL_CT_PROTO_TCP_IGNORE_INVALID_RST, NF_SYSCTL_CT_PROTO_TCP_MAX_RETRANS, NF_SYSCTL_CT_PROTO_TIMEOUT_UDP, NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_STREAM, @@ -798,6 +799,14 @@ static struct ctl_table nf_ct_sysctl_table[] = { .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_ONE, }, + [NF_SYSCTL_CT_PROTO_TCP_IGNORE_INVALID_RST] = { + .procname = "nf_conntrack_tcp_ignore_invalid_rst", + .maxlen = sizeof(u8), + .mode = 0644, + .proc_handler = proc_dou8vec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, + }, [NF_SYSCTL_CT_PROTO_TCP_MAX_RETRANS] = { .procname = "nf_conntrack_tcp_max_retrans", .maxlen = sizeof(u8), @@ -1004,6 +1013,7 @@ static void nf_conntrack_standalone_init_tcp_sysctl(struct net *net, XASSIGN(LOOSE, &tn->tcp_loose); XASSIGN(LIBERAL, &tn->tcp_be_liberal); XASSIGN(MAX_RETRANS, &tn->tcp_max_retrans); + XASSIGN(IGNORE_INVALID_RST, &tn->tcp_ignore_invalid_rst); #undef XASSIGN #if IS_ENABLED(CONFIG_NF_FLOW_TABLE) -- cgit v1.2.3 From 9a5605505d9c7dbfdb89cc29a8f5fc5cf9fd2334 Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Mon, 5 Jul 2021 15:38:12 +0000 Subject: bonding: Add struct bond_ipesc to manage SA bonding has been supporting ipsec offload. When SA is added, bonding just passes SA to its own active real interface. But it doesn't manage SA. So, when events(add/del real interface, active real interface change, etc) occur, bonding can't handle that well because It doesn't manage SA. So some problems(panic, UAF, refcnt leak)occur. In order to make it stable, it should manage SA. That's the reason why struct bond_ipsec is added. When a new SA is added to bonding interface, it is stored in the bond_ipsec list. And the SA is passed to a current active real interface. If events occur, it uses bond_ipsec data to handle these events. bond->ipsec_list is protected by bond->ipsec_lock. If a current active real interface is changed, the following logic works. 1. delete all SAs from old active real interface 2. Add all SAs to the new active real interface. 3. If a new active real interface doesn't support ipsec offload or SA's option, it sets real_dev to NULL. Fixes: 18cb261afd7b ("bonding: support hardware encryption offload to slaves") Signed-off-by: Taehee Yoo Signed-off-by: David S. Miller --- drivers/net/bonding/bond_main.c | 139 ++++++++++++++++++++++++++++++++++------ include/net/bonding.h | 9 ++- 2 files changed, 127 insertions(+), 21 deletions(-) (limited to 'include/net') diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index f7b89743fab9..165fa55cfb38 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -401,6 +401,7 @@ static int bond_vlan_rx_kill_vid(struct net_device *bond_dev, static int bond_ipsec_add_sa(struct xfrm_state *xs) { struct net_device *bond_dev = xs->xso.dev; + struct bond_ipsec *ipsec; struct bonding *bond; struct slave *slave; int err; @@ -416,9 +417,6 @@ static int bond_ipsec_add_sa(struct xfrm_state *xs) return -ENODEV; } - xs->xso.real_dev = slave->dev; - bond->xs = xs; - if (!slave->dev->xfrmdev_ops || !slave->dev->xfrmdev_ops->xdo_dev_state_add || netif_is_bond_master(slave->dev)) { @@ -427,11 +425,63 @@ static int bond_ipsec_add_sa(struct xfrm_state *xs) return -EINVAL; } + ipsec = kmalloc(sizeof(*ipsec), GFP_ATOMIC); + if (!ipsec) { + rcu_read_unlock(); + return -ENOMEM; + } + xs->xso.real_dev = slave->dev; + err = slave->dev->xfrmdev_ops->xdo_dev_state_add(xs); + if (!err) { + ipsec->xs = xs; + INIT_LIST_HEAD(&ipsec->list); + spin_lock_bh(&bond->ipsec_lock); + list_add(&ipsec->list, &bond->ipsec_list); + spin_unlock_bh(&bond->ipsec_lock); + } else { + kfree(ipsec); + } rcu_read_unlock(); return err; } +static void bond_ipsec_add_sa_all(struct bonding *bond) +{ + struct net_device *bond_dev = bond->dev; + struct bond_ipsec *ipsec; + struct slave *slave; + + rcu_read_lock(); + slave = rcu_dereference(bond->curr_active_slave); + if (!slave) + goto out; + + if (!slave->dev->xfrmdev_ops || + !slave->dev->xfrmdev_ops->xdo_dev_state_add || + netif_is_bond_master(slave->dev)) { + spin_lock_bh(&bond->ipsec_lock); + if (!list_empty(&bond->ipsec_list)) + slave_warn(bond_dev, slave->dev, + "%s: no slave xdo_dev_state_add\n", + __func__); + spin_unlock_bh(&bond->ipsec_lock); + goto out; + } + + spin_lock_bh(&bond->ipsec_lock); + list_for_each_entry(ipsec, &bond->ipsec_list, list) { + ipsec->xs->xso.real_dev = slave->dev; + if (slave->dev->xfrmdev_ops->xdo_dev_state_add(ipsec->xs)) { + slave_warn(bond_dev, slave->dev, "%s: failed to add SA\n", __func__); + ipsec->xs->xso.real_dev = NULL; + } + } + spin_unlock_bh(&bond->ipsec_lock); +out: + rcu_read_unlock(); +} + /** * bond_ipsec_del_sa - clear out this specific SA * @xs: pointer to transformer state struct @@ -439,6 +489,7 @@ static int bond_ipsec_add_sa(struct xfrm_state *xs) static void bond_ipsec_del_sa(struct xfrm_state *xs) { struct net_device *bond_dev = xs->xso.dev; + struct bond_ipsec *ipsec; struct bonding *bond; struct slave *slave; @@ -452,7 +503,10 @@ static void bond_ipsec_del_sa(struct xfrm_state *xs) if (!slave) goto out; - xs->xso.real_dev = slave->dev; + if (!xs->xso.real_dev) + goto out; + + WARN_ON(xs->xso.real_dev != slave->dev); if (!slave->dev->xfrmdev_ops || !slave->dev->xfrmdev_ops->xdo_dev_state_delete || @@ -463,6 +517,48 @@ static void bond_ipsec_del_sa(struct xfrm_state *xs) slave->dev->xfrmdev_ops->xdo_dev_state_delete(xs); out: + spin_lock_bh(&bond->ipsec_lock); + list_for_each_entry(ipsec, &bond->ipsec_list, list) { + if (ipsec->xs == xs) { + list_del(&ipsec->list); + kfree(ipsec); + break; + } + } + spin_unlock_bh(&bond->ipsec_lock); + rcu_read_unlock(); +} + +static void bond_ipsec_del_sa_all(struct bonding *bond) +{ + struct net_device *bond_dev = bond->dev; + struct bond_ipsec *ipsec; + struct slave *slave; + + rcu_read_lock(); + slave = rcu_dereference(bond->curr_active_slave); + if (!slave) { + rcu_read_unlock(); + return; + } + + spin_lock_bh(&bond->ipsec_lock); + list_for_each_entry(ipsec, &bond->ipsec_list, list) { + if (!ipsec->xs->xso.real_dev) + continue; + + if (!slave->dev->xfrmdev_ops || + !slave->dev->xfrmdev_ops->xdo_dev_state_delete || + netif_is_bond_master(slave->dev)) { + slave_warn(bond_dev, slave->dev, + "%s: no slave xdo_dev_state_delete\n", + __func__); + } else { + slave->dev->xfrmdev_ops->xdo_dev_state_delete(ipsec->xs); + } + ipsec->xs->xso.real_dev = NULL; + } + spin_unlock_bh(&bond->ipsec_lock); rcu_read_unlock(); } @@ -474,22 +570,27 @@ out: static bool bond_ipsec_offload_ok(struct sk_buff *skb, struct xfrm_state *xs) { struct net_device *bond_dev = xs->xso.dev; - struct bonding *bond = netdev_priv(bond_dev); - struct slave *curr_active = rcu_dereference(bond->curr_active_slave); - struct net_device *slave_dev = curr_active->dev; + struct net_device *real_dev; + struct slave *curr_active; + struct bonding *bond; + + bond = netdev_priv(bond_dev); + curr_active = rcu_dereference(bond->curr_active_slave); + real_dev = curr_active->dev; if (BOND_MODE(bond) != BOND_MODE_ACTIVEBACKUP) return true; - if (!slave_dev->xfrmdev_ops || - !slave_dev->xfrmdev_ops->xdo_dev_offload_ok || - netif_is_bond_master(slave_dev)) { - slave_warn(bond_dev, slave_dev, "%s: no slave xdo_dev_offload_ok\n", __func__); + if (!xs->xso.real_dev) + return false; + + if (!real_dev->xfrmdev_ops || + !real_dev->xfrmdev_ops->xdo_dev_offload_ok || + netif_is_bond_master(real_dev)) { return false; } - xs->xso.real_dev = slave_dev; - return slave_dev->xfrmdev_ops->xdo_dev_offload_ok(skb, xs); + return real_dev->xfrmdev_ops->xdo_dev_offload_ok(skb, xs); } static const struct xfrmdev_ops bond_xfrmdev_ops = { @@ -1006,8 +1107,7 @@ void bond_change_active_slave(struct bonding *bond, struct slave *new_active) return; #ifdef CONFIG_XFRM_OFFLOAD - if (old_active && bond->xs) - bond_ipsec_del_sa(bond->xs); + bond_ipsec_del_sa_all(bond); #endif /* CONFIG_XFRM_OFFLOAD */ if (new_active) { @@ -1082,10 +1182,7 @@ void bond_change_active_slave(struct bonding *bond, struct slave *new_active) } #ifdef CONFIG_XFRM_OFFLOAD - if (new_active && bond->xs) { - xfrm_dev_state_flush(dev_net(bond->dev), bond->dev, true); - bond_ipsec_add_sa(bond->xs); - } + bond_ipsec_add_sa_all(bond); #endif /* CONFIG_XFRM_OFFLOAD */ /* resend IGMP joins since active slave has changed or @@ -3343,6 +3440,7 @@ static int bond_master_netdev_event(unsigned long event, return bond_event_changename(event_bond); case NETDEV_UNREGISTER: bond_remove_proc_entry(event_bond); + xfrm_dev_state_flush(dev_net(bond_dev), bond_dev, true); break; case NETDEV_REGISTER: bond_create_proc_entry(event_bond); @@ -4910,7 +5008,8 @@ void bond_setup(struct net_device *bond_dev) #ifdef CONFIG_XFRM_OFFLOAD /* set up xfrm device ops (only supported in active-backup right now) */ bond_dev->xfrmdev_ops = &bond_xfrmdev_ops; - bond->xs = NULL; + INIT_LIST_HEAD(&bond->ipsec_list); + spin_lock_init(&bond->ipsec_lock); #endif /* CONFIG_XFRM_OFFLOAD */ /* don't acquire bond device's netif_tx_lock when transmitting */ diff --git a/include/net/bonding.h b/include/net/bonding.h index 15335732e166..625d9c72dee3 100644 --- a/include/net/bonding.h +++ b/include/net/bonding.h @@ -201,6 +201,11 @@ struct bond_up_slave { */ #define BOND_LINK_NOCHANGE -1 +struct bond_ipsec { + struct list_head list; + struct xfrm_state *xs; +}; + /* * Here are the locking policies for the two bonding locks: * Get rcu_read_lock when reading or RTNL when writing slave list. @@ -249,7 +254,9 @@ struct bonding { #endif /* CONFIG_DEBUG_FS */ struct rtnl_link_stats64 bond_stats; #ifdef CONFIG_XFRM_OFFLOAD - struct xfrm_state *xs; + struct list_head ipsec_list; + /* protecting ipsec_list */ + spinlock_t ipsec_lock; #endif /* CONFIG_XFRM_OFFLOAD */ }; -- cgit v1.2.3 From 7445cf31d2e25e3f8ad7b1c5342e624c09ab23a2 Mon Sep 17 00:00:00 2001 From: Zvi Effron Date: Wed, 7 Jul 2021 22:16:54 +0000 Subject: bpf: Add function for XDP meta data length check This commit prepares to use the XDP meta data length check in multiple places by making it into a static inline function instead of a literal. Co-developed-by: Cody Haas Co-developed-by: Lisa Watanabe Signed-off-by: Cody Haas Signed-off-by: Lisa Watanabe Signed-off-by: Zvi Effron Signed-off-by: Alexei Starovoitov Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20210707221657.3985075-2-zeffron@riotgames.com --- include/net/xdp.h | 5 +++++ net/core/filter.c | 4 ++-- 2 files changed, 7 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/net/xdp.h b/include/net/xdp.h index 5533f0ab2afc..ad5b02dcb6f4 100644 --- a/include/net/xdp.h +++ b/include/net/xdp.h @@ -276,6 +276,11 @@ xdp_data_meta_unsupported(const struct xdp_buff *xdp) return unlikely(xdp->data_meta > xdp->data); } +static inline bool xdp_metalen_invalid(unsigned long metalen) +{ + return (metalen & (sizeof(__u32) - 1)) || (metalen > 32); +} + struct xdp_attachment_info { struct bpf_prog *prog; u32 flags; diff --git a/net/core/filter.c b/net/core/filter.c index d70187ce851b..f2c15b2a057a 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -77,6 +77,7 @@ #include #include #include +#include static const struct bpf_func_proto * bpf_sk_base_func_proto(enum bpf_func_id func_id); @@ -3880,8 +3881,7 @@ BPF_CALL_2(bpf_xdp_adjust_meta, struct xdp_buff *, xdp, int, offset) if (unlikely(meta < xdp_frame_end || meta > xdp->data)) return -EINVAL; - if (unlikely((metalen & (sizeof(__u32) - 1)) || - (metalen > 32))) + if (unlikely(xdp_metalen_invalid(metalen))) return -EACCES; xdp->data_meta = meta; -- cgit v1.2.3 From 67a9c94317402b826fc3db32afc8f39336803d97 Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Fri, 9 Jul 2021 17:35:18 +0000 Subject: net: validate lwtstate->data before returning from skb_tunnel_info() skb_tunnel_info() returns pointer of lwtstate->data as ip_tunnel_info type without validation. lwtstate->data can have various types such as mpls_iptunnel_encap, etc and these are not compatible. So skb_tunnel_info() should validate before returning that pointer. Splat looks like: BUG: KASAN: slab-out-of-bounds in vxlan_get_route+0x418/0x4b0 [vxlan] Read of size 2 at addr ffff888106ec2698 by task ping/811 CPU: 1 PID: 811 Comm: ping Not tainted 5.13.0+ #1195 Call Trace: dump_stack_lvl+0x56/0x7b print_address_description.constprop.8.cold.13+0x13/0x2ee ? vxlan_get_route+0x418/0x4b0 [vxlan] ? vxlan_get_route+0x418/0x4b0 [vxlan] kasan_report.cold.14+0x83/0xdf ? vxlan_get_route+0x418/0x4b0 [vxlan] vxlan_get_route+0x418/0x4b0 [vxlan] [ ... ] vxlan_xmit_one+0x148b/0x32b0 [vxlan] [ ... ] vxlan_xmit+0x25c5/0x4780 [vxlan] [ ... ] dev_hard_start_xmit+0x1ae/0x6e0 __dev_queue_xmit+0x1f39/0x31a0 [ ... ] neigh_xmit+0x2f9/0x940 mpls_xmit+0x911/0x1600 [mpls_iptunnel] lwtunnel_xmit+0x18f/0x450 ip_finish_output2+0x867/0x2040 [ ... ] Fixes: 61adedf3e3f1 ("route: move lwtunnel state to dst_entry") Signed-off-by: Taehee Yoo Signed-off-by: David S. Miller --- include/net/dst_metadata.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/dst_metadata.h b/include/net/dst_metadata.h index 56cb3c38569a..14efa0ded75d 100644 --- a/include/net/dst_metadata.h +++ b/include/net/dst_metadata.h @@ -45,7 +45,9 @@ skb_tunnel_info(const struct sk_buff *skb) return &md_dst->u.tun_info; dst = skb_dst(skb); - if (dst && dst->lwtstate) + if (dst && dst->lwtstate && + (dst->lwtstate->type == LWTUNNEL_ENCAP_IP || + dst->lwtstate->type == LWTUNNEL_ENCAP_IP6)) return lwt_tun_info(dst->lwtstate); return NULL; -- cgit v1.2.3 From 6787b7e350d3552651a3422d3d8980fbc8d65368 Mon Sep 17 00:00:00 2001 From: Jianguo Wu Date: Fri, 9 Jul 2021 17:20:49 -0700 Subject: mptcp: avoid processing packet if a subflow reset If check_fully_established() causes a subflow reset, it should not continue to process the packet in tcp_data_queue(). Add a return value to mptcp_incoming_options(), and return false if a subflow has been reset, else return true. Then drop the packet in tcp_data_queue()/tcp_rcv_state_process() if mptcp_incoming_options() return false. Fixes: d582484726c4 ("mptcp: fix fallback for MP_JOIN subflows") Signed-off-by: Jianguo Wu Signed-off-by: Mat Martineau Signed-off-by: David S. Miller --- include/net/mptcp.h | 5 +++-- net/ipv4/tcp_input.c | 19 +++++++++++++++---- net/mptcp/options.c | 19 +++++++++++++------ 3 files changed, 31 insertions(+), 12 deletions(-) (limited to 'include/net') diff --git a/include/net/mptcp.h b/include/net/mptcp.h index cb580b06152f..8b5af683a818 100644 --- a/include/net/mptcp.h +++ b/include/net/mptcp.h @@ -105,7 +105,7 @@ bool mptcp_synack_options(const struct request_sock *req, unsigned int *size, bool mptcp_established_options(struct sock *sk, struct sk_buff *skb, unsigned int *size, unsigned int remaining, struct mptcp_out_options *opts); -void mptcp_incoming_options(struct sock *sk, struct sk_buff *skb); +bool mptcp_incoming_options(struct sock *sk, struct sk_buff *skb); void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp, struct mptcp_out_options *opts); @@ -227,9 +227,10 @@ static inline bool mptcp_established_options(struct sock *sk, return false; } -static inline void mptcp_incoming_options(struct sock *sk, +static inline bool mptcp_incoming_options(struct sock *sk, struct sk_buff *skb) { + return true; } static inline void mptcp_skb_ext_move(struct sk_buff *to, diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index a5a8d0a378b2..149ceb5c94ff 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -4247,6 +4247,9 @@ void tcp_reset(struct sock *sk, struct sk_buff *skb) { trace_tcp_receive_reset(sk); + /* mptcp can't tell us to ignore reset pkts, + * so just ignore the return value of mptcp_incoming_options(). + */ if (sk_is_mptcp(sk)) mptcp_incoming_options(sk, skb); @@ -4941,8 +4944,13 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb) bool fragstolen; int eaten; - if (sk_is_mptcp(sk)) - mptcp_incoming_options(sk, skb); + /* If a subflow has been reset, the packet should not continue + * to be processed, drop the packet. + */ + if (sk_is_mptcp(sk) && !mptcp_incoming_options(sk, skb)) { + __kfree_skb(skb); + return; + } if (TCP_SKB_CB(skb)->seq == TCP_SKB_CB(skb)->end_seq) { __kfree_skb(skb); @@ -6523,8 +6531,11 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb) case TCP_CLOSING: case TCP_LAST_ACK: if (!before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) { - if (sk_is_mptcp(sk)) - mptcp_incoming_options(sk, skb); + /* If a subflow has been reset, the packet should not + * continue to be processed, drop the packet. + */ + if (sk_is_mptcp(sk) && !mptcp_incoming_options(sk, skb)) + goto discard; break; } fallthrough; diff --git a/net/mptcp/options.c b/net/mptcp/options.c index b5850afea343..4452455aef7f 100644 --- a/net/mptcp/options.c +++ b/net/mptcp/options.c @@ -1035,7 +1035,8 @@ static bool add_addr_hmac_valid(struct mptcp_sock *msk, return hmac == mp_opt->ahmac; } -void mptcp_incoming_options(struct sock *sk, struct sk_buff *skb) +/* Return false if a subflow has been reset, else return true */ +bool mptcp_incoming_options(struct sock *sk, struct sk_buff *skb) { struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); struct mptcp_sock *msk = mptcp_sk(subflow->conn); @@ -1053,12 +1054,16 @@ void mptcp_incoming_options(struct sock *sk, struct sk_buff *skb) __mptcp_check_push(subflow->conn, sk); __mptcp_data_acked(subflow->conn); mptcp_data_unlock(subflow->conn); - return; + return true; } mptcp_get_options(sk, skb, &mp_opt); + + /* The subflow can be in close state only if check_fully_established() + * just sent a reset. If so, tell the caller to ignore the current packet. + */ if (!check_fully_established(msk, sk, subflow, skb, &mp_opt)) - return; + return sk->sk_state != TCP_CLOSE; if (mp_opt.fastclose && msk->local_key == mp_opt.rcvr_key) { @@ -1100,7 +1105,7 @@ void mptcp_incoming_options(struct sock *sk, struct sk_buff *skb) } if (!mp_opt.dss) - return; + return true; /* we can't wait for recvmsg() to update the ack_seq, otherwise * monodirectional flows will stuck @@ -1119,12 +1124,12 @@ void mptcp_incoming_options(struct sock *sk, struct sk_buff *skb) schedule_work(&msk->work)) sock_hold(subflow->conn); - return; + return true; } mpext = skb_ext_add(skb, SKB_EXT_MPTCP); if (!mpext) - return; + return true; memset(mpext, 0, sizeof(*mpext)); @@ -1153,6 +1158,8 @@ void mptcp_incoming_options(struct sock *sk, struct sk_buff *skb) if (mpext->csum_reqd) mpext->csum = mp_opt.csum; } + + return true; } static void mptcp_set_rwin(const struct tcp_sock *tp) -- cgit v1.2.3 From c63829182c37c2d6d0608976d15fa61ebebe9e6b Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Sun, 4 Jul 2021 12:02:47 -0700 Subject: af_unix: Implement ->psock_update_sk_prot() Now we can implement unix_bpf_update_proto() to update sk_prot, especially prot->close(). Signed-off-by: Cong Wang Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20210704190252.11866-7-xiyou.wangcong@gmail.com --- MAINTAINERS | 1 + include/net/af_unix.h | 10 ++++++++++ net/core/sock_map.c | 1 + net/unix/Makefile | 1 + net/unix/af_unix.c | 6 +++++- net/unix/unix_bpf.c | 47 +++++++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 65 insertions(+), 1 deletion(-) create mode 100644 net/unix/unix_bpf.c (limited to 'include/net') diff --git a/MAINTAINERS b/MAINTAINERS index 88449b7a4c95..2c793df1d873 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -10277,6 +10277,7 @@ F: net/core/skmsg.c F: net/core/sock_map.c F: net/ipv4/tcp_bpf.c F: net/ipv4/udp_bpf.c +F: net/unix/unix_bpf.c LANDLOCK SECURITY MODULE M: Mickaël Salaün diff --git a/include/net/af_unix.h b/include/net/af_unix.h index f42fdddecd41..cca645846af1 100644 --- a/include/net/af_unix.h +++ b/include/net/af_unix.h @@ -89,4 +89,14 @@ void unix_sysctl_unregister(struct net *net); static inline int unix_sysctl_register(struct net *net) { return 0; } static inline void unix_sysctl_unregister(struct net *net) {} #endif + +#ifdef CONFIG_BPF_SYSCALL +extern struct proto unix_proto; + +int unix_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool restore); +void __init unix_bpf_build_proto(void); +#else +static inline void __init unix_bpf_build_proto(void) +{} +#endif #endif diff --git a/net/core/sock_map.c b/net/core/sock_map.c index 3c427e7e6df9..ae5fa4338d9c 100644 --- a/net/core/sock_map.c +++ b/net/core/sock_map.c @@ -1517,6 +1517,7 @@ void sock_map_close(struct sock *sk, long timeout) release_sock(sk); saved_close(sk, timeout); } +EXPORT_SYMBOL_GPL(sock_map_close); static int sock_map_iter_attach_target(struct bpf_prog *prog, union bpf_iter_link_info *linfo, diff --git a/net/unix/Makefile b/net/unix/Makefile index 54e58cc4f945..20491825b4d0 100644 --- a/net/unix/Makefile +++ b/net/unix/Makefile @@ -7,6 +7,7 @@ obj-$(CONFIG_UNIX) += unix.o unix-y := af_unix.o garbage.o unix-$(CONFIG_SYSCTL) += sysctl_net_unix.o +unix-$(CONFIG_BPF_SYSCALL) += unix_bpf.o obj-$(CONFIG_UNIX_DIAG) += unix_diag.o unix_diag-y := diag.o diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 875eeaaddc07..573253c5b5c2 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -788,11 +788,14 @@ static void unix_close(struct sock *sk, long timeout) */ } -static struct proto unix_proto = { +struct proto unix_proto = { .name = "UNIX", .owner = THIS_MODULE, .obj_size = sizeof(struct unix_sock), .close = unix_close, +#ifdef CONFIG_BPF_SYSCALL + .psock_update_sk_prot = unix_bpf_update_proto, +#endif }; static struct sock *unix_create1(struct net *net, struct socket *sock, int kern) @@ -2973,6 +2976,7 @@ static int __init af_unix_init(void) sock_register(&unix_family_ops); register_pernet_subsys(&unix_net_ops); + unix_bpf_build_proto(); out: return rc; } diff --git a/net/unix/unix_bpf.c b/net/unix/unix_bpf.c new file mode 100644 index 000000000000..b1582a659427 --- /dev/null +++ b/net/unix/unix_bpf.c @@ -0,0 +1,47 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2021 Cong Wang */ + +#include +#include +#include +#include + +static struct proto *unix_prot_saved __read_mostly; +static DEFINE_SPINLOCK(unix_prot_lock); +static struct proto unix_bpf_prot; + +static void unix_bpf_rebuild_protos(struct proto *prot, const struct proto *base) +{ + *prot = *base; + prot->close = sock_map_close; +} + +static void unix_bpf_check_needs_rebuild(struct proto *ops) +{ + if (unlikely(ops != smp_load_acquire(&unix_prot_saved))) { + spin_lock_bh(&unix_prot_lock); + if (likely(ops != unix_prot_saved)) { + unix_bpf_rebuild_protos(&unix_bpf_prot, ops); + smp_store_release(&unix_prot_saved, ops); + } + spin_unlock_bh(&unix_prot_lock); + } +} + +int unix_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool restore) +{ + if (restore) { + sk->sk_write_space = psock->saved_write_space; + WRITE_ONCE(sk->sk_prot, psock->sk_proto); + return 0; + } + + unix_bpf_check_needs_rebuild(psock->sk_proto); + WRITE_ONCE(sk->sk_prot, &unix_bpf_prot); + return 0; +} + +void __init unix_bpf_build_proto(void) +{ + unix_bpf_rebuild_protos(&unix_bpf_prot, &unix_proto); +} -- cgit v1.2.3 From 9825d866ce0d11009513e06824885340062c166b Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Sun, 4 Jul 2021 12:02:48 -0700 Subject: af_unix: Implement unix_dgram_bpf_recvmsg() We have to implement unix_dgram_bpf_recvmsg() to replace the original ->recvmsg() to retrieve skmsg from ingress_msg. AF_UNIX is again special here because the lack of sk_prot->recvmsg(). I simply add a special case inside unix_dgram_recvmsg() to call sk->sk_prot->recvmsg() directly. Signed-off-by: Cong Wang Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20210704190252.11866-8-xiyou.wangcong@gmail.com --- include/net/af_unix.h | 2 ++ net/unix/af_unix.c | 19 ++++++++++--- net/unix/unix_bpf.c | 75 +++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 93 insertions(+), 3 deletions(-) (limited to 'include/net') diff --git a/include/net/af_unix.h b/include/net/af_unix.h index cca645846af1..435a2c3d5a6f 100644 --- a/include/net/af_unix.h +++ b/include/net/af_unix.h @@ -82,6 +82,8 @@ static inline struct unix_sock *unix_sk(const struct sock *sk) long unix_inq_len(struct sock *sk); long unix_outq_len(struct sock *sk); +int __unix_dgram_recvmsg(struct sock *sk, struct msghdr *msg, size_t size, + int flags); #ifdef CONFIG_SYSCTL int unix_sysctl_register(struct net *net); void unix_sysctl_unregister(struct net *net); diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 573253c5b5c2..89927678c0dc 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -2098,11 +2098,11 @@ static void unix_copy_addr(struct msghdr *msg, struct sock *sk) } } -static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg, - size_t size, int flags) +int __unix_dgram_recvmsg(struct sock *sk, struct msghdr *msg, size_t size, + int flags) { struct scm_cookie scm; - struct sock *sk = sock->sk; + struct socket *sock = sk->sk_socket; struct unix_sock *u = unix_sk(sk); struct sk_buff *skb, *last; long timeo; @@ -2205,6 +2205,19 @@ out: return err; } +static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, + int flags) +{ + struct sock *sk = sock->sk; + +#ifdef CONFIG_BPF_SYSCALL + if (sk->sk_prot != &unix_proto) + return sk->sk_prot->recvmsg(sk, msg, size, flags & MSG_DONTWAIT, + flags & ~MSG_DONTWAIT, NULL); +#endif + return __unix_dgram_recvmsg(sk, msg, size, flags); +} + static int unix_read_sock(struct sock *sk, read_descriptor_t *desc, sk_read_actor_t recv_actor) { diff --git a/net/unix/unix_bpf.c b/net/unix/unix_bpf.c index b1582a659427..db0cda29fb2f 100644 --- a/net/unix/unix_bpf.c +++ b/net/unix/unix_bpf.c @@ -6,6 +6,80 @@ #include #include +#define unix_sk_has_data(__sk, __psock) \ + ({ !skb_queue_empty(&__sk->sk_receive_queue) || \ + !skb_queue_empty(&__psock->ingress_skb) || \ + !list_empty(&__psock->ingress_msg); \ + }) + +static int unix_msg_wait_data(struct sock *sk, struct sk_psock *psock, + long timeo) +{ + DEFINE_WAIT_FUNC(wait, woken_wake_function); + struct unix_sock *u = unix_sk(sk); + int ret = 0; + + if (sk->sk_shutdown & RCV_SHUTDOWN) + return 1; + + if (!timeo) + return ret; + + add_wait_queue(sk_sleep(sk), &wait); + sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk); + if (!unix_sk_has_data(sk, psock)) { + mutex_unlock(&u->iolock); + wait_woken(&wait, TASK_INTERRUPTIBLE, timeo); + mutex_lock(&u->iolock); + ret = unix_sk_has_data(sk, psock); + } + sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk); + remove_wait_queue(sk_sleep(sk), &wait); + return ret; +} + +static int unix_dgram_bpf_recvmsg(struct sock *sk, struct msghdr *msg, + size_t len, int nonblock, int flags, + int *addr_len) +{ + struct unix_sock *u = unix_sk(sk); + struct sk_psock *psock; + int copied, ret; + + psock = sk_psock_get(sk); + if (unlikely(!psock)) + return __unix_dgram_recvmsg(sk, msg, len, flags); + + mutex_lock(&u->iolock); + if (!skb_queue_empty(&sk->sk_receive_queue) && + sk_psock_queue_empty(psock)) { + ret = __unix_dgram_recvmsg(sk, msg, len, flags); + goto out; + } + +msg_bytes_ready: + copied = sk_msg_recvmsg(sk, psock, msg, len, flags); + if (!copied) { + long timeo; + int data; + + timeo = sock_rcvtimeo(sk, nonblock); + data = unix_msg_wait_data(sk, psock, timeo); + if (data) { + if (!sk_psock_queue_empty(psock)) + goto msg_bytes_ready; + ret = __unix_dgram_recvmsg(sk, msg, len, flags); + goto out; + } + copied = -EAGAIN; + } + ret = copied; +out: + mutex_unlock(&u->iolock); + sk_psock_put(sk, psock); + return ret; +} + static struct proto *unix_prot_saved __read_mostly; static DEFINE_SPINLOCK(unix_prot_lock); static struct proto unix_bpf_prot; @@ -14,6 +88,7 @@ static void unix_bpf_rebuild_protos(struct proto *prot, const struct proto *base { *prot = *base; prot->close = sock_map_close; + prot->recvmsg = unix_dgram_bpf_recvmsg; } static void unix_bpf_check_needs_rebuild(struct proto *ops) -- cgit v1.2.3 From 749468760b952e555529ca8a71256b991455101e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 19 Jul 2021 02:20:28 -0700 Subject: net/tcp_fastopen: remove obsolete extern After cited commit, sysctl_tcp_fastopen_blackhole_timeout is no longer a global variable. Fixes: 3733be14a32b ("ipv4: Namespaceify tcp_fastopen_blackhole_timeout knob") Signed-off-by: Eric Dumazet Cc: Haishuang Yan Cc: Yuchung Cheng Cc: Neal Cardwell Acked-by: Wei Wang Link: https://lore.kernel.org/r/20210719092028.3016745-1-eric.dumazet@gmail.com Signed-off-by: Jakub Kicinski --- include/net/tcp.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/net') diff --git a/include/net/tcp.h b/include/net/tcp.h index 17df9b047ee4..784d5c3ef1c5 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1709,7 +1709,6 @@ struct tcp_fastopen_context { struct rcu_head rcu; }; -extern unsigned int sysctl_tcp_fastopen_blackhole_timeout; void tcp_fastopen_active_disable(struct sock *sk); bool tcp_fastopen_active_should_disable(struct sock *sk); void tcp_fastopen_active_disable_ofo_check(struct sock *sk); -- cgit v1.2.3 From e93abb840a2c356ed2809c31fcedb058601ac2e4 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 19 Jul 2021 03:11:07 -0700 Subject: net/tcp_fastopen: remove tcp_fastopen_ctx_lock Remove the (per netns) spinlock in favor of xchg() atomic operations. Signed-off-by: Eric Dumazet Cc: Yuchung Cheng Cc: Neal Cardwell Acked-by: Wei Wang Link: https://lore.kernel.org/r/20210719101107.3203943-1-eric.dumazet@gmail.com Signed-off-by: Jakub Kicinski --- include/net/netns/ipv4.h | 1 - net/ipv4/tcp_fastopen.c | 17 +++-------------- net/ipv4/tcp_ipv4.c | 1 - 3 files changed, 3 insertions(+), 16 deletions(-) (limited to 'include/net') diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index b8620519eace..2f65701a43c9 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -174,7 +174,6 @@ struct netns_ipv4 { int sysctl_tcp_fastopen; const struct tcp_congestion_ops __rcu *tcp_congestion_control; struct tcp_fastopen_context __rcu *tcp_fastopen_ctx; - spinlock_t tcp_fastopen_ctx_lock; unsigned int sysctl_tcp_fastopen_blackhole_timeout; atomic_t tfo_active_disable_times; unsigned long tfo_active_disable_stamp; diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c index 47c32604d38f..1a9fbd5448a7 100644 --- a/net/ipv4/tcp_fastopen.c +++ b/net/ipv4/tcp_fastopen.c @@ -55,12 +55,7 @@ void tcp_fastopen_ctx_destroy(struct net *net) { struct tcp_fastopen_context *ctxt; - spin_lock(&net->ipv4.tcp_fastopen_ctx_lock); - - ctxt = rcu_dereference_protected(net->ipv4.tcp_fastopen_ctx, - lockdep_is_held(&net->ipv4.tcp_fastopen_ctx_lock)); - rcu_assign_pointer(net->ipv4.tcp_fastopen_ctx, NULL); - spin_unlock(&net->ipv4.tcp_fastopen_ctx_lock); + ctxt = xchg((__force struct tcp_fastopen_context **)&net->ipv4.tcp_fastopen_ctx, NULL); if (ctxt) call_rcu(&ctxt->rcu, tcp_fastopen_ctx_free); @@ -89,18 +84,12 @@ int tcp_fastopen_reset_cipher(struct net *net, struct sock *sk, ctx->num = 1; } - spin_lock(&net->ipv4.tcp_fastopen_ctx_lock); if (sk) { q = &inet_csk(sk)->icsk_accept_queue.fastopenq; - octx = rcu_dereference_protected(q->ctx, - lockdep_is_held(&net->ipv4.tcp_fastopen_ctx_lock)); - rcu_assign_pointer(q->ctx, ctx); + octx = xchg((__force struct tcp_fastopen_context **)&q->ctx, ctx); } else { - octx = rcu_dereference_protected(net->ipv4.tcp_fastopen_ctx, - lockdep_is_held(&net->ipv4.tcp_fastopen_ctx_lock)); - rcu_assign_pointer(net->ipv4.tcp_fastopen_ctx, ctx); + octx = xchg((__force struct tcp_fastopen_context **)&net->ipv4.tcp_fastopen_ctx, ctx); } - spin_unlock(&net->ipv4.tcp_fastopen_ctx_lock); if (octx) call_rcu(&octx->rcu, tcp_fastopen_ctx_free); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index b9dc2d6197be..e9321dd39cdb 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2964,7 +2964,6 @@ static int __net_init tcp_sk_init(struct net *net) net->ipv4.sysctl_tcp_comp_sack_slack_ns = 100 * NSEC_PER_USEC; net->ipv4.sysctl_tcp_comp_sack_nr = 44; net->ipv4.sysctl_tcp_fastopen = TFO_CLIENT_ENABLE; - spin_lock_init(&net->ipv4.tcp_fastopen_ctx_lock); net->ipv4.sysctl_tcp_fastopen_blackhole_timeout = 60 * 60; atomic_set(&net->ipv4.tfo_active_disable_times, 0); -- cgit v1.2.3 From d7b1fd520d5d4271f4ab9b1671afbdcd868039d3 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Mon, 19 Jul 2021 20:14:48 +0300 Subject: net: dsa: let the core manage the tag_8021q context The basic problem description is as follows: Be there 3 switches in a daisy chain topology: | sw0p0 sw0p1 sw0p2 sw0p3 sw0p4 [ user ] [ user ] [ user ] [ dsa ] [ cpu ] | +---------+ | sw1p0 sw1p1 sw1p2 sw1p3 sw1p4 [ user ] [ user ] [ user ] [ dsa ] [ dsa ] | +---------+ | sw2p0 sw2p1 sw2p2 sw2p3 sw2p4 [ user ] [ user ] [ user ] [ user ] [ dsa ] The CPU will not be able to ping through the user ports of the bottom-most switch (like for example sw2p0), simply because tag_8021q was not coded up for this scenario - it has always assumed DSA switch trees with a single switch. To add support for the topology above, we must admit that the RX VLAN of sw2p0 must be added on some ports of switches 0 and 1 as well. This is in fact a textbook example of thing that can use the cross-chip notifier framework that DSA has set up in switch.c. There is only one problem: core DSA (switch.c) is not able right now to make the connection between a struct dsa_switch *ds and a struct dsa_8021q_context *ctx. Right now, it is drivers who call into tag_8021q.c and always provide a struct dsa_8021q_context *ctx pointer, and tag_8021q.c calls them back with the .tag_8021q_vlan_{add,del} methods. But with cross-chip notifiers, it is possible for tag_8021q to call drivers without drivers having ever asked for anything. A good example is right above: when sw2p0 wants to set itself up for tag_8021q, the .tag_8021q_vlan_add method needs to be called for switches 1 and 0, so that they transport sw2p0's VLANs towards the CPU without dropping them. So instead of letting drivers manage the tag_8021q context, add a tag_8021q_ctx pointer inside of struct dsa_switch, which will be populated when dsa_tag_8021q_register() returns success. The patch is fairly long-winded because we are partly reverting commit 5899ee367ab3 ("net: dsa: tag_8021q: add a context structure") which made the driver-facing tag_8021q API use "ctx" instead of "ds". Now that we can access "ctx" directly from "ds", this is no longer needed. Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- drivers/net/dsa/ocelot/felix.c | 22 +++---- drivers/net/dsa/ocelot/felix.h | 1 - drivers/net/dsa/sja1105/sja1105.h | 1 - drivers/net/dsa/sja1105/sja1105_main.c | 40 +++++------- include/linux/dsa/8021q.h | 18 +++--- include/net/dsa.h | 3 + net/dsa/tag_8021q.c | 114 ++++++++++++++++++--------------- 7 files changed, 99 insertions(+), 100 deletions(-) (limited to 'include/net') diff --git a/drivers/net/dsa/ocelot/felix.c b/drivers/net/dsa/ocelot/felix.c index b52cc381cdc1..9e4ae15aa4fb 100644 --- a/drivers/net/dsa/ocelot/felix.c +++ b/drivers/net/dsa/ocelot/felix.c @@ -425,14 +425,14 @@ static int felix_setup_tag_8021q(struct dsa_switch *ds, int cpu) ocelot_rmw_rix(ocelot, 0, cpu_flood, ANA_PGID_PGID, PGID_MC); ocelot_rmw_rix(ocelot, 0, cpu_flood, ANA_PGID_PGID, PGID_BC); - felix->dsa_8021q_ctx = dsa_tag_8021q_register(ds, &felix_tag_8021q_ops, - htons(ETH_P_8021AD)); - if (!felix->dsa_8021q_ctx) - return -ENOMEM; + err = dsa_tag_8021q_register(ds, &felix_tag_8021q_ops, + htons(ETH_P_8021AD)); + if (err) + return err; - err = dsa_8021q_setup(felix->dsa_8021q_ctx, true); + err = dsa_8021q_setup(ds, true); if (err) - goto out_free_dsa_8021_ctx; + goto out_tag_8021q_unregister; err = felix_setup_mmio_filtering(felix); if (err) @@ -441,9 +441,9 @@ static int felix_setup_tag_8021q(struct dsa_switch *ds, int cpu) return 0; out_teardown_dsa_8021q: - dsa_8021q_setup(felix->dsa_8021q_ctx, false); -out_free_dsa_8021_ctx: - dsa_tag_8021q_unregister(felix->dsa_8021q_ctx); + dsa_8021q_setup(ds, false); +out_tag_8021q_unregister: + dsa_tag_8021q_unregister(ds); return err; } @@ -458,11 +458,11 @@ static void felix_teardown_tag_8021q(struct dsa_switch *ds, int cpu) dev_err(ds->dev, "felix_teardown_mmio_filtering returned %d", err); - err = dsa_8021q_setup(felix->dsa_8021q_ctx, false); + err = dsa_8021q_setup(ds, false); if (err) dev_err(ds->dev, "dsa_8021q_setup returned %d", err); - dsa_tag_8021q_unregister(felix->dsa_8021q_ctx); + dsa_tag_8021q_unregister(ds); for (port = 0; port < ds->num_ports; port++) { if (dsa_is_unused_port(ds, port)) diff --git a/drivers/net/dsa/ocelot/felix.h b/drivers/net/dsa/ocelot/felix.h index 4d96cad815d5..9da3c6a94c6e 100644 --- a/drivers/net/dsa/ocelot/felix.h +++ b/drivers/net/dsa/ocelot/felix.h @@ -60,7 +60,6 @@ struct felix { struct lynx_pcs **pcs; resource_size_t switch_base; resource_size_t imdio_base; - struct dsa_8021q_context *dsa_8021q_ctx; enum dsa_tag_protocol tag_proto; }; diff --git a/drivers/net/dsa/sja1105/sja1105.h b/drivers/net/dsa/sja1105/sja1105.h index 869b19c08fc0..068be8afd322 100644 --- a/drivers/net/dsa/sja1105/sja1105.h +++ b/drivers/net/dsa/sja1105/sja1105.h @@ -257,7 +257,6 @@ struct sja1105_private { * the switch doesn't confuse them with one another. */ struct mutex mgmt_lock; - struct dsa_8021q_context *dsa_8021q_ctx; struct devlink_region **regions; struct sja1105_cbs_entry *cbs; struct mii_bus *mdio_base_t1; diff --git a/drivers/net/dsa/sja1105/sja1105_main.c b/drivers/net/dsa/sja1105/sja1105_main.c index 689f46797d1c..ac4254690a8d 100644 --- a/drivers/net/dsa/sja1105/sja1105_main.c +++ b/drivers/net/dsa/sja1105/sja1105_main.c @@ -1995,8 +1995,6 @@ static int sja1105_crosschip_bridge_join(struct dsa_switch *ds, int other_port, struct net_device *br) { struct dsa_switch *other_ds = dsa_switch_find(tree_index, sw_index); - struct sja1105_private *other_priv = other_ds->priv; - struct sja1105_private *priv = ds->priv; int port, rc; if (other_ds->ops != &sja1105_switch_ops) @@ -2008,17 +2006,13 @@ static int sja1105_crosschip_bridge_join(struct dsa_switch *ds, if (dsa_to_port(ds, port)->bridge_dev != br) continue; - rc = dsa_8021q_crosschip_bridge_join(priv->dsa_8021q_ctx, - port, - other_priv->dsa_8021q_ctx, + rc = dsa_8021q_crosschip_bridge_join(ds, port, other_ds, other_port); if (rc) return rc; - rc = dsa_8021q_crosschip_bridge_join(other_priv->dsa_8021q_ctx, - other_port, - priv->dsa_8021q_ctx, - port); + rc = dsa_8021q_crosschip_bridge_join(other_ds, other_port, + ds, port); if (rc) return rc; } @@ -2032,8 +2026,6 @@ static void sja1105_crosschip_bridge_leave(struct dsa_switch *ds, struct net_device *br) { struct dsa_switch *other_ds = dsa_switch_find(tree_index, sw_index); - struct sja1105_private *other_priv = other_ds->priv; - struct sja1105_private *priv = ds->priv; int port; if (other_ds->ops != &sja1105_switch_ops) @@ -2045,22 +2037,19 @@ static void sja1105_crosschip_bridge_leave(struct dsa_switch *ds, if (dsa_to_port(ds, port)->bridge_dev != br) continue; - dsa_8021q_crosschip_bridge_leave(priv->dsa_8021q_ctx, port, - other_priv->dsa_8021q_ctx, + dsa_8021q_crosschip_bridge_leave(ds, port, other_ds, other_port); - dsa_8021q_crosschip_bridge_leave(other_priv->dsa_8021q_ctx, - other_port, - priv->dsa_8021q_ctx, port); + dsa_8021q_crosschip_bridge_leave(other_ds, other_port, + ds, port); } } static int sja1105_setup_8021q_tagging(struct dsa_switch *ds, bool enabled) { - struct sja1105_private *priv = ds->priv; int rc; - rc = dsa_8021q_setup(priv->dsa_8021q_ctx, enabled); + rc = dsa_8021q_setup(ds, enabled); if (rc) return rc; @@ -2233,6 +2222,7 @@ static int sja1105_build_vlan_table(struct sja1105_private *priv, bool notify); static int sja1105_notify_crosschip_switches(struct sja1105_private *priv) { + struct dsa_8021q_context *ctx = priv->ds->tag_8021q_ctx; struct sja1105_crosschip_switch *s, *pos; struct list_head crosschip_switches; struct dsa_8021q_crosschip_link *c; @@ -2240,7 +2230,7 @@ static int sja1105_notify_crosschip_switches(struct sja1105_private *priv) INIT_LIST_HEAD(&crosschip_switches); - list_for_each_entry(c, &priv->dsa_8021q_ctx->crosschip_links, list) { + list_for_each_entry(c, &ctx->crosschip_links, list) { bool already_added = false; list_for_each_entry(s, &crosschip_switches, list) { @@ -3306,10 +3296,10 @@ static int sja1105_probe(struct spi_device *spi) mutex_init(&priv->ptp_data.lock); mutex_init(&priv->mgmt_lock); - priv->dsa_8021q_ctx = dsa_tag_8021q_register(ds, &sja1105_dsa_8021q_ops, - htons(ETH_P_8021Q)); - if (!priv->dsa_8021q_ctx) - return -ENOMEM; + rc = dsa_tag_8021q_register(ds, &sja1105_dsa_8021q_ops, + htons(ETH_P_8021Q)); + if (rc) + return rc; INIT_LIST_HEAD(&priv->bridge_vlans); INIT_LIST_HEAD(&priv->dsa_8021q_vlans); @@ -3373,7 +3363,7 @@ out_destroy_workers: out_unregister_switch: dsa_unregister_switch(ds); out_tag_8021q_unregister: - dsa_tag_8021q_unregister(priv->dsa_8021q_ctx); + dsa_tag_8021q_unregister(ds); return rc; } @@ -3384,7 +3374,7 @@ static int sja1105_remove(struct spi_device *spi) struct dsa_switch *ds = priv->ds; dsa_unregister_switch(ds); - dsa_tag_8021q_unregister(priv->dsa_8021q_ctx); + dsa_tag_8021q_unregister(ds); return 0; } diff --git a/include/linux/dsa/8021q.h b/include/linux/dsa/8021q.h index 9945898a90c3..77939c0c8dd5 100644 --- a/include/linux/dsa/8021q.h +++ b/include/linux/dsa/8021q.h @@ -34,20 +34,20 @@ struct dsa_8021q_context { __be16 proto; }; -struct dsa_8021q_context *dsa_tag_8021q_register(struct dsa_switch *ds, - const struct dsa_8021q_ops *ops, - __be16 proto); +int dsa_tag_8021q_register(struct dsa_switch *ds, + const struct dsa_8021q_ops *ops, + __be16 proto); -void dsa_tag_8021q_unregister(struct dsa_8021q_context *ctx); +void dsa_tag_8021q_unregister(struct dsa_switch *ds); -int dsa_8021q_setup(struct dsa_8021q_context *ctx, bool enabled); +int dsa_8021q_setup(struct dsa_switch *ds, bool enabled); -int dsa_8021q_crosschip_bridge_join(struct dsa_8021q_context *ctx, int port, - struct dsa_8021q_context *other_ctx, +int dsa_8021q_crosschip_bridge_join(struct dsa_switch *ds, int port, + struct dsa_switch *other_ds, int other_port); -int dsa_8021q_crosschip_bridge_leave(struct dsa_8021q_context *ctx, int port, - struct dsa_8021q_context *other_ctx, +int dsa_8021q_crosschip_bridge_leave(struct dsa_switch *ds, int port, + struct dsa_switch *other_ds, int other_port); struct sk_buff *dsa_8021q_xmit(struct sk_buff *skb, struct net_device *netdev, diff --git a/include/net/dsa.h b/include/net/dsa.h index 33f40c1ec379..e213572f6341 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -352,6 +352,9 @@ struct dsa_switch { unsigned int ageing_time_min; unsigned int ageing_time_max; + /* Storage for drivers using tag_8021q */ + struct dsa_8021q_context *tag_8021q_ctx; + /* devlink used to represent this switch device */ struct devlink *devlink; diff --git a/net/dsa/tag_8021q.c b/net/dsa/tag_8021q.c index 16eb2c7bcc8d..de46a551a486 100644 --- a/net/dsa/tag_8021q.c +++ b/net/dsa/tag_8021q.c @@ -113,10 +113,11 @@ EXPORT_SYMBOL_GPL(vid_is_dsa_8021q); * user explicitly configured this @vid through the bridge core, then the @vid * is installed again, but this time with the flags from the bridge layer. */ -static int dsa_8021q_vid_apply(struct dsa_8021q_context *ctx, int port, u16 vid, +static int dsa_8021q_vid_apply(struct dsa_switch *ds, int port, u16 vid, u16 flags, bool enabled) { - struct dsa_port *dp = dsa_to_port(ctx->ds, port); + struct dsa_8021q_context *ctx = ds->tag_8021q_ctx; + struct dsa_port *dp = dsa_to_port(ds, port); if (enabled) return ctx->ops->vlan_add(ctx->ds, dp->index, vid, flags); @@ -176,29 +177,29 @@ static int dsa_8021q_vid_apply(struct dsa_8021q_context *ctx, int port, u16 vid, * +-+-----+-+-----+-+-----+-+-----+-+ +-+-----+-+-----+-+-----+-+-----+-+ * swp0 swp1 swp2 swp3 swp0 swp1 swp2 swp3 */ -static int dsa_8021q_setup_port(struct dsa_8021q_context *ctx, int port, - bool enabled) +static int dsa_8021q_setup_port(struct dsa_switch *ds, int port, bool enabled) { - int upstream = dsa_upstream_port(ctx->ds, port); - u16 rx_vid = dsa_8021q_rx_vid(ctx->ds, port); - u16 tx_vid = dsa_8021q_tx_vid(ctx->ds, port); + struct dsa_8021q_context *ctx = ds->tag_8021q_ctx; + int upstream = dsa_upstream_port(ds, port); + u16 rx_vid = dsa_8021q_rx_vid(ds, port); + u16 tx_vid = dsa_8021q_tx_vid(ds, port); struct net_device *master; int i, err; /* The CPU port is implicitly configured by * configuring the front-panel ports */ - if (!dsa_is_user_port(ctx->ds, port)) + if (!dsa_is_user_port(ds, port)) return 0; - master = dsa_to_port(ctx->ds, port)->cpu_dp->master; + master = dsa_to_port(ds, port)->cpu_dp->master; /* Add this user port's RX VID to the membership list of all others * (including itself). This is so that bridging will not be hindered. * L2 forwarding rules still take precedence when there are no VLAN * restrictions, so there are no concerns about leaking traffic. */ - for (i = 0; i < ctx->ds->num_ports; i++) { + for (i = 0; i < ds->num_ports; i++) { u16 flags; if (i == upstream) @@ -211,9 +212,9 @@ static int dsa_8021q_setup_port(struct dsa_8021q_context *ctx, int port, /* The RX VID is a regular VLAN on all others */ flags = BRIDGE_VLAN_INFO_UNTAGGED; - err = dsa_8021q_vid_apply(ctx, i, rx_vid, flags, enabled); + err = dsa_8021q_vid_apply(ds, i, rx_vid, flags, enabled); if (err) { - dev_err(ctx->ds->dev, + dev_err(ds->dev, "Failed to apply RX VID %d to port %d: %pe\n", rx_vid, port, ERR_PTR(err)); return err; @@ -223,9 +224,9 @@ static int dsa_8021q_setup_port(struct dsa_8021q_context *ctx, int port, /* CPU port needs to see this port's RX VID * as tagged egress. */ - err = dsa_8021q_vid_apply(ctx, upstream, rx_vid, 0, enabled); + err = dsa_8021q_vid_apply(ds, upstream, rx_vid, 0, enabled); if (err) { - dev_err(ctx->ds->dev, + dev_err(ds->dev, "Failed to apply RX VID %d to port %d: %pe\n", rx_vid, port, ERR_PTR(err)); return err; @@ -238,17 +239,17 @@ static int dsa_8021q_setup_port(struct dsa_8021q_context *ctx, int port, vlan_vid_del(master, ctx->proto, rx_vid); /* Finally apply the TX VID on this port and on the CPU port */ - err = dsa_8021q_vid_apply(ctx, port, tx_vid, BRIDGE_VLAN_INFO_UNTAGGED, + err = dsa_8021q_vid_apply(ds, port, tx_vid, BRIDGE_VLAN_INFO_UNTAGGED, enabled); if (err) { - dev_err(ctx->ds->dev, + dev_err(ds->dev, "Failed to apply TX VID %d on port %d: %pe\n", tx_vid, port, ERR_PTR(err)); return err; } - err = dsa_8021q_vid_apply(ctx, upstream, tx_vid, 0, enabled); + err = dsa_8021q_vid_apply(ds, upstream, tx_vid, 0, enabled); if (err) { - dev_err(ctx->ds->dev, + dev_err(ds->dev, "Failed to apply TX VID %d on port %d: %pe\n", tx_vid, upstream, ERR_PTR(err)); return err; @@ -257,16 +258,16 @@ static int dsa_8021q_setup_port(struct dsa_8021q_context *ctx, int port, return err; } -int dsa_8021q_setup(struct dsa_8021q_context *ctx, bool enabled) +int dsa_8021q_setup(struct dsa_switch *ds, bool enabled) { int err, port; ASSERT_RTNL(); - for (port = 0; port < ctx->ds->num_ports; port++) { - err = dsa_8021q_setup_port(ctx, port, enabled); + for (port = 0; port < ds->num_ports; port++) { + err = dsa_8021q_setup_port(ds, port, enabled); if (err < 0) { - dev_err(ctx->ds->dev, + dev_err(ds->dev, "Failed to setup VLAN tagging for port %d: %pe\n", port, ERR_PTR(err)); return err; @@ -277,24 +278,25 @@ int dsa_8021q_setup(struct dsa_8021q_context *ctx, bool enabled) } EXPORT_SYMBOL_GPL(dsa_8021q_setup); -static int dsa_8021q_crosschip_link_apply(struct dsa_8021q_context *ctx, - int port, - struct dsa_8021q_context *other_ctx, +static int dsa_8021q_crosschip_link_apply(struct dsa_switch *ds, int port, + struct dsa_switch *other_ds, int other_port, bool enabled) { - u16 rx_vid = dsa_8021q_rx_vid(ctx->ds, port); + u16 rx_vid = dsa_8021q_rx_vid(ds, port); /* @rx_vid of local @ds port @port goes to @other_port of * @other_ds */ - return dsa_8021q_vid_apply(other_ctx, other_port, rx_vid, + return dsa_8021q_vid_apply(other_ds, other_port, rx_vid, BRIDGE_VLAN_INFO_UNTAGGED, enabled); } -static int dsa_8021q_crosschip_link_add(struct dsa_8021q_context *ctx, int port, - struct dsa_8021q_context *other_ctx, +static int dsa_8021q_crosschip_link_add(struct dsa_switch *ds, int port, + struct dsa_switch *other_ds, int other_port) { + struct dsa_8021q_context *other_ctx = other_ds->tag_8021q_ctx; + struct dsa_8021q_context *ctx = ds->tag_8021q_ctx; struct dsa_8021q_crosschip_link *c; list_for_each_entry(c, &ctx->crosschip_links, list) { @@ -305,9 +307,9 @@ static int dsa_8021q_crosschip_link_add(struct dsa_8021q_context *ctx, int port, } } - dev_dbg(ctx->ds->dev, + dev_dbg(ds->dev, "adding crosschip link from port %d to %s port %d\n", - port, dev_name(other_ctx->ds->dev), other_port); + port, dev_name(other_ds->dev), other_port); c = kzalloc(sizeof(*c), GFP_KERNEL); if (!c) @@ -323,7 +325,7 @@ static int dsa_8021q_crosschip_link_add(struct dsa_8021q_context *ctx, int port, return 0; } -static void dsa_8021q_crosschip_link_del(struct dsa_8021q_context *ctx, +static void dsa_8021q_crosschip_link_del(struct dsa_switch *ds, struct dsa_8021q_crosschip_link *c, bool *keep) { @@ -332,7 +334,7 @@ static void dsa_8021q_crosschip_link_del(struct dsa_8021q_context *ctx, if (*keep) return; - dev_dbg(ctx->ds->dev, + dev_dbg(ds->dev, "deleting crosschip link from port %d to %s port %d\n", c->port, dev_name(c->other_ctx->ds->dev), c->other_port); @@ -347,8 +349,8 @@ static void dsa_8021q_crosschip_link_del(struct dsa_8021q_context *ctx, * or untagged: it doesn't matter, since it should never egress a frame having * our @rx_vid. */ -int dsa_8021q_crosschip_bridge_join(struct dsa_8021q_context *ctx, int port, - struct dsa_8021q_context *other_ctx, +int dsa_8021q_crosschip_bridge_join(struct dsa_switch *ds, int port, + struct dsa_switch *other_ds, int other_port) { /* @other_upstream is how @other_ds reaches us. If we are part @@ -356,49 +358,50 @@ int dsa_8021q_crosschip_bridge_join(struct dsa_8021q_context *ctx, int port, * our CPU ports. If we're part of the same tree though, we should * probably use dsa_towards_port. */ - int other_upstream = dsa_upstream_port(other_ctx->ds, other_port); + int other_upstream = dsa_upstream_port(other_ds, other_port); int err; - err = dsa_8021q_crosschip_link_add(ctx, port, other_ctx, other_port); + err = dsa_8021q_crosschip_link_add(ds, port, other_ds, other_port); if (err) return err; - err = dsa_8021q_crosschip_link_apply(ctx, port, other_ctx, + err = dsa_8021q_crosschip_link_apply(ds, port, other_ds, other_port, true); if (err) return err; - err = dsa_8021q_crosschip_link_add(ctx, port, other_ctx, other_upstream); + err = dsa_8021q_crosschip_link_add(ds, port, other_ds, other_upstream); if (err) return err; - return dsa_8021q_crosschip_link_apply(ctx, port, other_ctx, + return dsa_8021q_crosschip_link_apply(ds, port, other_ds, other_upstream, true); } EXPORT_SYMBOL_GPL(dsa_8021q_crosschip_bridge_join); -int dsa_8021q_crosschip_bridge_leave(struct dsa_8021q_context *ctx, int port, - struct dsa_8021q_context *other_ctx, +int dsa_8021q_crosschip_bridge_leave(struct dsa_switch *ds, int port, + struct dsa_switch *other_ds, int other_port) { - int other_upstream = dsa_upstream_port(other_ctx->ds, other_port); + struct dsa_8021q_context *other_ctx = other_ds->tag_8021q_ctx; + int other_upstream = dsa_upstream_port(other_ds, other_port); + struct dsa_8021q_context *ctx = ds->tag_8021q_ctx; struct dsa_8021q_crosschip_link *c, *n; list_for_each_entry_safe(c, n, &ctx->crosschip_links, list) { if (c->port == port && c->other_ctx == other_ctx && (c->other_port == other_port || c->other_port == other_upstream)) { - struct dsa_8021q_context *other_ctx = c->other_ctx; int other_port = c->other_port; bool keep; int err; - dsa_8021q_crosschip_link_del(ctx, c, &keep); + dsa_8021q_crosschip_link_del(ds, c, &keep); if (keep) continue; - err = dsa_8021q_crosschip_link_apply(ctx, port, - other_ctx, + err = dsa_8021q_crosschip_link_apply(ds, port, + other_ds, other_port, false); if (err) @@ -410,15 +413,15 @@ int dsa_8021q_crosschip_bridge_leave(struct dsa_8021q_context *ctx, int port, } EXPORT_SYMBOL_GPL(dsa_8021q_crosschip_bridge_leave); -struct dsa_8021q_context *dsa_tag_8021q_register(struct dsa_switch *ds, - const struct dsa_8021q_ops *ops, - __be16 proto) +int dsa_tag_8021q_register(struct dsa_switch *ds, + const struct dsa_8021q_ops *ops, + __be16 proto) { struct dsa_8021q_context *ctx; ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); if (!ctx) - return NULL; + return -ENOMEM; ctx->ops = ops; ctx->proto = proto; @@ -426,12 +429,15 @@ struct dsa_8021q_context *dsa_tag_8021q_register(struct dsa_switch *ds, INIT_LIST_HEAD(&ctx->crosschip_links); - return ctx; + ds->tag_8021q_ctx = ctx; + + return 0; } EXPORT_SYMBOL_GPL(dsa_tag_8021q_register); -void dsa_tag_8021q_unregister(struct dsa_8021q_context *ctx) +void dsa_tag_8021q_unregister(struct dsa_switch *ds) { + struct dsa_8021q_context *ctx = ds->tag_8021q_ctx; struct dsa_8021q_crosschip_link *c, *n; list_for_each_entry_safe(c, n, &ctx->crosschip_links, list) { @@ -439,6 +445,8 @@ void dsa_tag_8021q_unregister(struct dsa_8021q_context *ctx) kfree(c); } + ds->tag_8021q_ctx = NULL; + kfree(ctx); } EXPORT_SYMBOL_GPL(dsa_tag_8021q_unregister); -- cgit v1.2.3 From 5da11eb407340233a6111c563419e19685a062a4 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Mon, 19 Jul 2021 20:14:49 +0300 Subject: net: dsa: make tag_8021q operations part of the core Make tag_8021q a more central element of DSA and move the 2 driver specific operations outside of struct dsa_8021q_context (which is supposed to hold dynamic data and not really constant function pointers). Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- drivers/net/dsa/ocelot/felix.c | 10 +++------- drivers/net/dsa/sja1105/sja1105_main.c | 10 +++------- include/linux/dsa/8021q.h | 10 +--------- include/net/dsa.h | 7 +++++++ net/dsa/tag_8021q.c | 10 +++------- 5 files changed, 17 insertions(+), 30 deletions(-) (limited to 'include/net') diff --git a/drivers/net/dsa/ocelot/felix.c b/drivers/net/dsa/ocelot/felix.c index 9e4ae15aa4fb..b6ab28d2f155 100644 --- a/drivers/net/dsa/ocelot/felix.c +++ b/drivers/net/dsa/ocelot/felix.c @@ -231,11 +231,6 @@ static int felix_tag_8021q_vlan_del(struct dsa_switch *ds, int port, u16 vid) return 0; } -static const struct dsa_8021q_ops felix_tag_8021q_ops = { - .vlan_add = felix_tag_8021q_vlan_add, - .vlan_del = felix_tag_8021q_vlan_del, -}; - /* Alternatively to using the NPI functionality, that same hardware MAC * connected internally to the enetc or fman DSA master can be configured to * use the software-defined tag_8021q frame format. As far as the hardware is @@ -425,8 +420,7 @@ static int felix_setup_tag_8021q(struct dsa_switch *ds, int cpu) ocelot_rmw_rix(ocelot, 0, cpu_flood, ANA_PGID_PGID, PGID_MC); ocelot_rmw_rix(ocelot, 0, cpu_flood, ANA_PGID_PGID, PGID_BC); - err = dsa_tag_8021q_register(ds, &felix_tag_8021q_ops, - htons(ETH_P_8021AD)); + err = dsa_tag_8021q_register(ds, htons(ETH_P_8021AD)); if (err) return err; @@ -1675,6 +1669,8 @@ const struct dsa_switch_ops felix_switch_ops = { .port_mrp_del = felix_mrp_del, .port_mrp_add_ring_role = felix_mrp_add_ring_role, .port_mrp_del_ring_role = felix_mrp_del_ring_role, + .tag_8021q_vlan_add = felix_tag_8021q_vlan_add, + .tag_8021q_vlan_del = felix_tag_8021q_vlan_del, }; struct net_device *felix_port_to_netdev(struct ocelot *ocelot, int port) diff --git a/drivers/net/dsa/sja1105/sja1105_main.c b/drivers/net/dsa/sja1105/sja1105_main.c index ac4254690a8d..0c04f6caccdf 100644 --- a/drivers/net/dsa/sja1105/sja1105_main.c +++ b/drivers/net/dsa/sja1105/sja1105_main.c @@ -2543,11 +2543,6 @@ static int sja1105_dsa_8021q_vlan_del(struct dsa_switch *ds, int port, u16 vid) return sja1105_build_vlan_table(priv, true); } -static const struct dsa_8021q_ops sja1105_dsa_8021q_ops = { - .vlan_add = sja1105_dsa_8021q_vlan_add, - .vlan_del = sja1105_dsa_8021q_vlan_del, -}; - /* The programming model for the SJA1105 switch is "all-at-once" via static * configuration tables. Some of these can be dynamically modified at runtime, * but not the xMII mode parameters table. @@ -3153,6 +3148,8 @@ static const struct dsa_switch_ops sja1105_switch_ops = { .crosschip_bridge_join = sja1105_crosschip_bridge_join, .crosschip_bridge_leave = sja1105_crosschip_bridge_leave, .devlink_info_get = sja1105_devlink_info_get, + .tag_8021q_vlan_add = sja1105_dsa_8021q_vlan_add, + .tag_8021q_vlan_del = sja1105_dsa_8021q_vlan_del, }; static const struct of_device_id sja1105_dt_ids[]; @@ -3296,8 +3293,7 @@ static int sja1105_probe(struct spi_device *spi) mutex_init(&priv->ptp_data.lock); mutex_init(&priv->mgmt_lock); - rc = dsa_tag_8021q_register(ds, &sja1105_dsa_8021q_ops, - htons(ETH_P_8021Q)); + rc = dsa_tag_8021q_register(ds, htons(ETH_P_8021Q)); if (rc) return rc; diff --git a/include/linux/dsa/8021q.h b/include/linux/dsa/8021q.h index 77939c0c8dd5..0bda08fb2f16 100644 --- a/include/linux/dsa/8021q.h +++ b/include/linux/dsa/8021q.h @@ -21,22 +21,14 @@ struct dsa_8021q_crosschip_link { refcount_t refcount; }; -struct dsa_8021q_ops { - int (*vlan_add)(struct dsa_switch *ds, int port, u16 vid, u16 flags); - int (*vlan_del)(struct dsa_switch *ds, int port, u16 vid); -}; - struct dsa_8021q_context { - const struct dsa_8021q_ops *ops; struct dsa_switch *ds; struct list_head crosschip_links; /* EtherType of RX VID, used for filtering on master interface */ __be16 proto; }; -int dsa_tag_8021q_register(struct dsa_switch *ds, - const struct dsa_8021q_ops *ops, - __be16 proto); +int dsa_tag_8021q_register(struct dsa_switch *ds, __be16 proto); void dsa_tag_8021q_unregister(struct dsa_switch *ds); diff --git a/include/net/dsa.h b/include/net/dsa.h index e213572f6341..9e5593885357 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -872,6 +872,13 @@ struct dsa_switch_ops { const struct switchdev_obj_ring_role_mrp *mrp); int (*port_mrp_del_ring_role)(struct dsa_switch *ds, int port, const struct switchdev_obj_ring_role_mrp *mrp); + + /* + * tag_8021q operations + */ + int (*tag_8021q_vlan_add)(struct dsa_switch *ds, int port, u16 vid, + u16 flags); + int (*tag_8021q_vlan_del)(struct dsa_switch *ds, int port, u16 vid); }; #define DSA_DEVLINK_PARAM_DRIVER(_id, _name, _type, _cmodes) \ diff --git a/net/dsa/tag_8021q.c b/net/dsa/tag_8021q.c index de46a551a486..4a11c5004783 100644 --- a/net/dsa/tag_8021q.c +++ b/net/dsa/tag_8021q.c @@ -116,13 +116,12 @@ EXPORT_SYMBOL_GPL(vid_is_dsa_8021q); static int dsa_8021q_vid_apply(struct dsa_switch *ds, int port, u16 vid, u16 flags, bool enabled) { - struct dsa_8021q_context *ctx = ds->tag_8021q_ctx; struct dsa_port *dp = dsa_to_port(ds, port); if (enabled) - return ctx->ops->vlan_add(ctx->ds, dp->index, vid, flags); + return ds->ops->tag_8021q_vlan_add(ds, dp->index, vid, flags); - return ctx->ops->vlan_del(ctx->ds, dp->index, vid); + return ds->ops->tag_8021q_vlan_del(ds, dp->index, vid); } /* RX VLAN tagging (left) and TX VLAN tagging (right) setup shown for a single @@ -413,9 +412,7 @@ int dsa_8021q_crosschip_bridge_leave(struct dsa_switch *ds, int port, } EXPORT_SYMBOL_GPL(dsa_8021q_crosschip_bridge_leave); -int dsa_tag_8021q_register(struct dsa_switch *ds, - const struct dsa_8021q_ops *ops, - __be16 proto) +int dsa_tag_8021q_register(struct dsa_switch *ds, __be16 proto) { struct dsa_8021q_context *ctx; @@ -423,7 +420,6 @@ int dsa_tag_8021q_register(struct dsa_switch *ds, if (!ctx) return -ENOMEM; - ctx->ops = ops; ctx->proto = proto; ctx->ds = ds; -- cgit v1.2.3 From c6451cda100d4ebbc3f6819e1161ce0e38ce7746 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Mon, 19 Jul 2021 16:51:38 +0300 Subject: net: switchdev: introduce helper for checking dynamically learned FDB entries It is a bit difficult to understand what DSA checks when it tries to avoid installing dynamically learned addresses on foreign interfaces as local host addresses, so create a generic switchdev helper that can be reused and is generally more readable. Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- include/net/switchdev.h | 6 ++++++ net/dsa/slave.c | 2 +- 2 files changed, 7 insertions(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/switchdev.h b/include/net/switchdev.h index e4cac9218ce1..745eb25fb8c4 100644 --- a/include/net/switchdev.h +++ b/include/net/switchdev.h @@ -238,6 +238,12 @@ switchdev_notifier_info_to_extack(const struct switchdev_notifier_info *info) return info->extack; } +static inline bool +switchdev_fdb_is_dynamically_learned(const struct switchdev_notifier_fdb_info *fdb_info) +{ + return !fdb_info->added_by_user && !fdb_info->is_local; +} + #ifdef CONFIG_NET_SWITCHDEV void switchdev_deferred_process(void); diff --git a/net/dsa/slave.c b/net/dsa/slave.c index ffbba1e71551..feb64f58faed 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -2438,7 +2438,7 @@ static int dsa_slave_switchdev_event(struct notifier_block *unused, * On the other hand, FDB entries for local termination * should always be installed. */ - if (!fdb_info->added_by_user && !fdb_info->is_local && + if (switchdev_fdb_is_dynamically_learned(fdb_info) && !dp->ds->assisted_learning_on_cpu_port) return NOTIFY_DONE; -- cgit v1.2.3 From 8ca07176ab00a6d06a9b254dcbb2514b4d607e9c Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Mon, 19 Jul 2021 16:51:39 +0300 Subject: net: switchdev: introduce a fanout helper for SWITCHDEV_FDB_{ADD,DEL}_TO_DEVICE Currently DSA has an issue with FDB entries pointing towards the bridge in the presence of br_fdb_replay() being called at port join and leave time. In particular, each bridge port will ask for a replay for the FDB entries pointing towards the bridge when it joins, and for another replay when it leaves. This means that for example, a bridge with 4 switch ports will notify DSA 4 times of the bridge MAC address. But if the MAC address of the bridge changes during the normal runtime of the system, the bridge notifies switchdev [ once ] of the deletion of the old MAC address as a local FDB towards the bridge, and of the insertion [ again once ] of the new MAC address as a local FDB. This is a problem, because DSA keeps the old MAC address as a host FDB entry with refcount 4 (4 ports asked for it using br_fdb_replay). So the old MAC address will not be deleted. Additionally, the new MAC address will only be installed with refcount 1, and when the first switch port leaves the bridge (leaving 3 others as still members), it will delete with it the new MAC address of the bridge from the local FDB entries kept by DSA (because the br_fdb_replay call on deletion will bring the entry's refcount from 1 to 0). So the problem, really, is that the number of br_fdb_replay() calls is not matched with the refcount that a host FDB is offloaded to DSA during normal runtime. An elegant way to solve the problem would be to make the switchdev notification emitted by br_fdb_change_mac_address() result in a host FDB kept by DSA which has a refcount exactly equal to the number of ports under that bridge. Then, no matter how many DSA ports join or leave that bridge, the host FDB entry will always be deleted when there are exactly zero remaining DSA switch ports members of the bridge. To implement the proposed solution, we remember that the switchdev objects and port attributes have some helpers provided by switchdev, which can be optionally called by drivers: switchdev_handle_port_obj_{add,del} and switchdev_handle_port_attr_set. These helpers: - fan out a switchdev object/attribute emitted for the bridge towards all the lower interfaces that pass the check_cb(). - fan out a switchdev object/attribute emitted for a bridge port that is a LAG towards all the lower interfaces that pass the check_cb(). In other words, this is the model we need for the FDB events too: something that will keep an FDB entry emitted towards a physical port as it is, but translate an FDB entry emitted towards the bridge into N FDB entries, one per physical port. Of course, there are many differences between fanning out a switchdev object (VLAN) on 3 lower interfaces of a LAG and fanning out an FDB entry on 3 lower interfaces of a LAG. Intuitively, an FDB entry towards a LAG should be treated specially, because FDB entries are unicast, we can't just install the same address towards 3 destinations. It is imaginable that drivers might want to treat this case specifically, so create some methods for this case and do not recurse into the LAG lower ports, just the bridge ports. DSA also listens for FDB entries on "foreign" interfaces, aka interfaces bridged with us which are not part of our hardware domain: think an Ethernet switch bridged with a Wi-Fi AP. For those addresses, DSA installs host FDB entries. However, there we have the same problem (those host FDB entries are installed with a refcount of only 1) and an even bigger one which we did not have with FDB entries towards the bridge: br_fdb_replay() is currently not called for FDB entries on foreign interfaces, just for the physical port and for the bridge itself. So when DSA sniffs an address learned by the software bridge towards a foreign interface like an e1000 port, and then that e1000 leaves the bridge, DSA remains with the dangling host FDB address. That will be fixed separately by replaying all FDB entries and not just the ones towards the port and the bridge. Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- include/net/switchdev.h | 56 ++++++++++++++ net/switchdev/switchdev.c | 190 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 246 insertions(+) (limited to 'include/net') diff --git a/include/net/switchdev.h b/include/net/switchdev.h index 745eb25fb8c4..6f57eb2e89cc 100644 --- a/include/net/switchdev.h +++ b/include/net/switchdev.h @@ -272,6 +272,30 @@ void switchdev_port_fwd_mark_set(struct net_device *dev, struct net_device *group_dev, bool joining); +int switchdev_handle_fdb_add_to_device(struct net_device *dev, + const struct switchdev_notifier_fdb_info *fdb_info, + bool (*check_cb)(const struct net_device *dev), + bool (*foreign_dev_check_cb)(const struct net_device *dev, + const struct net_device *foreign_dev), + int (*add_cb)(struct net_device *dev, + const struct net_device *orig_dev, const void *ctx, + const struct switchdev_notifier_fdb_info *fdb_info), + int (*lag_add_cb)(struct net_device *dev, + const struct net_device *orig_dev, const void *ctx, + const struct switchdev_notifier_fdb_info *fdb_info)); + +int switchdev_handle_fdb_del_to_device(struct net_device *dev, + const struct switchdev_notifier_fdb_info *fdb_info, + bool (*check_cb)(const struct net_device *dev), + bool (*foreign_dev_check_cb)(const struct net_device *dev, + const struct net_device *foreign_dev), + int (*del_cb)(struct net_device *dev, + const struct net_device *orig_dev, const void *ctx, + const struct switchdev_notifier_fdb_info *fdb_info), + int (*lag_del_cb)(struct net_device *dev, + const struct net_device *orig_dev, const void *ctx, + const struct switchdev_notifier_fdb_info *fdb_info)); + int switchdev_handle_port_obj_add(struct net_device *dev, struct switchdev_notifier_port_obj_info *port_obj_info, bool (*check_cb)(const struct net_device *dev), @@ -355,6 +379,38 @@ call_switchdev_blocking_notifiers(unsigned long val, return NOTIFY_DONE; } +static inline int +switchdev_handle_fdb_add_to_device(struct net_device *dev, + const struct switchdev_notifier_fdb_info *fdb_info, + bool (*check_cb)(const struct net_device *dev), + bool (*foreign_dev_check_cb)(const struct net_device *dev, + const struct net_device *foreign_dev), + int (*add_cb)(struct net_device *dev, + const struct net_device *orig_dev, const void *ctx, + const struct switchdev_notifier_fdb_info *fdb_info), + int (*lag_add_cb)(struct net_device *dev, + const struct net_device *orig_dev, const void *ctx, + const struct switchdev_notifier_fdb_info *fdb_info)) +{ + return 0; +} + +static inline int +switchdev_handle_fdb_del_to_device(struct net_device *dev, + const struct switchdev_notifier_fdb_info *fdb_info, + bool (*check_cb)(const struct net_device *dev), + bool (*foreign_dev_check_cb)(const struct net_device *dev, + const struct net_device *foreign_dev), + int (*del_cb)(struct net_device *dev, + const struct net_device *orig_dev, const void *ctx, + const struct switchdev_notifier_fdb_info *fdb_info), + int (*lag_del_cb)(struct net_device *dev, + const struct net_device *orig_dev, const void *ctx, + const struct switchdev_notifier_fdb_info *fdb_info)); +{ + return 0; +} + static inline int switchdev_handle_port_obj_add(struct net_device *dev, struct switchdev_notifier_port_obj_info *port_obj_info, diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c index 070698dd19bc..82dd4e4e86f5 100644 --- a/net/switchdev/switchdev.c +++ b/net/switchdev/switchdev.c @@ -378,6 +378,196 @@ int call_switchdev_blocking_notifiers(unsigned long val, struct net_device *dev, } EXPORT_SYMBOL_GPL(call_switchdev_blocking_notifiers); +static int __switchdev_handle_fdb_add_to_device(struct net_device *dev, + const struct net_device *orig_dev, + const struct switchdev_notifier_fdb_info *fdb_info, + bool (*check_cb)(const struct net_device *dev), + bool (*foreign_dev_check_cb)(const struct net_device *dev, + const struct net_device *foreign_dev), + int (*add_cb)(struct net_device *dev, + const struct net_device *orig_dev, const void *ctx, + const struct switchdev_notifier_fdb_info *fdb_info), + int (*lag_add_cb)(struct net_device *dev, + const struct net_device *orig_dev, const void *ctx, + const struct switchdev_notifier_fdb_info *fdb_info)) +{ + const struct switchdev_notifier_info *info = &fdb_info->info; + struct net_device *lower_dev; + struct list_head *iter; + int err = -EOPNOTSUPP; + + if (check_cb(dev)) { + /* Handle FDB entries on foreign interfaces as FDB entries + * towards the software bridge. + */ + if (foreign_dev_check_cb && foreign_dev_check_cb(dev, orig_dev)) { + struct net_device *br = netdev_master_upper_dev_get_rcu(dev); + + if (!br || !netif_is_bridge_master(br)) + return 0; + + /* No point in handling FDB entries on a foreign bridge */ + if (foreign_dev_check_cb(dev, br)) + return 0; + + return __switchdev_handle_fdb_add_to_device(br, orig_dev, + fdb_info, check_cb, + foreign_dev_check_cb, + add_cb, lag_add_cb); + } + + return add_cb(dev, orig_dev, info->ctx, fdb_info); + } + + /* If we passed over the foreign check, it means that the LAG interface + * is offloaded. + */ + if (netif_is_lag_master(dev)) { + if (!lag_add_cb) + return -EOPNOTSUPP; + + return lag_add_cb(dev, orig_dev, info->ctx, fdb_info); + } + + /* Recurse through lower interfaces in case the FDB entry is pointing + * towards a bridge device. + */ + netdev_for_each_lower_dev(dev, lower_dev, iter) { + /* Do not propagate FDB entries across bridges */ + if (netif_is_bridge_master(lower_dev)) + continue; + + err = __switchdev_handle_fdb_add_to_device(lower_dev, orig_dev, + fdb_info, check_cb, + foreign_dev_check_cb, + add_cb, lag_add_cb); + if (err && err != -EOPNOTSUPP) + return err; + } + + return err; +} + +int switchdev_handle_fdb_add_to_device(struct net_device *dev, + const struct switchdev_notifier_fdb_info *fdb_info, + bool (*check_cb)(const struct net_device *dev), + bool (*foreign_dev_check_cb)(const struct net_device *dev, + const struct net_device *foreign_dev), + int (*add_cb)(struct net_device *dev, + const struct net_device *orig_dev, const void *ctx, + const struct switchdev_notifier_fdb_info *fdb_info), + int (*lag_add_cb)(struct net_device *dev, + const struct net_device *orig_dev, const void *ctx, + const struct switchdev_notifier_fdb_info *fdb_info)) +{ + int err; + + err = __switchdev_handle_fdb_add_to_device(dev, dev, fdb_info, + check_cb, + foreign_dev_check_cb, + add_cb, lag_add_cb); + if (err == -EOPNOTSUPP) + err = 0; + + return err; +} +EXPORT_SYMBOL_GPL(switchdev_handle_fdb_add_to_device); + +static int __switchdev_handle_fdb_del_to_device(struct net_device *dev, + const struct net_device *orig_dev, + const struct switchdev_notifier_fdb_info *fdb_info, + bool (*check_cb)(const struct net_device *dev), + bool (*foreign_dev_check_cb)(const struct net_device *dev, + const struct net_device *foreign_dev), + int (*del_cb)(struct net_device *dev, + const struct net_device *orig_dev, const void *ctx, + const struct switchdev_notifier_fdb_info *fdb_info), + int (*lag_del_cb)(struct net_device *dev, + const struct net_device *orig_dev, const void *ctx, + const struct switchdev_notifier_fdb_info *fdb_info)) +{ + const struct switchdev_notifier_info *info = &fdb_info->info; + struct net_device *lower_dev; + struct list_head *iter; + int err = -EOPNOTSUPP; + + if (check_cb(dev)) { + /* Handle FDB entries on foreign interfaces as FDB entries + * towards the software bridge. + */ + if (foreign_dev_check_cb && foreign_dev_check_cb(dev, orig_dev)) { + struct net_device *br = netdev_master_upper_dev_get_rcu(dev); + + if (!br || !netif_is_bridge_master(br)) + return 0; + + /* No point in handling FDB entries on a foreign bridge */ + if (foreign_dev_check_cb(dev, br)) + return 0; + + return __switchdev_handle_fdb_del_to_device(br, orig_dev, + fdb_info, check_cb, + foreign_dev_check_cb, + del_cb, lag_del_cb); + } + + return del_cb(dev, orig_dev, info->ctx, fdb_info); + } + + /* If we passed over the foreign check, it means that the LAG interface + * is offloaded. + */ + if (netif_is_lag_master(dev)) { + if (!lag_del_cb) + return -EOPNOTSUPP; + + return lag_del_cb(dev, orig_dev, info->ctx, fdb_info); + } + + /* Recurse through lower interfaces in case the FDB entry is pointing + * towards a bridge device. + */ + netdev_for_each_lower_dev(dev, lower_dev, iter) { + /* Do not propagate FDB entries across bridges */ + if (netif_is_bridge_master(lower_dev)) + continue; + + err = switchdev_handle_fdb_del_to_device(lower_dev, fdb_info, + check_cb, + foreign_dev_check_cb, + del_cb, lag_del_cb); + if (err && err != -EOPNOTSUPP) + return err; + } + + return err; +} + +int switchdev_handle_fdb_del_to_device(struct net_device *dev, + const struct switchdev_notifier_fdb_info *fdb_info, + bool (*check_cb)(const struct net_device *dev), + bool (*foreign_dev_check_cb)(const struct net_device *dev, + const struct net_device *foreign_dev), + int (*del_cb)(struct net_device *dev, + const struct net_device *orig_dev, const void *ctx, + const struct switchdev_notifier_fdb_info *fdb_info), + int (*lag_del_cb)(struct net_device *dev, + const struct net_device *orig_dev, const void *ctx, + const struct switchdev_notifier_fdb_info *fdb_info)) +{ + int err; + + err = __switchdev_handle_fdb_del_to_device(dev, dev, fdb_info, + check_cb, + foreign_dev_check_cb, + del_cb, lag_del_cb); + if (err == -EOPNOTSUPP) + err = 0; + + return err; +} +EXPORT_SYMBOL_GPL(switchdev_handle_fdb_del_to_device); + static int __switchdev_handle_port_obj_add(struct net_device *dev, struct switchdev_notifier_port_obj_info *port_obj_info, bool (*check_cb)(const struct net_device *dev), -- cgit v1.2.3 From 2d151d39073aff498358543801fca0f670fea981 Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Sun, 18 Jul 2021 09:11:06 +0200 Subject: xfrm: Add possibility to set the default to block if we have no policy As the default we assume the traffic to pass, if we have no matching IPsec policy. With this patch, we have a possibility to change this default from allow to block. It can be configured via netlink. Each direction (input/output/forward) can be configured separately. With the default to block configuered, we need allow policies for all packet flows we accept. We do not use default policy lookup for the loopback device. v1->v2 - fix compiling when XFRM is disabled - Reported-by: kernel test robot Co-developed-by: Christian Langrock Signed-off-by: Christian Langrock Co-developed-by: Antony Antony Signed-off-by: Antony Antony Signed-off-by: Steffen Klassert --- include/net/netns/xfrm.h | 7 +++++++ include/net/xfrm.h | 36 ++++++++++++++++++++++++++------ include/uapi/linux/xfrm.h | 10 +++++++++ net/xfrm/xfrm_policy.c | 16 +++++++++++++++ net/xfrm/xfrm_user.c | 52 +++++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 115 insertions(+), 6 deletions(-) (limited to 'include/net') diff --git a/include/net/netns/xfrm.h b/include/net/netns/xfrm.h index e946366e8ba5..88c647302977 100644 --- a/include/net/netns/xfrm.h +++ b/include/net/netns/xfrm.h @@ -65,6 +65,13 @@ struct netns_xfrm { u32 sysctl_aevent_rseqth; int sysctl_larval_drop; u32 sysctl_acq_expires; + + u8 policy_default; +#define XFRM_POL_DEFAULT_IN 1 +#define XFRM_POL_DEFAULT_OUT 2 +#define XFRM_POL_DEFAULT_FWD 4 +#define XFRM_POL_DEFAULT_MASK 7 + #ifdef CONFIG_SYSCTL struct ctl_table_header *sysctl_hdr; #endif diff --git a/include/net/xfrm.h b/include/net/xfrm.h index cbff7c2a9724..2308210793a0 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -1075,6 +1075,22 @@ xfrm_state_addr_cmp(const struct xfrm_tmpl *tmpl, const struct xfrm_state *x, un } #ifdef CONFIG_XFRM +static inline bool +xfrm_default_allow(struct net *net, int dir) +{ + u8 def = net->xfrm.policy_default; + + switch (dir) { + case XFRM_POLICY_IN: + return def & XFRM_POL_DEFAULT_IN ? false : true; + case XFRM_POLICY_OUT: + return def & XFRM_POL_DEFAULT_OUT ? false : true; + case XFRM_POLICY_FWD: + return def & XFRM_POL_DEFAULT_FWD ? false : true; + } + return false; +} + int __xfrm_policy_check(struct sock *, int dir, struct sk_buff *skb, unsigned short family); @@ -1088,9 +1104,13 @@ static inline int __xfrm_policy_check2(struct sock *sk, int dir, if (sk && sk->sk_policy[XFRM_POLICY_IN]) return __xfrm_policy_check(sk, ndir, skb, family); - return (!net->xfrm.policy_count[dir] && !secpath_exists(skb)) || - (skb_dst(skb) && (skb_dst(skb)->flags & DST_NOPOLICY)) || - __xfrm_policy_check(sk, ndir, skb, family); + if (xfrm_default_allow(net, dir)) + return (!net->xfrm.policy_count[dir] && !secpath_exists(skb)) || + (skb_dst(skb) && (skb_dst(skb)->flags & DST_NOPOLICY)) || + __xfrm_policy_check(sk, ndir, skb, family); + else + return (skb_dst(skb) && (skb_dst(skb)->flags & DST_NOPOLICY)) || + __xfrm_policy_check(sk, ndir, skb, family); } static inline int xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, unsigned short family) @@ -1142,9 +1162,13 @@ static inline int xfrm_route_forward(struct sk_buff *skb, unsigned short family) { struct net *net = dev_net(skb->dev); - return !net->xfrm.policy_count[XFRM_POLICY_OUT] || - (skb_dst(skb)->flags & DST_NOXFRM) || - __xfrm_route_forward(skb, family); + if (xfrm_default_allow(net, XFRM_POLICY_FWD)) + return !net->xfrm.policy_count[XFRM_POLICY_OUT] || + (skb_dst(skb)->flags & DST_NOXFRM) || + __xfrm_route_forward(skb, family); + else + return (skb_dst(skb)->flags & DST_NOXFRM) || + __xfrm_route_forward(skb, family); } static inline int xfrm4_route_forward(struct sk_buff *skb) diff --git a/include/uapi/linux/xfrm.h b/include/uapi/linux/xfrm.h index ffc6a5391bb7..6e8095106192 100644 --- a/include/uapi/linux/xfrm.h +++ b/include/uapi/linux/xfrm.h @@ -213,6 +213,11 @@ enum { XFRM_MSG_GETSPDINFO, #define XFRM_MSG_GETSPDINFO XFRM_MSG_GETSPDINFO + XFRM_MSG_SETDEFAULT, +#define XFRM_MSG_SETDEFAULT XFRM_MSG_SETDEFAULT + XFRM_MSG_GETDEFAULT, +#define XFRM_MSG_GETDEFAULT XFRM_MSG_GETDEFAULT + XFRM_MSG_MAPPING, #define XFRM_MSG_MAPPING XFRM_MSG_MAPPING __XFRM_MSG_MAX @@ -508,6 +513,11 @@ struct xfrm_user_offload { #define XFRM_OFFLOAD_IPV6 1 #define XFRM_OFFLOAD_INBOUND 2 +struct xfrm_userpolicy_default { + __u8 dirmask; + __u8 action; +}; + #ifndef __KERNEL__ /* backwards compatibility for userspace */ #define XFRMGRP_ACQUIRE 1 diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 827d84255021..d5cb082e11fc 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -3165,6 +3165,11 @@ ok: return dst; nopol: + if (!(dst_orig->dev->flags & IFF_LOOPBACK) && + !xfrm_default_allow(net, dir)) { + err = -EPERM; + goto error; + } if (!(flags & XFRM_LOOKUP_ICMP)) { dst = dst_orig; goto ok; @@ -3553,6 +3558,11 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, } if (!pol) { + if (!xfrm_default_allow(net, dir)) { + XFRM_INC_STATS(net, LINUX_MIB_XFRMINNOPOLS); + return 0; + } + if (sp && secpath_has_nontransport(sp, 0, &xerr_idx)) { xfrm_secpath_reject(xerr_idx, skb, &fl); XFRM_INC_STATS(net, LINUX_MIB_XFRMINNOPOLS); @@ -3607,6 +3617,12 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, tpp[ti++] = &pols[pi]->xfrm_vec[i]; } xfrm_nr = ti; + + if (!xfrm_default_allow(net, dir) && !xfrm_nr) { + XFRM_INC_STATS(net, LINUX_MIB_XFRMINNOSTATES); + goto reject; + } + if (npols > 1) { xfrm_tmpl_sort(stp, tpp, xfrm_nr, family); tpp = stp; diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index b47d613409b7..4eafd1130c3e 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -1961,6 +1961,54 @@ static struct sk_buff *xfrm_policy_netlink(struct sk_buff *in_skb, return skb; } +static int xfrm_set_default(struct sk_buff *skb, struct nlmsghdr *nlh, + struct nlattr **attrs) +{ + struct net *net = sock_net(skb->sk); + struct xfrm_userpolicy_default *up = nlmsg_data(nlh); + u8 dirmask = (1 << up->dirmask) & XFRM_POL_DEFAULT_MASK; + u8 old_default = net->xfrm.policy_default; + + net->xfrm.policy_default = (old_default & (0xff ^ dirmask)) + | (up->action << up->dirmask); + + rt_genid_bump_all(net); + + return 0; +} + +static int xfrm_get_default(struct sk_buff *skb, struct nlmsghdr *nlh, + struct nlattr **attrs) +{ + struct sk_buff *r_skb; + struct nlmsghdr *r_nlh; + struct net *net = sock_net(skb->sk); + struct xfrm_userpolicy_default *r_up, *up; + int len = NLMSG_ALIGN(sizeof(struct xfrm_userpolicy_default)); + u32 portid = NETLINK_CB(skb).portid; + u32 seq = nlh->nlmsg_seq; + + up = nlmsg_data(nlh); + + r_skb = nlmsg_new(len, GFP_ATOMIC); + if (!r_skb) + return -ENOMEM; + + r_nlh = nlmsg_put(r_skb, portid, seq, XFRM_MSG_GETDEFAULT, sizeof(*r_up), 0); + if (!r_nlh) { + kfree_skb(r_skb); + return -EMSGSIZE; + } + + r_up = nlmsg_data(r_nlh); + + r_up->action = ((net->xfrm.policy_default & (1 << up->dirmask)) >> up->dirmask); + r_up->dirmask = up->dirmask; + nlmsg_end(r_skb, r_nlh); + + return nlmsg_unicast(net->xfrm.nlsk, r_skb, portid); +} + static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh, struct nlattr **attrs) { @@ -2664,6 +2712,8 @@ const int xfrm_msg_min[XFRM_NR_MSGTYPES] = { [XFRM_MSG_GETSADINFO - XFRM_MSG_BASE] = sizeof(u32), [XFRM_MSG_NEWSPDINFO - XFRM_MSG_BASE] = sizeof(u32), [XFRM_MSG_GETSPDINFO - XFRM_MSG_BASE] = sizeof(u32), + [XFRM_MSG_SETDEFAULT - XFRM_MSG_BASE] = XMSGSIZE(xfrm_userpolicy_default), + [XFRM_MSG_GETDEFAULT - XFRM_MSG_BASE] = XMSGSIZE(xfrm_userpolicy_default), }; EXPORT_SYMBOL_GPL(xfrm_msg_min); @@ -2743,6 +2793,8 @@ static const struct xfrm_link { .nla_pol = xfrma_spd_policy, .nla_max = XFRMA_SPD_MAX }, [XFRM_MSG_GETSPDINFO - XFRM_MSG_BASE] = { .doit = xfrm_get_spdinfo }, + [XFRM_MSG_SETDEFAULT - XFRM_MSG_BASE] = { .doit = xfrm_set_default }, + [XFRM_MSG_GETDEFAULT - XFRM_MSG_BASE] = { .doit = xfrm_get_default }, }; static int xfrm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, -- cgit v1.2.3 From 94111dfc18b8b8cb3c72006e0e7b31c038709ab4 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Tue, 20 Jul 2021 20:35:56 +0300 Subject: net: switchdev: remove stray semicolon in switchdev_handle_fdb_del_to_device shim With the semicolon at the end, the compiler sees the shim function as a declaration and not as a definition, and warns: 'switchdev_handle_fdb_del_to_device' declared 'static' but never defined Reported-by: kernel test robot Fixes: 8ca07176ab00 ("net: switchdev: introduce a fanout helper for SWITCHDEV_FDB_{ADD,DEL}_TO_DEVICE") Signed-off-by: Vladimir Oltean Tested-by: Matthieu Baerts Signed-off-by: David S. Miller --- include/net/switchdev.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/switchdev.h b/include/net/switchdev.h index 6f57eb2e89cc..66468ff8cc0a 100644 --- a/include/net/switchdev.h +++ b/include/net/switchdev.h @@ -406,7 +406,7 @@ switchdev_handle_fdb_del_to_device(struct net_device *dev, const struct switchdev_notifier_fdb_info *fdb_info), int (*lag_del_cb)(struct net_device *dev, const struct net_device *orig_dev, const void *ctx, - const struct switchdev_notifier_fdb_info *fdb_info)); + const struct switchdev_notifier_fdb_info *fdb_info)) { return 0; } -- cgit v1.2.3 From 9ee11f0fff205b4b3df9750bff5e94f97c71b6a0 Mon Sep 17 00:00:00 2001 From: Justin Iurman Date: Tue, 20 Jul 2021 21:42:57 +0200 Subject: ipv6: ioam: Data plane support for Pre-allocated Trace Implement support for processing the IOAM Pre-allocated Trace with IPv6, see [1] and [2]. Introduce a new IPv6 Hop-by-Hop TLV option, see IANA [3]. A new per-interface sysctl is introduced. The value is a boolean to accept (=1) or ignore (=0, by default) IPv6 IOAM options on ingress for an interface: - net.ipv6.conf.XXX.ioam6_enabled Two other sysctls are introduced to define IOAM IDs, represented by an integer. They are respectively per-namespace and per-interface: - net.ipv6.ioam6_id - net.ipv6.conf.XXX.ioam6_id The value of the first one represents the IOAM ID of the node itself (u32; max and default value = U32_MAX>>8, due to hop limit concatenation) while the other represents the IOAM ID of an interface (u16; max and default value = U16_MAX). Each "ioam6_id" sysctl has a "_wide" equivalent: - net.ipv6.ioam6_id_wide - net.ipv6.conf.XXX.ioam6_id_wide The value of the first one represents the wide IOAM ID of the node itself (u64; max and default value = U64_MAX>>8, due to hop limit concatenation) while the other represents the wide IOAM ID of an interface (u32; max and default value = U32_MAX). The use of short and wide equivalents is not exclusive, a deployment could choose to leverage both. For example, net.ipv6.conf.XXX.ioam6_id (short format) could be an identifier for a physical interface, whereas net.ipv6.conf.XXX.ioam6_id_wide (wide format) could be an identifier for a logical sub-interface. Documentation about new sysctls is provided at the end of this patchset. Two relativistic hash tables are used: one for IOAM namespaces, the other for IOAM schemas. A namespace can only have a single active schema and a schema can only be attached to a single namespace (1:1 relationship). [1] https://tools.ietf.org/html/draft-ietf-ippm-ioam-ipv6-options [2] https://tools.ietf.org/html/draft-ietf-ippm-ioam-data [3] https://www.iana.org/assignments/ipv6-parameters/ipv6-parameters.xhtml#ipv6-parameters-2 Signed-off-by: Justin Iurman Signed-off-by: David S. Miller --- include/linux/ioam6.h | 13 ++ include/linux/ipv6.h | 3 + include/net/ioam6.h | 64 +++++++++ include/net/netns/ipv6.h | 3 + include/uapi/linux/in6.h | 1 + include/uapi/linux/ioam6.h | 9 ++ include/uapi/linux/ipv6.h | 3 + net/ipv6/Makefile | 2 +- net/ipv6/addrconf.c | 37 +++++ net/ipv6/af_inet6.c | 10 ++ net/ipv6/exthdrs.c | 61 +++++++++ net/ipv6/ioam6.c | 333 +++++++++++++++++++++++++++++++++++++++++++++ net/ipv6/sysctl_net_ipv6.c | 19 +++ 13 files changed, 557 insertions(+), 1 deletion(-) create mode 100644 include/linux/ioam6.h create mode 100644 include/net/ioam6.h create mode 100644 net/ipv6/ioam6.c (limited to 'include/net') diff --git a/include/linux/ioam6.h b/include/linux/ioam6.h new file mode 100644 index 000000000000..94a24b36998f --- /dev/null +++ b/include/linux/ioam6.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ +/* + * IPv6 IOAM + * + * Author: + * Justin Iurman + */ +#ifndef _LINUX_IOAM6_H +#define _LINUX_IOAM6_H + +#include + +#endif /* _LINUX_IOAM6_H */ diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 70b2ad3b9884..ef4a69865737 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -76,6 +76,9 @@ struct ipv6_devconf { __s32 disable_policy; __s32 ndisc_tclass; __s32 rpl_seg_enabled; + __u32 ioam6_id; + __u32 ioam6_id_wide; + __u8 ioam6_enabled; struct ctl_table_header *sysctl_header; }; diff --git a/include/net/ioam6.h b/include/net/ioam6.h new file mode 100644 index 000000000000..772b91ee2e87 --- /dev/null +++ b/include/net/ioam6.h @@ -0,0 +1,64 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ +/* + * IPv6 IOAM implementation + * + * Author: + * Justin Iurman + */ + +#ifndef _NET_IOAM6_H +#define _NET_IOAM6_H + +#include +#include +#include +#include + +struct ioam6_namespace { + struct rhash_head head; + struct rcu_head rcu; + + struct ioam6_schema __rcu *schema; + + __be16 id; + __be32 data; + __be64 data_wide; +}; + +struct ioam6_schema { + struct rhash_head head; + struct rcu_head rcu; + + struct ioam6_namespace __rcu *ns; + + u32 id; + int len; + __be32 hdr; + + u8 data[0]; +}; + +struct ioam6_pernet_data { + struct mutex lock; + struct rhashtable namespaces; + struct rhashtable schemas; +}; + +static inline struct ioam6_pernet_data *ioam6_pernet(struct net *net) +{ +#if IS_ENABLED(CONFIG_IPV6) + return net->ipv6.ioam6_data; +#else + return NULL; +#endif +} + +struct ioam6_namespace *ioam6_namespace(struct net *net, __be16 id); +void ioam6_fill_trace_data(struct sk_buff *skb, + struct ioam6_namespace *ns, + struct ioam6_trace_hdr *trace); + +int ioam6_init(void); +void ioam6_exit(void); + +#endif /* _NET_IOAM6_H */ diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h index bde0b7adb4a3..a4b550380316 100644 --- a/include/net/netns/ipv6.h +++ b/include/net/netns/ipv6.h @@ -51,6 +51,8 @@ struct netns_sysctl_ipv6 { int max_dst_opts_len; int max_hbh_opts_len; int seg6_flowlabel; + u32 ioam6_id; + u64 ioam6_id_wide; bool skip_notify_on_dev_down; u8 fib_notify_on_flag_change; }; @@ -110,6 +112,7 @@ struct netns_ipv6 { spinlock_t lock; u32 seq; } ip6addrlbl_table; + struct ioam6_pernet_data *ioam6_data; }; #if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6) diff --git a/include/uapi/linux/in6.h b/include/uapi/linux/in6.h index 5ad396a57eb3..c4c53a9ab959 100644 --- a/include/uapi/linux/in6.h +++ b/include/uapi/linux/in6.h @@ -145,6 +145,7 @@ struct in6_flowlabel_req { #define IPV6_TLV_PADN 1 #define IPV6_TLV_ROUTERALERT 5 #define IPV6_TLV_CALIPSO 7 /* RFC 5570 */ +#define IPV6_TLV_IOAM 49 /* TEMPORARY IANA allocation for IOAM */ #define IPV6_TLV_JUMBO 194 #define IPV6_TLV_HAO 201 /* home address option */ diff --git a/include/uapi/linux/ioam6.h b/include/uapi/linux/ioam6.h index 2177e4e49566..23ba6e85582f 100644 --- a/include/uapi/linux/ioam6.h +++ b/include/uapi/linux/ioam6.h @@ -12,6 +12,15 @@ #include #include +#define IOAM6_U16_UNAVAILABLE U16_MAX +#define IOAM6_U32_UNAVAILABLE U32_MAX +#define IOAM6_U64_UNAVAILABLE U64_MAX + +#define IOAM6_DEFAULT_ID (IOAM6_U32_UNAVAILABLE >> 8) +#define IOAM6_DEFAULT_ID_WIDE (IOAM6_U64_UNAVAILABLE >> 8) +#define IOAM6_DEFAULT_IF_ID IOAM6_U16_UNAVAILABLE +#define IOAM6_DEFAULT_IF_ID_WIDE IOAM6_U32_UNAVAILABLE + /* * IPv6 IOAM Option Header */ diff --git a/include/uapi/linux/ipv6.h b/include/uapi/linux/ipv6.h index 70603775fe91..b243a53fa985 100644 --- a/include/uapi/linux/ipv6.h +++ b/include/uapi/linux/ipv6.h @@ -190,6 +190,9 @@ enum { DEVCONF_NDISC_TCLASS, DEVCONF_RPL_SEG_ENABLED, DEVCONF_RA_DEFRTR_METRIC, + DEVCONF_IOAM6_ENABLED, + DEVCONF_IOAM6_ID, + DEVCONF_IOAM6_ID_WIDE, DEVCONF_MAX }; diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile index cf7b47bdb9b3..b7ef10d417d6 100644 --- a/net/ipv6/Makefile +++ b/net/ipv6/Makefile @@ -10,7 +10,7 @@ ipv6-objs := af_inet6.o anycast.o ip6_output.o ip6_input.o addrconf.o \ route.o ip6_fib.o ipv6_sockglue.o ndisc.o udp.o udplite.o \ raw.o icmp.o mcast.o reassembly.o tcp_ipv6.o ping.o \ exthdrs.o datagram.o ip6_flowlabel.o inet6_connection_sock.o \ - udp_offload.o seg6.o fib6_notifier.o rpl.o + udp_offload.o seg6.o fib6_notifier.o rpl.o ioam6.o ipv6-offload := ip6_offload.o tcpv6_offload.o exthdrs_offload.o diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index bc330fffb4a8..1802287977f1 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -89,12 +89,15 @@ #include #include #include +#include #define INFINITY_LIFE_TIME 0xFFFFFFFF #define IPV6_MAX_STRLEN \ sizeof("ffff:ffff:ffff:ffff:ffff:ffff:255.255.255.255") +static u32 ioam6_if_id_max = U16_MAX; + static inline u32 cstamp_delta(unsigned long cstamp) { return (cstamp - INITIAL_JIFFIES) * 100UL / HZ; @@ -237,6 +240,9 @@ static struct ipv6_devconf ipv6_devconf __read_mostly = { .addr_gen_mode = IN6_ADDR_GEN_MODE_EUI64, .disable_policy = 0, .rpl_seg_enabled = 0, + .ioam6_enabled = 0, + .ioam6_id = IOAM6_DEFAULT_IF_ID, + .ioam6_id_wide = IOAM6_DEFAULT_IF_ID_WIDE, }; static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = { @@ -293,6 +299,9 @@ static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = { .addr_gen_mode = IN6_ADDR_GEN_MODE_EUI64, .disable_policy = 0, .rpl_seg_enabled = 0, + .ioam6_enabled = 0, + .ioam6_id = IOAM6_DEFAULT_IF_ID, + .ioam6_id_wide = IOAM6_DEFAULT_IF_ID_WIDE, }; /* Check if link is ready: is it up and is a valid qdisc available */ @@ -5524,6 +5533,9 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf, array[DEVCONF_DISABLE_POLICY] = cnf->disable_policy; array[DEVCONF_NDISC_TCLASS] = cnf->ndisc_tclass; array[DEVCONF_RPL_SEG_ENABLED] = cnf->rpl_seg_enabled; + array[DEVCONF_IOAM6_ENABLED] = cnf->ioam6_enabled; + array[DEVCONF_IOAM6_ID] = cnf->ioam6_id; + array[DEVCONF_IOAM6_ID_WIDE] = cnf->ioam6_id_wide; } static inline size_t inet6_ifla6_size(void) @@ -6930,6 +6942,31 @@ static const struct ctl_table addrconf_sysctl[] = { .mode = 0644, .proc_handler = proc_dointvec, }, + { + .procname = "ioam6_enabled", + .data = &ipv6_devconf.ioam6_enabled, + .maxlen = sizeof(u8), + .mode = 0644, + .proc_handler = proc_dou8vec_minmax, + .extra1 = (void *)SYSCTL_ZERO, + .extra2 = (void *)SYSCTL_ONE, + }, + { + .procname = "ioam6_id", + .data = &ipv6_devconf.ioam6_id, + .maxlen = sizeof(u32), + .mode = 0644, + .proc_handler = proc_douintvec_minmax, + .extra1 = (void *)SYSCTL_ZERO, + .extra2 = (void *)&ioam6_if_id_max, + }, + { + .procname = "ioam6_id_wide", + .data = &ipv6_devconf.ioam6_id_wide, + .maxlen = sizeof(u32), + .mode = 0644, + .proc_handler = proc_douintvec, + }, { /* sentinel */ } diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 2389ff702f51..d92c90d97763 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -62,6 +62,7 @@ #include #include #include +#include #include #include @@ -961,6 +962,9 @@ static int __net_init inet6_net_init(struct net *net) net->ipv6.sysctl.fib_notify_on_flag_change = 0; atomic_set(&net->ipv6.fib6_sernum, 1); + net->ipv6.sysctl.ioam6_id = IOAM6_DEFAULT_ID; + net->ipv6.sysctl.ioam6_id_wide = IOAM6_DEFAULT_ID_WIDE; + err = ipv6_init_mibs(net); if (err) return err; @@ -1191,6 +1195,10 @@ static int __init inet6_init(void) if (err) goto rpl_fail; + err = ioam6_init(); + if (err) + goto ioam6_fail; + err = igmp6_late_init(); if (err) goto igmp6_late_err; @@ -1213,6 +1221,8 @@ sysctl_fail: igmp6_late_cleanup(); #endif igmp6_late_err: + ioam6_exit(); +ioam6_fail: rpl_exit(); rpl_fail: seg6_exit(); diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c index 26882e165c9e..d897faa4e9e6 100644 --- a/net/ipv6/exthdrs.c +++ b/net/ipv6/exthdrs.c @@ -49,6 +49,9 @@ #include #endif #include +#include +#include +#include #include @@ -928,6 +931,60 @@ static bool ipv6_hop_ra(struct sk_buff *skb, int optoff) return false; } +/* IOAM */ + +static bool ipv6_hop_ioam(struct sk_buff *skb, int optoff) +{ + struct ioam6_trace_hdr *trace; + struct ioam6_namespace *ns; + struct ioam6_hdr *hdr; + + /* Bad alignment (must be 4n-aligned) */ + if (optoff & 3) + goto drop; + + /* Ignore if IOAM is not enabled on ingress */ + if (!__in6_dev_get(skb->dev)->cnf.ioam6_enabled) + goto ignore; + + /* Truncated Option header */ + hdr = (struct ioam6_hdr *)(skb_network_header(skb) + optoff); + if (hdr->opt_len < 2) + goto drop; + + switch (hdr->type) { + case IOAM6_TYPE_PREALLOC: + /* Truncated Pre-allocated Trace header */ + if (hdr->opt_len < 2 + sizeof(*trace)) + goto drop; + + /* Malformed Pre-allocated Trace header */ + trace = (struct ioam6_trace_hdr *)((u8 *)hdr + sizeof(*hdr)); + if (hdr->opt_len < 2 + sizeof(*trace) + trace->remlen * 4) + goto drop; + + /* Ignore if the IOAM namespace is unknown */ + ns = ioam6_namespace(ipv6_skb_net(skb), trace->namespace_id); + if (!ns) + goto ignore; + + if (!skb_valid_dst(skb)) + ip6_route_input(skb); + + ioam6_fill_trace_data(skb, ns, trace); + break; + default: + break; + } + +ignore: + return true; + +drop: + kfree_skb(skb); + return false; +} + /* Jumbo payload */ static bool ipv6_hop_jumbo(struct sk_buff *skb, int optoff) @@ -999,6 +1056,10 @@ static const struct tlvtype_proc tlvprochopopt_lst[] = { .type = IPV6_TLV_ROUTERALERT, .func = ipv6_hop_ra, }, + { + .type = IPV6_TLV_IOAM, + .func = ipv6_hop_ioam, + }, { .type = IPV6_TLV_JUMBO, .func = ipv6_hop_jumbo, diff --git a/net/ipv6/ioam6.c b/net/ipv6/ioam6.c new file mode 100644 index 000000000000..ba629e1b9408 --- /dev/null +++ b/net/ipv6/ioam6.c @@ -0,0 +1,333 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * IPv6 IOAM implementation + * + * Author: + * Justin Iurman + */ + +#include +#include +#include +#include +#include +#include + +#include +#include + +static void ioam6_ns_release(struct ioam6_namespace *ns) +{ + kfree_rcu(ns, rcu); +} + +static void ioam6_sc_release(struct ioam6_schema *sc) +{ + kfree_rcu(sc, rcu); +} + +static void ioam6_free_ns(void *ptr, void *arg) +{ + struct ioam6_namespace *ns = (struct ioam6_namespace *)ptr; + + if (ns) + ioam6_ns_release(ns); +} + +static void ioam6_free_sc(void *ptr, void *arg) +{ + struct ioam6_schema *sc = (struct ioam6_schema *)ptr; + + if (sc) + ioam6_sc_release(sc); +} + +static int ioam6_ns_cmpfn(struct rhashtable_compare_arg *arg, const void *obj) +{ + const struct ioam6_namespace *ns = obj; + + return (ns->id != *(__be16 *)arg->key); +} + +static int ioam6_sc_cmpfn(struct rhashtable_compare_arg *arg, const void *obj) +{ + const struct ioam6_schema *sc = obj; + + return (sc->id != *(u32 *)arg->key); +} + +static const struct rhashtable_params rht_ns_params = { + .key_len = sizeof(__be16), + .key_offset = offsetof(struct ioam6_namespace, id), + .head_offset = offsetof(struct ioam6_namespace, head), + .automatic_shrinking = true, + .obj_cmpfn = ioam6_ns_cmpfn, +}; + +static const struct rhashtable_params rht_sc_params = { + .key_len = sizeof(u32), + .key_offset = offsetof(struct ioam6_schema, id), + .head_offset = offsetof(struct ioam6_schema, head), + .automatic_shrinking = true, + .obj_cmpfn = ioam6_sc_cmpfn, +}; + +struct ioam6_namespace *ioam6_namespace(struct net *net, __be16 id) +{ + struct ioam6_pernet_data *nsdata = ioam6_pernet(net); + + return rhashtable_lookup_fast(&nsdata->namespaces, &id, rht_ns_params); +} + +static void __ioam6_fill_trace_data(struct sk_buff *skb, + struct ioam6_namespace *ns, + struct ioam6_trace_hdr *trace, + struct ioam6_schema *sc, + u8 sclen) +{ + struct __kernel_sock_timeval ts; + u64 raw64; + u32 raw32; + u16 raw16; + u8 *data; + u8 byte; + + data = trace->data + trace->remlen * 4 - trace->nodelen * 4 - sclen * 4; + + /* hop_lim and node_id */ + if (trace->type.bit0) { + byte = ipv6_hdr(skb)->hop_limit; + if (skb->dev) + byte--; + + raw32 = dev_net(skb->dev)->ipv6.sysctl.ioam6_id; + + *(__be32 *)data = cpu_to_be32((byte << 24) | raw32); + data += sizeof(__be32); + } + + /* ingress_if_id and egress_if_id */ + if (trace->type.bit1) { + if (!skb->dev) + raw16 = IOAM6_U16_UNAVAILABLE; + else + raw16 = (__force u16)__in6_dev_get(skb->dev)->cnf.ioam6_id; + + *(__be16 *)data = cpu_to_be16(raw16); + data += sizeof(__be16); + + if (skb_dst(skb)->dev->flags & IFF_LOOPBACK) + raw16 = IOAM6_U16_UNAVAILABLE; + else + raw16 = (__force u16)__in6_dev_get(skb_dst(skb)->dev)->cnf.ioam6_id; + + *(__be16 *)data = cpu_to_be16(raw16); + data += sizeof(__be16); + } + + /* timestamp seconds */ + if (trace->type.bit2) { + if (!skb->tstamp) + __net_timestamp(skb); + + skb_get_new_timestamp(skb, &ts); + + *(__be32 *)data = cpu_to_be32((u32)ts.tv_sec); + data += sizeof(__be32); + } + + /* timestamp subseconds */ + if (trace->type.bit3) { + if (!skb->tstamp) + __net_timestamp(skb); + + if (!trace->type.bit2) + skb_get_new_timestamp(skb, &ts); + + *(__be32 *)data = cpu_to_be32((u32)ts.tv_usec); + data += sizeof(__be32); + } + + /* transit delay */ + if (trace->type.bit4) { + *(__be32 *)data = cpu_to_be32(IOAM6_U32_UNAVAILABLE); + data += sizeof(__be32); + } + + /* namespace data */ + if (trace->type.bit5) { + *(__be32 *)data = ns->data; + data += sizeof(__be32); + } + + /* queue depth */ + if (trace->type.bit6) { + *(__be32 *)data = cpu_to_be32(IOAM6_U32_UNAVAILABLE); + data += sizeof(__be32); + } + + /* checksum complement */ + if (trace->type.bit7) { + *(__be32 *)data = cpu_to_be32(IOAM6_U32_UNAVAILABLE); + data += sizeof(__be32); + } + + /* hop_lim and node_id (wide) */ + if (trace->type.bit8) { + byte = ipv6_hdr(skb)->hop_limit; + if (skb->dev) + byte--; + + raw64 = dev_net(skb->dev)->ipv6.sysctl.ioam6_id_wide; + + *(__be64 *)data = cpu_to_be64(((u64)byte << 56) | raw64); + data += sizeof(__be64); + } + + /* ingress_if_id and egress_if_id (wide) */ + if (trace->type.bit9) { + if (!skb->dev) + raw32 = IOAM6_U32_UNAVAILABLE; + else + raw32 = __in6_dev_get(skb->dev)->cnf.ioam6_id_wide; + + *(__be32 *)data = cpu_to_be32(raw32); + data += sizeof(__be32); + + if (skb_dst(skb)->dev->flags & IFF_LOOPBACK) + raw32 = IOAM6_U32_UNAVAILABLE; + else + raw32 = __in6_dev_get(skb_dst(skb)->dev)->cnf.ioam6_id_wide; + + *(__be32 *)data = cpu_to_be32(raw32); + data += sizeof(__be32); + } + + /* namespace data (wide) */ + if (trace->type.bit10) { + *(__be64 *)data = ns->data_wide; + data += sizeof(__be64); + } + + /* buffer occupancy */ + if (trace->type.bit11) { + *(__be32 *)data = cpu_to_be32(IOAM6_U32_UNAVAILABLE); + data += sizeof(__be32); + } + + /* opaque state snapshot */ + if (trace->type.bit22) { + if (!sc) { + *(__be32 *)data = cpu_to_be32(IOAM6_U32_UNAVAILABLE >> 8); + } else { + *(__be32 *)data = sc->hdr; + data += sizeof(__be32); + + memcpy(data, sc->data, sc->len); + } + } +} + +/* called with rcu_read_lock() */ +void ioam6_fill_trace_data(struct sk_buff *skb, + struct ioam6_namespace *ns, + struct ioam6_trace_hdr *trace) +{ + struct ioam6_schema *sc; + u8 sclen = 0; + + /* Skip if Overflow flag is set OR + * if an unknown type (bit 12-21) is set + */ + if (trace->overflow || + trace->type.bit12 | trace->type.bit13 | trace->type.bit14 | + trace->type.bit15 | trace->type.bit16 | trace->type.bit17 | + trace->type.bit18 | trace->type.bit19 | trace->type.bit20 | + trace->type.bit21) { + return; + } + + /* NodeLen does not include Opaque State Snapshot length. We need to + * take it into account if the corresponding bit is set (bit 22) and + * if the current IOAM namespace has an active schema attached to it + */ + sc = rcu_dereference(ns->schema); + if (trace->type.bit22) { + sclen = sizeof_field(struct ioam6_schema, hdr) / 4; + + if (sc) + sclen += sc->len / 4; + } + + /* If there is no space remaining, we set the Overflow flag and we + * skip without filling the trace + */ + if (!trace->remlen || trace->remlen < trace->nodelen + sclen) { + trace->overflow = 1; + return; + } + + __ioam6_fill_trace_data(skb, ns, trace, sc, sclen); + trace->remlen -= trace->nodelen + sclen; +} + +static int __net_init ioam6_net_init(struct net *net) +{ + struct ioam6_pernet_data *nsdata; + int err = -ENOMEM; + + nsdata = kzalloc(sizeof(*nsdata), GFP_KERNEL); + if (!nsdata) + goto out; + + mutex_init(&nsdata->lock); + net->ipv6.ioam6_data = nsdata; + + err = rhashtable_init(&nsdata->namespaces, &rht_ns_params); + if (err) + goto free_nsdata; + + err = rhashtable_init(&nsdata->schemas, &rht_sc_params); + if (err) + goto free_rht_ns; + +out: + return err; +free_rht_ns: + rhashtable_destroy(&nsdata->namespaces); +free_nsdata: + kfree(nsdata); + net->ipv6.ioam6_data = NULL; + goto out; +} + +static void __net_exit ioam6_net_exit(struct net *net) +{ + struct ioam6_pernet_data *nsdata = ioam6_pernet(net); + + rhashtable_free_and_destroy(&nsdata->namespaces, ioam6_free_ns, NULL); + rhashtable_free_and_destroy(&nsdata->schemas, ioam6_free_sc, NULL); + + kfree(nsdata); +} + +static struct pernet_operations ioam6_net_ops = { + .init = ioam6_net_init, + .exit = ioam6_net_exit, +}; + +int __init ioam6_init(void) +{ + int err = register_pernet_subsys(&ioam6_net_ops); + + if (err) + return err; + + pr_info("In-situ OAM (IOAM) with IPv6\n"); + return 0; +} + +void ioam6_exit(void) +{ + unregister_pernet_subsys(&ioam6_net_ops); +} diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c index d7cf26f730d7..d53dd142bf87 100644 --- a/net/ipv6/sysctl_net_ipv6.c +++ b/net/ipv6/sysctl_net_ipv6.c @@ -21,6 +21,7 @@ #ifdef CONFIG_NETLABEL #include #endif +#include static int two = 2; static int three = 3; @@ -28,6 +29,8 @@ static int flowlabel_reflect_max = 0x7; static int auto_flowlabels_max = IP6_AUTO_FLOW_LABEL_MAX; static u32 rt6_multipath_hash_fields_all_mask = FIB_MULTIPATH_HASH_FIELD_ALL_MASK; +static u32 ioam6_id_max = IOAM6_DEFAULT_ID; +static u64 ioam6_id_wide_max = IOAM6_DEFAULT_ID_WIDE; static int proc_rt6_multipath_hash_policy(struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) @@ -196,6 +199,22 @@ static struct ctl_table ipv6_table_template[] = { .extra1 = SYSCTL_ZERO, .extra2 = &two, }, + { + .procname = "ioam6_id", + .data = &init_net.ipv6.sysctl.ioam6_id, + .maxlen = sizeof(u32), + .mode = 0644, + .proc_handler = proc_douintvec_minmax, + .extra2 = &ioam6_id_max, + }, + { + .procname = "ioam6_id_wide", + .data = &init_net.ipv6.sysctl.ioam6_id_wide, + .maxlen = sizeof(u64), + .mode = 0644, + .proc_handler = proc_doulongvec_minmax, + .extra2 = &ioam6_id_wide_max, + }, { } }; -- cgit v1.2.3 From 3edede08ff37c6a9370510508d5eeb54890baf47 Mon Sep 17 00:00:00 2001 From: Justin Iurman Date: Tue, 20 Jul 2021 21:42:59 +0200 Subject: ipv6: ioam: Support for IOAM injection with lwtunnels Add support for the IOAM inline insertion (only for the host-to-host use case) which is per-route configured with lightweight tunnels. The target is iproute2 and the patch is ready. It will be posted as soon as this patchset is merged. Here is an overview: $ ip -6 ro ad fc00::1/128 encap ioam6 trace type 0x800000 ns 1 size 12 dev eth0 This example configures an IOAM Pre-allocated Trace option attached to the fc00::1/128 prefix. The IOAM namespace (ns) is 1, the size of the pre-allocated trace data block is 12 octets (size) and only the first IOAM data (bit 0: hop_limit + node id) is included in the trace (type) represented as a bitfield. The reason why the in-transit (IPv6-in-IPv6 encapsulation) use case is not implemented is explained on the patchset cover. Signed-off-by: Justin Iurman Signed-off-by: David S. Miller --- include/linux/ioam6_iptunnel.h | 13 ++ include/net/ioam6.h | 3 + include/uapi/linux/ioam6.h | 1 + include/uapi/linux/ioam6_iptunnel.h | 20 +++ include/uapi/linux/lwtunnel.h | 1 + net/core/lwtunnel.c | 2 + net/ipv6/Kconfig | 11 ++ net/ipv6/Makefile | 1 + net/ipv6/ioam6.c | 44 ++++-- net/ipv6/ioam6_iptunnel.c | 274 ++++++++++++++++++++++++++++++++++++ 10 files changed, 358 insertions(+), 12 deletions(-) create mode 100644 include/linux/ioam6_iptunnel.h create mode 100644 include/uapi/linux/ioam6_iptunnel.h create mode 100644 net/ipv6/ioam6_iptunnel.c (limited to 'include/net') diff --git a/include/linux/ioam6_iptunnel.h b/include/linux/ioam6_iptunnel.h new file mode 100644 index 000000000000..07d9dfedd29d --- /dev/null +++ b/include/linux/ioam6_iptunnel.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ +/* + * IPv6 IOAM Lightweight Tunnel API + * + * Author: + * Justin Iurman + */ +#ifndef _LINUX_IOAM6_IPTUNNEL_H +#define _LINUX_IOAM6_IPTUNNEL_H + +#include + +#endif /* _LINUX_IOAM6_IPTUNNEL_H */ diff --git a/include/net/ioam6.h b/include/net/ioam6.h index 772b91ee2e87..3c2993bc48c8 100644 --- a/include/net/ioam6.h +++ b/include/net/ioam6.h @@ -61,4 +61,7 @@ void ioam6_fill_trace_data(struct sk_buff *skb, int ioam6_init(void); void ioam6_exit(void); +int ioam6_iptunnel_init(void); +void ioam6_iptunnel_exit(void); + #endif /* _NET_IOAM6_H */ diff --git a/include/uapi/linux/ioam6.h b/include/uapi/linux/ioam6.h index 23ba6e85582f..ac4de376f0ce 100644 --- a/include/uapi/linux/ioam6.h +++ b/include/uapi/linux/ioam6.h @@ -126,6 +126,7 @@ struct ioam6_trace_hdr { #error "Please fix " #endif +#define IOAM6_TRACE_DATA_SIZE_MAX 244 __u8 data[0]; } __attribute__((packed)); diff --git a/include/uapi/linux/ioam6_iptunnel.h b/include/uapi/linux/ioam6_iptunnel.h new file mode 100644 index 000000000000..bae14636a8c8 --- /dev/null +++ b/include/uapi/linux/ioam6_iptunnel.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */ +/* + * IPv6 IOAM Lightweight Tunnel API + * + * Author: + * Justin Iurman + */ + +#ifndef _UAPI_LINUX_IOAM6_IPTUNNEL_H +#define _UAPI_LINUX_IOAM6_IPTUNNEL_H + +enum { + IOAM6_IPTUNNEL_UNSPEC, + IOAM6_IPTUNNEL_TRACE, /* struct ioam6_trace_hdr */ + __IOAM6_IPTUNNEL_MAX, +}; + +#define IOAM6_IPTUNNEL_MAX (__IOAM6_IPTUNNEL_MAX - 1) + +#endif /* _UAPI_LINUX_IOAM6_IPTUNNEL_H */ diff --git a/include/uapi/linux/lwtunnel.h b/include/uapi/linux/lwtunnel.h index 568a4303ccce..2e206919125c 100644 --- a/include/uapi/linux/lwtunnel.h +++ b/include/uapi/linux/lwtunnel.h @@ -14,6 +14,7 @@ enum lwtunnel_encap_types { LWTUNNEL_ENCAP_BPF, LWTUNNEL_ENCAP_SEG6_LOCAL, LWTUNNEL_ENCAP_RPL, + LWTUNNEL_ENCAP_IOAM6, __LWTUNNEL_ENCAP_MAX, }; diff --git a/net/core/lwtunnel.c b/net/core/lwtunnel.c index 8ec7d13d2860..d0ae987d2de9 100644 --- a/net/core/lwtunnel.c +++ b/net/core/lwtunnel.c @@ -43,6 +43,8 @@ static const char *lwtunnel_encap_str(enum lwtunnel_encap_types encap_type) return "SEG6LOCAL"; case LWTUNNEL_ENCAP_RPL: return "RPL"; + case LWTUNNEL_ENCAP_IOAM6: + return "IOAM6"; case LWTUNNEL_ENCAP_IP6: case LWTUNNEL_ENCAP_IP: case LWTUNNEL_ENCAP_NONE: diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig index 747f56e0c636..e504204bca92 100644 --- a/net/ipv6/Kconfig +++ b/net/ipv6/Kconfig @@ -328,4 +328,15 @@ config IPV6_RPL_LWTUNNEL If unsure, say N. +config IPV6_IOAM6_LWTUNNEL + bool "IPv6: IOAM Pre-allocated Trace insertion support" + depends on IPV6 + select LWTUNNEL + help + Support for the inline insertion of IOAM Pre-allocated + Trace Header (only on locally generated packets), using + the lightweight tunnels mechanism. + + If unsure, say N. + endif # IPV6 diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile index b7ef10d417d6..1bc7e143217b 100644 --- a/net/ipv6/Makefile +++ b/net/ipv6/Makefile @@ -27,6 +27,7 @@ ipv6-$(CONFIG_NETLABEL) += calipso.o ipv6-$(CONFIG_IPV6_SEG6_LWTUNNEL) += seg6_iptunnel.o seg6_local.o ipv6-$(CONFIG_IPV6_SEG6_HMAC) += seg6_hmac.o ipv6-$(CONFIG_IPV6_RPL_LWTUNNEL) += rpl_iptunnel.o +ipv6-$(CONFIG_IPV6_IOAM6_LWTUNNEL) += ioam6_iptunnel.o ipv6-objs += $(ipv6-y) diff --git a/net/ipv6/ioam6.c b/net/ipv6/ioam6.c index ba59671f32b8..5e8961004832 100644 --- a/net/ipv6/ioam6.c +++ b/net/ipv6/ioam6.c @@ -648,7 +648,7 @@ static void __ioam6_fill_trace_data(struct sk_buff *skb, if (skb->dev) byte--; - raw32 = dev_net(skb->dev)->ipv6.sysctl.ioam6_id; + raw32 = dev_net(skb_dst(skb)->dev)->ipv6.sysctl.ioam6_id; *(__be32 *)data = cpu_to_be32((byte << 24) | raw32); data += sizeof(__be32); @@ -675,24 +675,31 @@ static void __ioam6_fill_trace_data(struct sk_buff *skb, /* timestamp seconds */ if (trace->type.bit2) { - if (!skb->tstamp) - __net_timestamp(skb); - - skb_get_new_timestamp(skb, &ts); + if (!skb->dev) { + *(__be32 *)data = cpu_to_be32(IOAM6_U32_UNAVAILABLE); + } else { + if (!skb->tstamp) + __net_timestamp(skb); - *(__be32 *)data = cpu_to_be32((u32)ts.tv_sec); + skb_get_new_timestamp(skb, &ts); + *(__be32 *)data = cpu_to_be32((u32)ts.tv_sec); + } data += sizeof(__be32); } /* timestamp subseconds */ if (trace->type.bit3) { - if (!skb->tstamp) - __net_timestamp(skb); + if (!skb->dev) { + *(__be32 *)data = cpu_to_be32(IOAM6_U32_UNAVAILABLE); + } else { + if (!skb->tstamp) + __net_timestamp(skb); - if (!trace->type.bit2) - skb_get_new_timestamp(skb, &ts); + if (!trace->type.bit2) + skb_get_new_timestamp(skb, &ts); - *(__be32 *)data = cpu_to_be32((u32)ts.tv_usec); + *(__be32 *)data = cpu_to_be32((u32)ts.tv_usec); + } data += sizeof(__be32); } @@ -726,7 +733,7 @@ static void __ioam6_fill_trace_data(struct sk_buff *skb, if (skb->dev) byte--; - raw64 = dev_net(skb->dev)->ipv6.sysctl.ioam6_id_wide; + raw64 = dev_net(skb_dst(skb)->dev)->ipv6.sysctl.ioam6_id_wide; *(__be64 *)data = cpu_to_be64(((u64)byte << 56) | raw64); data += sizeof(__be64); @@ -874,10 +881,20 @@ int __init ioam6_init(void) if (err) goto out_unregister_pernet_subsys; +#ifdef CONFIG_IPV6_IOAM6_LWTUNNEL + err = ioam6_iptunnel_init(); + if (err) + goto out_unregister_genl; +#endif + pr_info("In-situ OAM (IOAM) with IPv6\n"); out: return err; +#ifdef CONFIG_IPV6_IOAM6_LWTUNNEL +out_unregister_genl: + genl_unregister_family(&ioam6_genl_family); +#endif out_unregister_pernet_subsys: unregister_pernet_subsys(&ioam6_net_ops); goto out; @@ -885,6 +902,9 @@ out_unregister_pernet_subsys: void ioam6_exit(void) { +#ifdef CONFIG_IPV6_IOAM6_LWTUNNEL + ioam6_iptunnel_exit(); +#endif genl_unregister_family(&ioam6_genl_family); unregister_pernet_subsys(&ioam6_net_ops); } diff --git a/net/ipv6/ioam6_iptunnel.c b/net/ipv6/ioam6_iptunnel.c new file mode 100644 index 000000000000..f9ee04541c17 --- /dev/null +++ b/net/ipv6/ioam6_iptunnel.c @@ -0,0 +1,274 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * IPv6 IOAM Lightweight Tunnel implementation + * + * Author: + * Justin Iurman + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define IOAM6_MASK_SHORT_FIELDS 0xff100000 +#define IOAM6_MASK_WIDE_FIELDS 0xe00000 + +struct ioam6_lwt_encap { + struct ipv6_hopopt_hdr eh; + u8 pad[2]; /* 2-octet padding for 4n-alignment */ + struct ioam6_hdr ioamh; + struct ioam6_trace_hdr traceh; +} __packed; + +struct ioam6_lwt { + struct ioam6_lwt_encap tuninfo; +}; + +static struct ioam6_lwt *ioam6_lwt_state(struct lwtunnel_state *lwt) +{ + return (struct ioam6_lwt *)lwt->data; +} + +static struct ioam6_lwt_encap *ioam6_lwt_info(struct lwtunnel_state *lwt) +{ + return &ioam6_lwt_state(lwt)->tuninfo; +} + +static struct ioam6_trace_hdr *ioam6_trace(struct lwtunnel_state *lwt) +{ + return &(ioam6_lwt_state(lwt)->tuninfo.traceh); +} + +static const struct nla_policy ioam6_iptunnel_policy[IOAM6_IPTUNNEL_MAX + 1] = { + [IOAM6_IPTUNNEL_TRACE] = NLA_POLICY_EXACT_LEN(sizeof(struct ioam6_trace_hdr)), +}; + +static int nla_put_ioam6_trace(struct sk_buff *skb, int attrtype, + struct ioam6_trace_hdr *trace) +{ + struct ioam6_trace_hdr *data; + struct nlattr *nla; + int len; + + len = sizeof(*trace); + + nla = nla_reserve(skb, attrtype, len); + if (!nla) + return -EMSGSIZE; + + data = nla_data(nla); + memcpy(data, trace, len); + + return 0; +} + +static bool ioam6_validate_trace_hdr(struct ioam6_trace_hdr *trace) +{ + u32 fields; + + if (!trace->type_be32 || !trace->remlen || + trace->remlen > IOAM6_TRACE_DATA_SIZE_MAX / 4) + return false; + + trace->nodelen = 0; + fields = be32_to_cpu(trace->type_be32); + + trace->nodelen += hweight32(fields & IOAM6_MASK_SHORT_FIELDS) + * (sizeof(__be32) / 4); + trace->nodelen += hweight32(fields & IOAM6_MASK_WIDE_FIELDS) + * (sizeof(__be64) / 4); + + return true; +} + +static int ioam6_build_state(struct net *net, struct nlattr *nla, + unsigned int family, const void *cfg, + struct lwtunnel_state **ts, + struct netlink_ext_ack *extack) +{ + struct nlattr *tb[IOAM6_IPTUNNEL_MAX + 1]; + struct ioam6_lwt_encap *tuninfo; + struct ioam6_trace_hdr *trace; + struct lwtunnel_state *s; + int len_aligned; + int len, err; + + if (family != AF_INET6) + return -EINVAL; + + err = nla_parse_nested(tb, IOAM6_IPTUNNEL_MAX, nla, + ioam6_iptunnel_policy, extack); + if (err < 0) + return err; + + if (!tb[IOAM6_IPTUNNEL_TRACE]) { + NL_SET_ERR_MSG(extack, "missing trace"); + return -EINVAL; + } + + trace = nla_data(tb[IOAM6_IPTUNNEL_TRACE]); + if (!ioam6_validate_trace_hdr(trace)) { + NL_SET_ERR_MSG_ATTR(extack, tb[IOAM6_IPTUNNEL_TRACE], + "invalid trace validation"); + return -EINVAL; + } + + len = sizeof(*tuninfo) + trace->remlen * 4; + len_aligned = ALIGN(len, 8); + + s = lwtunnel_state_alloc(len_aligned); + if (!s) + return -ENOMEM; + + tuninfo = ioam6_lwt_info(s); + tuninfo->eh.hdrlen = (len_aligned >> 3) - 1; + tuninfo->pad[0] = IPV6_TLV_PADN; + tuninfo->ioamh.type = IOAM6_TYPE_PREALLOC; + tuninfo->ioamh.opt_type = IPV6_TLV_IOAM; + tuninfo->ioamh.opt_len = sizeof(tuninfo->ioamh) - 2 + sizeof(*trace) + + trace->remlen * 4; + + memcpy(&tuninfo->traceh, trace, sizeof(*trace)); + + len = len_aligned - len; + if (len == 1) { + tuninfo->traceh.data[trace->remlen * 4] = IPV6_TLV_PAD1; + } else if (len > 0) { + tuninfo->traceh.data[trace->remlen * 4] = IPV6_TLV_PADN; + tuninfo->traceh.data[trace->remlen * 4 + 1] = len - 2; + } + + s->type = LWTUNNEL_ENCAP_IOAM6; + s->flags |= LWTUNNEL_STATE_OUTPUT_REDIRECT; + + *ts = s; + + return 0; +} + +static int ioam6_do_inline(struct sk_buff *skb, struct ioam6_lwt_encap *tuninfo) +{ + struct ioam6_trace_hdr *trace; + struct ipv6hdr *oldhdr, *hdr; + struct ioam6_namespace *ns; + int hdrlen, err; + + hdrlen = (tuninfo->eh.hdrlen + 1) << 3; + + err = skb_cow_head(skb, hdrlen + skb->mac_len); + if (unlikely(err)) + return err; + + oldhdr = ipv6_hdr(skb); + skb_pull(skb, sizeof(*oldhdr)); + skb_postpull_rcsum(skb, skb_network_header(skb), sizeof(*oldhdr)); + + skb_push(skb, sizeof(*oldhdr) + hdrlen); + skb_reset_network_header(skb); + skb_mac_header_rebuild(skb); + + hdr = ipv6_hdr(skb); + memmove(hdr, oldhdr, sizeof(*oldhdr)); + tuninfo->eh.nexthdr = hdr->nexthdr; + + skb_set_transport_header(skb, sizeof(*hdr)); + skb_postpush_rcsum(skb, hdr, sizeof(*hdr) + hdrlen); + + memcpy(skb_transport_header(skb), (u8 *)tuninfo, hdrlen); + + hdr->nexthdr = NEXTHDR_HOP; + hdr->payload_len = cpu_to_be16(skb->len - sizeof(*hdr)); + + trace = (struct ioam6_trace_hdr *)(skb_transport_header(skb) + + sizeof(struct ipv6_hopopt_hdr) + 2 + + sizeof(struct ioam6_hdr)); + + ns = ioam6_namespace(dev_net(skb_dst(skb)->dev), trace->namespace_id); + if (ns) + ioam6_fill_trace_data(skb, ns, trace); + + return 0; +} + +static int ioam6_output(struct net *net, struct sock *sk, struct sk_buff *skb) +{ + struct lwtunnel_state *lwt = skb_dst(skb)->lwtstate; + int err = -EINVAL; + + if (skb->protocol != htons(ETH_P_IPV6)) + goto drop; + + /* Only for packets we send and + * that do not contain a Hop-by-Hop yet + */ + if (skb->dev || ipv6_hdr(skb)->nexthdr == NEXTHDR_HOP) + goto out; + + err = ioam6_do_inline(skb, ioam6_lwt_info(lwt)); + if (unlikely(err)) + goto drop; + + err = skb_cow_head(skb, LL_RESERVED_SPACE(skb_dst(skb)->dev)); + if (unlikely(err)) + goto drop; + +out: + return lwt->orig_output(net, sk, skb); + +drop: + kfree_skb(skb); + return err; +} + +static int ioam6_fill_encap_info(struct sk_buff *skb, + struct lwtunnel_state *lwtstate) +{ + struct ioam6_trace_hdr *trace = ioam6_trace(lwtstate); + + if (nla_put_ioam6_trace(skb, IOAM6_IPTUNNEL_TRACE, trace)) + return -EMSGSIZE; + + return 0; +} + +static int ioam6_encap_nlsize(struct lwtunnel_state *lwtstate) +{ + struct ioam6_trace_hdr *trace = ioam6_trace(lwtstate); + + return nla_total_size(sizeof(*trace)); +} + +static int ioam6_encap_cmp(struct lwtunnel_state *a, struct lwtunnel_state *b) +{ + struct ioam6_trace_hdr *a_hdr = ioam6_trace(a); + struct ioam6_trace_hdr *b_hdr = ioam6_trace(b); + + return (a_hdr->namespace_id != b_hdr->namespace_id); +} + +static const struct lwtunnel_encap_ops ioam6_iptun_ops = { + .build_state = ioam6_build_state, + .output = ioam6_output, + .fill_encap = ioam6_fill_encap_info, + .get_encap_size = ioam6_encap_nlsize, + .cmp_encap = ioam6_encap_cmp, + .owner = THIS_MODULE, +}; + +int __init ioam6_iptunnel_init(void) +{ + return lwtunnel_encap_add_ops(&ioam6_iptun_ops, LWTUNNEL_ENCAP_IOAM6); +} + +void ioam6_iptunnel_exit(void) +{ + lwtunnel_encap_del_ops(&ioam6_iptun_ops, LWTUNNEL_ENCAP_IOAM6); +} -- cgit v1.2.3 From 427faee167bce4ccb0b22f6815a9ac509e31d4c3 Mon Sep 17 00:00:00 2001 From: Vadim Fedorenko Date: Tue, 20 Jul 2021 23:06:27 +0300 Subject: net: ipv6: introduce ip6_dst_mtu_maybe_forward Replace ip6_dst_mtu_forward with ip6_dst_mtu_maybe_forward and reuse this code in ip6_mtu. Actually these two functions were almost duplicates, this change will simplify the maintaince of mtu calculation code. Signed-off-by: Vadim Fedorenko Reviewed-by: David Ahern Signed-off-by: David S. Miller --- include/net/ip6_route.h | 5 +++-- net/ipv6/ip6_output.c | 2 +- net/ipv6/route.c | 20 +------------------- net/netfilter/nf_flow_table_core.c | 2 +- 4 files changed, 6 insertions(+), 23 deletions(-) (limited to 'include/net') diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index 625a38ccb5d9..820eae3ea95f 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -316,12 +316,13 @@ static inline bool rt6_duplicate_nexthop(struct fib6_info *a, struct fib6_info * !lwtunnel_cmp_encap(nha->fib_nh_lws, nhb->fib_nh_lws); } -static inline unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst) +static inline unsigned int ip6_dst_mtu_maybe_forward(const struct dst_entry *dst, + bool forwarding) { struct inet6_dev *idev; unsigned int mtu; - if (dst_metric_locked(dst, RTAX_MTU)) { + if (!forwarding || dst_metric_locked(dst, RTAX_MTU)) { mtu = dst_metric_raw(dst, RTAX_MTU); if (mtu) goto out; diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 01bea76e3891..f6bc7828a480 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -607,7 +607,7 @@ int ip6_forward(struct sk_buff *skb) } } - mtu = ip6_dst_mtu_forward(dst); + mtu = ip6_dst_mtu_maybe_forward(dst, true); if (mtu < IPV6_MIN_MTU) mtu = IPV6_MIN_MTU; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 5f7286acca33..21d5d0b4cde5 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -3201,25 +3201,7 @@ static unsigned int ip6_default_advmss(const struct dst_entry *dst) INDIRECT_CALLABLE_SCOPE unsigned int ip6_mtu(const struct dst_entry *dst) { - struct inet6_dev *idev; - unsigned int mtu; - - mtu = dst_metric_raw(dst, RTAX_MTU); - if (mtu) - goto out; - - mtu = IPV6_MIN_MTU; - - rcu_read_lock(); - idev = __in6_dev_get(dst->dev); - if (idev) - mtu = idev->cnf.mtu6; - rcu_read_unlock(); - -out: - mtu = min_t(unsigned int, mtu, IP6_MAX_MTU); - - return mtu - lwtunnel_headroom(dst->lwtstate, mtu); + return ip6_dst_mtu_maybe_forward(dst, false); } EXPORT_INDIRECT_CALLABLE(ip6_mtu); diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c index 1e50908b1b7e..8fe024a0ae46 100644 --- a/net/netfilter/nf_flow_table_core.c +++ b/net/netfilter/nf_flow_table_core.c @@ -99,7 +99,7 @@ static int flow_offload_fill_route(struct flow_offload *flow, flow_tuple->mtu = ip_dst_mtu_maybe_forward(dst, true); break; case NFPROTO_IPV6: - flow_tuple->mtu = ip6_dst_mtu_forward(dst); + flow_tuple->mtu = ip6_dst_mtu_maybe_forward(dst, true); break; } -- cgit v1.2.3 From ac6627a28dbfb5d96736544a00c3938fa7ea6dfb Mon Sep 17 00:00:00 2001 From: Vadim Fedorenko Date: Tue, 20 Jul 2021 23:06:28 +0300 Subject: net: ipv4: Consolidate ipv4_mtu and ip_dst_mtu_maybe_forward Consolidate IPv4 MTU code the same way it is done in IPv6 to have code aligned in both address families Signed-off-by: Vadim Fedorenko Reviewed-by: David Ahern Signed-off-by: David S. Miller --- include/net/ip.h | 22 ++++++++++++++++++---- net/ipv4/route.c | 21 +-------------------- 2 files changed, 19 insertions(+), 24 deletions(-) (limited to 'include/net') diff --git a/include/net/ip.h b/include/net/ip.h index d9683bef8684..9192444f2964 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -436,18 +436,32 @@ static inline bool ip_sk_ignore_df(const struct sock *sk) static inline unsigned int ip_dst_mtu_maybe_forward(const struct dst_entry *dst, bool forwarding) { + const struct rtable *rt = container_of(dst, struct rtable, dst); struct net *net = dev_net(dst->dev); unsigned int mtu; if (net->ipv4.sysctl_ip_fwd_use_pmtu || ip_mtu_locked(dst) || - !forwarding) - return dst_mtu(dst); + !forwarding) { + mtu = rt->rt_pmtu; + if (mtu && time_before(jiffies, rt->dst.expires)) + goto out; + } /* 'forwarding = true' case should always honour route mtu */ mtu = dst_metric_raw(dst, RTAX_MTU); - if (!mtu) - mtu = min(READ_ONCE(dst->dev->mtu), IP_MAX_MTU); + if (mtu) + goto out; + + mtu = READ_ONCE(dst->dev->mtu); + + if (unlikely(ip_mtu_locked(dst))) { + if (rt->rt_uses_gateway && mtu > 576) + mtu = 576; + } + +out: + mtu = min_t(unsigned int, mtu, IP_MAX_MTU); return mtu - lwtunnel_headroom(dst->lwtstate, mtu); } diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 99c06944501a..04754d55b3c1 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1299,26 +1299,7 @@ static unsigned int ipv4_default_advmss(const struct dst_entry *dst) INDIRECT_CALLABLE_SCOPE unsigned int ipv4_mtu(const struct dst_entry *dst) { - const struct rtable *rt = (const struct rtable *)dst; - unsigned int mtu = rt->rt_pmtu; - - if (!mtu || time_after_eq(jiffies, rt->dst.expires)) - mtu = dst_metric_raw(dst, RTAX_MTU); - - if (mtu) - goto out; - - mtu = READ_ONCE(dst->dev->mtu); - - if (unlikely(ip_mtu_locked(dst))) { - if (rt->rt_uses_gateway && mtu > 576) - mtu = 576; - } - -out: - mtu = min_t(unsigned int, mtu, IP_MAX_MTU); - - return mtu - lwtunnel_headroom(dst->lwtstate, mtu); + return ip_dst_mtu_maybe_forward(dst, false); } EXPORT_INDIRECT_CALLABLE(ipv4_mtu); -- cgit v1.2.3 From 5b22d3669f2fa6e762c5302fc4b6051a92b81617 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Thu, 22 Jul 2021 18:55:39 +0300 Subject: net: dsa: track the number of switches in a tree In preparation of supporting data plane forwarding on behalf of a software bridge, some drivers might need to view bridges as virtual switches behind the CPU port in a cross-chip topology. Give them some help and let them know how many physical switches there are in the tree, so that they can count the virtual switches starting from that number on. Note that the first dsa_switch_ops method where this information is reliably available is .setup(). This is because of how DSA works: in a tree with 3 switches, each calling dsa_register_switch(), the first 2 will advance until dsa_tree_setup() -> dsa_tree_setup_routing_table() and exit with error code 0 because the topology is not complete. Since probing is parallel at this point, one switch does not know about the existence of the other. Then the third switch comes, and for it, dsa_tree_setup_routing_table() returns complete = true. This switch goes ahead and calls dsa_tree_setup_switches() for everybody else, calling their .setup() methods too. This acts as the synchronization point. Signed-off-by: Vladimir Oltean Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- include/net/dsa.h | 3 +++ net/dsa/dsa2.c | 3 +++ 2 files changed, 6 insertions(+) (limited to 'include/net') diff --git a/include/net/dsa.h b/include/net/dsa.h index 9e5593885357..929bcaec4d41 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -159,6 +159,9 @@ struct dsa_switch_tree { */ struct net_device **lags; unsigned int lags_len; + + /* Track the largest switch index within a tree */ + unsigned int last_switch; }; #define dsa_lags_foreach_id(_id, _dst) \ diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c index 185629f27f80..de5e93ba2a9d 100644 --- a/net/dsa/dsa2.c +++ b/net/dsa/dsa2.c @@ -1265,6 +1265,9 @@ static int dsa_switch_parse_member_of(struct dsa_switch *ds, return -EEXIST; } + if (ds->dst->last_switch < ds->index) + ds->dst->last_switch = ds->index; + return 0; } -- cgit v1.2.3 From 123abc06e74f49d9b173a93cb2b797fb85f50ba3 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Thu, 22 Jul 2021 18:55:40 +0300 Subject: net: dsa: add support for bridge TX forwarding offload For a DSA switch, to offload the forwarding process of a bridge device means to send the packets coming from the software bridge as data plane packets. This is contrary to everything that DSA has done so far, because the current taggers only know to send control packets (ones that target a specific destination port), whereas data plane packets are supposed to be forwarded according to the FDB lookup, much like packets ingressing on any regular ingress port. If the FDB lookup process returns multiple destination ports (flooding, multicast), then replication is also handled by the switch hardware - the bridge only sends a single packet and avoids the skb_clone(). DSA keeps for each bridge port a zero-based index (the number of the bridge). Multiple ports performing TX forwarding offload to the same bridge have the same dp->bridge_num value, and ports not offloading the TX data plane of a bridge have dp->bridge_num = -1. The tagger can check if the packet that is being transmitted on has skb->offload_fwd_mark = true or not. If it does, it can be sure that the packet belongs to the data plane of a bridge, further information about which can be obtained based on dp->bridge_dev and dp->bridge_num. It can then compose a DSA tag for injecting a data plane packet into that bridge number. For the switch driver side, we offer two new dsa_switch_ops methods, called .port_bridge_fwd_offload_{add,del}, which are modeled after .port_bridge_{join,leave}. These methods are provided in case the driver needs to configure the hardware to treat packets coming from that bridge software interface as data plane packets. The switchdev <-> bridge interaction happens during the netdev_master_upper_dev_link() call, so to switch drivers, the effect is that the .port_bridge_fwd_offload_add() method is called immediately after .port_bridge_join(). If the bridge number exceeds the number of bridges for which the switch driver can offload the TX data plane (and this includes the case where the driver can offload none), DSA falls back to simply returning tx_fwd_offload = false in the switchdev_bridge_port_offload() call. Signed-off-by: Vladimir Oltean Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- include/net/dsa.h | 18 ++++++++++++ net/dsa/dsa2.c | 1 + net/dsa/dsa_priv.h | 2 ++ net/dsa/port.c | 84 +++++++++++++++++++++++++++++++++++++++++++++++++++++- 4 files changed, 104 insertions(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/dsa.h b/include/net/dsa.h index 929bcaec4d41..f8eb2dc3fbef 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -162,6 +162,9 @@ struct dsa_switch_tree { /* Track the largest switch index within a tree */ unsigned int last_switch; + + /* Track the bridges with forwarding offload enabled */ + unsigned long fwd_offloading_bridges; }; #define dsa_lags_foreach_id(_id, _dst) \ @@ -262,6 +265,7 @@ struct dsa_port { bool vlan_filtering; u8 stp_state; struct net_device *bridge_dev; + int bridge_num; struct devlink_port devlink_port; bool devlink_port_setup; struct phylink *pl; @@ -413,6 +417,12 @@ struct dsa_switch { */ unsigned int num_lag_ids; + /* Drivers that support bridge forwarding offload should set this to + * the maximum number of bridges spanning the same switch tree that can + * be offloaded. + */ + unsigned int num_fwd_offloading_bridges; + size_t num_ports; }; @@ -696,6 +706,14 @@ struct dsa_switch_ops { struct net_device *bridge); void (*port_bridge_leave)(struct dsa_switch *ds, int port, struct net_device *bridge); + /* Called right after .port_bridge_join() */ + int (*port_bridge_tx_fwd_offload)(struct dsa_switch *ds, int port, + struct net_device *bridge, + int bridge_num); + /* Called right before .port_bridge_leave() */ + void (*port_bridge_tx_fwd_unoffload)(struct dsa_switch *ds, int port, + struct net_device *bridge, + int bridge_num); void (*port_stp_state_set)(struct dsa_switch *ds, int port, u8 state); void (*port_fast_age)(struct dsa_switch *ds, int port); diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c index de5e93ba2a9d..c7fa85fb3086 100644 --- a/net/dsa/dsa2.c +++ b/net/dsa/dsa2.c @@ -1044,6 +1044,7 @@ static struct dsa_port *dsa_port_touch(struct dsa_switch *ds, int index) dp->ds = ds; dp->index = index; + dp->bridge_num = -1; INIT_LIST_HEAD(&dp->list); list_add_tail(&dp->list, &dst->ports); diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h index 78c70f5bdab5..b1d9aa4d313c 100644 --- a/net/dsa/dsa_priv.h +++ b/net/dsa/dsa_priv.h @@ -14,6 +14,8 @@ #include #include +#define DSA_MAX_NUM_OFFLOADING_BRIDGES BITS_PER_LONG + enum { DSA_NOTIFIER_AGEING_TIME, DSA_NOTIFIER_BRIDGE_JOIN, diff --git a/net/dsa/port.c b/net/dsa/port.c index f2704f101ccf..7b9bf45a76b6 100644 --- a/net/dsa/port.c +++ b/net/dsa/port.c @@ -230,6 +230,83 @@ static void dsa_port_switchdev_unsync_attrs(struct dsa_port *dp) */ } +static int dsa_tree_find_bridge_num(struct dsa_switch_tree *dst, + struct net_device *bridge_dev) +{ + struct dsa_port *dp; + + /* When preparing the offload for a port, it will have a valid + * dp->bridge_dev pointer but a not yet valid dp->bridge_num. + * However there might be other ports having the same dp->bridge_dev + * and a valid dp->bridge_num, so just ignore this port. + */ + list_for_each_entry(dp, &dst->ports, list) + if (dp->bridge_dev == bridge_dev && dp->bridge_num != -1) + return dp->bridge_num; + + return -1; +} + +static void dsa_port_bridge_tx_fwd_unoffload(struct dsa_port *dp, + struct net_device *bridge_dev) +{ + struct dsa_switch_tree *dst = dp->ds->dst; + int bridge_num = dp->bridge_num; + struct dsa_switch *ds = dp->ds; + + /* No bridge TX forwarding offload => do nothing */ + if (!ds->ops->port_bridge_tx_fwd_unoffload || dp->bridge_num == -1) + return; + + dp->bridge_num = -1; + + /* Check if the bridge is still in use, otherwise it is time + * to clean it up so we can reuse this bridge_num later. + */ + if (!dsa_tree_find_bridge_num(dst, bridge_dev)) + clear_bit(bridge_num, &dst->fwd_offloading_bridges); + + /* Notify the chips only once the offload has been deactivated, so + * that they can update their configuration accordingly. + */ + ds->ops->port_bridge_tx_fwd_unoffload(ds, dp->index, bridge_dev, + bridge_num); +} + +static bool dsa_port_bridge_tx_fwd_offload(struct dsa_port *dp, + struct net_device *bridge_dev) +{ + struct dsa_switch_tree *dst = dp->ds->dst; + struct dsa_switch *ds = dp->ds; + int bridge_num, err; + + if (!ds->ops->port_bridge_tx_fwd_offload) + return false; + + bridge_num = dsa_tree_find_bridge_num(dst, bridge_dev); + if (bridge_num < 0) { + /* First port that offloads TX forwarding for this bridge */ + bridge_num = find_first_zero_bit(&dst->fwd_offloading_bridges, + DSA_MAX_NUM_OFFLOADING_BRIDGES); + if (bridge_num >= ds->num_fwd_offloading_bridges) + return false; + + set_bit(bridge_num, &dst->fwd_offloading_bridges); + } + + dp->bridge_num = bridge_num; + + /* Notify the driver */ + err = ds->ops->port_bridge_tx_fwd_offload(ds, dp->index, bridge_dev, + bridge_num); + if (err) { + dsa_port_bridge_tx_fwd_unoffload(dp, bridge_dev); + return false; + } + + return true; +} + int dsa_port_bridge_join(struct dsa_port *dp, struct net_device *br, struct netlink_ext_ack *extack) { @@ -241,6 +318,7 @@ int dsa_port_bridge_join(struct dsa_port *dp, struct net_device *br, }; struct net_device *dev = dp->slave; struct net_device *brport_dev; + bool tx_fwd_offload; int err; /* Here the interface is already bridged. Reflect the current @@ -254,10 +332,12 @@ int dsa_port_bridge_join(struct dsa_port *dp, struct net_device *br, if (err) goto out_rollback; + tx_fwd_offload = dsa_port_bridge_tx_fwd_offload(dp, br); + err = switchdev_bridge_port_offload(brport_dev, dev, dp, &dsa_slave_switchdev_notifier, &dsa_slave_switchdev_blocking_notifier, - false, extack); + tx_fwd_offload, extack); if (err) goto out_rollback_unbridge; @@ -302,6 +382,8 @@ void dsa_port_bridge_leave(struct dsa_port *dp, struct net_device *br) */ dp->bridge_dev = NULL; + dsa_port_bridge_tx_fwd_unoffload(dp, br); + err = dsa_broadcast(DSA_NOTIFIER_BRIDGE_LEAVE, &info); if (err) pr_err("DSA: failed to notify DSA_NOTIFIER_BRIDGE_LEAVE\n"); -- cgit v1.2.3 From 62001372c2b6cdf2346afb2cf94ed3d950eee64c Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Thu, 1 Jul 2021 13:05:54 -0700 Subject: bpf: tcp: seq_file: Remove bpf_seq_afinfo from tcp_iter_state A following patch will create a separate struct to store extra bpf_iter state and it will embed the existing tcp_iter_state like this: struct bpf_tcp_iter_state { struct tcp_iter_state state; /* More bpf_iter specific states here ... */ } As a prep work, this patch removes the "struct tcp_seq_afinfo *bpf_seq_afinfo" where its purpose is to tell if it is iterating from bpf_iter instead of proc fs. Currently, if "*bpf_seq_afinfo" is not NULL, it is iterating from bpf_iter. The kernel should not filter by the addr family and leave this filtering decision to the bpf prog. Instead of adding a "*bpf_seq_afinfo" pointer, this patch uses the "seq->op == &bpf_iter_tcp_seq_ops" test to tell if it is iterating from the bpf iter. The bpf_iter_(init|fini)_tcp() is left here to prepare for the change of a following patch. Signed-off-by: Martin KaFai Lau Signed-off-by: Andrii Nakryiko Reviewed-by: Eric Dumazet Acked-by: Kuniyuki Iwashima Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20210701200554.1034982-1-kafai@fb.com --- include/net/tcp.h | 1 - net/ipv4/tcp_ipv4.c | 25 +++++-------------------- 2 files changed, 5 insertions(+), 21 deletions(-) (limited to 'include/net') diff --git a/include/net/tcp.h b/include/net/tcp.h index 17df9b047ee4..ba3034123e1d 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1959,7 +1959,6 @@ struct tcp_iter_state { struct seq_net_private p; enum tcp_seq_states state; struct sock *syn_wait_sk; - struct tcp_seq_afinfo *bpf_seq_afinfo; int bucket, offset, sbucket, num; loff_t last_pos; }; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index f2583c4699fd..665f99d14436 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2735,12 +2735,13 @@ static const struct seq_operations bpf_iter_tcp_seq_ops = { #endif static unsigned short seq_file_family(const struct seq_file *seq) { - const struct tcp_iter_state *st = seq->private; - const struct tcp_seq_afinfo *afinfo = st->bpf_seq_afinfo; + const struct tcp_seq_afinfo *afinfo; +#ifdef CONFIG_BPF_SYSCALL /* Iterated from bpf_iter. Let the bpf prog to filter instead. */ - if (afinfo) + if (seq->op == &bpf_iter_tcp_seq_ops) return AF_UNSPEC; +#endif /* Iterated from proc fs */ afinfo = PDE_DATA(file_inode(seq->file)); @@ -2998,27 +2999,11 @@ DEFINE_BPF_ITER_FUNC(tcp, struct bpf_iter_meta *meta, static int bpf_iter_init_tcp(void *priv_data, struct bpf_iter_aux_info *aux) { - struct tcp_iter_state *st = priv_data; - struct tcp_seq_afinfo *afinfo; - int ret; - - afinfo = kmalloc(sizeof(*afinfo), GFP_USER | __GFP_NOWARN); - if (!afinfo) - return -ENOMEM; - - afinfo->family = AF_UNSPEC; - st->bpf_seq_afinfo = afinfo; - ret = bpf_iter_init_seq_net(priv_data, aux); - if (ret) - kfree(afinfo); - return ret; + return bpf_iter_init_seq_net(priv_data, aux); } static void bpf_iter_fini_tcp(void *priv_data) { - struct tcp_iter_state *st = priv_data; - - kfree(st->bpf_seq_afinfo); bpf_iter_fini_seq_net(priv_data); } -- cgit v1.2.3 From 05c0b35709c58b83d4dc515d2ac52e9c0f197d03 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Thu, 1 Jul 2021 13:06:06 -0700 Subject: tcp: seq_file: Replace listening_hash with lhash2 This patch moves the tcp seq_file iteration on listeners from the port only listening_hash to the port+addr lhash2. When iterating from the bpf iter, the next patch will need to lock the socket such that the bpf iter can call setsockopt (e.g. to change the TCP_CONGESTION). To avoid locking the bucket and then locking the sock, the bpf iter will first batch some sockets from the same bucket and then unlock the bucket. If the bucket size is small (which usually is), it is easier to batch the whole bucket such that it is less likely to miss a setsockopt on a socket due to changes in the bucket. However, the port only listening_hash could have many listeners hashed to a bucket (e.g. many individual VIP(s):443 and also multiple by the number of SO_REUSEPORT). We have seen bucket size in tens of thousands range. Also, the chance of having changes in some popular port buckets (e.g. 443) is also high. The port+addr lhash2 was introduced to solve this large listener bucket issue. Also, the listening_hash usage has already been replaced with lhash2 in the fast path inet[6]_lookup_listener(). This patch follows the same direction on moving to lhash2 and iterates the lhash2 instead of listening_hash. Signed-off-by: Martin KaFai Lau Signed-off-by: Andrii Nakryiko Reviewed-by: Eric Dumazet Acked-by: Kuniyuki Iwashima Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20210701200606.1035783-1-kafai@fb.com --- include/net/inet_hashtables.h | 6 ++++++ net/ipv4/tcp_ipv4.c | 35 ++++++++++++++++++----------------- 2 files changed, 24 insertions(+), 17 deletions(-) (limited to 'include/net') diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index ca6a3ea9057e..f72ec113ae56 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -160,6 +160,12 @@ struct inet_hashinfo { ____cacheline_aligned_in_smp; }; +#define inet_lhash2_for_each_icsk_continue(__icsk) \ + hlist_for_each_entry_continue(__icsk, icsk_listen_portaddr_node) + +#define inet_lhash2_for_each_icsk(__icsk, list) \ + hlist_for_each_entry(__icsk, list, icsk_listen_portaddr_node) + #define inet_lhash2_for_each_icsk_rcu(__icsk, list) \ hlist_for_each_entry_rcu(__icsk, list, icsk_listen_portaddr_node) diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 48a0a3873c7a..d38b4379dca4 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2296,21 +2296,22 @@ static void *listening_get_first(struct seq_file *seq) struct tcp_iter_state *st = seq->private; st->offset = 0; - for (; st->bucket < INET_LHTABLE_SIZE; st->bucket++) { - struct inet_listen_hashbucket *ilb; - struct hlist_nulls_node *node; + for (; st->bucket <= tcp_hashinfo.lhash2_mask; st->bucket++) { + struct inet_listen_hashbucket *ilb2; + struct inet_connection_sock *icsk; struct sock *sk; - ilb = &tcp_hashinfo.listening_hash[st->bucket]; - if (hlist_nulls_empty(&ilb->nulls_head)) + ilb2 = &tcp_hashinfo.lhash2[st->bucket]; + if (hlist_empty(&ilb2->head)) continue; - spin_lock(&ilb->lock); - sk_nulls_for_each(sk, node, &ilb->nulls_head) { + spin_lock(&ilb2->lock); + inet_lhash2_for_each_icsk(icsk, &ilb2->head) { + sk = (struct sock *)icsk; if (seq_sk_match(seq, sk)) return sk; } - spin_unlock(&ilb->lock); + spin_unlock(&ilb2->lock); } return NULL; @@ -2324,22 +2325,22 @@ static void *listening_get_first(struct seq_file *seq) static void *listening_get_next(struct seq_file *seq, void *cur) { struct tcp_iter_state *st = seq->private; - struct inet_listen_hashbucket *ilb; - struct hlist_nulls_node *node; + struct inet_listen_hashbucket *ilb2; + struct inet_connection_sock *icsk; struct sock *sk = cur; ++st->num; ++st->offset; - sk = sk_nulls_next(sk); - - sk_nulls_for_each_from(sk, node) { + icsk = inet_csk(sk); + inet_lhash2_for_each_icsk_continue(icsk) { + sk = (struct sock *)icsk; if (seq_sk_match(seq, sk)) return sk; } - ilb = &tcp_hashinfo.listening_hash[st->bucket]; - spin_unlock(&ilb->lock); + ilb2 = &tcp_hashinfo.lhash2[st->bucket]; + spin_unlock(&ilb2->lock); ++st->bucket; return listening_get_first(seq); } @@ -2456,7 +2457,7 @@ static void *tcp_seek_last_pos(struct seq_file *seq) switch (st->state) { case TCP_SEQ_STATE_LISTENING: - if (st->bucket >= INET_LHTABLE_SIZE) + if (st->bucket > tcp_hashinfo.lhash2_mask) break; st->state = TCP_SEQ_STATE_LISTENING; rc = listening_get_first(seq); @@ -2541,7 +2542,7 @@ void tcp_seq_stop(struct seq_file *seq, void *v) switch (st->state) { case TCP_SEQ_STATE_LISTENING: if (v != SEQ_START_TOKEN) - spin_unlock(&tcp_hashinfo.listening_hash[st->bucket].lock); + spin_unlock(&tcp_hashinfo.lhash2[st->bucket].lock); break; case TCP_SEQ_STATE_ESTABLISHED: if (v) -- cgit v1.2.3 From 48d5440393d328cfe18b26e3609c84c0f41d6515 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Sat, 24 Jul 2021 23:47:32 +0200 Subject: nfc: constify payload argument in nci_send_cmd() The nci_send_cmd() payload argument is passed directly to skb_put_data() which already accepts a pointer to const, so make it const as well for correctness and safety. Signed-off-by: Krzysztof Kozlowski Signed-off-by: David S. Miller --- include/net/nfc/nci_core.h | 2 +- net/nfc/nci/core.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/net/nfc/nci_core.h b/include/net/nfc/nci_core.h index 1df0f8074c9d..bf573eca07ca 100644 --- a/include/net/nfc/nci_core.h +++ b/include/net/nfc/nci_core.h @@ -360,7 +360,7 @@ int nci_core_rsp_packet(struct nci_dev *ndev, __u16 opcode, int nci_core_ntf_packet(struct nci_dev *ndev, __u16 opcode, struct sk_buff *skb); void nci_rx_data_packet(struct nci_dev *ndev, struct sk_buff *skb); -int nci_send_cmd(struct nci_dev *ndev, __u16 opcode, __u8 plen, void *payload); +int nci_send_cmd(struct nci_dev *ndev, __u16 opcode, __u8 plen, const void *payload); int nci_send_data(struct nci_dev *ndev, __u8 conn_id, struct sk_buff *skb); int nci_conn_max_data_pkt_payload_size(struct nci_dev *ndev, __u8 conn_id); void nci_data_exchange_complete(struct nci_dev *ndev, struct sk_buff *skb, diff --git a/net/nfc/nci/core.c b/net/nfc/nci/core.c index da7fe9db1b00..09967b836361 100644 --- a/net/nfc/nci/core.c +++ b/net/nfc/nci/core.c @@ -1332,7 +1332,7 @@ int nci_send_frame(struct nci_dev *ndev, struct sk_buff *skb) EXPORT_SYMBOL(nci_send_frame); /* Send NCI command */ -int nci_send_cmd(struct nci_dev *ndev, __u16 opcode, __u8 plen, void *payload) +int nci_send_cmd(struct nci_dev *ndev, __u16 opcode, __u8 plen, const void *payload) { struct nci_ctrl_hdr *hdr; struct sk_buff *skb; -- cgit v1.2.3 From b9c28286d8f1822a10f702fcefe0b8a1db4e6917 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Sat, 24 Jul 2021 23:47:33 +0200 Subject: nfc: constify nci_ops The struct nci_ops is modified by NFC core in only one case: nci_allocate_device() receives too many proprietary commands (prop_ops) to configure. This is a build time known constrain, so a graceful handling of such case is not necessary. Instead, fail the nci_allocate_device() and add BUILD_BUG_ON() to places which set these. This allows to constify the struct nci_ops (consisting of function pointers) for correctness and safety. Signed-off-by: Krzysztof Kozlowski Signed-off-by: David S. Miller --- drivers/nfc/fdp/fdp.c | 3 ++- drivers/nfc/nfcmrvl/main.c | 2 +- drivers/nfc/nxp-nci/core.c | 2 +- drivers/nfc/st-nci/core.c | 3 ++- drivers/nfc/virtual_ncidev.c | 2 +- include/net/nfc/nci_core.h | 4 ++-- net/nfc/nci/core.c | 5 ++--- 7 files changed, 11 insertions(+), 10 deletions(-) (limited to 'include/net') diff --git a/drivers/nfc/fdp/fdp.c b/drivers/nfc/fdp/fdp.c index 528745862738..73f51848a693 100644 --- a/drivers/nfc/fdp/fdp.c +++ b/drivers/nfc/fdp/fdp.c @@ -675,7 +675,7 @@ static struct nci_driver_ops fdp_prop_ops[] = { }, }; -static struct nci_ops nci_ops = { +static const struct nci_ops nci_ops = { .open = fdp_nci_open, .close = fdp_nci_close, .send = fdp_nci_send, @@ -718,6 +718,7 @@ int fdp_nci_probe(struct fdp_i2c_phy *phy, struct nfc_phy_ops *phy_ops, NFC_PROTO_NFC_DEP_MASK | NFC_PROTO_ISO15693_MASK; + BUILD_BUG_ON(ARRAY_SIZE(fdp_prop_ops) > NCI_MAX_PROPRIETARY_CMD); ndev = nci_allocate_device(&nci_ops, protocols, tx_headroom, tx_tailroom); if (!ndev) { diff --git a/drivers/nfc/nfcmrvl/main.c b/drivers/nfc/nfcmrvl/main.c index a4620b480c4f..6e9e7ce8792c 100644 --- a/drivers/nfc/nfcmrvl/main.c +++ b/drivers/nfc/nfcmrvl/main.c @@ -81,7 +81,7 @@ static int nfcmrvl_nci_fw_download(struct nci_dev *ndev, return nfcmrvl_fw_dnld_start(ndev, firmware_name); } -static struct nci_ops nfcmrvl_nci_ops = { +static const struct nci_ops nfcmrvl_nci_ops = { .open = nfcmrvl_nci_open, .close = nfcmrvl_nci_close, .send = nfcmrvl_nci_send, diff --git a/drivers/nfc/nxp-nci/core.c b/drivers/nfc/nxp-nci/core.c index 2b0c7232e91f..518e2afb43a8 100644 --- a/drivers/nfc/nxp-nci/core.c +++ b/drivers/nfc/nxp-nci/core.c @@ -83,7 +83,7 @@ static int nxp_nci_send(struct nci_dev *ndev, struct sk_buff *skb) return r; } -static struct nci_ops nxp_nci_ops = { +static const struct nci_ops nxp_nci_ops = { .open = nxp_nci_open, .close = nxp_nci_close, .send = nxp_nci_send, diff --git a/drivers/nfc/st-nci/core.c b/drivers/nfc/st-nci/core.c index 110ff1281e5f..f6fce34a77da 100644 --- a/drivers/nfc/st-nci/core.c +++ b/drivers/nfc/st-nci/core.c @@ -94,7 +94,7 @@ static struct nci_driver_ops st_nci_prop_ops[] = { }, }; -static struct nci_ops st_nci_ops = { +static const struct nci_ops st_nci_ops = { .init = st_nci_init, .open = st_nci_open, .close = st_nci_close, @@ -131,6 +131,7 @@ int st_nci_probe(struct llt_ndlc *ndlc, int phy_headroom, | NFC_PROTO_ISO15693_MASK | NFC_PROTO_NFC_DEP_MASK; + BUILD_BUG_ON(ARRAY_SIZE(st_nci_prop_ops) > NCI_MAX_PROPRIETARY_CMD); ndlc->ndev = nci_allocate_device(&st_nci_ops, protocols, phy_headroom, phy_tailroom); if (!ndlc->ndev) { diff --git a/drivers/nfc/virtual_ncidev.c b/drivers/nfc/virtual_ncidev.c index f73ee0bf3593..b914ab2c2109 100644 --- a/drivers/nfc/virtual_ncidev.c +++ b/drivers/nfc/virtual_ncidev.c @@ -65,7 +65,7 @@ static int virtual_nci_send(struct nci_dev *ndev, struct sk_buff *skb) return 0; } -static struct nci_ops virtual_nci_ops = { +static const struct nci_ops virtual_nci_ops = { .open = virtual_nci_open, .close = virtual_nci_close, .send = virtual_nci_send diff --git a/include/net/nfc/nci_core.h b/include/net/nfc/nci_core.h index bf573eca07ca..5dae7e2cbc49 100644 --- a/include/net/nfc/nci_core.h +++ b/include/net/nfc/nci_core.h @@ -194,7 +194,7 @@ struct nci_hci_dev { /* NCI Core structures */ struct nci_dev { struct nfc_dev *nfc_dev; - struct nci_ops *ops; + const struct nci_ops *ops; struct nci_hci_dev *hci_dev; int tx_headroom; @@ -267,7 +267,7 @@ struct nci_dev { }; /* ----- NCI Devices ----- */ -struct nci_dev *nci_allocate_device(struct nci_ops *ops, +struct nci_dev *nci_allocate_device(const struct nci_ops *ops, __u32 supported_protocols, int tx_headroom, int tx_tailroom); diff --git a/net/nfc/nci/core.c b/net/nfc/nci/core.c index 09967b836361..a7d26f2791b0 100644 --- a/net/nfc/nci/core.c +++ b/net/nfc/nci/core.c @@ -1129,7 +1129,7 @@ static struct nfc_ops nci_nfc_ops = { * @tx_headroom: Reserved space at beginning of skb * @tx_tailroom: Reserved space at end of skb */ -struct nci_dev *nci_allocate_device(struct nci_ops *ops, +struct nci_dev *nci_allocate_device(const struct nci_ops *ops, __u32 supported_protocols, int tx_headroom, int tx_tailroom) { @@ -1152,8 +1152,7 @@ struct nci_dev *nci_allocate_device(struct nci_ops *ops, if (ops->n_prop_ops > NCI_MAX_PROPRIETARY_CMD) { pr_err("Too many proprietary commands: %zd\n", ops->n_prop_ops); - ops->prop_ops = NULL; - ops->n_prop_ops = 0; + goto free_nci; } ndev->tx_headroom = tx_headroom; -- cgit v1.2.3 From cb8caa3c6c04c18027258ab34e09bcf87e726f22 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Sat, 24 Jul 2021 23:47:35 +0200 Subject: nfc: constify nci_driver_ops (prop_ops and core_ops) Neither the core nor the drivers modify the passed pointer to struct nci_driver_ops (consisting of function pointers), so make it a pointer to const for correctness and safety. Signed-off-by: Krzysztof Kozlowski Signed-off-by: David S. Miller --- drivers/nfc/fdp/fdp.c | 4 ++-- drivers/nfc/s3fwrn5/nci.c | 2 +- drivers/nfc/s3fwrn5/nci.h | 2 +- drivers/nfc/st-nci/core.c | 2 +- include/net/nfc/nci_core.h | 4 ++-- net/nfc/nci/core.c | 16 ++++++++-------- 6 files changed, 15 insertions(+), 15 deletions(-) (limited to 'include/net') diff --git a/drivers/nfc/fdp/fdp.c b/drivers/nfc/fdp/fdp.c index 73f51848a693..4d88a617d0e8 100644 --- a/drivers/nfc/fdp/fdp.c +++ b/drivers/nfc/fdp/fdp.c @@ -651,7 +651,7 @@ static int fdp_nci_core_get_config_rsp_packet(struct nci_dev *ndev, return 0; } -static struct nci_driver_ops fdp_core_ops[] = { +static const struct nci_driver_ops fdp_core_ops[] = { { .opcode = NCI_OP_CORE_GET_CONFIG_RSP, .rsp = fdp_nci_core_get_config_rsp_packet, @@ -662,7 +662,7 @@ static struct nci_driver_ops fdp_core_ops[] = { }, }; -static struct nci_driver_ops fdp_prop_ops[] = { +static const struct nci_driver_ops fdp_prop_ops[] = { { .opcode = nci_opcode_pack(NCI_GID_PROP, NCI_OP_PROP_PATCH_OID), .rsp = fdp_nci_prop_patch_rsp_packet, diff --git a/drivers/nfc/s3fwrn5/nci.c b/drivers/nfc/s3fwrn5/nci.c index 819e3474a437..e374e670b36b 100644 --- a/drivers/nfc/s3fwrn5/nci.c +++ b/drivers/nfc/s3fwrn5/nci.c @@ -20,7 +20,7 @@ static int s3fwrn5_nci_prop_rsp(struct nci_dev *ndev, struct sk_buff *skb) return 0; } -struct nci_driver_ops s3fwrn5_nci_prop_ops[4] = { +const struct nci_driver_ops s3fwrn5_nci_prop_ops[4] = { { .opcode = nci_opcode_pack(NCI_GID_PROPRIETARY, NCI_PROP_SET_RFREG), diff --git a/drivers/nfc/s3fwrn5/nci.h b/drivers/nfc/s3fwrn5/nci.h index 5c22c5315f79..c2d906591e9e 100644 --- a/drivers/nfc/s3fwrn5/nci.h +++ b/drivers/nfc/s3fwrn5/nci.h @@ -50,7 +50,7 @@ struct nci_prop_fw_cfg_rsp { __u8 status; }; -extern struct nci_driver_ops s3fwrn5_nci_prop_ops[4]; +extern const struct nci_driver_ops s3fwrn5_nci_prop_ops[4]; int s3fwrn5_nci_rf_configure(struct s3fwrn5_info *info, const char *fw_name); #endif /* __LOCAL_S3FWRN5_NCI_H_ */ diff --git a/drivers/nfc/st-nci/core.c b/drivers/nfc/st-nci/core.c index f6fce34a77da..72bb51efdf9c 100644 --- a/drivers/nfc/st-nci/core.c +++ b/drivers/nfc/st-nci/core.c @@ -86,7 +86,7 @@ static int st_nci_prop_rsp_packet(struct nci_dev *ndev, return 0; } -static struct nci_driver_ops st_nci_prop_ops[] = { +static const struct nci_driver_ops st_nci_prop_ops[] = { { .opcode = nci_opcode_pack(NCI_GID_PROPRIETARY, ST_NCI_CORE_PROP), diff --git a/include/net/nfc/nci_core.h b/include/net/nfc/nci_core.h index 5dae7e2cbc49..e7118e0cc3b1 100644 --- a/include/net/nfc/nci_core.h +++ b/include/net/nfc/nci_core.h @@ -82,10 +82,10 @@ struct nci_ops { void (*hci_cmd_received)(struct nci_dev *ndev, u8 pipe, u8 cmd, struct sk_buff *skb); - struct nci_driver_ops *prop_ops; + const struct nci_driver_ops *prop_ops; size_t n_prop_ops; - struct nci_driver_ops *core_ops; + const struct nci_driver_ops *core_ops; size_t n_core_ops; }; diff --git a/net/nfc/nci/core.c b/net/nfc/nci/core.c index a7d26f2791b0..50c625940fa3 100644 --- a/net/nfc/nci/core.c +++ b/net/nfc/nci/core.c @@ -1363,12 +1363,12 @@ int nci_send_cmd(struct nci_dev *ndev, __u16 opcode, __u8 plen, const void *payl EXPORT_SYMBOL(nci_send_cmd); /* Proprietary commands API */ -static struct nci_driver_ops *ops_cmd_lookup(struct nci_driver_ops *ops, - size_t n_ops, - __u16 opcode) +static const struct nci_driver_ops *ops_cmd_lookup(const struct nci_driver_ops *ops, + size_t n_ops, + __u16 opcode) { size_t i; - struct nci_driver_ops *op; + const struct nci_driver_ops *op; if (!ops || !n_ops) return NULL; @@ -1383,10 +1383,10 @@ static struct nci_driver_ops *ops_cmd_lookup(struct nci_driver_ops *ops, } static int nci_op_rsp_packet(struct nci_dev *ndev, __u16 rsp_opcode, - struct sk_buff *skb, struct nci_driver_ops *ops, + struct sk_buff *skb, const struct nci_driver_ops *ops, size_t n_ops) { - struct nci_driver_ops *op; + const struct nci_driver_ops *op; op = ops_cmd_lookup(ops, n_ops, rsp_opcode); if (!op || !op->rsp) @@ -1396,10 +1396,10 @@ static int nci_op_rsp_packet(struct nci_dev *ndev, __u16 rsp_opcode, } static int nci_op_ntf_packet(struct nci_dev *ndev, __u16 ntf_opcode, - struct sk_buff *skb, struct nci_driver_ops *ops, + struct sk_buff *skb, const struct nci_driver_ops *ops, size_t n_ops) { - struct nci_driver_ops *op; + const struct nci_driver_ops *op; op = ops_cmd_lookup(ops, n_ops, ntf_opcode); if (!op || !op->ntf) -- cgit v1.2.3 From 15944ad2e5a1cc2ef2c6fa60b04e464dbb4ddb61 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Sat, 24 Jul 2021 23:49:23 +0200 Subject: nfc: constify pointer to nfc_vendor_cmd Neither the core nor the drivers modify the passed pointer to struct nfc_vendor_cmd, so make it a pointer to const for correctness and safety. Signed-off-by: Krzysztof Kozlowski Signed-off-by: David S. Miller --- drivers/nfc/st-nci/vendor_cmds.c | 2 +- drivers/nfc/st21nfca/vendor_cmds.c | 2 +- include/net/nfc/hci.h | 2 +- include/net/nfc/nci_core.h | 2 +- include/net/nfc/nfc.h | 4 ++-- net/nfc/netlink.c | 2 +- 6 files changed, 7 insertions(+), 7 deletions(-) (limited to 'include/net') diff --git a/drivers/nfc/st-nci/vendor_cmds.c b/drivers/nfc/st-nci/vendor_cmds.c index 94b600029a2a..30d2912d1a05 100644 --- a/drivers/nfc/st-nci/vendor_cmds.c +++ b/drivers/nfc/st-nci/vendor_cmds.c @@ -371,7 +371,7 @@ static int st_nci_manufacturer_specific(struct nfc_dev *dev, void *data, return nfc_vendor_cmd_reply(msg); } -static struct nfc_vendor_cmd st_nci_vendor_cmds[] = { +static const struct nfc_vendor_cmd st_nci_vendor_cmds[] = { { .vendor_id = ST_NCI_VENDOR_OUI, .subcmd = FACTORY_MODE, diff --git a/drivers/nfc/st21nfca/vendor_cmds.c b/drivers/nfc/st21nfca/vendor_cmds.c index 62332ca91554..74882866dbaf 100644 --- a/drivers/nfc/st21nfca/vendor_cmds.c +++ b/drivers/nfc/st21nfca/vendor_cmds.c @@ -295,7 +295,7 @@ exit: return r; } -static struct nfc_vendor_cmd st21nfca_vendor_cmds[] = { +static const struct nfc_vendor_cmd st21nfca_vendor_cmds[] = { { .vendor_id = ST21NFCA_VENDOR_OUI, .subcmd = FACTORY_MODE, diff --git a/include/net/nfc/hci.h b/include/net/nfc/hci.h index b35f37a57686..2daec8036be9 100644 --- a/include/net/nfc/hci.h +++ b/include/net/nfc/hci.h @@ -168,7 +168,7 @@ void nfc_hci_set_clientdata(struct nfc_hci_dev *hdev, void *clientdata); void *nfc_hci_get_clientdata(struct nfc_hci_dev *hdev); static inline int nfc_hci_set_vendor_cmds(struct nfc_hci_dev *hdev, - struct nfc_vendor_cmd *cmds, + const struct nfc_vendor_cmd *cmds, int n_cmds) { return nfc_set_vendor_cmds(hdev->ndev, cmds, n_cmds); diff --git a/include/net/nfc/nci_core.h b/include/net/nfc/nci_core.h index e7118e0cc3b1..00f2c60971d7 100644 --- a/include/net/nfc/nci_core.h +++ b/include/net/nfc/nci_core.h @@ -343,7 +343,7 @@ static inline void *nci_get_drvdata(struct nci_dev *ndev) } static inline int nci_set_vendor_cmds(struct nci_dev *ndev, - struct nfc_vendor_cmd *cmds, + const struct nfc_vendor_cmd *cmds, int n_cmds) { return nfc_set_vendor_cmds(ndev->nfc_dev, cmds, n_cmds); diff --git a/include/net/nfc/nfc.h b/include/net/nfc/nfc.h index 2cd3a261bcbc..31672021d071 100644 --- a/include/net/nfc/nfc.h +++ b/include/net/nfc/nfc.h @@ -188,7 +188,7 @@ struct nfc_dev { struct rfkill *rfkill; - struct nfc_vendor_cmd *vendor_cmds; + const struct nfc_vendor_cmd *vendor_cmds; int n_vendor_cmds; struct nfc_ops *ops; @@ -297,7 +297,7 @@ void nfc_send_to_raw_sock(struct nfc_dev *dev, struct sk_buff *skb, u8 payload_type, u8 direction); static inline int nfc_set_vendor_cmds(struct nfc_dev *dev, - struct nfc_vendor_cmd *cmds, + const struct nfc_vendor_cmd *cmds, int n_cmds) { if (dev->vendor_cmds || dev->n_vendor_cmds) diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c index 722f7ef891e1..70467a82be8f 100644 --- a/net/nfc/netlink.c +++ b/net/nfc/netlink.c @@ -1531,7 +1531,7 @@ static int nfc_genl_vendor_cmd(struct sk_buff *skb, struct genl_info *info) { struct nfc_dev *dev; - struct nfc_vendor_cmd *cmd; + const struct nfc_vendor_cmd *cmd; u32 dev_idx, vid, subcmd; u8 *data; size_t data_len; -- cgit v1.2.3 From f6c802a726aea43a2937763f13560c1fd170fae6 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Sat, 24 Jul 2021 23:49:25 +0200 Subject: nfc: constify nfc_ops Neither the core nor the drivers modify the passed pointer to struct nfc_ops, so make it a pointer to const for correctness and safety. Signed-off-by: Krzysztof Kozlowski Signed-off-by: David S. Miller --- drivers/nfc/pn533/pn533.c | 2 +- include/net/nfc/nfc.h | 4 ++-- net/nfc/core.c | 2 +- net/nfc/digital_core.c | 2 +- net/nfc/hci/core.c | 2 +- net/nfc/nci/core.c | 2 +- 6 files changed, 7 insertions(+), 7 deletions(-) (limited to 'include/net') diff --git a/drivers/nfc/pn533/pn533.c b/drivers/nfc/pn533/pn533.c index cd64bfe20402..2f3f3fe9a0ba 100644 --- a/drivers/nfc/pn533/pn533.c +++ b/drivers/nfc/pn533/pn533.c @@ -2623,7 +2623,7 @@ static int pn533_dev_down(struct nfc_dev *nfc_dev) return ret; } -static struct nfc_ops pn533_nfc_ops = { +static const struct nfc_ops pn533_nfc_ops = { .dev_up = pn533_dev_up, .dev_down = pn533_dev_down, .dep_link_up = pn533_dep_link_up, diff --git a/include/net/nfc/nfc.h b/include/net/nfc/nfc.h index 31672021d071..85b698794b14 100644 --- a/include/net/nfc/nfc.h +++ b/include/net/nfc/nfc.h @@ -191,14 +191,14 @@ struct nfc_dev { const struct nfc_vendor_cmd *vendor_cmds; int n_vendor_cmds; - struct nfc_ops *ops; + const struct nfc_ops *ops; struct genl_info *cur_cmd_info; }; #define to_nfc_dev(_dev) container_of(_dev, struct nfc_dev, dev) extern struct class nfc_class; -struct nfc_dev *nfc_allocate_device(struct nfc_ops *ops, +struct nfc_dev *nfc_allocate_device(const struct nfc_ops *ops, u32 supported_protocols, int tx_headroom, int tx_tailroom); diff --git a/net/nfc/core.c b/net/nfc/core.c index 573c80c6ff7a..6ade54149b73 100644 --- a/net/nfc/core.c +++ b/net/nfc/core.c @@ -1048,7 +1048,7 @@ struct nfc_dev *nfc_get_device(unsigned int idx) * @tx_headroom: reserved space at beginning of skb * @tx_tailroom: reserved space at end of skb */ -struct nfc_dev *nfc_allocate_device(struct nfc_ops *ops, +struct nfc_dev *nfc_allocate_device(const struct nfc_ops *ops, u32 supported_protocols, int tx_headroom, int tx_tailroom) { diff --git a/net/nfc/digital_core.c b/net/nfc/digital_core.c index 5044c7db577e..8f2572decccd 100644 --- a/net/nfc/digital_core.c +++ b/net/nfc/digital_core.c @@ -732,7 +732,7 @@ exit: return rc; } -static struct nfc_ops digital_nfc_ops = { +static const struct nfc_ops digital_nfc_ops = { .dev_up = digital_dev_up, .dev_down = digital_dev_down, .start_poll = digital_start_poll, diff --git a/net/nfc/hci/core.c b/net/nfc/hci/core.c index e37d30302b06..b33fe4ee1581 100644 --- a/net/nfc/hci/core.c +++ b/net/nfc/hci/core.c @@ -928,7 +928,7 @@ static int hci_fw_download(struct nfc_dev *nfc_dev, const char *firmware_name) return hdev->ops->fw_download(hdev, firmware_name); } -static struct nfc_ops hci_nfc_ops = { +static const struct nfc_ops hci_nfc_ops = { .dev_up = hci_dev_up, .dev_down = hci_dev_down, .start_poll = hci_start_poll, diff --git a/net/nfc/nci/core.c b/net/nfc/nci/core.c index 50c625940fa3..400d66c4e210 100644 --- a/net/nfc/nci/core.c +++ b/net/nfc/nci/core.c @@ -1102,7 +1102,7 @@ static int nci_fw_download(struct nfc_dev *nfc_dev, const char *firmware_name) return ndev->ops->fw_download(ndev, firmware_name); } -static struct nfc_ops nci_nfc_ops = { +static const struct nfc_ops nci_nfc_ops = { .dev_up = nci_dev_up, .dev_down = nci_dev_down, .start_poll = nci_start_poll, -- cgit v1.2.3 From 094c45c84d799fb0eb3617632097370843ba3678 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Sat, 24 Jul 2021 23:49:26 +0200 Subject: nfc: constify nfc_hci_ops Neither the core nor the drivers modify the passed pointer to struct nfc_hci_ops, so make it a pointer to const for correctness and safety. Signed-off-by: Krzysztof Kozlowski Signed-off-by: David S. Miller --- drivers/nfc/microread/microread.c | 2 +- drivers/nfc/pn544/pn544.c | 2 +- drivers/nfc/st21nfca/core.c | 2 +- include/net/nfc/hci.h | 4 ++-- net/nfc/hci/core.c | 2 +- 5 files changed, 6 insertions(+), 6 deletions(-) (limited to 'include/net') diff --git a/drivers/nfc/microread/microread.c b/drivers/nfc/microread/microread.c index 151a0631ec72..8e847524937c 100644 --- a/drivers/nfc/microread/microread.c +++ b/drivers/nfc/microread/microread.c @@ -625,7 +625,7 @@ static int microread_event_received(struct nfc_hci_dev *hdev, u8 pipe, return r; } -static struct nfc_hci_ops microread_hci_ops = { +static const struct nfc_hci_ops microread_hci_ops = { .open = microread_open, .close = microread_close, .hci_ready = microread_hci_ready, diff --git a/drivers/nfc/pn544/pn544.c b/drivers/nfc/pn544/pn544.c index f4d09ebba5c8..c2b4555ab4b7 100644 --- a/drivers/nfc/pn544/pn544.c +++ b/drivers/nfc/pn544/pn544.c @@ -881,7 +881,7 @@ static int pn544_hci_disable_se(struct nfc_hci_dev *hdev, u32 se_idx) } } -static struct nfc_hci_ops pn544_hci_ops = { +static const struct nfc_hci_ops pn544_hci_ops = { .open = pn544_hci_open, .close = pn544_hci_close, .hci_ready = pn544_hci_ready, diff --git a/drivers/nfc/st21nfca/core.c b/drivers/nfc/st21nfca/core.c index 675f8a342869..5e6c99fcfd27 100644 --- a/drivers/nfc/st21nfca/core.c +++ b/drivers/nfc/st21nfca/core.c @@ -912,7 +912,7 @@ static int st21nfca_hci_event_received(struct nfc_hci_dev *hdev, u8 pipe, } } -static struct nfc_hci_ops st21nfca_hci_ops = { +static const struct nfc_hci_ops st21nfca_hci_ops = { .open = st21nfca_hci_open, .close = st21nfca_hci_close, .load_session = st21nfca_hci_load_session, diff --git a/include/net/nfc/hci.h b/include/net/nfc/hci.h index 2daec8036be9..756c11084f65 100644 --- a/include/net/nfc/hci.h +++ b/include/net/nfc/hci.h @@ -118,7 +118,7 @@ struct nfc_hci_dev { struct sk_buff_head msg_rx_queue; - struct nfc_hci_ops *ops; + const struct nfc_hci_ops *ops; struct nfc_llc *llc; @@ -151,7 +151,7 @@ struct nfc_hci_dev { }; /* hci device allocation */ -struct nfc_hci_dev *nfc_hci_allocate_device(struct nfc_hci_ops *ops, +struct nfc_hci_dev *nfc_hci_allocate_device(const struct nfc_hci_ops *ops, struct nfc_hci_init_data *init_data, unsigned long quirks, u32 protocols, diff --git a/net/nfc/hci/core.c b/net/nfc/hci/core.c index b33fe4ee1581..ff94ac774937 100644 --- a/net/nfc/hci/core.c +++ b/net/nfc/hci/core.c @@ -947,7 +947,7 @@ static const struct nfc_ops hci_nfc_ops = { .se_io = hci_se_io, }; -struct nfc_hci_dev *nfc_hci_allocate_device(struct nfc_hci_ops *ops, +struct nfc_hci_dev *nfc_hci_allocate_device(const struct nfc_hci_ops *ops, struct nfc_hci_init_data *init_data, unsigned long quirks, u32 protocols, -- cgit v1.2.3 From 7186aac9c22de855220ebc54df7af399647bd1fc Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Sat, 24 Jul 2021 23:49:28 +0200 Subject: nfc: constify nfc_digital_ops Neither the core nor the drivers modify the passed pointer to struct nfc_digital_ops, so make it a pointer to const for correctness and safety. Signed-off-by: Krzysztof Kozlowski Signed-off-by: David S. Miller --- drivers/nfc/nfcsim.c | 2 +- drivers/nfc/port100.c | 2 +- drivers/nfc/st95hf/core.c | 2 +- drivers/nfc/trf7970a.c | 2 +- include/net/nfc/digital.h | 4 ++-- net/nfc/digital_core.c | 2 +- 6 files changed, 7 insertions(+), 7 deletions(-) (limited to 'include/net') diff --git a/drivers/nfc/nfcsim.c b/drivers/nfc/nfcsim.c index a9864fcdfba6..143dc49b815b 100644 --- a/drivers/nfc/nfcsim.c +++ b/drivers/nfc/nfcsim.c @@ -320,7 +320,7 @@ static int nfcsim_tg_listen(struct nfc_digital_dev *ddev, u16 timeout, return nfcsim_send(ddev, NULL, timeout, cb, arg); } -static struct nfc_digital_ops nfcsim_digital_ops = { +static const struct nfc_digital_ops nfcsim_digital_ops = { .in_configure_hw = nfcsim_in_configure_hw, .in_send_cmd = nfcsim_in_send_cmd, diff --git a/drivers/nfc/port100.c b/drivers/nfc/port100.c index 1d614f9d864a..ccb5c5fab905 100644 --- a/drivers/nfc/port100.c +++ b/drivers/nfc/port100.c @@ -1463,7 +1463,7 @@ static int port100_listen(struct nfc_digital_dev *ddev, u16 timeout, return port100_tg_send_cmd(ddev, skb, timeout, cb, arg); } -static struct nfc_digital_ops port100_digital_ops = { +static const struct nfc_digital_ops port100_digital_ops = { .in_configure_hw = port100_in_configure_hw, .in_send_cmd = port100_in_send_cmd, diff --git a/drivers/nfc/st95hf/core.c b/drivers/nfc/st95hf/core.c index 2dc788c363fd..993818742570 100644 --- a/drivers/nfc/st95hf/core.c +++ b/drivers/nfc/st95hf/core.c @@ -1037,7 +1037,7 @@ static void st95hf_abort_cmd(struct nfc_digital_dev *ddev) { } -static struct nfc_digital_ops st95hf_nfc_digital_ops = { +static const struct nfc_digital_ops st95hf_nfc_digital_ops = { .in_configure_hw = st95hf_in_configure_hw, .in_send_cmd = st95hf_in_send_cmd, diff --git a/drivers/nfc/trf7970a.c b/drivers/nfc/trf7970a.c index 33978022ae47..1aed44629aaa 100644 --- a/drivers/nfc/trf7970a.c +++ b/drivers/nfc/trf7970a.c @@ -1861,7 +1861,7 @@ static void trf7970a_abort_cmd(struct nfc_digital_dev *ddev) mutex_unlock(&trf->lock); } -static struct nfc_digital_ops trf7970a_nfc_ops = { +static const struct nfc_digital_ops trf7970a_nfc_ops = { .in_configure_hw = trf7970a_in_configure_hw, .in_send_cmd = trf7970a_send_cmd, .tg_configure_hw = trf7970a_tg_configure_hw, diff --git a/include/net/nfc/digital.h b/include/net/nfc/digital.h index 963db96bcbbb..bb3e8fdc0692 100644 --- a/include/net/nfc/digital.h +++ b/include/net/nfc/digital.h @@ -191,7 +191,7 @@ struct digital_poll_tech { struct nfc_digital_dev { struct nfc_dev *nfc_dev; - struct nfc_digital_ops *ops; + const struct nfc_digital_ops *ops; u32 protocols; @@ -236,7 +236,7 @@ struct nfc_digital_dev { void (*skb_add_crc)(struct sk_buff *skb); }; -struct nfc_digital_dev *nfc_digital_allocate_device(struct nfc_digital_ops *ops, +struct nfc_digital_dev *nfc_digital_allocate_device(const struct nfc_digital_ops *ops, __u32 supported_protocols, __u32 driver_capabilities, int tx_headroom, diff --git a/net/nfc/digital_core.c b/net/nfc/digital_core.c index 8f2572decccd..fefc03674f4f 100644 --- a/net/nfc/digital_core.c +++ b/net/nfc/digital_core.c @@ -745,7 +745,7 @@ static const struct nfc_ops digital_nfc_ops = { .im_transceive = digital_in_send, }; -struct nfc_digital_dev *nfc_digital_allocate_device(struct nfc_digital_ops *ops, +struct nfc_digital_dev *nfc_digital_allocate_device(const struct nfc_digital_ops *ops, __u32 supported_protocols, __u32 driver_capabilities, int tx_headroom, int tx_tailroom) -- cgit v1.2.3 From 058e6e0ed0eace43401c945082dec1d669b5b231 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sun, 25 Jul 2021 13:42:50 -0400 Subject: sctp: improve the code for pmtu probe send and recv update This patch does 3 things: - make sctp_transport_pl_send() and sctp_transport_pl_recv() return bool type to decide if more probe is needed to send. - pr_debug() only when probe is really needed to send. - count pl.raise_count in sctp_transport_pl_send() instead of sctp_transport_pl_recv(), and it's only incremented for the 1st probe for the same size. These are preparations for the next patch to make probes happen only when there's packet loss in Search Complete state. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/net/sctp/structs.h | 4 ++-- net/sctp/sm_statefuns.c | 15 +++++++-------- net/sctp/transport.c | 41 +++++++++++++++++++++++------------------ 3 files changed, 32 insertions(+), 28 deletions(-) (limited to 'include/net') diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 32fc4a309df5..f3d414ed208e 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -1024,8 +1024,8 @@ bool sctp_transport_update_pmtu(struct sctp_transport *t, u32 pmtu); void sctp_transport_immediate_rtx(struct sctp_transport *); void sctp_transport_dst_release(struct sctp_transport *t); void sctp_transport_dst_confirm(struct sctp_transport *t); -void sctp_transport_pl_send(struct sctp_transport *t); -void sctp_transport_pl_recv(struct sctp_transport *t); +bool sctp_transport_pl_send(struct sctp_transport *t); +bool sctp_transport_pl_recv(struct sctp_transport *t); /* This is the structure we use to queue packets as they come into diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index 09a8f23ec709..32df65f68c12 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -1109,12 +1109,12 @@ enum sctp_disposition sctp_sf_send_probe(struct net *net, if (!sctp_transport_pl_enabled(transport)) return SCTP_DISPOSITION_CONSUME; - sctp_transport_pl_send(transport); - - reply = sctp_make_heartbeat(asoc, transport, transport->pl.probe_size); - if (!reply) - return SCTP_DISPOSITION_NOMEM; - sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(reply)); + if (sctp_transport_pl_send(transport)) { + reply = sctp_make_heartbeat(asoc, transport, transport->pl.probe_size); + if (!reply) + return SCTP_DISPOSITION_NOMEM; + sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(reply)); + } sctp_add_cmd_sf(commands, SCTP_CMD_PROBE_TIMER_UPDATE, SCTP_TRANSPORT(transport)); @@ -1274,8 +1274,7 @@ enum sctp_disposition sctp_sf_backbeat_8_3(struct net *net, !sctp_transport_pl_enabled(link)) return SCTP_DISPOSITION_DISCARD; - sctp_transport_pl_recv(link); - if (link->pl.state == SCTP_PL_COMPLETE) + if (sctp_transport_pl_recv(link)) return SCTP_DISPOSITION_CONSUME; return sctp_sf_send_probe(net, ep, asoc, type, link, commands); diff --git a/net/sctp/transport.c b/net/sctp/transport.c index 397a6244dd97..23e7bd3e3bd4 100644 --- a/net/sctp/transport.c +++ b/net/sctp/transport.c @@ -258,16 +258,12 @@ void sctp_transport_pmtu(struct sctp_transport *transport, struct sock *sk) sctp_transport_pl_update(transport); } -void sctp_transport_pl_send(struct sctp_transport *t) +bool sctp_transport_pl_send(struct sctp_transport *t) { - pr_debug("%s: PLPMTUD: transport: %p, state: %d, pmtu: %d, size: %d, high: %d\n", - __func__, t, t->pl.state, t->pl.pmtu, t->pl.probe_size, t->pl.probe_high); - - if (t->pl.probe_count < SCTP_MAX_PROBES) { - t->pl.probe_count++; - return; - } + if (t->pl.probe_count < SCTP_MAX_PROBES) + goto out; + t->pl.probe_count = 0; if (t->pl.state == SCTP_PL_BASE) { if (t->pl.probe_size == SCTP_BASE_PLPMTU) { /* BASE_PLPMTU Confirmation Failed */ t->pl.state = SCTP_PL_ERROR; /* Base -> Error */ @@ -299,10 +295,20 @@ void sctp_transport_pl_send(struct sctp_transport *t) sctp_assoc_sync_pmtu(t->asoc); } } - t->pl.probe_count = 1; + +out: + if (t->pl.state == SCTP_PL_COMPLETE && t->pl.raise_count < 30 && + !t->pl.probe_count) + t->pl.raise_count++; + + pr_debug("%s: PLPMTUD: transport: %p, state: %d, pmtu: %d, size: %d, high: %d\n", + __func__, t, t->pl.state, t->pl.pmtu, t->pl.probe_size, t->pl.probe_high); + + t->pl.probe_count++; + return true; } -void sctp_transport_pl_recv(struct sctp_transport *t) +bool sctp_transport_pl_recv(struct sctp_transport *t) { pr_debug("%s: PLPMTUD: transport: %p, state: %d, pmtu: %d, size: %d, high: %d\n", __func__, t, t->pl.state, t->pl.pmtu, t->pl.probe_size, t->pl.probe_high); @@ -323,7 +329,7 @@ void sctp_transport_pl_recv(struct sctp_transport *t) if (!t->pl.probe_high) { t->pl.probe_size = min(t->pl.probe_size + SCTP_PL_BIG_STEP, SCTP_MAX_PLPMTU); - return; + return false; } t->pl.probe_size += SCTP_PL_MIN_STEP; if (t->pl.probe_size >= t->pl.probe_high) { @@ -335,14 +341,13 @@ void sctp_transport_pl_recv(struct sctp_transport *t) t->pathmtu = t->pl.pmtu + sctp_transport_pl_hlen(t); sctp_assoc_sync_pmtu(t->asoc); } - } else if (t->pl.state == SCTP_PL_COMPLETE) { - t->pl.raise_count++; - if (t->pl.raise_count == 30) { - /* Raise probe_size again after 30 * interval in Search Complete */ - t->pl.state = SCTP_PL_SEARCH; /* Search Complete -> Search */ - t->pl.probe_size += SCTP_PL_MIN_STEP; - } + } else if (t->pl.state == SCTP_PL_COMPLETE && t->pl.raise_count == 30) { + /* Raise probe_size again after 30 * interval in Search Complete */ + t->pl.state = SCTP_PL_SEARCH; /* Search Complete -> Search */ + t->pl.probe_size += SCTP_PL_MIN_STEP; } + + return t->pl.state == SCTP_PL_COMPLETE; } static bool sctp_transport_pl_toobig(struct sctp_transport *t, u32 pmtu) -- cgit v1.2.3 From eacf078cf4c7aa23e9591738511f142cc39b5186 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sun, 25 Jul 2021 13:42:51 -0400 Subject: sctp: send pmtu probe only if packet loss in Search Complete state This patch is to introduce last_rtx_chunks into sctp_transport to detect if there's any packet retransmission/loss happened by checking against asoc's rtx_data_chunks in sctp_transport_pl_send(). If there is, namely, transport->last_rtx_chunks != asoc->rtx_data_chunks, the pmtu probe will be sent out. Otherwise, increment the pl.raise_count and return when it's in Search Complete state. With this patch, if in Search Complete state, which is a long period, it doesn't need to keep probing the current pmtu unless there's data packet loss. This will save quite some traffic. v1->v2: - add the missing Fixes tag. Fixes: 0dac127c0557 ("sctp: do black hole detection in search complete state") Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/net/sctp/structs.h | 1 + net/sctp/transport.c | 6 +++++- 2 files changed, 6 insertions(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index f3d414ed208e..651bba654d77 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -984,6 +984,7 @@ struct sctp_transport { } cacc; struct { + __u32 last_rtx_chunks; __u16 pmtu; __u16 probe_size; __u16 probe_high; diff --git a/net/sctp/transport.c b/net/sctp/transport.c index 23e7bd3e3bd4..a3d3ca6dd63d 100644 --- a/net/sctp/transport.c +++ b/net/sctp/transport.c @@ -263,6 +263,7 @@ bool sctp_transport_pl_send(struct sctp_transport *t) if (t->pl.probe_count < SCTP_MAX_PROBES) goto out; + t->pl.last_rtx_chunks = t->asoc->rtx_data_chunks; t->pl.probe_count = 0; if (t->pl.state == SCTP_PL_BASE) { if (t->pl.probe_size == SCTP_BASE_PLPMTU) { /* BASE_PLPMTU Confirmation Failed */ @@ -298,8 +299,10 @@ bool sctp_transport_pl_send(struct sctp_transport *t) out: if (t->pl.state == SCTP_PL_COMPLETE && t->pl.raise_count < 30 && - !t->pl.probe_count) + !t->pl.probe_count && t->pl.last_rtx_chunks == t->asoc->rtx_data_chunks) { t->pl.raise_count++; + return false; + } pr_debug("%s: PLPMTUD: transport: %p, state: %d, pmtu: %d, size: %d, high: %d\n", __func__, t, t->pl.state, t->pl.pmtu, t->pl.probe_size, t->pl.probe_high); @@ -313,6 +316,7 @@ bool sctp_transport_pl_recv(struct sctp_transport *t) pr_debug("%s: PLPMTUD: transport: %p, state: %d, pmtu: %d, size: %d, high: %d\n", __func__, t, t->pl.state, t->pl.pmtu, t->pl.probe_size, t->pl.probe_high); + t->pl.last_rtx_chunks = t->asoc->rtx_data_chunks; t->pl.pmtu = t->pl.probe_size; t->pl.probe_count = 0; if (t->pl.state == SCTP_PL_BASE) { -- cgit v1.2.3 From edac6f6332d96aab59af5f27a195f55cd080f034 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Mon, 26 Jul 2021 19:55:36 +0300 Subject: Revert "net: dsa: Allow drivers to filter packets they can decode source port from" This reverts commit cc1939e4b3aaf534fb2f3706820012036825731c. Currently 2 classes of DSA drivers are able to send/receive packets directly through the DSA master: - drivers with DSA_TAG_PROTO_NONE - sja1105 Now that sja1105 has gained the ability to perform traffic termination even under the tricky case (VLAN-aware bridge), and that is much more functional (we can perform VLAN-aware bridging with foreign interfaces), there is no reason to keep this code in the receive path of the network core. So delete it. Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- include/net/dsa.h | 15 --------------- net/dsa/port.c | 1 - net/ethernet/eth.c | 6 +----- 3 files changed, 1 insertion(+), 21 deletions(-) (limited to 'include/net') diff --git a/include/net/dsa.h b/include/net/dsa.h index f8eb2dc3fbef..55fcac854058 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -88,11 +88,6 @@ struct dsa_device_ops { struct packet_type *pt); void (*flow_dissect)(const struct sk_buff *skb, __be16 *proto, int *offset); - /* Used to determine which traffic should match the DSA filter in - * eth_type_trans, and which, if any, should bypass it and be processed - * as regular on the master net device. - */ - bool (*filter)(const struct sk_buff *skb, struct net_device *dev); unsigned int needed_headroom; unsigned int needed_tailroom; const char *name; @@ -246,7 +241,6 @@ struct dsa_port { struct dsa_switch_tree *dst; struct sk_buff *(*rcv)(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt); - bool (*filter)(const struct sk_buff *skb, struct net_device *dev); enum { DSA_PORT_TYPE_UNUSED = 0, @@ -985,15 +979,6 @@ static inline bool netdev_uses_dsa(const struct net_device *dev) return false; } -static inline bool dsa_can_decode(const struct sk_buff *skb, - struct net_device *dev) -{ -#if IS_ENABLED(CONFIG_NET_DSA) - return !dev->dsa_ptr->filter || dev->dsa_ptr->filter(skb, dev); -#endif - return false; -} - /* All DSA tags that push the EtherType to the right (basically all except tail * tags, which don't break dissection) can be treated the same from the * perspective of the flow dissector. diff --git a/net/dsa/port.c b/net/dsa/port.c index 7b9bf45a76b6..b927d94b6934 100644 --- a/net/dsa/port.c +++ b/net/dsa/port.c @@ -888,7 +888,6 @@ int dsa_port_mrp_del_ring_role(const struct dsa_port *dp, void dsa_port_set_tag_protocol(struct dsa_port *cpu_dp, const struct dsa_device_ops *tag_ops) { - cpu_dp->filter = tag_ops->filter; cpu_dp->rcv = tag_ops->rcv; cpu_dp->tag_ops = tag_ops; } diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c index 9cce612e8976..171ba75b74c9 100644 --- a/net/ethernet/eth.c +++ b/net/ethernet/eth.c @@ -182,12 +182,8 @@ __be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev) * at all, so we check here whether one of those tagging * variants has been configured on the receiving interface, * and if so, set skb->protocol without looking at the packet. - * The DSA tagging protocol may be able to decode some but not all - * traffic (for example only for management). In that case give it the - * option to filter the packets from which it can decode source port - * information. */ - if (unlikely(netdev_uses_dsa(dev)) && dsa_can_decode(skb, dev)) + if (unlikely(netdev_uses_dsa(dev))) return htons(ETH_P_XDSA); if (likely(eth_proto_is_802_3(eth->h_proto))) -- cgit v1.2.3 From f9b282b36dfa9b6c6d6b3e8816cdf0e4defff482 Mon Sep 17 00:00:00 2001 From: Yajun Deng Date: Tue, 27 Jul 2021 11:41:41 +0800 Subject: net: netlink: add the case when nlh is NULL Add the case when nlh is NULL in nlmsg_report(), so that the caller doesn't need to deal with this case. Signed-off-by: Yajun Deng Signed-off-by: David S. Miller --- include/net/netlink.h | 2 +- net/core/rtnetlink.c | 6 +----- net/netlink/genetlink.c | 9 ++++----- 3 files changed, 6 insertions(+), 11 deletions(-) (limited to 'include/net') diff --git a/include/net/netlink.h b/include/net/netlink.h index 1ceec518ab49..7a2a9d3144ba 100644 --- a/include/net/netlink.h +++ b/include/net/netlink.h @@ -885,7 +885,7 @@ static inline int nlmsg_validate_deprecated(const struct nlmsghdr *nlh, */ static inline int nlmsg_report(const struct nlmsghdr *nlh) { - return !!(nlh->nlmsg_flags & NLM_F_ECHO); + return nlh ? !!(nlh->nlmsg_flags & NLM_F_ECHO) : 0; } /** diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 670d74ab91ae..e79aaf1f7139 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -726,12 +726,8 @@ void rtnl_notify(struct sk_buff *skb, struct net *net, u32 pid, u32 group, struct nlmsghdr *nlh, gfp_t flags) { struct sock *rtnl = net->rtnl; - int report = 0; - if (nlh) - report = nlmsg_report(nlh); - - nlmsg_notify(rtnl, skb, pid, group, report, flags); + nlmsg_notify(rtnl, skb, pid, group, nlmsg_report(nlh), flags); } EXPORT_SYMBOL(rtnl_notify); diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index 2d6fdf40df66..ae58da608a31 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -1485,6 +1485,7 @@ int genlmsg_multicast_allns(const struct genl_family *family, { if (WARN_ON_ONCE(group >= family->n_mcgrps)) return -EINVAL; + group = family->mcgrp_offset + group; return genlmsg_mcast(skb, portid, group, flags); } @@ -1495,14 +1496,12 @@ void genl_notify(const struct genl_family *family, struct sk_buff *skb, { struct net *net = genl_info_net(info); struct sock *sk = net->genl_sock; - int report = 0; - - if (info->nlhdr) - report = nlmsg_report(info->nlhdr); if (WARN_ON_ONCE(group >= family->n_mcgrps)) return; + group = family->mcgrp_offset + group; - nlmsg_notify(sk, skb, info->snd_portid, group, report, flags); + nlmsg_notify(sk, skb, info->snd_portid, group, + nlmsg_report(info->nlhdr), flags); } EXPORT_SYMBOL(genl_notify); -- cgit v1.2.3 From c7c9d2102c9c098916ab9e0ab248006107d00d6c Mon Sep 17 00:00:00 2001 From: Pavel Skripkin Date: Sun, 25 Jul 2021 00:11:59 +0300 Subject: net: llc: fix skb_over_panic Syzbot reported skb_over_panic() in llc_pdu_init_as_xid_cmd(). The problem was in wrong LCC header manipulations. Syzbot's reproducer tries to send XID packet. llc_ui_sendmsg() is doing following steps: 1. skb allocation with size = len + header size len is passed from userpace and header size is 3 since addr->sllc_xid is set. 2. skb_reserve() for header_len = 3 3. filling all other space with memcpy_from_msg() Ok, at this moment we have fully loaded skb, only headers needs to be filled. Then code comes to llc_sap_action_send_xid_c(). This function pushes 3 bytes for LLC PDU header and initializes it. Then comes llc_pdu_init_as_xid_cmd(). It initalizes next 3 bytes *AFTER* LLC PDU header and call skb_push(skb, 3). This looks wrong for 2 reasons: 1. Bytes rigth after LLC header are user data, so this function was overwriting payload. 2. skb_push(skb, 3) call can cause skb_over_panic() since all free space was filled in llc_ui_sendmsg(). (This can happen is user passed 686 len: 686 + 14 (eth header) + 3 (LLC header) = 703. SKB_DATA_ALIGN(703) = 704) So, in this patch I added 2 new private constansts: LLC_PDU_TYPE_U_XID and LLC_PDU_LEN_U_XID. LLC_PDU_LEN_U_XID is used to correctly reserve header size to handle LLC + XID case. LLC_PDU_TYPE_U_XID is used by llc_pdu_header_init() function to push 6 bytes instead of 3. And finally I removed skb_push() call from llc_pdu_init_as_xid_cmd(). This changes should not affect other parts of LLC, since after all steps we just transmit buffer. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Reported-and-tested-by: syzbot+5e5a981ad7cc54c4b2b4@syzkaller.appspotmail.com Signed-off-by: Pavel Skripkin Signed-off-by: David S. Miller --- include/net/llc_pdu.h | 31 +++++++++++++++++++++++-------- net/llc/af_llc.c | 10 +++++++++- net/llc/llc_s_ac.c | 2 +- 3 files changed, 33 insertions(+), 10 deletions(-) (limited to 'include/net') diff --git a/include/net/llc_pdu.h b/include/net/llc_pdu.h index c0f0a13ed818..49aa79c7b278 100644 --- a/include/net/llc_pdu.h +++ b/include/net/llc_pdu.h @@ -15,9 +15,11 @@ #include /* Lengths of frame formats */ -#define LLC_PDU_LEN_I 4 /* header and 2 control bytes */ -#define LLC_PDU_LEN_S 4 -#define LLC_PDU_LEN_U 3 /* header and 1 control byte */ +#define LLC_PDU_LEN_I 4 /* header and 2 control bytes */ +#define LLC_PDU_LEN_S 4 +#define LLC_PDU_LEN_U 3 /* header and 1 control byte */ +/* header and 1 control byte and XID info */ +#define LLC_PDU_LEN_U_XID (LLC_PDU_LEN_U + sizeof(struct llc_xid_info)) /* Known SAP addresses */ #define LLC_GLOBAL_SAP 0xFF #define LLC_NULL_SAP 0x00 /* not network-layer visible */ @@ -50,9 +52,10 @@ #define LLC_PDU_TYPE_U_MASK 0x03 /* 8-bit control field */ #define LLC_PDU_TYPE_MASK 0x03 -#define LLC_PDU_TYPE_I 0 /* first bit */ -#define LLC_PDU_TYPE_S 1 /* first two bits */ -#define LLC_PDU_TYPE_U 3 /* first two bits */ +#define LLC_PDU_TYPE_I 0 /* first bit */ +#define LLC_PDU_TYPE_S 1 /* first two bits */ +#define LLC_PDU_TYPE_U 3 /* first two bits */ +#define LLC_PDU_TYPE_U_XID 4 /* private type for detecting XID commands */ #define LLC_PDU_TYPE_IS_I(pdu) \ ((!(pdu->ctrl_1 & LLC_PDU_TYPE_I_MASK)) ? 1 : 0) @@ -230,9 +233,18 @@ static inline struct llc_pdu_un *llc_pdu_un_hdr(struct sk_buff *skb) static inline void llc_pdu_header_init(struct sk_buff *skb, u8 type, u8 ssap, u8 dsap, u8 cr) { - const int hlen = type == LLC_PDU_TYPE_U ? 3 : 4; + int hlen = 4; /* default value for I and S types */ struct llc_pdu_un *pdu; + switch (type) { + case LLC_PDU_TYPE_U: + hlen = 3; + break; + case LLC_PDU_TYPE_U_XID: + hlen = 6; + break; + } + skb_push(skb, hlen); skb_reset_network_header(skb); pdu = llc_pdu_un_hdr(skb); @@ -374,7 +386,10 @@ static inline void llc_pdu_init_as_xid_cmd(struct sk_buff *skb, xid_info->fmt_id = LLC_XID_FMT_ID; /* 0x81 */ xid_info->type = svcs_supported; xid_info->rw = rx_window << 1; /* size of receive window */ - skb_put(skb, sizeof(struct llc_xid_info)); + + /* no need to push/put since llc_pdu_header_init() has already + * pushed 3 + 3 bytes + */ } /** diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c index 7180979114e4..ac5cadd02cfa 100644 --- a/net/llc/af_llc.c +++ b/net/llc/af_llc.c @@ -98,8 +98,16 @@ static inline u8 llc_ui_header_len(struct sock *sk, struct sockaddr_llc *addr) { u8 rc = LLC_PDU_LEN_U; - if (addr->sllc_test || addr->sllc_xid) + if (addr->sllc_test) rc = LLC_PDU_LEN_U; + else if (addr->sllc_xid) + /* We need to expand header to sizeof(struct llc_xid_info) + * since llc_pdu_init_as_xid_cmd() sets 4,5,6 bytes of LLC header + * as XID PDU. In llc_ui_sendmsg() we reserved header size and then + * filled all other space with user data. If we won't reserve this + * bytes, llc_pdu_init_as_xid_cmd() will overwrite user data + */ + rc = LLC_PDU_LEN_U_XID; else if (sk->sk_type == SOCK_STREAM) rc = LLC_PDU_LEN_I; return rc; diff --git a/net/llc/llc_s_ac.c b/net/llc/llc_s_ac.c index b554f26c68ee..79d1cef8f15a 100644 --- a/net/llc/llc_s_ac.c +++ b/net/llc/llc_s_ac.c @@ -79,7 +79,7 @@ int llc_sap_action_send_xid_c(struct llc_sap *sap, struct sk_buff *skb) struct llc_sap_state_ev *ev = llc_sap_ev(skb); int rc; - llc_pdu_header_init(skb, LLC_PDU_TYPE_U, ev->saddr.lsap, + llc_pdu_header_init(skb, LLC_PDU_TYPE_U_XID, ev->saddr.lsap, ev->daddr.lsap, LLC_PDU_CMD); llc_pdu_init_as_xid_cmd(skb, LLC_XID_NULL_CLASS_2, 0); rc = llc_mac_hdr_init(skb, ev->saddr.mac, ev->daddr.mac); -- cgit v1.2.3 From 3e7a1c7c561ed8508fbdb98ed5708175bbcf7938 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 27 Jul 2021 15:45:06 +0200 Subject: ip_tunnel: use ndo_siocdevprivate The various ipv4 and ipv6 tunnel drivers each implement a set of 12 SIOCDEVPRIVATE commands for managing tunnels. These all work correctly in compat mode. Move them over to the new .ndo_siocdevprivate operation. Cc: Hideaki YOSHIFUJI Cc: David Ahern Cc: Steffen Klassert Cc: Herbert Xu Signed-off-by: Arnd Bergmann Signed-off-by: David S. Miller --- include/net/ip_tunnels.h | 3 ++- net/ipv4/ip_gre.c | 2 +- net/ipv4/ip_tunnel.c | 9 +++++---- net/ipv4/ip_vti.c | 2 +- net/ipv4/ipip.c | 2 +- net/ipv6/ip6_gre.c | 17 +++++++++-------- net/ipv6/ip6_tunnel.c | 21 +++++++++++---------- net/ipv6/ip6_vti.c | 21 +++++++++++---------- net/ipv6/sit.c | 35 ++++++++++++++++++----------------- 9 files changed, 59 insertions(+), 53 deletions(-) (limited to 'include/net') diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h index 548b65bd3973..bc3b13ec93c9 100644 --- a/include/net/ip_tunnels.h +++ b/include/net/ip_tunnels.h @@ -270,7 +270,8 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, const u8 proto, int tunnel_hlen); int ip_tunnel_ctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd); -int ip_tunnel_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd); +int ip_tunnel_siocdevprivate(struct net_device *dev, struct ifreq *ifr, + void __user *data, int cmd); int __ip_tunnel_change_mtu(struct net_device *dev, int new_mtu, bool strict); int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu); diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 12dca0c85f3c..6ebf05859acb 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -923,7 +923,7 @@ static const struct net_device_ops ipgre_netdev_ops = { .ndo_stop = ipgre_close, #endif .ndo_start_xmit = ipgre_xmit, - .ndo_do_ioctl = ip_tunnel_ioctl, + .ndo_siocdevprivate = ip_tunnel_siocdevprivate, .ndo_change_mtu = ip_tunnel_change_mtu, .ndo_get_stats64 = dev_get_tstats64, .ndo_get_iflink = ip_tunnel_get_iflink, diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index 0dca00745ac3..7f0e810c06f4 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -958,19 +958,20 @@ done: } EXPORT_SYMBOL_GPL(ip_tunnel_ctl); -int ip_tunnel_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) +int ip_tunnel_siocdevprivate(struct net_device *dev, struct ifreq *ifr, + void __user *data, int cmd) { struct ip_tunnel_parm p; int err; - if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) + if (copy_from_user(&p, data, sizeof(p))) return -EFAULT; err = dev->netdev_ops->ndo_tunnel_ctl(dev, &p, cmd); - if (!err && copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p))) + if (!err && copy_to_user(data, &p, sizeof(p))) return -EFAULT; return err; } -EXPORT_SYMBOL_GPL(ip_tunnel_ioctl); +EXPORT_SYMBOL_GPL(ip_tunnel_siocdevprivate); int __ip_tunnel_change_mtu(struct net_device *dev, int new_mtu, bool strict) { diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c index eb560eecee08..efe25a0172e6 100644 --- a/net/ipv4/ip_vti.c +++ b/net/ipv4/ip_vti.c @@ -405,7 +405,7 @@ static const struct net_device_ops vti_netdev_ops = { .ndo_init = vti_tunnel_init, .ndo_uninit = ip_tunnel_uninit, .ndo_start_xmit = vti_tunnel_xmit, - .ndo_do_ioctl = ip_tunnel_ioctl, + .ndo_siocdevprivate = ip_tunnel_siocdevprivate, .ndo_change_mtu = ip_tunnel_change_mtu, .ndo_get_stats64 = dev_get_tstats64, .ndo_get_iflink = ip_tunnel_get_iflink, diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index 266c65577ba6..3aa78ccbec3e 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -347,7 +347,7 @@ static const struct net_device_ops ipip_netdev_ops = { .ndo_init = ipip_tunnel_init, .ndo_uninit = ip_tunnel_uninit, .ndo_start_xmit = ipip_tunnel_xmit, - .ndo_do_ioctl = ip_tunnel_ioctl, + .ndo_siocdevprivate = ip_tunnel_siocdevprivate, .ndo_change_mtu = ip_tunnel_change_mtu, .ndo_get_stats64 = dev_get_tstats64, .ndo_get_iflink = ip_tunnel_get_iflink, diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index bc224f917bbd..3ad201d372d8 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -1244,8 +1244,9 @@ static void ip6gre_tnl_parm_to_user(struct ip6_tnl_parm2 *u, memcpy(u->name, p->name, sizeof(u->name)); } -static int ip6gre_tunnel_ioctl(struct net_device *dev, - struct ifreq *ifr, int cmd) +static int ip6gre_tunnel_siocdevprivate(struct net_device *dev, + struct ifreq *ifr, void __user *data, + int cmd) { int err = 0; struct ip6_tnl_parm2 p; @@ -1259,7 +1260,7 @@ static int ip6gre_tunnel_ioctl(struct net_device *dev, switch (cmd) { case SIOCGETTUNNEL: if (dev == ign->fb_tunnel_dev) { - if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) { + if (copy_from_user(&p, data, sizeof(p))) { err = -EFAULT; break; } @@ -1270,7 +1271,7 @@ static int ip6gre_tunnel_ioctl(struct net_device *dev, } memset(&p, 0, sizeof(p)); ip6gre_tnl_parm_to_user(&p, &t->parms); - if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p))) + if (copy_to_user(data, &p, sizeof(p))) err = -EFAULT; break; @@ -1281,7 +1282,7 @@ static int ip6gre_tunnel_ioctl(struct net_device *dev, goto done; err = -EFAULT; - if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) + if (copy_from_user(&p, data, sizeof(p))) goto done; err = -EINVAL; @@ -1318,7 +1319,7 @@ static int ip6gre_tunnel_ioctl(struct net_device *dev, memset(&p, 0, sizeof(p)); ip6gre_tnl_parm_to_user(&p, &t->parms); - if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p))) + if (copy_to_user(data, &p, sizeof(p))) err = -EFAULT; } else err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT); @@ -1331,7 +1332,7 @@ static int ip6gre_tunnel_ioctl(struct net_device *dev, if (dev == ign->fb_tunnel_dev) { err = -EFAULT; - if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) + if (copy_from_user(&p, data, sizeof(p))) goto done; err = -ENOENT; ip6gre_tnl_parm_from_user(&p1, &p); @@ -1398,7 +1399,7 @@ static const struct net_device_ops ip6gre_netdev_ops = { .ndo_init = ip6gre_tunnel_init, .ndo_uninit = ip6gre_tunnel_uninit, .ndo_start_xmit = ip6gre_tunnel_xmit, - .ndo_do_ioctl = ip6gre_tunnel_ioctl, + .ndo_siocdevprivate = ip6gre_tunnel_siocdevprivate, .ndo_change_mtu = ip6_tnl_change_mtu, .ndo_get_stats64 = dev_get_tstats64, .ndo_get_iflink = ip6_tnl_get_iflink, diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 322698d9fcf4..20a67efda47f 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -1581,9 +1581,10 @@ ip6_tnl_parm_to_user(struct ip6_tnl_parm *u, const struct __ip6_tnl_parm *p) } /** - * ip6_tnl_ioctl - configure ipv6 tunnels from userspace + * ip6_tnl_siocdevprivate - configure ipv6 tunnels from userspace * @dev: virtual device associated with tunnel - * @ifr: parameters passed from userspace + * @ifr: unused + * @data: parameters passed from userspace * @cmd: command to be performed * * Description: @@ -1609,7 +1610,8 @@ ip6_tnl_parm_to_user(struct ip6_tnl_parm *u, const struct __ip6_tnl_parm *p) **/ static int -ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) +ip6_tnl_siocdevprivate(struct net_device *dev, struct ifreq *ifr, + void __user *data, int cmd) { int err = 0; struct ip6_tnl_parm p; @@ -1623,7 +1625,7 @@ ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) switch (cmd) { case SIOCGETTUNNEL: if (dev == ip6n->fb_tnl_dev) { - if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) { + if (copy_from_user(&p, data, sizeof(p))) { err = -EFAULT; break; } @@ -1635,9 +1637,8 @@ ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) memset(&p, 0, sizeof(p)); } ip6_tnl_parm_to_user(&p, &t->parms); - if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p))) { + if (copy_to_user(data, &p, sizeof(p))) err = -EFAULT; - } break; case SIOCADDTUNNEL: case SIOCCHGTUNNEL: @@ -1645,7 +1646,7 @@ ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) break; err = -EFAULT; - if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) + if (copy_from_user(&p, data, sizeof(p))) break; err = -EINVAL; if (p.proto != IPPROTO_IPV6 && p.proto != IPPROTO_IPIP && @@ -1669,7 +1670,7 @@ ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) if (!IS_ERR(t)) { err = 0; ip6_tnl_parm_to_user(&p, &t->parms); - if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p))) + if (copy_to_user(data, &p, sizeof(p))) err = -EFAULT; } else { @@ -1683,7 +1684,7 @@ ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) if (dev == ip6n->fb_tnl_dev) { err = -EFAULT; - if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) + if (copy_from_user(&p, data, sizeof(p))) break; err = -ENOENT; ip6_tnl_parm_from_user(&p1, &p); @@ -1802,7 +1803,7 @@ static const struct net_device_ops ip6_tnl_netdev_ops = { .ndo_init = ip6_tnl_dev_init, .ndo_uninit = ip6_tnl_dev_uninit, .ndo_start_xmit = ip6_tnl_start_xmit, - .ndo_do_ioctl = ip6_tnl_ioctl, + .ndo_siocdevprivate = ip6_tnl_siocdevprivate, .ndo_change_mtu = ip6_tnl_change_mtu, .ndo_get_stats64 = dev_get_tstats64, .ndo_get_iflink = ip6_tnl_get_iflink, diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c index 2d048e21abbb..1d8e3ffa225d 100644 --- a/net/ipv6/ip6_vti.c +++ b/net/ipv6/ip6_vti.c @@ -771,13 +771,14 @@ vti6_parm_to_user(struct ip6_tnl_parm2 *u, const struct __ip6_tnl_parm *p) } /** - * vti6_ioctl - configure vti6 tunnels from userspace + * vti6_siocdevprivate - configure vti6 tunnels from userspace * @dev: virtual device associated with tunnel - * @ifr: parameters passed from userspace + * @ifr: unused + * @data: parameters passed from userspace * @cmd: command to be performed * * Description: - * vti6_ioctl() is used for managing vti6 tunnels + * vti6_siocdevprivate() is used for managing vti6 tunnels * from userspace. * * The possible commands are the following: @@ -798,7 +799,7 @@ vti6_parm_to_user(struct ip6_tnl_parm2 *u, const struct __ip6_tnl_parm *p) * %-ENODEV if attempting to change or delete a nonexisting device **/ static int -vti6_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) +vti6_siocdevprivate(struct net_device *dev, struct ifreq *ifr, void __user *data, int cmd) { int err = 0; struct ip6_tnl_parm2 p; @@ -810,7 +811,7 @@ vti6_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) switch (cmd) { case SIOCGETTUNNEL: if (dev == ip6n->fb_tnl_dev) { - if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) { + if (copy_from_user(&p, data, sizeof(p))) { err = -EFAULT; break; } @@ -822,7 +823,7 @@ vti6_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) if (!t) t = netdev_priv(dev); vti6_parm_to_user(&p, &t->parms); - if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p))) + if (copy_to_user(data, &p, sizeof(p))) err = -EFAULT; break; case SIOCADDTUNNEL: @@ -831,7 +832,7 @@ vti6_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) break; err = -EFAULT; - if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) + if (copy_from_user(&p, data, sizeof(p))) break; err = -EINVAL; if (p.proto != IPPROTO_IPV6 && p.proto != 0) @@ -852,7 +853,7 @@ vti6_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) if (t) { err = 0; vti6_parm_to_user(&p, &t->parms); - if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p))) + if (copy_to_user(data, &p, sizeof(p))) err = -EFAULT; } else @@ -865,7 +866,7 @@ vti6_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) if (dev == ip6n->fb_tnl_dev) { err = -EFAULT; - if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) + if (copy_from_user(&p, data, sizeof(p))) break; err = -ENOENT; vti6_parm_from_user(&p1, &p); @@ -890,7 +891,7 @@ static const struct net_device_ops vti6_netdev_ops = { .ndo_init = vti6_dev_init, .ndo_uninit = vti6_dev_uninit, .ndo_start_xmit = vti6_tnl_xmit, - .ndo_do_ioctl = vti6_ioctl, + .ndo_siocdevprivate = vti6_siocdevprivate, .ndo_get_stats64 = dev_get_tstats64, .ndo_get_iflink = ip6_tnl_get_iflink, }; diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 33adc12b697d..ef0c7a7c18e2 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -299,9 +299,8 @@ __ipip6_tunnel_locate_prl(struct ip_tunnel *t, __be32 addr) } -static int ipip6_tunnel_get_prl(struct net_device *dev, struct ifreq *ifr) +static int ipip6_tunnel_get_prl(struct net_device *dev, struct ip_tunnel_prl __user *a) { - struct ip_tunnel_prl __user *a = ifr->ifr_ifru.ifru_data; struct ip_tunnel *t = netdev_priv(dev); struct ip_tunnel_prl kprl, *kp; struct ip_tunnel_prl_entry *prl; @@ -454,8 +453,8 @@ out: return err; } -static int ipip6_tunnel_prl_ctl(struct net_device *dev, struct ifreq *ifr, - int cmd) +static int ipip6_tunnel_prl_ctl(struct net_device *dev, + struct ip_tunnel_prl __user *data, int cmd) { struct ip_tunnel *t = netdev_priv(dev); struct ip_tunnel_prl prl; @@ -466,7 +465,7 @@ static int ipip6_tunnel_prl_ctl(struct net_device *dev, struct ifreq *ifr, if (dev == dev_to_sit_net(dev)->fb_tunnel_dev) return -EINVAL; - if (copy_from_user(&prl, ifr->ifr_ifru.ifru_data, sizeof(prl))) + if (copy_from_user(&prl, data, sizeof(prl))) return -EFAULT; switch (cmd) { @@ -1198,14 +1197,14 @@ static int ipip6_tunnel_update_6rd(struct ip_tunnel *t, } static int -ipip6_tunnel_get6rd(struct net_device *dev, struct ifreq *ifr) +ipip6_tunnel_get6rd(struct net_device *dev, struct ip_tunnel_parm __user *data) { struct ip_tunnel *t = netdev_priv(dev); struct ip_tunnel_6rd ip6rd; struct ip_tunnel_parm p; if (dev == dev_to_sit_net(dev)->fb_tunnel_dev) { - if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) + if (copy_from_user(&p, data, sizeof(p))) return -EFAULT; t = ipip6_tunnel_locate(t->net, &p, 0); } @@ -1216,13 +1215,14 @@ ipip6_tunnel_get6rd(struct net_device *dev, struct ifreq *ifr) ip6rd.relay_prefix = t->ip6rd.relay_prefix; ip6rd.prefixlen = t->ip6rd.prefixlen; ip6rd.relay_prefixlen = t->ip6rd.relay_prefixlen; - if (copy_to_user(ifr->ifr_ifru.ifru_data, &ip6rd, sizeof(ip6rd))) + if (copy_to_user(data, &ip6rd, sizeof(ip6rd))) return -EFAULT; return 0; } static int -ipip6_tunnel_6rdctl(struct net_device *dev, struct ifreq *ifr, int cmd) +ipip6_tunnel_6rdctl(struct net_device *dev, struct ip_tunnel_6rd __user *data, + int cmd) { struct ip_tunnel *t = netdev_priv(dev); struct ip_tunnel_6rd ip6rd; @@ -1230,7 +1230,7 @@ ipip6_tunnel_6rdctl(struct net_device *dev, struct ifreq *ifr, int cmd) if (!ns_capable(t->net->user_ns, CAP_NET_ADMIN)) return -EPERM; - if (copy_from_user(&ip6rd, ifr->ifr_ifru.ifru_data, sizeof(ip6rd))) + if (copy_from_user(&ip6rd, data, sizeof(ip6rd))) return -EFAULT; if (cmd != SIOCDEL6RD) { @@ -1369,27 +1369,28 @@ ipip6_tunnel_ctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd) } static int -ipip6_tunnel_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) +ipip6_tunnel_siocdevprivate(struct net_device *dev, struct ifreq *ifr, + void __user *data, int cmd) { switch (cmd) { case SIOCGETTUNNEL: case SIOCADDTUNNEL: case SIOCCHGTUNNEL: case SIOCDELTUNNEL: - return ip_tunnel_ioctl(dev, ifr, cmd); + return ip_tunnel_siocdevprivate(dev, ifr, data, cmd); case SIOCGETPRL: - return ipip6_tunnel_get_prl(dev, ifr); + return ipip6_tunnel_get_prl(dev, data); case SIOCADDPRL: case SIOCDELPRL: case SIOCCHGPRL: - return ipip6_tunnel_prl_ctl(dev, ifr, cmd); + return ipip6_tunnel_prl_ctl(dev, data, cmd); #ifdef CONFIG_IPV6_SIT_6RD case SIOCGET6RD: - return ipip6_tunnel_get6rd(dev, ifr); + return ipip6_tunnel_get6rd(dev, data); case SIOCADD6RD: case SIOCCHG6RD: case SIOCDEL6RD: - return ipip6_tunnel_6rdctl(dev, ifr, cmd); + return ipip6_tunnel_6rdctl(dev, data, cmd); #endif default: return -EINVAL; @@ -1400,7 +1401,7 @@ static const struct net_device_ops ipip6_netdev_ops = { .ndo_init = ipip6_tunnel_init, .ndo_uninit = ipip6_tunnel_uninit, .ndo_start_xmit = sit_tunnel_xmit, - .ndo_do_ioctl = ipip6_tunnel_ioctl, + .ndo_siocdevprivate = ipip6_tunnel_siocdevprivate, .ndo_get_stats64 = dev_get_tstats64, .ndo_get_iflink = ip_tunnel_get_iflink, .ndo_tunnel_ctl = ipip6_tunnel_ctl, -- cgit v1.2.3 From a76053707dbf0dc020a73b4d90cd952409ef3691 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 27 Jul 2021 15:45:13 +0200 Subject: dev_ioctl: split out ndo_eth_ioctl Most users of ndo_do_ioctl are ethernet drivers that implement the MII commands SIOCGMIIPHY/SIOCGMIIREG/SIOCSMIIREG, or hardware timestamping with SIOCSHWTSTAMP/SIOCGHWTSTAMP. Separate these from the few drivers that use ndo_do_ioctl to implement SIOCBOND, SIOCBR and SIOCWANDEV commands. This is a purely cosmetic change intended to help readers find their way through the implementation. Cc: Doug Ledford Cc: Jason Gunthorpe Cc: Jay Vosburgh Cc: Veaceslav Falico Cc: Andy Gospodarek Cc: Andrew Lunn Cc: Vivien Didelot Cc: Florian Fainelli Cc: Vladimir Oltean Cc: Leon Romanovsky Cc: linux-rdma@vger.kernel.org Signed-off-by: Arnd Bergmann Acked-by: Jason Gunthorpe Signed-off-by: David S. Miller --- Documentation/networking/netdevices.rst | 4 +++ Documentation/networking/timestamping.rst | 6 ++-- drivers/infiniband/ulp/ipoib/ipoib_main.c | 8 ++--- drivers/net/bonding/bond_main.c | 42 +++++++++++++++------- drivers/net/ethernet/3com/3c574_cs.c | 2 +- drivers/net/ethernet/3com/3c59x.c | 4 +-- drivers/net/ethernet/8390/ax88796.c | 2 +- drivers/net/ethernet/8390/axnet_cs.c | 2 +- drivers/net/ethernet/8390/pcnet_cs.c | 2 +- drivers/net/ethernet/actions/owl-emac.c | 6 ++-- drivers/net/ethernet/adaptec/starfire.c | 2 +- drivers/net/ethernet/agere/et131x.c | 2 +- drivers/net/ethernet/allwinner/sun4i-emac.c | 2 +- drivers/net/ethernet/amd/amd8111e.c | 2 +- drivers/net/ethernet/amd/au1000_eth.c | 2 +- drivers/net/ethernet/amd/pcnet32.c | 2 +- drivers/net/ethernet/amd/xgbe/xgbe-drv.c | 2 +- drivers/net/ethernet/aquantia/atlantic/aq_main.c | 2 +- drivers/net/ethernet/arc/emac_main.c | 2 +- drivers/net/ethernet/atheros/ag71xx.c | 2 +- drivers/net/ethernet/atheros/alx/main.c | 2 +- drivers/net/ethernet/atheros/atl1c/atl1c_main.c | 2 +- drivers/net/ethernet/atheros/atl1e/atl1e_main.c | 2 +- drivers/net/ethernet/atheros/atlx/atl1.c | 2 +- drivers/net/ethernet/atheros/atlx/atl2.c | 2 +- drivers/net/ethernet/broadcom/b44.c | 2 +- drivers/net/ethernet/broadcom/bcm63xx_enet.c | 4 +-- drivers/net/ethernet/broadcom/bgmac.c | 2 +- drivers/net/ethernet/broadcom/bnx2.c | 2 +- drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c | 2 +- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 2 +- drivers/net/ethernet/broadcom/genet/bcmgenet.c | 2 +- drivers/net/ethernet/broadcom/sb1250-mac.c | 2 +- drivers/net/ethernet/broadcom/tg3.c | 2 +- drivers/net/ethernet/cadence/macb_main.c | 4 +-- drivers/net/ethernet/cavium/liquidio/lio_main.c | 2 +- drivers/net/ethernet/cavium/liquidio/lio_vf_main.c | 2 +- drivers/net/ethernet/cavium/octeon/octeon_mgmt.c | 2 +- drivers/net/ethernet/cavium/thunder/nicvf_main.c | 2 +- drivers/net/ethernet/chelsio/cxgb/cxgb2.c | 2 +- drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c | 2 +- drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c | 2 +- .../net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c | 2 +- drivers/net/ethernet/cirrus/ep93xx_eth.c | 2 +- drivers/net/ethernet/davicom/dm9000.c | 2 +- drivers/net/ethernet/dec/tulip/tulip_core.c | 2 +- drivers/net/ethernet/dec/tulip/winbond-840.c | 2 +- drivers/net/ethernet/dlink/dl2k.c | 2 +- drivers/net/ethernet/dlink/sundance.c | 2 +- drivers/net/ethernet/dnet.c | 2 +- drivers/net/ethernet/ethoc.c | 2 +- drivers/net/ethernet/faraday/ftgmac100.c | 2 +- drivers/net/ethernet/faraday/ftmac100.c | 2 +- drivers/net/ethernet/fealnx.c | 2 +- drivers/net/ethernet/freescale/dpaa/dpaa_eth.c | 2 +- drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c | 2 +- drivers/net/ethernet/freescale/enetc/enetc_pf.c | 2 +- drivers/net/ethernet/freescale/enetc/enetc_vf.c | 2 +- drivers/net/ethernet/freescale/fec_main.c | 2 +- drivers/net/ethernet/freescale/fec_mpc52xx.c | 2 +- .../net/ethernet/freescale/fs_enet/fs_enet-main.c | 2 +- drivers/net/ethernet/freescale/gianfar.c | 2 +- drivers/net/ethernet/freescale/ucc_geth.c | 2 +- drivers/net/ethernet/hisilicon/hisi_femac.c | 2 +- drivers/net/ethernet/hisilicon/hns/hns_enet.c | 2 +- drivers/net/ethernet/hisilicon/hns3/hns3_enet.c | 2 +- drivers/net/ethernet/ibm/emac/core.c | 4 +-- drivers/net/ethernet/ibm/ibmveth.c | 2 +- drivers/net/ethernet/intel/e100.c | 2 +- drivers/net/ethernet/intel/e1000/e1000_main.c | 2 +- drivers/net/ethernet/intel/e1000e/netdev.c | 2 +- drivers/net/ethernet/intel/i40e/i40e_main.c | 2 +- drivers/net/ethernet/intel/ice/ice_main.c | 6 ++-- drivers/net/ethernet/intel/igb/igb_main.c | 2 +- drivers/net/ethernet/intel/igbvf/netdev.c | 2 +- drivers/net/ethernet/intel/igc/igc_main.c | 2 +- drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 2 +- drivers/net/ethernet/jme.c | 2 +- drivers/net/ethernet/korina.c | 2 +- drivers/net/ethernet/lantiq_etop.c | 2 +- drivers/net/ethernet/marvell/mv643xx_eth.c | 2 +- drivers/net/ethernet/marvell/mvneta.c | 2 +- drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c | 2 +- .../net/ethernet/marvell/octeontx2/nic/otx2_pf.c | 2 +- drivers/net/ethernet/marvell/pxa168_eth.c | 2 +- drivers/net/ethernet/marvell/skge.c | 2 +- drivers/net/ethernet/marvell/sky2.c | 4 +-- drivers/net/ethernet/mediatek/mtk_eth_soc.c | 2 +- drivers/net/ethernet/mediatek/mtk_star_emac.c | 2 +- drivers/net/ethernet/mellanox/mlx4/en_netdev.c | 2 +- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 2 +- .../net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c | 2 +- .../ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c | 2 +- .../ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c | 4 +-- drivers/net/ethernet/mellanox/mlxsw/spectrum.c | 2 +- drivers/net/ethernet/micrel/ks8851_common.c | 2 +- drivers/net/ethernet/micrel/ksz884x.c | 2 +- drivers/net/ethernet/microchip/lan743x_main.c | 2 +- drivers/net/ethernet/mscc/ocelot_net.c | 2 +- drivers/net/ethernet/natsemi/natsemi.c | 2 +- drivers/net/ethernet/neterion/s2io.c | 2 +- drivers/net/ethernet/neterion/vxge/vxge-main.c | 2 +- drivers/net/ethernet/nxp/lpc_eth.c | 2 +- .../net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c | 2 +- drivers/net/ethernet/packetengines/hamachi.c | 2 +- drivers/net/ethernet/packetengines/yellowfin.c | 2 +- drivers/net/ethernet/pensando/ionic/ionic_lif.c | 4 +-- drivers/net/ethernet/qlogic/qede/qede_main.c | 2 +- drivers/net/ethernet/qualcomm/emac/emac.c | 2 +- drivers/net/ethernet/rdc/r6040.c | 2 +- drivers/net/ethernet/realtek/8139cp.c | 2 +- drivers/net/ethernet/realtek/8139too.c | 2 +- drivers/net/ethernet/realtek/r8169_main.c | 2 +- drivers/net/ethernet/renesas/ravb_main.c | 2 +- drivers/net/ethernet/renesas/sh_eth.c | 4 +-- drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c | 2 +- drivers/net/ethernet/sfc/efx.c | 2 +- drivers/net/ethernet/sfc/falcon/efx.c | 2 +- drivers/net/ethernet/sgi/ioc3-eth.c | 2 +- drivers/net/ethernet/sgi/meth.c | 2 +- drivers/net/ethernet/sis/sis190.c | 2 +- drivers/net/ethernet/sis/sis900.c | 2 +- drivers/net/ethernet/smsc/epic100.c | 2 +- drivers/net/ethernet/smsc/smc91c92_cs.c | 2 +- drivers/net/ethernet/smsc/smsc911x.c | 2 +- drivers/net/ethernet/smsc/smsc9420.c | 2 +- drivers/net/ethernet/socionext/netsec.c | 2 +- drivers/net/ethernet/socionext/sni_ave.c | 2 +- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 2 +- drivers/net/ethernet/sun/cassini.c | 2 +- drivers/net/ethernet/sun/niu.c | 2 +- drivers/net/ethernet/sun/sungem.c | 2 +- drivers/net/ethernet/synopsys/dwc-xlgmac-net.c | 2 +- drivers/net/ethernet/ti/am65-cpsw-nuss.c | 2 +- drivers/net/ethernet/ti/cpmac.c | 2 +- drivers/net/ethernet/ti/cpsw.c | 2 +- drivers/net/ethernet/ti/cpsw_new.c | 2 +- drivers/net/ethernet/ti/davinci_emac.c | 2 +- drivers/net/ethernet/ti/netcp_core.c | 2 +- drivers/net/ethernet/ti/tlan.c | 2 +- drivers/net/ethernet/toshiba/spider_net.c | 2 +- drivers/net/ethernet/toshiba/tc35815.c | 2 +- drivers/net/ethernet/tundra/tsi108_eth.c | 2 +- drivers/net/ethernet/via/via-rhine.c | 2 +- drivers/net/ethernet/via/via-velocity.c | 2 +- drivers/net/ethernet/xilinx/ll_temac_main.c | 2 +- drivers/net/ethernet/xilinx/xilinx_axienet_main.c | 2 +- drivers/net/ethernet/xilinx/xilinx_emaclite.c | 2 +- drivers/net/ethernet/xircom/xirc2ps_cs.c | 2 +- drivers/net/ethernet/xscale/ixp4xx_eth.c | 2 +- drivers/net/macvlan.c | 8 ++--- drivers/net/phy/phy.c | 4 +-- drivers/net/usb/asix_devices.c | 6 ++-- drivers/net/usb/ax88172a.c | 2 +- drivers/net/usb/ax88179_178a.c | 2 +- drivers/net/usb/dm9601.c | 2 +- drivers/net/usb/lan78xx.c | 2 +- drivers/net/usb/mcs7830.c | 2 +- drivers/net/usb/r8152.c | 2 +- drivers/net/usb/smsc75xx.c | 2 +- drivers/net/usb/smsc95xx.c | 2 +- drivers/net/usb/sr9700.c | 2 +- drivers/net/usb/sr9800.c | 2 +- drivers/s390/net/qeth_l2_main.c | 2 +- drivers/s390/net/qeth_l3_main.c | 4 +-- drivers/staging/octeon/ethernet.c | 12 +++---- include/linux/netdevice.h | 6 ++++ include/net/dsa.h | 14 ++++---- net/8021q/vlan_dev.c | 6 ++-- net/core/dev_ioctl.c | 38 ++++++++++++++------ net/dsa/master.c | 6 ++-- net/dsa/slave.c | 2 +- 172 files changed, 273 insertions(+), 231 deletions(-) (limited to 'include/net') diff --git a/Documentation/networking/netdevices.rst b/Documentation/networking/netdevices.rst index 02f1faac839a..f57f255f2397 100644 --- a/Documentation/networking/netdevices.rst +++ b/Documentation/networking/netdevices.rst @@ -229,6 +229,10 @@ ndo_siocdevprivate: This is used to implement SIOCDEVPRIVATE ioctl helpers. These should not be added to new drivers, so don't use. +ndo_eth_ioctl: + Synchronization: rtnl_lock() semaphore. + Context: process + ndo_get_stats: Synchronization: rtnl_lock() semaphore, dev_base_lock rwlock, or RCU. Context: atomic (can't sleep under rwlock or RCU) diff --git a/Documentation/networking/timestamping.rst b/Documentation/networking/timestamping.rst index 7db3985359bc..a722eb30e014 100644 --- a/Documentation/networking/timestamping.rst +++ b/Documentation/networking/timestamping.rst @@ -625,7 +625,7 @@ interfaces of a DSA switch to share the same PHC. By design, PTP timestamping with a DSA switch does not need any special handling in the driver for the host port it is attached to. However, when the host port also supports PTP timestamping, DSA will take care of intercepting -the ``.ndo_do_ioctl`` calls towards the host port, and block attempts to enable +the ``.ndo_eth_ioctl`` calls towards the host port, and block attempts to enable hardware timestamping on it. This is because the SO_TIMESTAMPING API does not allow the delivery of multiple hardware timestamps for the same packet, so anybody else except for the DSA switch port must be prevented from doing so. @@ -688,7 +688,7 @@ ethtool ioctl operations for them need to be mediated by their respective MAC driver. Therefore, as opposed to DSA switches, modifications need to be done to each individual MAC driver for PHY timestamping support. This entails: -- Checking, in ``.ndo_do_ioctl``, whether ``phy_has_hwtstamp(netdev->phydev)`` +- Checking, in ``.ndo_eth_ioctl``, whether ``phy_has_hwtstamp(netdev->phydev)`` is true or not. If it is, then the MAC driver should not process this request but instead pass it on to the PHY using ``phy_mii_ioctl()``. @@ -747,7 +747,7 @@ For example, a typical driver design for TX timestamping might be to split the transmission part into 2 portions: 1. "TX": checks whether PTP timestamping has been previously enabled through - the ``.ndo_do_ioctl`` ("``priv->hwtstamp_tx_enabled == true``") and the + the ``.ndo_eth_ioctl`` ("``priv->hwtstamp_tx_enabled == true``") and the current skb requires a TX timestamp ("``skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP``"). If this is true, it sets the "``skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS``" flag. Note: as diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index abf60f4d9203..0aa8629fdf62 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -1745,10 +1745,10 @@ static int ipoib_ioctl(struct net_device *dev, struct ifreq *ifr, { struct ipoib_dev_priv *priv = ipoib_priv(dev); - if (!priv->rn_ops->ndo_do_ioctl) + if (!priv->rn_ops->ndo_eth_ioctl) return -EOPNOTSUPP; - return priv->rn_ops->ndo_do_ioctl(dev, ifr, cmd); + return priv->rn_ops->ndo_eth_ioctl(dev, ifr, cmd); } static int ipoib_dev_init(struct net_device *dev) @@ -2078,7 +2078,7 @@ static const struct net_device_ops ipoib_netdev_ops_pf = { .ndo_set_vf_guid = ipoib_set_vf_guid, .ndo_set_mac_address = ipoib_set_mac, .ndo_get_stats64 = ipoib_get_stats, - .ndo_do_ioctl = ipoib_ioctl, + .ndo_eth_ioctl = ipoib_ioctl, }; static const struct net_device_ops ipoib_netdev_ops_vf = { @@ -2093,7 +2093,7 @@ static const struct net_device_ops ipoib_netdev_ops_vf = { .ndo_set_rx_mode = ipoib_set_mcast_list, .ndo_get_iflink = ipoib_get_iflink, .ndo_get_stats64 = ipoib_get_stats, - .ndo_do_ioctl = ipoib_ioctl, + .ndo_eth_ioctl = ipoib_ioctl, }; static const struct net_device_ops ipoib_netdev_default_pf = { diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 96864183f92e..23769e937c28 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -732,7 +732,7 @@ static int bond_check_dev_link(struct bonding *bond, BMSR_LSTATUS : 0; /* Ethtool can't be used, fallback to MII ioctls. */ - ioctl = slave_ops->ndo_do_ioctl; + ioctl = slave_ops->ndo_eth_ioctl; if (ioctl) { /* TODO: set pointer to correct ioctl on a per team member * bases to make this more efficient. that is, once @@ -756,7 +756,7 @@ static int bond_check_dev_link(struct bonding *bond, } } - /* If reporting, report that either there's no dev->do_ioctl, + /* If reporting, report that either there's no ndo_eth_ioctl, * or both SIOCGMIIREG and get_link failed (meaning that we * cannot report link status). If not reporting, pretend * we're ok. @@ -1733,7 +1733,7 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev, if (!bond->params.use_carrier && slave_dev->ethtool_ops->get_link == NULL && - slave_ops->ndo_do_ioctl == NULL) { + slave_ops->ndo_eth_ioctl == NULL) { slave_warn(bond_dev, slave_dev, "no link monitoring support\n"); } @@ -3962,20 +3962,13 @@ static void bond_get_stats(struct net_device *bond_dev, rcu_read_unlock(); } -static int bond_do_ioctl(struct net_device *bond_dev, struct ifreq *ifr, int cmd) +static int bond_eth_ioctl(struct net_device *bond_dev, struct ifreq *ifr, int cmd) { struct bonding *bond = netdev_priv(bond_dev); - struct net_device *slave_dev = NULL; - struct ifbond k_binfo; - struct ifbond __user *u_binfo = NULL; - struct ifslave k_sinfo; - struct ifslave __user *u_sinfo = NULL; struct mii_ioctl_data *mii = NULL; - struct bond_opt_value newval; - struct net *net; - int res = 0; + int res; - netdev_dbg(bond_dev, "bond_ioctl: cmd=%d\n", cmd); + netdev_dbg(bond_dev, "bond_eth_ioctl: cmd=%d\n", cmd); switch (cmd) { case SIOCGMIIPHY: @@ -4000,6 +3993,28 @@ static int bond_do_ioctl(struct net_device *bond_dev, struct ifreq *ifr, int cmd } return 0; + default: + res = -EOPNOTSUPP; + } + + return res; +} + +static int bond_do_ioctl(struct net_device *bond_dev, struct ifreq *ifr, int cmd) +{ + struct bonding *bond = netdev_priv(bond_dev); + struct net_device *slave_dev = NULL; + struct ifbond k_binfo; + struct ifbond __user *u_binfo = NULL; + struct ifslave k_sinfo; + struct ifslave __user *u_sinfo = NULL; + struct bond_opt_value newval; + struct net *net; + int res = 0; + + netdev_dbg(bond_dev, "bond_ioctl: cmd=%d\n", cmd); + + switch (cmd) { case SIOCBONDINFOQUERY: u_binfo = (struct ifbond __user *)ifr->ifr_data; @@ -4972,6 +4987,7 @@ static const struct net_device_ops bond_netdev_ops = { .ndo_start_xmit = bond_start_xmit, .ndo_select_queue = bond_select_queue, .ndo_get_stats64 = bond_get_stats, + .ndo_eth_ioctl = bond_eth_ioctl, .ndo_do_ioctl = bond_do_ioctl, .ndo_siocdevprivate = bond_siocdevprivate, .ndo_change_rx_flags = bond_change_rx_flags, diff --git a/drivers/net/ethernet/3com/3c574_cs.c b/drivers/net/ethernet/3com/3c574_cs.c index f66e7fb9a2bb..dd4d3c48b98d 100644 --- a/drivers/net/ethernet/3com/3c574_cs.c +++ b/drivers/net/ethernet/3com/3c574_cs.c @@ -252,7 +252,7 @@ static const struct net_device_ops el3_netdev_ops = { .ndo_start_xmit = el3_start_xmit, .ndo_tx_timeout = el3_tx_timeout, .ndo_get_stats = el3_get_stats, - .ndo_do_ioctl = el3_ioctl, + .ndo_eth_ioctl = el3_ioctl, .ndo_set_rx_mode = set_multicast_list, .ndo_set_mac_address = eth_mac_addr, .ndo_validate_addr = eth_validate_addr, diff --git a/drivers/net/ethernet/3com/3c59x.c b/drivers/net/ethernet/3com/3c59x.c index 7d7d3ffe25c3..17c16333a412 100644 --- a/drivers/net/ethernet/3com/3c59x.c +++ b/drivers/net/ethernet/3com/3c59x.c @@ -1052,7 +1052,7 @@ static const struct net_device_ops boomrang_netdev_ops = { .ndo_tx_timeout = vortex_tx_timeout, .ndo_get_stats = vortex_get_stats, #ifdef CONFIG_PCI - .ndo_do_ioctl = vortex_ioctl, + .ndo_eth_ioctl = vortex_ioctl, #endif .ndo_set_rx_mode = set_rx_mode, .ndo_set_mac_address = eth_mac_addr, @@ -1069,7 +1069,7 @@ static const struct net_device_ops vortex_netdev_ops = { .ndo_tx_timeout = vortex_tx_timeout, .ndo_get_stats = vortex_get_stats, #ifdef CONFIG_PCI - .ndo_do_ioctl = vortex_ioctl, + .ndo_eth_ioctl = vortex_ioctl, #endif .ndo_set_rx_mode = set_rx_mode, .ndo_set_mac_address = eth_mac_addr, diff --git a/drivers/net/ethernet/8390/ax88796.c b/drivers/net/ethernet/8390/ax88796.c index 172947fc051a..9595dd1f32ca 100644 --- a/drivers/net/ethernet/8390/ax88796.c +++ b/drivers/net/ethernet/8390/ax88796.c @@ -635,7 +635,7 @@ static void ax_eeprom_register_write(struct eeprom_93cx6 *eeprom) static const struct net_device_ops ax_netdev_ops = { .ndo_open = ax_open, .ndo_stop = ax_close, - .ndo_do_ioctl = ax_ioctl, + .ndo_eth_ioctl = ax_ioctl, .ndo_start_xmit = ax_ei_start_xmit, .ndo_tx_timeout = ax_ei_tx_timeout, diff --git a/drivers/net/ethernet/8390/axnet_cs.c b/drivers/net/ethernet/8390/axnet_cs.c index 8c321dfc7b3b..3c370e686ec3 100644 --- a/drivers/net/ethernet/8390/axnet_cs.c +++ b/drivers/net/ethernet/8390/axnet_cs.c @@ -128,7 +128,7 @@ static inline struct axnet_dev *PRIV(struct net_device *dev) static const struct net_device_ops axnet_netdev_ops = { .ndo_open = axnet_open, .ndo_stop = axnet_close, - .ndo_do_ioctl = axnet_ioctl, + .ndo_eth_ioctl = axnet_ioctl, .ndo_start_xmit = axnet_start_xmit, .ndo_tx_timeout = axnet_tx_timeout, .ndo_get_stats = get_stats, diff --git a/drivers/net/ethernet/8390/pcnet_cs.c b/drivers/net/ethernet/8390/pcnet_cs.c index cac036706382..96ad72abd373 100644 --- a/drivers/net/ethernet/8390/pcnet_cs.c +++ b/drivers/net/ethernet/8390/pcnet_cs.c @@ -223,7 +223,7 @@ static const struct net_device_ops pcnet_netdev_ops = { .ndo_set_config = set_config, .ndo_start_xmit = ei_start_xmit, .ndo_get_stats = ei_get_stats, - .ndo_do_ioctl = ei_ioctl, + .ndo_eth_ioctl = ei_ioctl, .ndo_set_rx_mode = ei_set_multicast_list, .ndo_tx_timeout = ei_tx_timeout, .ndo_set_mac_address = eth_mac_addr, diff --git a/drivers/net/ethernet/actions/owl-emac.c b/drivers/net/ethernet/actions/owl-emac.c index b8e771c2bc40..c4ecf4fcadf8 100644 --- a/drivers/net/ethernet/actions/owl-emac.c +++ b/drivers/net/ethernet/actions/owl-emac.c @@ -1179,8 +1179,8 @@ static int owl_emac_ndo_set_mac_addr(struct net_device *netdev, void *addr) return owl_emac_setup_frame_xmit(netdev_priv(netdev)); } -static int owl_emac_ndo_do_ioctl(struct net_device *netdev, - struct ifreq *req, int cmd) +static int owl_emac_ndo_eth_ioctl(struct net_device *netdev, + struct ifreq *req, int cmd) { if (!netif_running(netdev)) return -EINVAL; @@ -1224,7 +1224,7 @@ static const struct net_device_ops owl_emac_netdev_ops = { .ndo_set_rx_mode = owl_emac_ndo_set_rx_mode, .ndo_set_mac_address = owl_emac_ndo_set_mac_addr, .ndo_validate_addr = eth_validate_addr, - .ndo_do_ioctl = owl_emac_ndo_do_ioctl, + .ndo_eth_ioctl = owl_emac_ndo_eth_ioctl, .ndo_tx_timeout = owl_emac_ndo_tx_timeout, .ndo_get_stats = owl_emac_ndo_get_stats, }; diff --git a/drivers/net/ethernet/adaptec/starfire.c b/drivers/net/ethernet/adaptec/starfire.c index 7965e5e3c985..e0f6cc910bd2 100644 --- a/drivers/net/ethernet/adaptec/starfire.c +++ b/drivers/net/ethernet/adaptec/starfire.c @@ -625,7 +625,7 @@ static const struct net_device_ops netdev_ops = { .ndo_tx_timeout = tx_timeout, .ndo_get_stats = get_stats, .ndo_set_rx_mode = set_rx_mode, - .ndo_do_ioctl = netdev_ioctl, + .ndo_eth_ioctl = netdev_ioctl, .ndo_set_mac_address = eth_mac_addr, .ndo_validate_addr = eth_validate_addr, #ifdef VLAN_SUPPORT diff --git a/drivers/net/ethernet/agere/et131x.c b/drivers/net/ethernet/agere/et131x.c index 41f8821f792d..920633161174 100644 --- a/drivers/net/ethernet/agere/et131x.c +++ b/drivers/net/ethernet/agere/et131x.c @@ -3882,7 +3882,7 @@ static const struct net_device_ops et131x_netdev_ops = { .ndo_set_mac_address = eth_mac_addr, .ndo_validate_addr = eth_validate_addr, .ndo_get_stats = et131x_stats, - .ndo_do_ioctl = phy_do_ioctl, + .ndo_eth_ioctl = phy_do_ioctl, }; static int et131x_pci_setup(struct pci_dev *pdev, diff --git a/drivers/net/ethernet/allwinner/sun4i-emac.c b/drivers/net/ethernet/allwinner/sun4i-emac.c index f99ae317c188..037baea1c738 100644 --- a/drivers/net/ethernet/allwinner/sun4i-emac.c +++ b/drivers/net/ethernet/allwinner/sun4i-emac.c @@ -774,7 +774,7 @@ static const struct net_device_ops emac_netdev_ops = { .ndo_start_xmit = emac_start_xmit, .ndo_tx_timeout = emac_timeout, .ndo_set_rx_mode = emac_set_rx_mode, - .ndo_do_ioctl = phy_do_ioctl_running, + .ndo_eth_ioctl = phy_do_ioctl_running, .ndo_validate_addr = eth_validate_addr, .ndo_set_mac_address = emac_set_mac_address, #ifdef CONFIG_NET_POLL_CONTROLLER diff --git a/drivers/net/ethernet/amd/amd8111e.c b/drivers/net/ethernet/amd/amd8111e.c index 9cac5aa75a73..92e4246dc359 100644 --- a/drivers/net/ethernet/amd/amd8111e.c +++ b/drivers/net/ethernet/amd/amd8111e.c @@ -1729,7 +1729,7 @@ static const struct net_device_ops amd8111e_netdev_ops = { .ndo_set_rx_mode = amd8111e_set_multicast_list, .ndo_validate_addr = eth_validate_addr, .ndo_set_mac_address = amd8111e_set_mac_address, - .ndo_do_ioctl = amd8111e_ioctl, + .ndo_eth_ioctl = amd8111e_ioctl, .ndo_change_mtu = amd8111e_change_mtu, #ifdef CONFIG_NET_POLL_CONTROLLER .ndo_poll_controller = amd8111e_poll, diff --git a/drivers/net/ethernet/amd/au1000_eth.c b/drivers/net/ethernet/amd/au1000_eth.c index 19e195420e24..9c1636222b99 100644 --- a/drivers/net/ethernet/amd/au1000_eth.c +++ b/drivers/net/ethernet/amd/au1000_eth.c @@ -1051,7 +1051,7 @@ static const struct net_device_ops au1000_netdev_ops = { .ndo_stop = au1000_close, .ndo_start_xmit = au1000_tx, .ndo_set_rx_mode = au1000_multicast_list, - .ndo_do_ioctl = phy_do_ioctl_running, + .ndo_eth_ioctl = phy_do_ioctl_running, .ndo_tx_timeout = au1000_tx_timeout, .ndo_set_mac_address = eth_mac_addr, .ndo_validate_addr = eth_validate_addr, diff --git a/drivers/net/ethernet/amd/pcnet32.c b/drivers/net/ethernet/amd/pcnet32.c index 4100ab07e6b7..70d76fdb9f56 100644 --- a/drivers/net/ethernet/amd/pcnet32.c +++ b/drivers/net/ethernet/amd/pcnet32.c @@ -1572,7 +1572,7 @@ static const struct net_device_ops pcnet32_netdev_ops = { .ndo_tx_timeout = pcnet32_tx_timeout, .ndo_get_stats = pcnet32_get_stats, .ndo_set_rx_mode = pcnet32_set_multicast_list, - .ndo_do_ioctl = pcnet32_ioctl, + .ndo_eth_ioctl = pcnet32_ioctl, .ndo_set_mac_address = eth_mac_addr, .ndo_validate_addr = eth_validate_addr, #ifdef CONFIG_NET_POLL_CONTROLLER diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c index 4f714f874c4f..17a585adfb49 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c @@ -2284,7 +2284,7 @@ static const struct net_device_ops xgbe_netdev_ops = { .ndo_set_rx_mode = xgbe_set_rx_mode, .ndo_set_mac_address = xgbe_set_mac_address, .ndo_validate_addr = eth_validate_addr, - .ndo_do_ioctl = xgbe_ioctl, + .ndo_eth_ioctl = xgbe_ioctl, .ndo_change_mtu = xgbe_change_mtu, .ndo_tx_timeout = xgbe_tx_timeout, .ndo_get_stats64 = xgbe_get_stats64, diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_main.c b/drivers/net/ethernet/aquantia/atlantic/aq_main.c index 4af0cd9530de..e22935ce9573 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_main.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_main.c @@ -421,7 +421,7 @@ static const struct net_device_ops aq_ndev_ops = { .ndo_change_mtu = aq_ndev_change_mtu, .ndo_set_mac_address = aq_ndev_set_mac_address, .ndo_set_features = aq_ndev_set_features, - .ndo_do_ioctl = aq_ndev_ioctl, + .ndo_eth_ioctl = aq_ndev_ioctl, .ndo_vlan_rx_add_vid = aq_ndo_vlan_rx_add_vid, .ndo_vlan_rx_kill_vid = aq_ndo_vlan_rx_kill_vid, .ndo_setup_tc = aq_ndo_setup_tc, diff --git a/drivers/net/ethernet/arc/emac_main.c b/drivers/net/ethernet/arc/emac_main.c index 67b8113a2b53..38c288ec9059 100644 --- a/drivers/net/ethernet/arc/emac_main.c +++ b/drivers/net/ethernet/arc/emac_main.c @@ -844,7 +844,7 @@ static const struct net_device_ops arc_emac_netdev_ops = { .ndo_set_mac_address = arc_emac_set_address, .ndo_get_stats = arc_emac_stats, .ndo_set_rx_mode = arc_emac_set_rx_mode, - .ndo_do_ioctl = phy_do_ioctl_running, + .ndo_eth_ioctl = phy_do_ioctl_running, #ifdef CONFIG_NET_POLL_CONTROLLER .ndo_poll_controller = arc_emac_poll_controller, #endif diff --git a/drivers/net/ethernet/atheros/ag71xx.c b/drivers/net/ethernet/atheros/ag71xx.c index 1ba81b1eb6fd..02ae98aabf91 100644 --- a/drivers/net/ethernet/atheros/ag71xx.c +++ b/drivers/net/ethernet/atheros/ag71xx.c @@ -1851,7 +1851,7 @@ static const struct net_device_ops ag71xx_netdev_ops = { .ndo_open = ag71xx_open, .ndo_stop = ag71xx_stop, .ndo_start_xmit = ag71xx_hard_start_xmit, - .ndo_do_ioctl = phy_do_ioctl, + .ndo_eth_ioctl = phy_do_ioctl, .ndo_tx_timeout = ag71xx_tx_timeout, .ndo_change_mtu = ag71xx_change_mtu, .ndo_set_mac_address = eth_mac_addr, diff --git a/drivers/net/ethernet/atheros/alx/main.c b/drivers/net/ethernet/atheros/alx/main.c index 11ef1fbe7aee..4ea157efca86 100644 --- a/drivers/net/ethernet/atheros/alx/main.c +++ b/drivers/net/ethernet/atheros/alx/main.c @@ -1701,7 +1701,7 @@ static const struct net_device_ops alx_netdev_ops = { .ndo_validate_addr = eth_validate_addr, .ndo_set_mac_address = alx_set_mac_address, .ndo_change_mtu = alx_change_mtu, - .ndo_do_ioctl = alx_ioctl, + .ndo_eth_ioctl = alx_ioctl, .ndo_tx_timeout = alx_tx_timeout, .ndo_fix_features = alx_fix_features, #ifdef CONFIG_NET_POLL_CONTROLLER diff --git a/drivers/net/ethernet/atheros/atl1c/atl1c_main.c b/drivers/net/ethernet/atheros/atl1c/atl1c_main.c index 1c6246a5dc22..3b51b172b317 100644 --- a/drivers/net/ethernet/atheros/atl1c/atl1c_main.c +++ b/drivers/net/ethernet/atheros/atl1c/atl1c_main.c @@ -2609,7 +2609,7 @@ static const struct net_device_ops atl1c_netdev_ops = { .ndo_change_mtu = atl1c_change_mtu, .ndo_fix_features = atl1c_fix_features, .ndo_set_features = atl1c_set_features, - .ndo_do_ioctl = atl1c_ioctl, + .ndo_eth_ioctl = atl1c_ioctl, .ndo_tx_timeout = atl1c_tx_timeout, .ndo_get_stats = atl1c_get_stats, #ifdef CONFIG_NET_POLL_CONTROLLER diff --git a/drivers/net/ethernet/atheros/atl1e/atl1e_main.c b/drivers/net/ethernet/atheros/atl1e/atl1e_main.c index 2eb0a2ab69f6..753973ac922e 100644 --- a/drivers/net/ethernet/atheros/atl1e/atl1e_main.c +++ b/drivers/net/ethernet/atheros/atl1e/atl1e_main.c @@ -2247,7 +2247,7 @@ static const struct net_device_ops atl1e_netdev_ops = { .ndo_fix_features = atl1e_fix_features, .ndo_set_features = atl1e_set_features, .ndo_change_mtu = atl1e_change_mtu, - .ndo_do_ioctl = atl1e_ioctl, + .ndo_eth_ioctl = atl1e_ioctl, .ndo_tx_timeout = atl1e_tx_timeout, #ifdef CONFIG_NET_POLL_CONTROLLER .ndo_poll_controller = atl1e_netpoll, diff --git a/drivers/net/ethernet/atheros/atlx/atl1.c b/drivers/net/ethernet/atheros/atlx/atl1.c index c67201a13cf5..68f6c0bbd945 100644 --- a/drivers/net/ethernet/atheros/atlx/atl1.c +++ b/drivers/net/ethernet/atheros/atlx/atl1.c @@ -2885,7 +2885,7 @@ static const struct net_device_ops atl1_netdev_ops = { .ndo_change_mtu = atl1_change_mtu, .ndo_fix_features = atlx_fix_features, .ndo_set_features = atlx_set_features, - .ndo_do_ioctl = atlx_ioctl, + .ndo_eth_ioctl = atlx_ioctl, .ndo_tx_timeout = atlx_tx_timeout, #ifdef CONFIG_NET_POLL_CONTROLLER .ndo_poll_controller = atl1_poll_controller, diff --git a/drivers/net/ethernet/atheros/atlx/atl2.c b/drivers/net/ethernet/atheros/atlx/atl2.c index 0cc0db04c27d..b69298ddb647 100644 --- a/drivers/net/ethernet/atheros/atlx/atl2.c +++ b/drivers/net/ethernet/atheros/atlx/atl2.c @@ -1293,7 +1293,7 @@ static const struct net_device_ops atl2_netdev_ops = { .ndo_change_mtu = atl2_change_mtu, .ndo_fix_features = atl2_fix_features, .ndo_set_features = atl2_set_features, - .ndo_do_ioctl = atl2_ioctl, + .ndo_eth_ioctl = atl2_ioctl, .ndo_tx_timeout = atl2_tx_timeout, #ifdef CONFIG_NET_POLL_CONTROLLER .ndo_poll_controller = atl2_poll_controller, diff --git a/drivers/net/ethernet/broadcom/b44.c b/drivers/net/ethernet/broadcom/b44.c index ad2655efe423..fa784953c601 100644 --- a/drivers/net/ethernet/broadcom/b44.c +++ b/drivers/net/ethernet/broadcom/b44.c @@ -2198,7 +2198,7 @@ static const struct net_device_ops b44_netdev_ops = { .ndo_set_rx_mode = b44_set_rx_mode, .ndo_set_mac_address = b44_set_mac_addr, .ndo_validate_addr = eth_validate_addr, - .ndo_do_ioctl = b44_ioctl, + .ndo_eth_ioctl = b44_ioctl, .ndo_tx_timeout = b44_tx_timeout, .ndo_change_mtu = b44_change_mtu, #ifdef CONFIG_NET_POLL_CONTROLLER diff --git a/drivers/net/ethernet/broadcom/bcm63xx_enet.c b/drivers/net/ethernet/broadcom/bcm63xx_enet.c index 977f097fc7bf..5ec056a26cf8 100644 --- a/drivers/net/ethernet/broadcom/bcm63xx_enet.c +++ b/drivers/net/ethernet/broadcom/bcm63xx_enet.c @@ -1699,7 +1699,7 @@ static const struct net_device_ops bcm_enet_ops = { .ndo_start_xmit = bcm_enet_start_xmit, .ndo_set_mac_address = bcm_enet_set_mac_address, .ndo_set_rx_mode = bcm_enet_set_multicast_list, - .ndo_do_ioctl = bcm_enet_ioctl, + .ndo_eth_ioctl = bcm_enet_ioctl, .ndo_change_mtu = bcm_enet_change_mtu, }; @@ -2446,7 +2446,7 @@ static const struct net_device_ops bcm_enetsw_ops = { .ndo_stop = bcm_enetsw_stop, .ndo_start_xmit = bcm_enet_start_xmit, .ndo_change_mtu = bcm_enet_change_mtu, - .ndo_do_ioctl = bcm_enetsw_ioctl, + .ndo_eth_ioctl = bcm_enetsw_ioctl, }; diff --git a/drivers/net/ethernet/broadcom/bgmac.c b/drivers/net/ethernet/broadcom/bgmac.c index 075f6e146b29..fe4d99abd548 100644 --- a/drivers/net/ethernet/broadcom/bgmac.c +++ b/drivers/net/ethernet/broadcom/bgmac.c @@ -1263,7 +1263,7 @@ static const struct net_device_ops bgmac_netdev_ops = { .ndo_set_rx_mode = bgmac_set_rx_mode, .ndo_set_mac_address = bgmac_set_mac_address, .ndo_validate_addr = eth_validate_addr, - .ndo_do_ioctl = phy_do_ioctl_running, + .ndo_eth_ioctl = phy_do_ioctl_running, .ndo_change_mtu = bgmac_change_mtu, }; diff --git a/drivers/net/ethernet/broadcom/bnx2.c b/drivers/net/ethernet/broadcom/bnx2.c index bee6cfad9fc6..89ee1c0e9c79 100644 --- a/drivers/net/ethernet/broadcom/bnx2.c +++ b/drivers/net/ethernet/broadcom/bnx2.c @@ -8546,7 +8546,7 @@ static const struct net_device_ops bnx2_netdev_ops = { .ndo_stop = bnx2_close, .ndo_get_stats64 = bnx2_get_stats64, .ndo_set_rx_mode = bnx2_set_rx_mode, - .ndo_do_ioctl = bnx2_ioctl, + .ndo_eth_ioctl = bnx2_ioctl, .ndo_validate_addr = eth_validate_addr, .ndo_set_mac_address = bnx2_change_mac_addr, .ndo_change_mtu = bnx2_change_mtu, diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c index 2acbc73dcd18..6d98134913cd 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c @@ -13048,7 +13048,7 @@ static const struct net_device_ops bnx2x_netdev_ops = { .ndo_set_rx_mode = bnx2x_set_rx_mode, .ndo_set_mac_address = bnx2x_change_mac_addr, .ndo_validate_addr = bnx2x_validate_addr, - .ndo_do_ioctl = bnx2x_ioctl, + .ndo_eth_ioctl = bnx2x_ioctl, .ndo_change_mtu = bnx2x_change_mtu, .ndo_fix_features = bnx2x_fix_features, .ndo_set_features = bnx2x_set_features, diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 4db162cee911..e34c362a3c58 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -12667,7 +12667,7 @@ static const struct net_device_ops bnxt_netdev_ops = { .ndo_stop = bnxt_close, .ndo_get_stats64 = bnxt_get_stats64, .ndo_set_rx_mode = bnxt_set_rx_mode, - .ndo_do_ioctl = bnxt_ioctl, + .ndo_eth_ioctl = bnxt_ioctl, .ndo_validate_addr = eth_validate_addr, .ndo_set_mac_address = bnxt_change_mac_addr, .ndo_change_mtu = bnxt_change_mtu, diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c index db74241935ab..63e2237e0cb4 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c @@ -3659,7 +3659,7 @@ static const struct net_device_ops bcmgenet_netdev_ops = { .ndo_tx_timeout = bcmgenet_timeout, .ndo_set_rx_mode = bcmgenet_set_rx_mode, .ndo_set_mac_address = bcmgenet_set_mac_addr, - .ndo_do_ioctl = phy_do_ioctl_running, + .ndo_eth_ioctl = phy_do_ioctl_running, .ndo_set_features = bcmgenet_set_features, #ifdef CONFIG_NET_POLL_CONTROLLER .ndo_poll_controller = bcmgenet_poll_controller, diff --git a/drivers/net/ethernet/broadcom/sb1250-mac.c b/drivers/net/ethernet/broadcom/sb1250-mac.c index 5b4568c2ad1c..f38f40eb966e 100644 --- a/drivers/net/ethernet/broadcom/sb1250-mac.c +++ b/drivers/net/ethernet/broadcom/sb1250-mac.c @@ -2136,7 +2136,7 @@ static const struct net_device_ops sbmac_netdev_ops = { .ndo_start_xmit = sbmac_start_tx, .ndo_set_rx_mode = sbmac_set_rx_mode, .ndo_tx_timeout = sbmac_tx_timeout, - .ndo_do_ioctl = sbmac_mii_ioctl, + .ndo_eth_ioctl = sbmac_mii_ioctl, .ndo_validate_addr = eth_validate_addr, .ndo_set_mac_address = eth_mac_addr, #ifdef CONFIG_NET_POLL_CONTROLLER diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c index b0e49643f483..6f82eeaa4b9f 100644 --- a/drivers/net/ethernet/broadcom/tg3.c +++ b/drivers/net/ethernet/broadcom/tg3.c @@ -14290,7 +14290,7 @@ static const struct net_device_ops tg3_netdev_ops = { .ndo_validate_addr = eth_validate_addr, .ndo_set_rx_mode = tg3_set_rx_mode, .ndo_set_mac_address = tg3_set_mac_addr, - .ndo_do_ioctl = tg3_ioctl, + .ndo_eth_ioctl = tg3_ioctl, .ndo_tx_timeout = tg3_tx_timeout, .ndo_change_mtu = tg3_change_mtu, .ndo_fix_features = tg3_fix_features, diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index 7d2fe13a52f8..181ebc235925 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -3664,7 +3664,7 @@ static const struct net_device_ops macb_netdev_ops = { .ndo_start_xmit = macb_start_xmit, .ndo_set_rx_mode = macb_set_rx_mode, .ndo_get_stats = macb_get_stats, - .ndo_do_ioctl = macb_ioctl, + .ndo_eth_ioctl = macb_ioctl, .ndo_validate_addr = eth_validate_addr, .ndo_change_mtu = macb_change_mtu, .ndo_set_mac_address = eth_mac_addr, @@ -4323,7 +4323,7 @@ static const struct net_device_ops at91ether_netdev_ops = { .ndo_get_stats = macb_get_stats, .ndo_set_rx_mode = macb_set_rx_mode, .ndo_set_mac_address = eth_mac_addr, - .ndo_do_ioctl = macb_ioctl, + .ndo_eth_ioctl = macb_ioctl, .ndo_validate_addr = eth_validate_addr, #ifdef CONFIG_NET_POLL_CONTROLLER .ndo_poll_controller = at91ether_poll_controller, diff --git a/drivers/net/ethernet/cavium/liquidio/lio_main.c b/drivers/net/ethernet/cavium/liquidio/lio_main.c index 591229b96257..a4a5209a9386 100644 --- a/drivers/net/ethernet/cavium/liquidio/lio_main.c +++ b/drivers/net/ethernet/cavium/liquidio/lio_main.c @@ -3223,7 +3223,7 @@ static const struct net_device_ops lionetdevops = { .ndo_vlan_rx_add_vid = liquidio_vlan_rx_add_vid, .ndo_vlan_rx_kill_vid = liquidio_vlan_rx_kill_vid, .ndo_change_mtu = liquidio_change_mtu, - .ndo_do_ioctl = liquidio_ioctl, + .ndo_eth_ioctl = liquidio_ioctl, .ndo_fix_features = liquidio_fix_features, .ndo_set_features = liquidio_set_features, .ndo_set_vf_mac = liquidio_set_vf_mac, diff --git a/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c b/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c index ffddb3126a32..3085dd455a17 100644 --- a/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c +++ b/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c @@ -1889,7 +1889,7 @@ static const struct net_device_ops lionetdevops = { .ndo_vlan_rx_add_vid = liquidio_vlan_rx_add_vid, .ndo_vlan_rx_kill_vid = liquidio_vlan_rx_kill_vid, .ndo_change_mtu = liquidio_change_mtu, - .ndo_do_ioctl = liquidio_ioctl, + .ndo_eth_ioctl = liquidio_ioctl, .ndo_fix_features = liquidio_fix_features, .ndo_set_features = liquidio_set_features, }; diff --git a/drivers/net/ethernet/cavium/octeon/octeon_mgmt.c b/drivers/net/ethernet/cavium/octeon/octeon_mgmt.c index 48ff6fb0eed9..30463a6d1f8c 100644 --- a/drivers/net/ethernet/cavium/octeon/octeon_mgmt.c +++ b/drivers/net/ethernet/cavium/octeon/octeon_mgmt.c @@ -1373,7 +1373,7 @@ static const struct net_device_ops octeon_mgmt_ops = { .ndo_start_xmit = octeon_mgmt_xmit, .ndo_set_rx_mode = octeon_mgmt_set_rx_filtering, .ndo_set_mac_address = octeon_mgmt_set_mac_address, - .ndo_do_ioctl = octeon_mgmt_ioctl, + .ndo_eth_ioctl = octeon_mgmt_ioctl, .ndo_change_mtu = octeon_mgmt_change_mtu, #ifdef CONFIG_NET_POLL_CONTROLLER .ndo_poll_controller = octeon_mgmt_poll_controller, diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_main.c b/drivers/net/ethernet/cavium/thunder/nicvf_main.c index e2b290135fd9..efaaa57d4ed5 100644 --- a/drivers/net/ethernet/cavium/thunder/nicvf_main.c +++ b/drivers/net/ethernet/cavium/thunder/nicvf_main.c @@ -2096,7 +2096,7 @@ static const struct net_device_ops nicvf_netdev_ops = { .ndo_fix_features = nicvf_fix_features, .ndo_set_features = nicvf_set_features, .ndo_bpf = nicvf_xdp, - .ndo_do_ioctl = nicvf_ioctl, + .ndo_eth_ioctl = nicvf_ioctl, .ndo_set_rx_mode = nicvf_set_rx_mode, }; diff --git a/drivers/net/ethernet/chelsio/cxgb/cxgb2.c b/drivers/net/ethernet/chelsio/cxgb/cxgb2.c index 512da98019c6..e7575d41f4f5 100644 --- a/drivers/net/ethernet/chelsio/cxgb/cxgb2.c +++ b/drivers/net/ethernet/chelsio/cxgb/cxgb2.c @@ -924,7 +924,7 @@ static const struct net_device_ops cxgb_netdev_ops = { .ndo_get_stats = t1_get_stats, .ndo_validate_addr = eth_validate_addr, .ndo_set_rx_mode = t1_set_rxmode, - .ndo_do_ioctl = t1_ioctl, + .ndo_eth_ioctl = t1_ioctl, .ndo_change_mtu = t1_change_mtu, .ndo_set_mac_address = t1_set_mac_addr, .ndo_fix_features = t1_fix_features, diff --git a/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c b/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c index eae893d7d840..72af9d2a00ae 100644 --- a/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c +++ b/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c @@ -3184,7 +3184,7 @@ static const struct net_device_ops cxgb_netdev_ops = { .ndo_get_stats = cxgb_get_stats, .ndo_validate_addr = eth_validate_addr, .ndo_set_rx_mode = cxgb_set_rxmode, - .ndo_do_ioctl = cxgb_ioctl, + .ndo_eth_ioctl = cxgb_ioctl, .ndo_siocdevprivate = cxgb_siocdevprivate, .ndo_change_mtu = cxgb_change_mtu, .ndo_set_mac_address = cxgb_set_mac_addr, diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c index dbf9a0e6601d..aa8573202c37 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c @@ -3872,7 +3872,7 @@ static const struct net_device_ops cxgb4_netdev_ops = { .ndo_set_mac_address = cxgb_set_mac_addr, .ndo_set_features = cxgb_set_features, .ndo_validate_addr = eth_validate_addr, - .ndo_do_ioctl = cxgb_ioctl, + .ndo_eth_ioctl = cxgb_ioctl, .ndo_change_mtu = cxgb_change_mtu, #ifdef CONFIG_NET_POLL_CONTROLLER .ndo_poll_controller = cxgb_netpoll, diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c b/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c index 2820a0bb971b..2842628ad2c5 100644 --- a/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c @@ -2837,7 +2837,7 @@ static const struct net_device_ops cxgb4vf_netdev_ops = { .ndo_set_rx_mode = cxgb4vf_set_rxmode, .ndo_set_mac_address = cxgb4vf_set_mac_addr, .ndo_validate_addr = eth_validate_addr, - .ndo_do_ioctl = cxgb4vf_do_ioctl, + .ndo_eth_ioctl = cxgb4vf_do_ioctl, .ndo_change_mtu = cxgb4vf_change_mtu, .ndo_fix_features = cxgb4vf_fix_features, .ndo_set_features = cxgb4vf_set_features, diff --git a/drivers/net/ethernet/cirrus/ep93xx_eth.c b/drivers/net/ethernet/cirrus/ep93xx_eth.c index 9f5e5ec69991..072fac5f5d24 100644 --- a/drivers/net/ethernet/cirrus/ep93xx_eth.c +++ b/drivers/net/ethernet/cirrus/ep93xx_eth.c @@ -733,7 +733,7 @@ static const struct net_device_ops ep93xx_netdev_ops = { .ndo_open = ep93xx_open, .ndo_stop = ep93xx_close, .ndo_start_xmit = ep93xx_xmit, - .ndo_do_ioctl = ep93xx_ioctl, + .ndo_eth_ioctl = ep93xx_ioctl, .ndo_validate_addr = eth_validate_addr, .ndo_set_mac_address = eth_mac_addr, }; diff --git a/drivers/net/ethernet/davicom/dm9000.c b/drivers/net/ethernet/davicom/dm9000.c index 2a8bf53c2f75..e842de6f6635 100644 --- a/drivers/net/ethernet/davicom/dm9000.c +++ b/drivers/net/ethernet/davicom/dm9000.c @@ -1372,7 +1372,7 @@ static const struct net_device_ops dm9000_netdev_ops = { .ndo_start_xmit = dm9000_start_xmit, .ndo_tx_timeout = dm9000_timeout, .ndo_set_rx_mode = dm9000_hash_table, - .ndo_do_ioctl = dm9000_ioctl, + .ndo_eth_ioctl = dm9000_ioctl, .ndo_set_features = dm9000_set_features, .ndo_validate_addr = eth_validate_addr, .ndo_set_mac_address = eth_mac_addr, diff --git a/drivers/net/ethernet/dec/tulip/tulip_core.c b/drivers/net/ethernet/dec/tulip/tulip_core.c index c1dcd6ca1457..fcedd733bacb 100644 --- a/drivers/net/ethernet/dec/tulip/tulip_core.c +++ b/drivers/net/ethernet/dec/tulip/tulip_core.c @@ -1271,7 +1271,7 @@ static const struct net_device_ops tulip_netdev_ops = { .ndo_tx_timeout = tulip_tx_timeout, .ndo_stop = tulip_close, .ndo_get_stats = tulip_get_stats, - .ndo_do_ioctl = private_ioctl, + .ndo_eth_ioctl = private_ioctl, .ndo_set_rx_mode = set_rx_mode, .ndo_set_mac_address = eth_mac_addr, .ndo_validate_addr = eth_validate_addr, diff --git a/drivers/net/ethernet/dec/tulip/winbond-840.c b/drivers/net/ethernet/dec/tulip/winbond-840.c index f6ff1f76eacb..07a48f6bf0fa 100644 --- a/drivers/net/ethernet/dec/tulip/winbond-840.c +++ b/drivers/net/ethernet/dec/tulip/winbond-840.c @@ -341,7 +341,7 @@ static const struct net_device_ops netdev_ops = { .ndo_start_xmit = start_tx, .ndo_get_stats = get_stats, .ndo_set_rx_mode = set_rx_mode, - .ndo_do_ioctl = netdev_ioctl, + .ndo_eth_ioctl = netdev_ioctl, .ndo_tx_timeout = tx_timeout, .ndo_set_mac_address = eth_mac_addr, .ndo_validate_addr = eth_validate_addr, diff --git a/drivers/net/ethernet/dlink/dl2k.c b/drivers/net/ethernet/dlink/dl2k.c index 734acb834c98..202ecb132053 100644 --- a/drivers/net/ethernet/dlink/dl2k.c +++ b/drivers/net/ethernet/dlink/dl2k.c @@ -95,7 +95,7 @@ static const struct net_device_ops netdev_ops = { .ndo_validate_addr = eth_validate_addr, .ndo_set_mac_address = eth_mac_addr, .ndo_set_rx_mode = set_multicast, - .ndo_do_ioctl = rio_ioctl, + .ndo_eth_ioctl = rio_ioctl, .ndo_tx_timeout = rio_tx_timeout, }; diff --git a/drivers/net/ethernet/dlink/sundance.c b/drivers/net/ethernet/dlink/sundance.c index ee0ca712dd1c..c36d186dffed 100644 --- a/drivers/net/ethernet/dlink/sundance.c +++ b/drivers/net/ethernet/dlink/sundance.c @@ -479,7 +479,7 @@ static const struct net_device_ops netdev_ops = { .ndo_start_xmit = start_tx, .ndo_get_stats = get_stats, .ndo_set_rx_mode = set_rx_mode, - .ndo_do_ioctl = netdev_ioctl, + .ndo_eth_ioctl = netdev_ioctl, .ndo_tx_timeout = tx_timeout, .ndo_change_mtu = change_mtu, .ndo_set_mac_address = sundance_set_mac_addr, diff --git a/drivers/net/ethernet/dnet.c b/drivers/net/ethernet/dnet.c index 48c6eb142dcc..6c51cf991dad 100644 --- a/drivers/net/ethernet/dnet.c +++ b/drivers/net/ethernet/dnet.c @@ -742,7 +742,7 @@ static const struct net_device_ops dnet_netdev_ops = { .ndo_stop = dnet_close, .ndo_get_stats = dnet_get_stats, .ndo_start_xmit = dnet_start_xmit, - .ndo_do_ioctl = phy_do_ioctl_running, + .ndo_eth_ioctl = phy_do_ioctl_running, .ndo_set_mac_address = eth_mac_addr, .ndo_validate_addr = eth_validate_addr, }; diff --git a/drivers/net/ethernet/ethoc.c b/drivers/net/ethernet/ethoc.c index e1b43b07755b..ed1ed48e7483 100644 --- a/drivers/net/ethernet/ethoc.c +++ b/drivers/net/ethernet/ethoc.c @@ -1009,7 +1009,7 @@ static const struct ethtool_ops ethoc_ethtool_ops = { static const struct net_device_ops ethoc_netdev_ops = { .ndo_open = ethoc_open, .ndo_stop = ethoc_stop, - .ndo_do_ioctl = ethoc_ioctl, + .ndo_eth_ioctl = ethoc_ioctl, .ndo_set_mac_address = ethoc_set_mac_address, .ndo_set_rx_mode = ethoc_set_multicast_list, .ndo_change_mtu = ethoc_change_mtu, diff --git a/drivers/net/ethernet/faraday/ftgmac100.c b/drivers/net/ethernet/faraday/ftgmac100.c index 11dbbfd38770..ff76e401a014 100644 --- a/drivers/net/ethernet/faraday/ftgmac100.c +++ b/drivers/net/ethernet/faraday/ftgmac100.c @@ -1616,7 +1616,7 @@ static const struct net_device_ops ftgmac100_netdev_ops = { .ndo_start_xmit = ftgmac100_hard_start_xmit, .ndo_set_mac_address = ftgmac100_set_mac_addr, .ndo_validate_addr = eth_validate_addr, - .ndo_do_ioctl = phy_do_ioctl, + .ndo_eth_ioctl = phy_do_ioctl, .ndo_tx_timeout = ftgmac100_tx_timeout, .ndo_set_rx_mode = ftgmac100_set_rx_mode, .ndo_set_features = ftgmac100_set_features, diff --git a/drivers/net/ethernet/faraday/ftmac100.c b/drivers/net/ethernet/faraday/ftmac100.c index 5a1a8f2ea63c..8a341e2d5833 100644 --- a/drivers/net/ethernet/faraday/ftmac100.c +++ b/drivers/net/ethernet/faraday/ftmac100.c @@ -1043,7 +1043,7 @@ static const struct net_device_ops ftmac100_netdev_ops = { .ndo_start_xmit = ftmac100_hard_start_xmit, .ndo_set_mac_address = eth_mac_addr, .ndo_validate_addr = eth_validate_addr, - .ndo_do_ioctl = ftmac100_do_ioctl, + .ndo_eth_ioctl = ftmac100_do_ioctl, }; /****************************************************************************** diff --git a/drivers/net/ethernet/fealnx.c b/drivers/net/ethernet/fealnx.c index 0f141c14d72d..25c91b3c5fd3 100644 --- a/drivers/net/ethernet/fealnx.c +++ b/drivers/net/ethernet/fealnx.c @@ -463,7 +463,7 @@ static const struct net_device_ops netdev_ops = { .ndo_start_xmit = start_tx, .ndo_get_stats = get_stats, .ndo_set_rx_mode = set_rx_mode, - .ndo_do_ioctl = mii_ioctl, + .ndo_eth_ioctl = mii_ioctl, .ndo_tx_timeout = fealnx_tx_timeout, .ndo_set_mac_address = eth_mac_addr, .ndo_validate_addr = eth_validate_addr, diff --git a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c index e6826561cf11..685d2d8a3b36 100644 --- a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c +++ b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c @@ -3157,7 +3157,7 @@ static const struct net_device_ops dpaa_ops = { .ndo_set_mac_address = dpaa_set_mac_address, .ndo_validate_addr = eth_validate_addr, .ndo_set_rx_mode = dpaa_set_rx_mode, - .ndo_do_ioctl = dpaa_ioctl, + .ndo_eth_ioctl = dpaa_ioctl, .ndo_setup_tc = dpaa_setup_tc, .ndo_change_mtu = dpaa_change_mtu, .ndo_bpf = dpaa_xdp, diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c index 973352393bd4..f664021c3ad1 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c @@ -2594,7 +2594,7 @@ static const struct net_device_ops dpaa2_eth_ops = { .ndo_get_stats64 = dpaa2_eth_get_stats, .ndo_set_rx_mode = dpaa2_eth_set_rx_mode, .ndo_set_features = dpaa2_eth_set_features, - .ndo_do_ioctl = dpaa2_eth_ioctl, + .ndo_eth_ioctl = dpaa2_eth_ioctl, .ndo_change_mtu = dpaa2_eth_change_mtu, .ndo_bpf = dpaa2_eth_xdp, .ndo_xdp_xmit = dpaa2_eth_xdp_xmit, diff --git a/drivers/net/ethernet/freescale/enetc/enetc_pf.c b/drivers/net/ethernet/freescale/enetc/enetc_pf.c index c84f6c226743..60d94e0a07d6 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_pf.c +++ b/drivers/net/ethernet/freescale/enetc/enetc_pf.c @@ -735,7 +735,7 @@ static const struct net_device_ops enetc_ndev_ops = { .ndo_set_vf_vlan = enetc_pf_set_vf_vlan, .ndo_set_vf_spoofchk = enetc_pf_set_vf_spoofchk, .ndo_set_features = enetc_pf_set_features, - .ndo_do_ioctl = enetc_ioctl, + .ndo_eth_ioctl = enetc_ioctl, .ndo_setup_tc = enetc_setup_tc, .ndo_bpf = enetc_setup_bpf, .ndo_xdp_xmit = enetc_xdp_xmit, diff --git a/drivers/net/ethernet/freescale/enetc/enetc_vf.c b/drivers/net/ethernet/freescale/enetc/enetc_vf.c index 03090ba7e226..1a9d1e8b772c 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_vf.c +++ b/drivers/net/ethernet/freescale/enetc/enetc_vf.c @@ -99,7 +99,7 @@ static const struct net_device_ops enetc_ndev_ops = { .ndo_get_stats = enetc_get_stats, .ndo_set_mac_address = enetc_vf_set_mac_addr, .ndo_set_features = enetc_vf_set_features, - .ndo_do_ioctl = enetc_ioctl, + .ndo_eth_ioctl = enetc_ioctl, .ndo_setup_tc = enetc_setup_tc, }; diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index 8aea707a65a7..e361be85f26f 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -3280,7 +3280,7 @@ static const struct net_device_ops fec_netdev_ops = { .ndo_validate_addr = eth_validate_addr, .ndo_tx_timeout = fec_timeout, .ndo_set_mac_address = fec_set_mac_address, - .ndo_do_ioctl = fec_enet_ioctl, + .ndo_eth_ioctl = fec_enet_ioctl, #ifdef CONFIG_NET_POLL_CONTROLLER .ndo_poll_controller = fec_poll_controller, #endif diff --git a/drivers/net/ethernet/freescale/fec_mpc52xx.c b/drivers/net/ethernet/freescale/fec_mpc52xx.c index 02c47658a215..73ff359a15f1 100644 --- a/drivers/net/ethernet/freescale/fec_mpc52xx.c +++ b/drivers/net/ethernet/freescale/fec_mpc52xx.c @@ -792,7 +792,7 @@ static const struct net_device_ops mpc52xx_fec_netdev_ops = { .ndo_set_rx_mode = mpc52xx_fec_set_multicast_list, .ndo_set_mac_address = mpc52xx_fec_set_mac_address, .ndo_validate_addr = eth_validate_addr, - .ndo_do_ioctl = phy_do_ioctl, + .ndo_eth_ioctl = phy_do_ioctl, .ndo_tx_timeout = mpc52xx_fec_tx_timeout, .ndo_get_stats = mpc52xx_fec_get_stats, #ifdef CONFIG_NET_POLL_CONTROLLER diff --git a/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c b/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c index 6ee325ad35c5..2db6e38a772e 100644 --- a/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c +++ b/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c @@ -900,7 +900,7 @@ static const struct net_device_ops fs_enet_netdev_ops = { .ndo_start_xmit = fs_enet_start_xmit, .ndo_tx_timeout = fs_timeout, .ndo_set_rx_mode = fs_set_multicast_list, - .ndo_do_ioctl = phy_do_ioctl_running, + .ndo_eth_ioctl = phy_do_ioctl_running, .ndo_validate_addr = eth_validate_addr, .ndo_set_mac_address = eth_mac_addr, #ifdef CONFIG_NET_POLL_CONTROLLER diff --git a/drivers/net/ethernet/freescale/gianfar.c b/drivers/net/ethernet/freescale/gianfar.c index 9646483137c4..af6ad94bf24a 100644 --- a/drivers/net/ethernet/freescale/gianfar.c +++ b/drivers/net/ethernet/freescale/gianfar.c @@ -3184,7 +3184,7 @@ static const struct net_device_ops gfar_netdev_ops = { .ndo_set_features = gfar_set_features, .ndo_set_rx_mode = gfar_set_multi, .ndo_tx_timeout = gfar_timeout, - .ndo_do_ioctl = gfar_ioctl, + .ndo_eth_ioctl = gfar_ioctl, .ndo_get_stats64 = gfar_get_stats64, .ndo_change_carrier = fixed_phy_change_carrier, .ndo_set_mac_address = gfar_set_mac_addr, diff --git a/drivers/net/ethernet/freescale/ucc_geth.c b/drivers/net/ethernet/freescale/ucc_geth.c index 0acfafb73db1..3eb288d10b0c 100644 --- a/drivers/net/ethernet/freescale/ucc_geth.c +++ b/drivers/net/ethernet/freescale/ucc_geth.c @@ -3516,7 +3516,7 @@ static const struct net_device_ops ucc_geth_netdev_ops = { .ndo_set_mac_address = ucc_geth_set_mac_addr, .ndo_set_rx_mode = ucc_geth_set_multi, .ndo_tx_timeout = ucc_geth_timeout, - .ndo_do_ioctl = ucc_geth_ioctl, + .ndo_eth_ioctl = ucc_geth_ioctl, #ifdef CONFIG_NET_POLL_CONTROLLER .ndo_poll_controller = ucc_netpoll, #endif diff --git a/drivers/net/ethernet/hisilicon/hisi_femac.c b/drivers/net/ethernet/hisilicon/hisi_femac.c index 3c4db4a6b431..22bf914f2dbd 100644 --- a/drivers/net/ethernet/hisilicon/hisi_femac.c +++ b/drivers/net/ethernet/hisilicon/hisi_femac.c @@ -685,7 +685,7 @@ static const struct net_device_ops hisi_femac_netdev_ops = { .ndo_open = hisi_femac_net_open, .ndo_stop = hisi_femac_net_close, .ndo_start_xmit = hisi_femac_net_xmit, - .ndo_do_ioctl = phy_do_ioctl_running, + .ndo_eth_ioctl = phy_do_ioctl_running, .ndo_set_mac_address = hisi_femac_set_mac_address, .ndo_set_rx_mode = hisi_femac_net_set_rx_mode, }; diff --git a/drivers/net/ethernet/hisilicon/hns/hns_enet.c b/drivers/net/ethernet/hisilicon/hns/hns_enet.c index ad534f9e41ab..343c605c4be8 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_enet.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_enet.c @@ -1945,7 +1945,7 @@ static const struct net_device_ops hns_nic_netdev_ops = { .ndo_tx_timeout = hns_nic_net_timeout, .ndo_set_mac_address = hns_nic_net_set_mac_address, .ndo_change_mtu = hns_nic_change_mtu, - .ndo_do_ioctl = phy_do_ioctl_running, + .ndo_eth_ioctl = phy_do_ioctl_running, .ndo_set_features = hns_nic_set_features, .ndo_fix_features = hns_nic_fix_features, .ndo_get_stats64 = hns_nic_get_stats64, diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c index cdb5f14fb6bc..cb8d5da3654f 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c @@ -2852,7 +2852,7 @@ static const struct net_device_ops hns3_nic_netdev_ops = { .ndo_start_xmit = hns3_nic_net_xmit, .ndo_tx_timeout = hns3_nic_net_timeout, .ndo_set_mac_address = hns3_nic_net_set_mac_address, - .ndo_do_ioctl = hns3_nic_do_ioctl, + .ndo_eth_ioctl = hns3_nic_do_ioctl, .ndo_change_mtu = hns3_nic_change_mtu, .ndo_set_features = hns3_nic_set_features, .ndo_features_check = hns3_features_check, diff --git a/drivers/net/ethernet/ibm/emac/core.c b/drivers/net/ethernet/ibm/emac/core.c index 471be6ec7e8a..664a91af662d 100644 --- a/drivers/net/ethernet/ibm/emac/core.c +++ b/drivers/net/ethernet/ibm/emac/core.c @@ -3011,7 +3011,7 @@ static const struct net_device_ops emac_netdev_ops = { .ndo_stop = emac_close, .ndo_get_stats = emac_stats, .ndo_set_rx_mode = emac_set_multicast_list, - .ndo_do_ioctl = emac_ioctl, + .ndo_eth_ioctl = emac_ioctl, .ndo_tx_timeout = emac_tx_timeout, .ndo_validate_addr = eth_validate_addr, .ndo_set_mac_address = emac_set_mac_address, @@ -3023,7 +3023,7 @@ static const struct net_device_ops emac_gige_netdev_ops = { .ndo_stop = emac_close, .ndo_get_stats = emac_stats, .ndo_set_rx_mode = emac_set_multicast_list, - .ndo_do_ioctl = emac_ioctl, + .ndo_eth_ioctl = emac_ioctl, .ndo_tx_timeout = emac_tx_timeout, .ndo_validate_addr = eth_validate_addr, .ndo_set_mac_address = emac_set_mac_address, diff --git a/drivers/net/ethernet/ibm/ibmveth.c b/drivers/net/ethernet/ibm/ibmveth.c index 737ba85e409f..3d9b4f99d357 100644 --- a/drivers/net/ethernet/ibm/ibmveth.c +++ b/drivers/net/ethernet/ibm/ibmveth.c @@ -1630,7 +1630,7 @@ static const struct net_device_ops ibmveth_netdev_ops = { .ndo_stop = ibmveth_close, .ndo_start_xmit = ibmveth_start_xmit, .ndo_set_rx_mode = ibmveth_set_multicast_list, - .ndo_do_ioctl = ibmveth_ioctl, + .ndo_eth_ioctl = ibmveth_ioctl, .ndo_change_mtu = ibmveth_change_mtu, .ndo_fix_features = ibmveth_fix_features, .ndo_set_features = ibmveth_set_features, diff --git a/drivers/net/ethernet/intel/e100.c b/drivers/net/ethernet/intel/e100.c index 1ec924c556c5..373eb027b925 100644 --- a/drivers/net/ethernet/intel/e100.c +++ b/drivers/net/ethernet/intel/e100.c @@ -2809,7 +2809,7 @@ static const struct net_device_ops e100_netdev_ops = { .ndo_validate_addr = eth_validate_addr, .ndo_set_rx_mode = e100_set_multicast_list, .ndo_set_mac_address = e100_set_mac_address, - .ndo_do_ioctl = e100_do_ioctl, + .ndo_eth_ioctl = e100_do_ioctl, .ndo_tx_timeout = e100_tx_timeout, #ifdef CONFIG_NET_POLL_CONTROLLER .ndo_poll_controller = e100_netpoll, diff --git a/drivers/net/ethernet/intel/e1000/e1000_main.c b/drivers/net/ethernet/intel/e1000/e1000_main.c index c2a109126c27..bed4f040face 100644 --- a/drivers/net/ethernet/intel/e1000/e1000_main.c +++ b/drivers/net/ethernet/intel/e1000/e1000_main.c @@ -832,7 +832,7 @@ static const struct net_device_ops e1000_netdev_ops = { .ndo_set_mac_address = e1000_set_mac, .ndo_tx_timeout = e1000_tx_timeout, .ndo_change_mtu = e1000_change_mtu, - .ndo_do_ioctl = e1000_ioctl, + .ndo_eth_ioctl = e1000_ioctl, .ndo_validate_addr = eth_validate_addr, .ndo_vlan_rx_add_vid = e1000_vlan_rx_add_vid, .ndo_vlan_rx_kill_vid = e1000_vlan_rx_kill_vid, diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c index 3c22b509fa79..900b3ab998bd 100644 --- a/drivers/net/ethernet/intel/e1000e/netdev.c +++ b/drivers/net/ethernet/intel/e1000e/netdev.c @@ -7354,7 +7354,7 @@ static const struct net_device_ops e1000e_netdev_ops = { .ndo_set_rx_mode = e1000e_set_rx_mode, .ndo_set_mac_address = e1000_set_mac, .ndo_change_mtu = e1000_change_mtu, - .ndo_do_ioctl = e1000_ioctl, + .ndo_eth_ioctl = e1000_ioctl, .ndo_tx_timeout = e1000_tx_timeout, .ndo_validate_addr = eth_validate_addr, diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 53c1fbeee62a..5b4012a09acb 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -13256,7 +13256,7 @@ static const struct net_device_ops i40e_netdev_ops = { .ndo_validate_addr = eth_validate_addr, .ndo_set_mac_address = i40e_set_mac, .ndo_change_mtu = i40e_change_mtu, - .ndo_do_ioctl = i40e_ioctl, + .ndo_eth_ioctl = i40e_ioctl, .ndo_tx_timeout = i40e_tx_timeout, .ndo_vlan_rx_add_vid = i40e_vlan_rx_add_vid, .ndo_vlan_rx_kill_vid = i40e_vlan_rx_kill_vid, diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index ef8d1815af56..33916ed9e874 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -6558,12 +6558,12 @@ event_after: } /** - * ice_do_ioctl - Access the hwtstamp interface + * ice_eth_ioctl - Access the hwtstamp interface * @netdev: network interface device structure * @ifr: interface request data * @cmd: ioctl command */ -static int ice_do_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd) +static int ice_eth_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd) { struct ice_netdev_priv *np = netdev_priv(netdev); struct ice_pf *pf = np->vsi->back; @@ -7229,7 +7229,7 @@ static const struct net_device_ops ice_netdev_ops = { .ndo_change_mtu = ice_change_mtu, .ndo_get_stats64 = ice_get_stats64, .ndo_set_tx_maxrate = ice_set_tx_maxrate, - .ndo_do_ioctl = ice_do_ioctl, + .ndo_eth_ioctl = ice_eth_ioctl, .ndo_set_vf_spoofchk = ice_set_vf_spoofchk, .ndo_set_vf_mac = ice_set_vf_mac, .ndo_get_vf_config = ice_get_vf_cfg, diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index 171a7a629b20..751de06019a0 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -2991,7 +2991,7 @@ static const struct net_device_ops igb_netdev_ops = { .ndo_set_rx_mode = igb_set_rx_mode, .ndo_set_mac_address = igb_set_mac, .ndo_change_mtu = igb_change_mtu, - .ndo_do_ioctl = igb_ioctl, + .ndo_eth_ioctl = igb_ioctl, .ndo_tx_timeout = igb_tx_timeout, .ndo_validate_addr = eth_validate_addr, .ndo_vlan_rx_add_vid = igb_vlan_rx_add_vid, diff --git a/drivers/net/ethernet/intel/igbvf/netdev.c b/drivers/net/ethernet/intel/igbvf/netdev.c index 1bbe9862a758..d32e72d953c8 100644 --- a/drivers/net/ethernet/intel/igbvf/netdev.c +++ b/drivers/net/ethernet/intel/igbvf/netdev.c @@ -2657,7 +2657,7 @@ static const struct net_device_ops igbvf_netdev_ops = { .ndo_set_rx_mode = igbvf_set_rx_mode, .ndo_set_mac_address = igbvf_set_mac, .ndo_change_mtu = igbvf_change_mtu, - .ndo_do_ioctl = igbvf_ioctl, + .ndo_eth_ioctl = igbvf_ioctl, .ndo_tx_timeout = igbvf_tx_timeout, .ndo_vlan_rx_add_vid = igbvf_vlan_rx_add_vid, .ndo_vlan_rx_kill_vid = igbvf_vlan_rx_kill_vid, diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c index 5c95bf82eaf7..b7aab35c1132 100644 --- a/drivers/net/ethernet/intel/igc/igc_main.c +++ b/drivers/net/ethernet/intel/igc/igc_main.c @@ -6013,7 +6013,7 @@ static const struct net_device_ops igc_netdev_ops = { .ndo_fix_features = igc_fix_features, .ndo_set_features = igc_set_features, .ndo_features_check = igc_features_check, - .ndo_do_ioctl = igc_ioctl, + .ndo_eth_ioctl = igc_ioctl, .ndo_setup_tc = igc_setup_tc, .ndo_bpf = igc_bpf, .ndo_xdp_xmit = igc_xdp_xmit, diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 14aea40da50f..24e06ba6f5e9 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -10247,7 +10247,7 @@ static const struct net_device_ops ixgbe_netdev_ops = { .ndo_set_tx_maxrate = ixgbe_tx_maxrate, .ndo_vlan_rx_add_vid = ixgbe_vlan_rx_add_vid, .ndo_vlan_rx_kill_vid = ixgbe_vlan_rx_kill_vid, - .ndo_do_ioctl = ixgbe_ioctl, + .ndo_eth_ioctl = ixgbe_ioctl, .ndo_set_vf_mac = ixgbe_ndo_set_vf_mac, .ndo_set_vf_vlan = ixgbe_ndo_set_vf_vlan, .ndo_set_vf_rate = ixgbe_ndo_set_vf_bw, diff --git a/drivers/net/ethernet/jme.c b/drivers/net/ethernet/jme.c index f1b9284e0bea..1251b74fe0e2 100644 --- a/drivers/net/ethernet/jme.c +++ b/drivers/net/ethernet/jme.c @@ -2901,7 +2901,7 @@ static const struct net_device_ops jme_netdev_ops = { .ndo_open = jme_open, .ndo_stop = jme_close, .ndo_validate_addr = eth_validate_addr, - .ndo_do_ioctl = jme_ioctl, + .ndo_eth_ioctl = jme_ioctl, .ndo_start_xmit = jme_start_xmit, .ndo_set_mac_address = jme_set_macaddr, .ndo_set_rx_mode = jme_set_multi, diff --git a/drivers/net/ethernet/korina.c b/drivers/net/ethernet/korina.c index b30a45725374..3e9f324f1061 100644 --- a/drivers/net/ethernet/korina.c +++ b/drivers/net/ethernet/korina.c @@ -1272,7 +1272,7 @@ static const struct net_device_ops korina_netdev_ops = { .ndo_start_xmit = korina_send_packet, .ndo_set_rx_mode = korina_multicast_list, .ndo_tx_timeout = korina_tx_timeout, - .ndo_do_ioctl = korina_ioctl, + .ndo_eth_ioctl = korina_ioctl, .ndo_validate_addr = eth_validate_addr, .ndo_set_mac_address = eth_mac_addr, #ifdef CONFIG_NET_POLL_CONTROLLER diff --git a/drivers/net/ethernet/lantiq_etop.c b/drivers/net/ethernet/lantiq_etop.c index 2d0c52f7106b..62f8c5212182 100644 --- a/drivers/net/ethernet/lantiq_etop.c +++ b/drivers/net/ethernet/lantiq_etop.c @@ -609,7 +609,7 @@ static const struct net_device_ops ltq_eth_netdev_ops = { .ndo_stop = ltq_etop_stop, .ndo_start_xmit = ltq_etop_tx, .ndo_change_mtu = ltq_etop_change_mtu, - .ndo_do_ioctl = phy_do_ioctl, + .ndo_eth_ioctl = phy_do_ioctl, .ndo_set_mac_address = ltq_etop_set_mac_address, .ndo_validate_addr = eth_validate_addr, .ndo_set_rx_mode = ltq_etop_set_multicast_list, diff --git a/drivers/net/ethernet/marvell/mv643xx_eth.c b/drivers/net/ethernet/marvell/mv643xx_eth.c index d207bfcaf31d..6502c5c2ebca 100644 --- a/drivers/net/ethernet/marvell/mv643xx_eth.c +++ b/drivers/net/ethernet/marvell/mv643xx_eth.c @@ -3060,7 +3060,7 @@ static const struct net_device_ops mv643xx_eth_netdev_ops = { .ndo_set_rx_mode = mv643xx_eth_set_rx_mode, .ndo_set_mac_address = mv643xx_eth_set_mac_address, .ndo_validate_addr = eth_validate_addr, - .ndo_do_ioctl = mv643xx_eth_ioctl, + .ndo_eth_ioctl = mv643xx_eth_ioctl, .ndo_change_mtu = mv643xx_eth_change_mtu, .ndo_set_features = mv643xx_eth_set_features, .ndo_tx_timeout = mv643xx_eth_tx_timeout, diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c index 975a1a77d445..ff8db311963c 100644 --- a/drivers/net/ethernet/marvell/mvneta.c +++ b/drivers/net/ethernet/marvell/mvneta.c @@ -4994,7 +4994,7 @@ static const struct net_device_ops mvneta_netdev_ops = { .ndo_change_mtu = mvneta_change_mtu, .ndo_fix_features = mvneta_fix_features, .ndo_get_stats64 = mvneta_get_stats64, - .ndo_do_ioctl = mvneta_ioctl, + .ndo_eth_ioctl = mvneta_ioctl, .ndo_bpf = mvneta_xdp, .ndo_xdp_xmit = mvneta_xdp_xmit, .ndo_setup_tc = mvneta_setup_tc, diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c index 878fb17dea41..99bd8b8aa0e2 100644 --- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c +++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c @@ -5702,7 +5702,7 @@ static const struct net_device_ops mvpp2_netdev_ops = { .ndo_set_mac_address = mvpp2_set_mac_address, .ndo_change_mtu = mvpp2_change_mtu, .ndo_get_stats64 = mvpp2_get_stats64, - .ndo_do_ioctl = mvpp2_ioctl, + .ndo_eth_ioctl = mvpp2_ioctl, .ndo_vlan_rx_add_vid = mvpp2_vlan_rx_add_vid, .ndo_vlan_rx_kill_vid = mvpp2_vlan_rx_kill_vid, .ndo_set_features = mvpp2_set_features, diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c index f300b807a85b..3f03bbdd8d04 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c @@ -2326,7 +2326,7 @@ static const struct net_device_ops otx2_netdev_ops = { .ndo_set_features = otx2_set_features, .ndo_tx_timeout = otx2_tx_timeout, .ndo_get_stats64 = otx2_get_stats64, - .ndo_do_ioctl = otx2_ioctl, + .ndo_eth_ioctl = otx2_ioctl, .ndo_set_vf_mac = otx2_set_vf_mac, .ndo_set_vf_vlan = otx2_set_vf_vlan, .ndo_get_vf_config = otx2_get_vf_config, diff --git a/drivers/net/ethernet/marvell/pxa168_eth.c b/drivers/net/ethernet/marvell/pxa168_eth.c index 9b48ae4bac39..fab53c9b8380 100644 --- a/drivers/net/ethernet/marvell/pxa168_eth.c +++ b/drivers/net/ethernet/marvell/pxa168_eth.c @@ -1377,7 +1377,7 @@ static const struct net_device_ops pxa168_eth_netdev_ops = { .ndo_set_rx_mode = pxa168_eth_set_rx_mode, .ndo_set_mac_address = pxa168_eth_set_mac_address, .ndo_validate_addr = eth_validate_addr, - .ndo_do_ioctl = phy_do_ioctl, + .ndo_eth_ioctl = phy_do_ioctl, .ndo_change_mtu = pxa168_eth_change_mtu, .ndo_tx_timeout = pxa168_eth_tx_timeout, #ifdef CONFIG_NET_POLL_CONTROLLER diff --git a/drivers/net/ethernet/marvell/skge.c b/drivers/net/ethernet/marvell/skge.c index d4bb27ba1419..150c06ee3627 100644 --- a/drivers/net/ethernet/marvell/skge.c +++ b/drivers/net/ethernet/marvell/skge.c @@ -3787,7 +3787,7 @@ static const struct net_device_ops skge_netdev_ops = { .ndo_open = skge_up, .ndo_stop = skge_down, .ndo_start_xmit = skge_xmit_frame, - .ndo_do_ioctl = skge_ioctl, + .ndo_eth_ioctl = skge_ioctl, .ndo_get_stats = skge_get_stats, .ndo_tx_timeout = skge_tx_timeout, .ndo_change_mtu = skge_change_mtu, diff --git a/drivers/net/ethernet/marvell/sky2.c b/drivers/net/ethernet/marvell/sky2.c index 8b8bff59c8fe..743ca96527fa 100644 --- a/drivers/net/ethernet/marvell/sky2.c +++ b/drivers/net/ethernet/marvell/sky2.c @@ -4693,7 +4693,7 @@ static const struct net_device_ops sky2_netdev_ops[2] = { .ndo_open = sky2_open, .ndo_stop = sky2_close, .ndo_start_xmit = sky2_xmit_frame, - .ndo_do_ioctl = sky2_ioctl, + .ndo_eth_ioctl = sky2_ioctl, .ndo_validate_addr = eth_validate_addr, .ndo_set_mac_address = sky2_set_mac_address, .ndo_set_rx_mode = sky2_set_multicast, @@ -4710,7 +4710,7 @@ static const struct net_device_ops sky2_netdev_ops[2] = { .ndo_open = sky2_open, .ndo_stop = sky2_close, .ndo_start_xmit = sky2_xmit_frame, - .ndo_do_ioctl = sky2_ioctl, + .ndo_eth_ioctl = sky2_ioctl, .ndo_validate_addr = eth_validate_addr, .ndo_set_mac_address = sky2_set_mac_address, .ndo_set_rx_mode = sky2_set_multicast, diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index 64adfd24e134..398c23cec815 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -2933,7 +2933,7 @@ static const struct net_device_ops mtk_netdev_ops = { .ndo_start_xmit = mtk_start_xmit, .ndo_set_mac_address = mtk_set_mac_address, .ndo_validate_addr = eth_validate_addr, - .ndo_do_ioctl = mtk_do_ioctl, + .ndo_eth_ioctl = mtk_do_ioctl, .ndo_change_mtu = mtk_change_mtu, .ndo_tx_timeout = mtk_tx_timeout, .ndo_get_stats64 = mtk_get_stats64, diff --git a/drivers/net/ethernet/mediatek/mtk_star_emac.c b/drivers/net/ethernet/mediatek/mtk_star_emac.c index 96d2891f1675..1d5dd2015453 100644 --- a/drivers/net/ethernet/mediatek/mtk_star_emac.c +++ b/drivers/net/ethernet/mediatek/mtk_star_emac.c @@ -1162,7 +1162,7 @@ static const struct net_device_ops mtk_star_netdev_ops = { .ndo_start_xmit = mtk_star_netdev_start_xmit, .ndo_get_stats64 = mtk_star_netdev_get_stats64, .ndo_set_rx_mode = mtk_star_set_rx_mode, - .ndo_do_ioctl = mtk_star_netdev_ioctl, + .ndo_eth_ioctl = mtk_star_netdev_ioctl, .ndo_set_mac_address = eth_mac_addr, .ndo_validate_addr = eth_validate_addr, }; diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index 5d0c9c62382d..a2f61a87cef8 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -2828,7 +2828,7 @@ static const struct net_device_ops mlx4_netdev_ops = { .ndo_set_mac_address = mlx4_en_set_mac, .ndo_validate_addr = eth_validate_addr, .ndo_change_mtu = mlx4_en_change_mtu, - .ndo_do_ioctl = mlx4_en_ioctl, + .ndo_eth_ioctl = mlx4_en_ioctl, .ndo_tx_timeout = mlx4_en_tx_timeout, .ndo_vlan_rx_add_vid = mlx4_en_vlan_rx_add_vid, .ndo_vlan_rx_kill_vid = mlx4_en_vlan_rx_kill_vid, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index b9a0459b58f1..b6c1e3124f96 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -4417,7 +4417,7 @@ const struct net_device_ops mlx5e_netdev_ops = { .ndo_set_features = mlx5e_set_features, .ndo_fix_features = mlx5e_fix_features, .ndo_change_mtu = mlx5e_change_nic_mtu, - .ndo_do_ioctl = mlx5e_ioctl, + .ndo_eth_ioctl = mlx5e_ioctl, .ndo_set_tx_maxrate = mlx5e_set_tx_maxrate, .ndo_features_check = mlx5e_features_check, .ndo_tx_timeout = mlx5e_tx_timeout, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c index 6535c636ae22..a126cbc6f0d6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c @@ -50,7 +50,7 @@ static const struct net_device_ops mlx5i_netdev_ops = { .ndo_init = mlx5i_dev_init, .ndo_uninit = mlx5i_dev_cleanup, .ndo_change_mtu = mlx5i_change_mtu, - .ndo_do_ioctl = mlx5i_ioctl, + .ndo_eth_ioctl = mlx5i_ioctl, }; /* IPoIB mlx5 netdev profile */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c index 18ee21b06a00..5308f23702bc 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c @@ -149,7 +149,7 @@ static const struct net_device_ops mlx5i_pkey_netdev_ops = { .ndo_get_stats64 = mlx5i_get_stats, .ndo_uninit = mlx5i_pkey_dev_cleanup, .ndo_change_mtu = mlx5i_pkey_change_mtu, - .ndo_do_ioctl = mlx5i_pkey_ioctl, + .ndo_eth_ioctl = mlx5i_pkey_ioctl, }; /* Child NDOs */ diff --git a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c index a0a059e0154f..d22219613719 100644 --- a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c +++ b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c @@ -199,7 +199,7 @@ static int mlxbf_gige_stop(struct net_device *netdev) return 0; } -static int mlxbf_gige_do_ioctl(struct net_device *netdev, +static int mlxbf_gige_eth_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd) { if (!(netif_running(netdev))) @@ -253,7 +253,7 @@ static const struct net_device_ops mlxbf_gige_netdev_ops = { .ndo_start_xmit = mlxbf_gige_start_xmit, .ndo_set_mac_address = eth_mac_addr, .ndo_validate_addr = eth_validate_addr, - .ndo_do_ioctl = mlxbf_gige_do_ioctl, + .ndo_eth_ioctl = mlxbf_gige_eth_ioctl, .ndo_set_rx_mode = mlxbf_gige_set_rx_mode, .ndo_get_stats64 = mlxbf_gige_get_stats64, }; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index 88699e678544..081408e892d5 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -1207,7 +1207,7 @@ static const struct net_device_ops mlxsw_sp_port_netdev_ops = { .ndo_vlan_rx_kill_vid = mlxsw_sp_port_kill_vid, .ndo_set_features = mlxsw_sp_set_features, .ndo_get_devlink_port = mlxsw_sp_port_get_devlink_port, - .ndo_do_ioctl = mlxsw_sp_port_ioctl, + .ndo_eth_ioctl = mlxsw_sp_port_ioctl, }; static int diff --git a/drivers/net/ethernet/micrel/ks8851_common.c b/drivers/net/ethernet/micrel/ks8851_common.c index 831518466de2..3f69bb59ba49 100644 --- a/drivers/net/ethernet/micrel/ks8851_common.c +++ b/drivers/net/ethernet/micrel/ks8851_common.c @@ -689,7 +689,7 @@ static int ks8851_net_ioctl(struct net_device *dev, struct ifreq *req, int cmd) static const struct net_device_ops ks8851_netdev_ops = { .ndo_open = ks8851_net_open, .ndo_stop = ks8851_net_stop, - .ndo_do_ioctl = ks8851_net_ioctl, + .ndo_eth_ioctl = ks8851_net_ioctl, .ndo_start_xmit = ks8851_start_xmit, .ndo_set_mac_address = ks8851_set_mac_address, .ndo_set_rx_mode = ks8851_set_rx_mode, diff --git a/drivers/net/ethernet/micrel/ksz884x.c b/drivers/net/ethernet/micrel/ksz884x.c index 7945eb5e2fe8..a0ee155f9f51 100644 --- a/drivers/net/ethernet/micrel/ksz884x.c +++ b/drivers/net/ethernet/micrel/ksz884x.c @@ -6738,7 +6738,7 @@ static const struct net_device_ops netdev_ops = { .ndo_set_features = netdev_set_features, .ndo_set_mac_address = netdev_set_mac_address, .ndo_validate_addr = eth_validate_addr, - .ndo_do_ioctl = netdev_ioctl, + .ndo_eth_ioctl = netdev_ioctl, .ndo_set_rx_mode = netdev_set_rx_mode, #ifdef CONFIG_NET_POLL_CONTROLLER .ndo_poll_controller = netdev_netpoll, diff --git a/drivers/net/ethernet/microchip/lan743x_main.c b/drivers/net/ethernet/microchip/lan743x_main.c index dae10328c6cf..9e8561cdc32a 100644 --- a/drivers/net/ethernet/microchip/lan743x_main.c +++ b/drivers/net/ethernet/microchip/lan743x_main.c @@ -2655,7 +2655,7 @@ static const struct net_device_ops lan743x_netdev_ops = { .ndo_open = lan743x_netdev_open, .ndo_stop = lan743x_netdev_close, .ndo_start_xmit = lan743x_netdev_xmit_frame, - .ndo_do_ioctl = lan743x_netdev_ioctl, + .ndo_eth_ioctl = lan743x_netdev_ioctl, .ndo_set_rx_mode = lan743x_netdev_set_multicast, .ndo_change_mtu = lan743x_netdev_change_mtu, .ndo_get_stats64 = lan743x_netdev_get_stats64, diff --git a/drivers/net/ethernet/mscc/ocelot_net.c b/drivers/net/ethernet/mscc/ocelot_net.c index c52f175df389..de900ea70fd4 100644 --- a/drivers/net/ethernet/mscc/ocelot_net.c +++ b/drivers/net/ethernet/mscc/ocelot_net.c @@ -823,7 +823,7 @@ static const struct net_device_ops ocelot_port_netdev_ops = { .ndo_vlan_rx_kill_vid = ocelot_vlan_rx_kill_vid, .ndo_set_features = ocelot_set_features, .ndo_setup_tc = ocelot_setup_tc, - .ndo_do_ioctl = ocelot_ioctl, + .ndo_eth_ioctl = ocelot_ioctl, .ndo_get_devlink_port = ocelot_get_devlink_port, }; diff --git a/drivers/net/ethernet/natsemi/natsemi.c b/drivers/net/ethernet/natsemi/natsemi.c index 51b4b25d15ad..bd9d026e609d 100644 --- a/drivers/net/ethernet/natsemi/natsemi.c +++ b/drivers/net/ethernet/natsemi/natsemi.c @@ -790,7 +790,7 @@ static const struct net_device_ops natsemi_netdev_ops = { .ndo_get_stats = get_stats, .ndo_set_rx_mode = set_rx_mode, .ndo_change_mtu = natsemi_change_mtu, - .ndo_do_ioctl = netdev_ioctl, + .ndo_eth_ioctl = netdev_ioctl, .ndo_tx_timeout = ns_tx_timeout, .ndo_set_mac_address = eth_mac_addr, .ndo_validate_addr = eth_validate_addr, diff --git a/drivers/net/ethernet/neterion/s2io.c b/drivers/net/ethernet/neterion/s2io.c index 0b017d4f5c08..09c0e839cca5 100644 --- a/drivers/net/ethernet/neterion/s2io.c +++ b/drivers/net/ethernet/neterion/s2io.c @@ -7625,7 +7625,7 @@ static const struct net_device_ops s2io_netdev_ops = { .ndo_start_xmit = s2io_xmit, .ndo_validate_addr = eth_validate_addr, .ndo_set_rx_mode = s2io_ndo_set_multicast, - .ndo_do_ioctl = s2io_ioctl, + .ndo_eth_ioctl = s2io_ioctl, .ndo_set_mac_address = s2io_set_mac_addr, .ndo_change_mtu = s2io_change_mtu, .ndo_set_features = s2io_set_features, diff --git a/drivers/net/ethernet/neterion/vxge/vxge-main.c b/drivers/net/ethernet/neterion/vxge/vxge-main.c index 82eef4c72f01..20fb4ad29865 100644 --- a/drivers/net/ethernet/neterion/vxge/vxge-main.c +++ b/drivers/net/ethernet/neterion/vxge/vxge-main.c @@ -3339,7 +3339,7 @@ static const struct net_device_ops vxge_netdev_ops = { .ndo_start_xmit = vxge_xmit, .ndo_validate_addr = eth_validate_addr, .ndo_set_rx_mode = vxge_set_multicast, - .ndo_do_ioctl = vxge_ioctl, + .ndo_eth_ioctl = vxge_ioctl, .ndo_set_mac_address = vxge_set_mac_addr, .ndo_change_mtu = vxge_change_mtu, .ndo_fix_features = vxge_fix_features, diff --git a/drivers/net/ethernet/nxp/lpc_eth.c b/drivers/net/ethernet/nxp/lpc_eth.c index 64c6842bd452..d29fe562b3de 100644 --- a/drivers/net/ethernet/nxp/lpc_eth.c +++ b/drivers/net/ethernet/nxp/lpc_eth.c @@ -1219,7 +1219,7 @@ static const struct net_device_ops lpc_netdev_ops = { .ndo_stop = lpc_eth_close, .ndo_start_xmit = lpc_eth_hard_start_xmit, .ndo_set_rx_mode = lpc_eth_set_multicast_list, - .ndo_do_ioctl = phy_do_ioctl_running, + .ndo_eth_ioctl = phy_do_ioctl_running, .ndo_set_mac_address = lpc_set_mac_address, .ndo_validate_addr = eth_validate_addr, }; diff --git a/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c b/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c index e351f3d1608f..bc35d5703bd2 100644 --- a/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c +++ b/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c @@ -2333,7 +2333,7 @@ static const struct net_device_ops pch_gbe_netdev_ops = { .ndo_tx_timeout = pch_gbe_tx_timeout, .ndo_change_mtu = pch_gbe_change_mtu, .ndo_set_features = pch_gbe_set_features, - .ndo_do_ioctl = pch_gbe_ioctl, + .ndo_eth_ioctl = pch_gbe_ioctl, .ndo_set_rx_mode = pch_gbe_set_multi, #ifdef CONFIG_NET_POLL_CONTROLLER .ndo_poll_controller = pch_gbe_netpoll, diff --git a/drivers/net/ethernet/packetengines/hamachi.c b/drivers/net/ethernet/packetengines/hamachi.c index 94823c5f7dff..1a6336a56d3d 100644 --- a/drivers/net/ethernet/packetengines/hamachi.c +++ b/drivers/net/ethernet/packetengines/hamachi.c @@ -573,7 +573,7 @@ static const struct net_device_ops hamachi_netdev_ops = { .ndo_validate_addr = eth_validate_addr, .ndo_set_mac_address = eth_mac_addr, .ndo_tx_timeout = hamachi_tx_timeout, - .ndo_do_ioctl = hamachi_ioctl, + .ndo_eth_ioctl = hamachi_ioctl, .ndo_siocdevprivate = hamachi_siocdevprivate, }; diff --git a/drivers/net/ethernet/packetengines/yellowfin.c b/drivers/net/ethernet/packetengines/yellowfin.c index d1dd9bc1bc7f..f5cd8f51be7c 100644 --- a/drivers/net/ethernet/packetengines/yellowfin.c +++ b/drivers/net/ethernet/packetengines/yellowfin.c @@ -362,7 +362,7 @@ static const struct net_device_ops netdev_ops = { .ndo_set_rx_mode = set_rx_mode, .ndo_validate_addr = eth_validate_addr, .ndo_set_mac_address = eth_mac_addr, - .ndo_do_ioctl = netdev_ioctl, + .ndo_eth_ioctl = netdev_ioctl, .ndo_tx_timeout = yellowfin_tx_timeout, }; diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.c b/drivers/net/ethernet/pensando/ionic/ionic_lif.c index af3a5368529c..537c2907b91e 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_lif.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.c @@ -2264,7 +2264,7 @@ static int ionic_stop(struct net_device *netdev) return 0; } -static int ionic_do_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd) +static int ionic_eth_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd) { struct ionic_lif *lif = netdev_priv(netdev); @@ -2526,7 +2526,7 @@ static int ionic_set_vf_link_state(struct net_device *netdev, int vf, int set) static const struct net_device_ops ionic_netdev_ops = { .ndo_open = ionic_open, .ndo_stop = ionic_stop, - .ndo_do_ioctl = ionic_do_ioctl, + .ndo_eth_ioctl = ionic_eth_ioctl, .ndo_start_xmit = ionic_start_xmit, .ndo_get_stats64 = ionic_get_stats64, .ndo_set_rx_mode = ionic_ndo_set_rx_mode, diff --git a/drivers/net/ethernet/qlogic/qede/qede_main.c b/drivers/net/ethernet/qlogic/qede/qede_main.c index 01ac1e93d27a..173878696143 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_main.c +++ b/drivers/net/ethernet/qlogic/qede/qede_main.c @@ -644,7 +644,7 @@ static const struct net_device_ops qede_netdev_ops = { .ndo_set_mac_address = qede_set_mac_addr, .ndo_validate_addr = eth_validate_addr, .ndo_change_mtu = qede_change_mtu, - .ndo_do_ioctl = qede_ioctl, + .ndo_eth_ioctl = qede_ioctl, .ndo_tx_timeout = qede_tx_timeout, #ifdef CONFIG_QED_SRIOV .ndo_set_vf_mac = qede_set_vf_mac, diff --git a/drivers/net/ethernet/qualcomm/emac/emac.c b/drivers/net/ethernet/qualcomm/emac/emac.c index ad655f0a4965..9015a38eaced 100644 --- a/drivers/net/ethernet/qualcomm/emac/emac.c +++ b/drivers/net/ethernet/qualcomm/emac/emac.c @@ -377,7 +377,7 @@ static const struct net_device_ops emac_netdev_ops = { .ndo_start_xmit = emac_start_xmit, .ndo_set_mac_address = eth_mac_addr, .ndo_change_mtu = emac_change_mtu, - .ndo_do_ioctl = phy_do_ioctl_running, + .ndo_eth_ioctl = phy_do_ioctl_running, .ndo_tx_timeout = emac_tx_timeout, .ndo_get_stats64 = emac_get_stats64, .ndo_set_features = emac_set_features, diff --git a/drivers/net/ethernet/rdc/r6040.c b/drivers/net/ethernet/rdc/r6040.c index 47e9998b62f0..4b2eca5e08e2 100644 --- a/drivers/net/ethernet/rdc/r6040.c +++ b/drivers/net/ethernet/rdc/r6040.c @@ -954,7 +954,7 @@ static const struct net_device_ops r6040_netdev_ops = { .ndo_set_rx_mode = r6040_multicast_list, .ndo_validate_addr = eth_validate_addr, .ndo_set_mac_address = eth_mac_addr, - .ndo_do_ioctl = phy_do_ioctl, + .ndo_eth_ioctl = phy_do_ioctl, .ndo_tx_timeout = r6040_tx_timeout, #ifdef CONFIG_NET_POLL_CONTROLLER .ndo_poll_controller = r6040_poll_controller, diff --git a/drivers/net/ethernet/realtek/8139cp.c b/drivers/net/ethernet/realtek/8139cp.c index 9677e257e9a1..edc61906694f 100644 --- a/drivers/net/ethernet/realtek/8139cp.c +++ b/drivers/net/ethernet/realtek/8139cp.c @@ -1869,7 +1869,7 @@ static const struct net_device_ops cp_netdev_ops = { .ndo_set_mac_address = cp_set_mac_address, .ndo_set_rx_mode = cp_set_rx_mode, .ndo_get_stats = cp_get_stats, - .ndo_do_ioctl = cp_ioctl, + .ndo_eth_ioctl = cp_ioctl, .ndo_start_xmit = cp_start_xmit, .ndo_tx_timeout = cp_tx_timeout, .ndo_set_features = cp_set_features, diff --git a/drivers/net/ethernet/realtek/8139too.c b/drivers/net/ethernet/realtek/8139too.c index f0608f050050..2e6923cc653e 100644 --- a/drivers/net/ethernet/realtek/8139too.c +++ b/drivers/net/ethernet/realtek/8139too.c @@ -932,7 +932,7 @@ static const struct net_device_ops rtl8139_netdev_ops = { .ndo_set_mac_address = rtl8139_set_mac_address, .ndo_start_xmit = rtl8139_start_xmit, .ndo_set_rx_mode = rtl8139_set_rx_mode, - .ndo_do_ioctl = netdev_ioctl, + .ndo_eth_ioctl = netdev_ioctl, .ndo_tx_timeout = rtl8139_tx_timeout, #ifdef CONFIG_NET_POLL_CONTROLLER .ndo_poll_controller = rtl8139_poll_controller, diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c index c7af5bc3b8af..fa2dab6980bb 100644 --- a/drivers/net/ethernet/realtek/r8169_main.c +++ b/drivers/net/ethernet/realtek/r8169_main.c @@ -4979,7 +4979,7 @@ static const struct net_device_ops rtl_netdev_ops = { .ndo_fix_features = rtl8169_fix_features, .ndo_set_features = rtl8169_set_features, .ndo_set_mac_address = rtl_set_mac_address, - .ndo_do_ioctl = phy_do_ioctl_running, + .ndo_eth_ioctl = phy_do_ioctl_running, .ndo_set_rx_mode = rtl_set_rx_mode, #ifdef CONFIG_NET_POLL_CONTROLLER .ndo_poll_controller = rtl8169_netpoll, diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c index 805397088850..f4dfe9f71d06 100644 --- a/drivers/net/ethernet/renesas/ravb_main.c +++ b/drivers/net/ethernet/renesas/ravb_main.c @@ -1872,7 +1872,7 @@ static const struct net_device_ops ravb_netdev_ops = { .ndo_get_stats = ravb_get_stats, .ndo_set_rx_mode = ravb_set_rx_mode, .ndo_tx_timeout = ravb_tx_timeout, - .ndo_do_ioctl = ravb_do_ioctl, + .ndo_eth_ioctl = ravb_do_ioctl, .ndo_change_mtu = ravb_change_mtu, .ndo_validate_addr = eth_validate_addr, .ndo_set_mac_address = eth_mac_addr, diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c index 840478692a37..6c8ba916d1a6 100644 --- a/drivers/net/ethernet/renesas/sh_eth.c +++ b/drivers/net/ethernet/renesas/sh_eth.c @@ -3141,7 +3141,7 @@ static const struct net_device_ops sh_eth_netdev_ops = { .ndo_get_stats = sh_eth_get_stats, .ndo_set_rx_mode = sh_eth_set_rx_mode, .ndo_tx_timeout = sh_eth_tx_timeout, - .ndo_do_ioctl = phy_do_ioctl_running, + .ndo_eth_ioctl = phy_do_ioctl_running, .ndo_change_mtu = sh_eth_change_mtu, .ndo_validate_addr = eth_validate_addr, .ndo_set_mac_address = eth_mac_addr, @@ -3157,7 +3157,7 @@ static const struct net_device_ops sh_eth_netdev_ops_tsu = { .ndo_vlan_rx_add_vid = sh_eth_vlan_rx_add_vid, .ndo_vlan_rx_kill_vid = sh_eth_vlan_rx_kill_vid, .ndo_tx_timeout = sh_eth_tx_timeout, - .ndo_do_ioctl = phy_do_ioctl_running, + .ndo_eth_ioctl = phy_do_ioctl_running, .ndo_change_mtu = sh_eth_change_mtu, .ndo_validate_addr = eth_validate_addr, .ndo_set_mac_address = eth_mac_addr, diff --git a/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c b/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c index 090bcd2fb758..6781aa636d58 100644 --- a/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c +++ b/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c @@ -1964,7 +1964,7 @@ static const struct net_device_ops sxgbe_netdev_ops = { .ndo_set_features = sxgbe_set_features, .ndo_set_rx_mode = sxgbe_set_rx_mode, .ndo_tx_timeout = sxgbe_tx_timeout, - .ndo_do_ioctl = sxgbe_ioctl, + .ndo_eth_ioctl = sxgbe_ioctl, #ifdef CONFIG_NET_POLL_CONTROLLER .ndo_poll_controller = sxgbe_poll_controller, #endif diff --git a/drivers/net/ethernet/sfc/efx.c b/drivers/net/ethernet/sfc/efx.c index 37fcf2eb0741..a295e2621cf3 100644 --- a/drivers/net/ethernet/sfc/efx.c +++ b/drivers/net/ethernet/sfc/efx.c @@ -591,7 +591,7 @@ static const struct net_device_ops efx_netdev_ops = { .ndo_tx_timeout = efx_watchdog, .ndo_start_xmit = efx_hard_start_xmit, .ndo_validate_addr = eth_validate_addr, - .ndo_do_ioctl = efx_ioctl, + .ndo_eth_ioctl = efx_ioctl, .ndo_change_mtu = efx_change_mtu, .ndo_set_mac_address = efx_set_mac_address, .ndo_set_rx_mode = efx_set_rx_mode, diff --git a/drivers/net/ethernet/sfc/falcon/efx.c b/drivers/net/ethernet/sfc/falcon/efx.c index 9ec752a43c75..c177ea0f301e 100644 --- a/drivers/net/ethernet/sfc/falcon/efx.c +++ b/drivers/net/ethernet/sfc/falcon/efx.c @@ -2219,7 +2219,7 @@ static const struct net_device_ops ef4_netdev_ops = { .ndo_tx_timeout = ef4_watchdog, .ndo_start_xmit = ef4_hard_start_xmit, .ndo_validate_addr = eth_validate_addr, - .ndo_do_ioctl = ef4_ioctl, + .ndo_eth_ioctl = ef4_ioctl, .ndo_change_mtu = ef4_change_mtu, .ndo_set_mac_address = ef4_set_mac_address, .ndo_set_rx_mode = ef4_set_rx_mode, diff --git a/drivers/net/ethernet/sgi/ioc3-eth.c b/drivers/net/ethernet/sgi/ioc3-eth.c index 2b29fd4cbdf4..062f7844c496 100644 --- a/drivers/net/ethernet/sgi/ioc3-eth.c +++ b/drivers/net/ethernet/sgi/ioc3-eth.c @@ -820,7 +820,7 @@ static const struct net_device_ops ioc3_netdev_ops = { .ndo_tx_timeout = ioc3_timeout, .ndo_get_stats = ioc3_get_stats, .ndo_set_rx_mode = ioc3_set_multicast_list, - .ndo_do_ioctl = ioc3_ioctl, + .ndo_eth_ioctl = ioc3_ioctl, .ndo_validate_addr = eth_validate_addr, .ndo_set_mac_address = ioc3_set_mac_address, }; diff --git a/drivers/net/ethernet/sgi/meth.c b/drivers/net/ethernet/sgi/meth.c index 0c396ecd3389..efce834d8ee6 100644 --- a/drivers/net/ethernet/sgi/meth.c +++ b/drivers/net/ethernet/sgi/meth.c @@ -812,7 +812,7 @@ static const struct net_device_ops meth_netdev_ops = { .ndo_open = meth_open, .ndo_stop = meth_release, .ndo_start_xmit = meth_tx, - .ndo_do_ioctl = meth_ioctl, + .ndo_eth_ioctl = meth_ioctl, .ndo_tx_timeout = meth_tx_timeout, .ndo_validate_addr = eth_validate_addr, .ndo_set_mac_address = eth_mac_addr, diff --git a/drivers/net/ethernet/sis/sis190.c b/drivers/net/ethernet/sis/sis190.c index 676b193833c0..3d1a18a01ce5 100644 --- a/drivers/net/ethernet/sis/sis190.c +++ b/drivers/net/ethernet/sis/sis190.c @@ -1841,7 +1841,7 @@ static int sis190_mac_addr(struct net_device *dev, void *p) static const struct net_device_ops sis190_netdev_ops = { .ndo_open = sis190_open, .ndo_stop = sis190_close, - .ndo_do_ioctl = sis190_ioctl, + .ndo_eth_ioctl = sis190_ioctl, .ndo_start_xmit = sis190_start_xmit, .ndo_tx_timeout = sis190_tx_timeout, .ndo_set_rx_mode = sis190_set_rx_mode, diff --git a/drivers/net/ethernet/sis/sis900.c b/drivers/net/ethernet/sis/sis900.c index ca9c00b7f588..ec6f7f993eb7 100644 --- a/drivers/net/ethernet/sis/sis900.c +++ b/drivers/net/ethernet/sis/sis900.c @@ -404,7 +404,7 @@ static const struct net_device_ops sis900_netdev_ops = { .ndo_set_rx_mode = set_rx_mode, .ndo_validate_addr = eth_validate_addr, .ndo_set_mac_address = eth_mac_addr, - .ndo_do_ioctl = mii_ioctl, + .ndo_eth_ioctl = mii_ioctl, .ndo_tx_timeout = sis900_tx_timeout, #ifdef CONFIG_NET_POLL_CONTROLLER .ndo_poll_controller = sis900_poll, diff --git a/drivers/net/ethernet/smsc/epic100.c b/drivers/net/ethernet/smsc/epic100.c index 51cd7dca91cd..44daf79a8f97 100644 --- a/drivers/net/ethernet/smsc/epic100.c +++ b/drivers/net/ethernet/smsc/epic100.c @@ -312,7 +312,7 @@ static const struct net_device_ops epic_netdev_ops = { .ndo_tx_timeout = epic_tx_timeout, .ndo_get_stats = epic_get_stats, .ndo_set_rx_mode = set_rx_mode, - .ndo_do_ioctl = netdev_ioctl, + .ndo_eth_ioctl = netdev_ioctl, .ndo_set_mac_address = eth_mac_addr, .ndo_validate_addr = eth_validate_addr, }; diff --git a/drivers/net/ethernet/smsc/smc91c92_cs.c b/drivers/net/ethernet/smsc/smc91c92_cs.c index f2a50eb3c1e0..42fc37c7887a 100644 --- a/drivers/net/ethernet/smsc/smc91c92_cs.c +++ b/drivers/net/ethernet/smsc/smc91c92_cs.c @@ -294,7 +294,7 @@ static const struct net_device_ops smc_netdev_ops = { .ndo_tx_timeout = smc_tx_timeout, .ndo_set_config = s9k_config, .ndo_set_rx_mode = set_rx_mode, - .ndo_do_ioctl = smc_ioctl, + .ndo_eth_ioctl = smc_ioctl, .ndo_set_mac_address = eth_mac_addr, .ndo_validate_addr = eth_validate_addr, }; diff --git a/drivers/net/ethernet/smsc/smsc911x.c b/drivers/net/ethernet/smsc/smsc911x.c index 556a9790cdcf..199a97339280 100644 --- a/drivers/net/ethernet/smsc/smsc911x.c +++ b/drivers/net/ethernet/smsc/smsc911x.c @@ -2148,7 +2148,7 @@ static const struct net_device_ops smsc911x_netdev_ops = { .ndo_start_xmit = smsc911x_hard_start_xmit, .ndo_get_stats = smsc911x_get_stats, .ndo_set_rx_mode = smsc911x_set_multicast_list, - .ndo_do_ioctl = phy_do_ioctl_running, + .ndo_eth_ioctl = phy_do_ioctl_running, .ndo_validate_addr = eth_validate_addr, .ndo_set_mac_address = smsc911x_set_mac_address, #ifdef CONFIG_NET_POLL_CONTROLLER diff --git a/drivers/net/ethernet/smsc/smsc9420.c b/drivers/net/ethernet/smsc/smsc9420.c index c1dab009415d..fdbd2a43e267 100644 --- a/drivers/net/ethernet/smsc/smsc9420.c +++ b/drivers/net/ethernet/smsc/smsc9420.c @@ -1482,7 +1482,7 @@ static const struct net_device_ops smsc9420_netdev_ops = { .ndo_start_xmit = smsc9420_hard_start_xmit, .ndo_get_stats = smsc9420_get_stats, .ndo_set_rx_mode = smsc9420_set_multicast_list, - .ndo_do_ioctl = phy_do_ioctl_running, + .ndo_eth_ioctl = phy_do_ioctl_running, .ndo_validate_addr = eth_validate_addr, .ndo_set_mac_address = eth_mac_addr, #ifdef CONFIG_NET_POLL_CONTROLLER diff --git a/drivers/net/ethernet/socionext/netsec.c b/drivers/net/ethernet/socionext/netsec.c index 20d148c019d8..d15f7b3a3f10 100644 --- a/drivers/net/ethernet/socionext/netsec.c +++ b/drivers/net/ethernet/socionext/netsec.c @@ -1831,7 +1831,7 @@ static const struct net_device_ops netsec_netdev_ops = { .ndo_set_features = netsec_netdev_set_features, .ndo_set_mac_address = eth_mac_addr, .ndo_validate_addr = eth_validate_addr, - .ndo_do_ioctl = phy_do_ioctl, + .ndo_eth_ioctl = phy_do_ioctl, .ndo_xdp_xmit = netsec_xdp_xmit, .ndo_bpf = netsec_xdp, }; diff --git a/drivers/net/ethernet/socionext/sni_ave.c b/drivers/net/ethernet/socionext/sni_ave.c index 5eb6bb4f7b6c..ae31ed93aaf0 100644 --- a/drivers/net/ethernet/socionext/sni_ave.c +++ b/drivers/net/ethernet/socionext/sni_ave.c @@ -1543,7 +1543,7 @@ static const struct net_device_ops ave_netdev_ops = { .ndo_open = ave_open, .ndo_stop = ave_stop, .ndo_start_xmit = ave_start_xmit, - .ndo_do_ioctl = ave_ioctl, + .ndo_eth_ioctl = ave_ioctl, .ndo_set_rx_mode = ave_set_rx_mode, .ndo_get_stats64 = ave_get_stats64, .ndo_set_mac_address = ave_set_mac_address, diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 7b8404a21544..a2aa75cb184e 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -6451,7 +6451,7 @@ static const struct net_device_ops stmmac_netdev_ops = { .ndo_set_features = stmmac_set_features, .ndo_set_rx_mode = stmmac_set_rx_mode, .ndo_tx_timeout = stmmac_tx_timeout, - .ndo_do_ioctl = stmmac_ioctl, + .ndo_eth_ioctl = stmmac_ioctl, .ndo_setup_tc = stmmac_setup_tc, .ndo_select_queue = stmmac_select_queue, #ifdef CONFIG_NET_POLL_CONTROLLER diff --git a/drivers/net/ethernet/sun/cassini.c b/drivers/net/ethernet/sun/cassini.c index 981685c88308..287ae4c538aa 100644 --- a/drivers/net/ethernet/sun/cassini.c +++ b/drivers/net/ethernet/sun/cassini.c @@ -4876,7 +4876,7 @@ static const struct net_device_ops cas_netdev_ops = { .ndo_start_xmit = cas_start_xmit, .ndo_get_stats = cas_get_stats, .ndo_set_rx_mode = cas_set_multicast, - .ndo_do_ioctl = cas_ioctl, + .ndo_eth_ioctl = cas_ioctl, .ndo_tx_timeout = cas_tx_timeout, .ndo_change_mtu = cas_change_mtu, .ndo_set_mac_address = eth_mac_addr, diff --git a/drivers/net/ethernet/sun/niu.c b/drivers/net/ethernet/sun/niu.c index 74e748662ec0..006fd4237725 100644 --- a/drivers/net/ethernet/sun/niu.c +++ b/drivers/net/ethernet/sun/niu.c @@ -9667,7 +9667,7 @@ static const struct net_device_ops niu_netdev_ops = { .ndo_set_rx_mode = niu_set_rx_mode, .ndo_validate_addr = eth_validate_addr, .ndo_set_mac_address = niu_set_mac_addr, - .ndo_do_ioctl = niu_ioctl, + .ndo_eth_ioctl = niu_ioctl, .ndo_tx_timeout = niu_tx_timeout, .ndo_change_mtu = niu_change_mtu, }; diff --git a/drivers/net/ethernet/sun/sungem.c b/drivers/net/ethernet/sun/sungem.c index cfb9e21b18b7..d72018a60c0f 100644 --- a/drivers/net/ethernet/sun/sungem.c +++ b/drivers/net/ethernet/sun/sungem.c @@ -2831,7 +2831,7 @@ static const struct net_device_ops gem_netdev_ops = { .ndo_start_xmit = gem_start_xmit, .ndo_get_stats = gem_get_stats, .ndo_set_rx_mode = gem_set_multicast, - .ndo_do_ioctl = gem_ioctl, + .ndo_eth_ioctl = gem_ioctl, .ndo_tx_timeout = gem_tx_timeout, .ndo_change_mtu = gem_change_mtu, .ndo_validate_addr = eth_validate_addr, diff --git a/drivers/net/ethernet/synopsys/dwc-xlgmac-net.c b/drivers/net/ethernet/synopsys/dwc-xlgmac-net.c index 26d178f8616b..1db7104fef3a 100644 --- a/drivers/net/ethernet/synopsys/dwc-xlgmac-net.c +++ b/drivers/net/ethernet/synopsys/dwc-xlgmac-net.c @@ -933,7 +933,7 @@ static const struct net_device_ops xlgmac_netdev_ops = { .ndo_change_mtu = xlgmac_change_mtu, .ndo_set_mac_address = xlgmac_set_mac_address, .ndo_validate_addr = eth_validate_addr, - .ndo_do_ioctl = xlgmac_ioctl, + .ndo_eth_ioctl = xlgmac_ioctl, .ndo_vlan_rx_add_vid = xlgmac_vlan_rx_add_vid, .ndo_vlan_rx_kill_vid = xlgmac_vlan_rx_kill_vid, #ifdef CONFIG_NET_POLL_CONTROLLER diff --git a/drivers/net/ethernet/ti/am65-cpsw-nuss.c b/drivers/net/ethernet/ti/am65-cpsw-nuss.c index 229e2f09d605..dffb6839f0fa 100644 --- a/drivers/net/ethernet/ti/am65-cpsw-nuss.c +++ b/drivers/net/ethernet/ti/am65-cpsw-nuss.c @@ -1480,7 +1480,7 @@ static const struct net_device_ops am65_cpsw_nuss_netdev_ops = { .ndo_tx_timeout = am65_cpsw_nuss_ndo_host_tx_timeout, .ndo_vlan_rx_add_vid = am65_cpsw_nuss_ndo_slave_add_vid, .ndo_vlan_rx_kill_vid = am65_cpsw_nuss_ndo_slave_kill_vid, - .ndo_do_ioctl = am65_cpsw_nuss_ndo_slave_ioctl, + .ndo_eth_ioctl = am65_cpsw_nuss_ndo_slave_ioctl, .ndo_setup_tc = am65_cpsw_qos_ndo_setup_tc, .ndo_get_devlink_port = am65_cpsw_ndo_get_devlink_port, }; diff --git a/drivers/net/ethernet/ti/cpmac.c b/drivers/net/ethernet/ti/cpmac.c index c20715107075..02d4e51f7306 100644 --- a/drivers/net/ethernet/ti/cpmac.c +++ b/drivers/net/ethernet/ti/cpmac.c @@ -1044,7 +1044,7 @@ static const struct net_device_ops cpmac_netdev_ops = { .ndo_start_xmit = cpmac_start_xmit, .ndo_tx_timeout = cpmac_tx_timeout, .ndo_set_rx_mode = cpmac_set_multicast_list, - .ndo_do_ioctl = phy_do_ioctl_running, + .ndo_eth_ioctl = phy_do_ioctl_running, .ndo_validate_addr = eth_validate_addr, .ndo_set_mac_address = eth_mac_addr, }; diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c index cbbd0f665796..abf9a2a6f7eb 100644 --- a/drivers/net/ethernet/ti/cpsw.c +++ b/drivers/net/ethernet/ti/cpsw.c @@ -1159,7 +1159,7 @@ static const struct net_device_ops cpsw_netdev_ops = { .ndo_stop = cpsw_ndo_stop, .ndo_start_xmit = cpsw_ndo_start_xmit, .ndo_set_mac_address = cpsw_ndo_set_mac_address, - .ndo_do_ioctl = cpsw_ndo_ioctl, + .ndo_eth_ioctl = cpsw_ndo_ioctl, .ndo_validate_addr = eth_validate_addr, .ndo_tx_timeout = cpsw_ndo_tx_timeout, .ndo_set_rx_mode = cpsw_ndo_set_rx_mode, diff --git a/drivers/net/ethernet/ti/cpsw_new.c b/drivers/net/ethernet/ti/cpsw_new.c index 4448a91cce54..b4f55ff4e84f 100644 --- a/drivers/net/ethernet/ti/cpsw_new.c +++ b/drivers/net/ethernet/ti/cpsw_new.c @@ -1128,7 +1128,7 @@ static const struct net_device_ops cpsw_netdev_ops = { .ndo_stop = cpsw_ndo_stop, .ndo_start_xmit = cpsw_ndo_start_xmit, .ndo_set_mac_address = cpsw_ndo_set_mac_address, - .ndo_do_ioctl = cpsw_ndo_ioctl, + .ndo_eth_ioctl = cpsw_ndo_ioctl, .ndo_validate_addr = eth_validate_addr, .ndo_tx_timeout = cpsw_ndo_tx_timeout, .ndo_set_rx_mode = cpsw_ndo_set_rx_mode, diff --git a/drivers/net/ethernet/ti/davinci_emac.c b/drivers/net/ethernet/ti/davinci_emac.c index c674e34b6839..637796670746 100644 --- a/drivers/net/ethernet/ti/davinci_emac.c +++ b/drivers/net/ethernet/ti/davinci_emac.c @@ -1670,7 +1670,7 @@ static const struct net_device_ops emac_netdev_ops = { .ndo_start_xmit = emac_dev_xmit, .ndo_set_rx_mode = emac_dev_mcast_set, .ndo_set_mac_address = emac_dev_setmac_addr, - .ndo_do_ioctl = emac_devioctl, + .ndo_eth_ioctl = emac_devioctl, .ndo_tx_timeout = emac_dev_tx_timeout, .ndo_get_stats = emac_dev_getnetstats, #ifdef CONFIG_NET_POLL_CONTROLLER diff --git a/drivers/net/ethernet/ti/netcp_core.c b/drivers/net/ethernet/ti/netcp_core.c index 97942b0e3897..eda2961c0fe2 100644 --- a/drivers/net/ethernet/ti/netcp_core.c +++ b/drivers/net/ethernet/ti/netcp_core.c @@ -1944,7 +1944,7 @@ static const struct net_device_ops netcp_netdev_ops = { .ndo_stop = netcp_ndo_stop, .ndo_start_xmit = netcp_ndo_start_xmit, .ndo_set_rx_mode = netcp_set_rx_mode, - .ndo_do_ioctl = netcp_ndo_ioctl, + .ndo_eth_ioctl = netcp_ndo_ioctl, .ndo_get_stats64 = netcp_get_stats, .ndo_set_mac_address = eth_mac_addr, .ndo_validate_addr = eth_validate_addr, diff --git a/drivers/net/ethernet/ti/tlan.c b/drivers/net/ethernet/ti/tlan.c index e0cb713193ea..77c448ad67ce 100644 --- a/drivers/net/ethernet/ti/tlan.c +++ b/drivers/net/ethernet/ti/tlan.c @@ -749,7 +749,7 @@ static const struct net_device_ops tlan_netdev_ops = { .ndo_tx_timeout = tlan_tx_timeout, .ndo_get_stats = tlan_get_stats, .ndo_set_rx_mode = tlan_set_multicast_list, - .ndo_do_ioctl = tlan_ioctl, + .ndo_eth_ioctl = tlan_ioctl, .ndo_set_mac_address = eth_mac_addr, .ndo_validate_addr = eth_validate_addr, #ifdef CONFIG_NET_POLL_CONTROLLER diff --git a/drivers/net/ethernet/toshiba/spider_net.c b/drivers/net/ethernet/toshiba/spider_net.c index 226a76633e65..087f0af56c50 100644 --- a/drivers/net/ethernet/toshiba/spider_net.c +++ b/drivers/net/ethernet/toshiba/spider_net.c @@ -2214,7 +2214,7 @@ static const struct net_device_ops spider_net_ops = { .ndo_start_xmit = spider_net_xmit, .ndo_set_rx_mode = spider_net_set_multi, .ndo_set_mac_address = spider_net_set_mac, - .ndo_do_ioctl = spider_net_do_ioctl, + .ndo_eth_ioctl = spider_net_do_ioctl, .ndo_tx_timeout = spider_net_tx_timeout, .ndo_validate_addr = eth_validate_addr, /* HW VLAN */ diff --git a/drivers/net/ethernet/toshiba/tc35815.c b/drivers/net/ethernet/toshiba/tc35815.c index fedb2bf69261..52245ac60fc7 100644 --- a/drivers/net/ethernet/toshiba/tc35815.c +++ b/drivers/net/ethernet/toshiba/tc35815.c @@ -750,7 +750,7 @@ static const struct net_device_ops tc35815_netdev_ops = { .ndo_get_stats = tc35815_get_stats, .ndo_set_rx_mode = tc35815_set_multicast_list, .ndo_tx_timeout = tc35815_tx_timeout, - .ndo_do_ioctl = phy_do_ioctl_running, + .ndo_eth_ioctl = phy_do_ioctl_running, .ndo_validate_addr = eth_validate_addr, .ndo_set_mac_address = eth_mac_addr, #ifdef CONFIG_NET_POLL_CONTROLLER diff --git a/drivers/net/ethernet/tundra/tsi108_eth.c b/drivers/net/ethernet/tundra/tsi108_eth.c index c62f474b6d08..cf0917b29e30 100644 --- a/drivers/net/ethernet/tundra/tsi108_eth.c +++ b/drivers/net/ethernet/tundra/tsi108_eth.c @@ -1538,7 +1538,7 @@ static const struct net_device_ops tsi108_netdev_ops = { .ndo_start_xmit = tsi108_send_packet, .ndo_set_rx_mode = tsi108_set_rx_mode, .ndo_get_stats = tsi108_get_stats, - .ndo_do_ioctl = tsi108_do_ioctl, + .ndo_eth_ioctl = tsi108_do_ioctl, .ndo_set_mac_address = tsi108_set_mac, .ndo_validate_addr = eth_validate_addr, }; diff --git a/drivers/net/ethernet/via/via-rhine.c b/drivers/net/ethernet/via/via-rhine.c index 73ca597ebd1b..961b623b7880 100644 --- a/drivers/net/ethernet/via/via-rhine.c +++ b/drivers/net/ethernet/via/via-rhine.c @@ -884,7 +884,7 @@ static const struct net_device_ops rhine_netdev_ops = { .ndo_set_rx_mode = rhine_set_rx_mode, .ndo_validate_addr = eth_validate_addr, .ndo_set_mac_address = eth_mac_addr, - .ndo_do_ioctl = netdev_ioctl, + .ndo_eth_ioctl = netdev_ioctl, .ndo_tx_timeout = rhine_tx_timeout, .ndo_vlan_rx_add_vid = rhine_vlan_rx_add_vid, .ndo_vlan_rx_kill_vid = rhine_vlan_rx_kill_vid, diff --git a/drivers/net/ethernet/via/via-velocity.c b/drivers/net/ethernet/via/via-velocity.c index 88426b5e410b..278f49518d3f 100644 --- a/drivers/net/ethernet/via/via-velocity.c +++ b/drivers/net/ethernet/via/via-velocity.c @@ -2637,7 +2637,7 @@ static const struct net_device_ops velocity_netdev_ops = { .ndo_set_mac_address = eth_mac_addr, .ndo_set_rx_mode = velocity_set_multi, .ndo_change_mtu = velocity_change_mtu, - .ndo_do_ioctl = velocity_ioctl, + .ndo_eth_ioctl = velocity_ioctl, .ndo_vlan_rx_add_vid = velocity_vlan_rx_add_vid, .ndo_vlan_rx_kill_vid = velocity_vlan_rx_kill_vid, #ifdef CONFIG_NET_POLL_CONTROLLER diff --git a/drivers/net/ethernet/xilinx/ll_temac_main.c b/drivers/net/ethernet/xilinx/ll_temac_main.c index 60a4f79b8fa1..db1994fb51c5 100644 --- a/drivers/net/ethernet/xilinx/ll_temac_main.c +++ b/drivers/net/ethernet/xilinx/ll_temac_main.c @@ -1237,7 +1237,7 @@ static const struct net_device_ops temac_netdev_ops = { .ndo_set_rx_mode = temac_set_multicast_list, .ndo_set_mac_address = temac_set_mac_address, .ndo_validate_addr = eth_validate_addr, - .ndo_do_ioctl = phy_do_ioctl_running, + .ndo_eth_ioctl = phy_do_ioctl_running, #ifdef CONFIG_NET_POLL_CONTROLLER .ndo_poll_controller = temac_poll_controller, #endif diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c index 13cd799541aa..348c0ba5edcf 100644 --- a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c +++ b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c @@ -1227,7 +1227,7 @@ static const struct net_device_ops axienet_netdev_ops = { .ndo_change_mtu = axienet_change_mtu, .ndo_set_mac_address = netdev_set_mac_address, .ndo_validate_addr = eth_validate_addr, - .ndo_do_ioctl = axienet_ioctl, + .ndo_eth_ioctl = axienet_ioctl, .ndo_set_rx_mode = axienet_set_multicast_list, #ifdef CONFIG_NET_POLL_CONTROLLER .ndo_poll_controller = axienet_poll_controller, diff --git a/drivers/net/ethernet/xilinx/xilinx_emaclite.c b/drivers/net/ethernet/xilinx/xilinx_emaclite.c index b06377fe7293..b780aad3550a 100644 --- a/drivers/net/ethernet/xilinx/xilinx_emaclite.c +++ b/drivers/net/ethernet/xilinx/xilinx_emaclite.c @@ -1263,7 +1263,7 @@ static const struct net_device_ops xemaclite_netdev_ops = { .ndo_start_xmit = xemaclite_send, .ndo_set_mac_address = xemaclite_set_mac_address, .ndo_tx_timeout = xemaclite_tx_timeout, - .ndo_do_ioctl = xemaclite_ioctl, + .ndo_eth_ioctl = xemaclite_ioctl, #ifdef CONFIG_NET_POLL_CONTROLLER .ndo_poll_controller = xemaclite_poll_controller, #endif diff --git a/drivers/net/ethernet/xircom/xirc2ps_cs.c b/drivers/net/ethernet/xircom/xirc2ps_cs.c index 4f6db6f5c272..ae611e46da6a 100644 --- a/drivers/net/ethernet/xircom/xirc2ps_cs.c +++ b/drivers/net/ethernet/xircom/xirc2ps_cs.c @@ -464,7 +464,7 @@ static const struct net_device_ops netdev_ops = { .ndo_start_xmit = do_start_xmit, .ndo_tx_timeout = xirc_tx_timeout, .ndo_set_config = do_config, - .ndo_do_ioctl = do_ioctl, + .ndo_eth_ioctl = do_ioctl, .ndo_set_rx_mode = set_multicast_list, .ndo_set_mac_address = eth_mac_addr, .ndo_validate_addr = eth_validate_addr, diff --git a/drivers/net/ethernet/xscale/ixp4xx_eth.c b/drivers/net/ethernet/xscale/ixp4xx_eth.c index 7ae754eadf22..ff50305d6e13 100644 --- a/drivers/net/ethernet/xscale/ixp4xx_eth.c +++ b/drivers/net/ethernet/xscale/ixp4xx_eth.c @@ -1357,7 +1357,7 @@ static const struct net_device_ops ixp4xx_netdev_ops = { .ndo_stop = eth_close, .ndo_start_xmit = eth_xmit, .ndo_set_rx_mode = eth_set_mcast_list, - .ndo_do_ioctl = eth_ioctl, + .ndo_eth_ioctl = eth_ioctl, .ndo_set_mac_address = eth_mac_addr, .ndo_validate_addr = eth_validate_addr, }; diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c index 80de9768ecd4..35f46ad040b0 100644 --- a/drivers/net/macvlan.c +++ b/drivers/net/macvlan.c @@ -829,7 +829,7 @@ static int macvlan_change_mtu(struct net_device *dev, int new_mtu) return 0; } -static int macvlan_do_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) +static int macvlan_eth_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) { struct net_device *real_dev = macvlan_dev_real_dev(dev); const struct net_device_ops *ops = real_dev->netdev_ops; @@ -845,8 +845,8 @@ static int macvlan_do_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) break; fallthrough; case SIOCGHWTSTAMP: - if (netif_device_present(real_dev) && ops->ndo_do_ioctl) - err = ops->ndo_do_ioctl(real_dev, &ifrr, cmd); + if (netif_device_present(real_dev) && ops->ndo_eth_ioctl) + err = ops->ndo_eth_ioctl(real_dev, &ifrr, cmd); break; } @@ -1151,7 +1151,7 @@ static const struct net_device_ops macvlan_netdev_ops = { .ndo_stop = macvlan_stop, .ndo_start_xmit = macvlan_start_xmit, .ndo_change_mtu = macvlan_change_mtu, - .ndo_do_ioctl = macvlan_do_ioctl, + .ndo_eth_ioctl = macvlan_eth_ioctl, .ndo_fix_features = macvlan_fix_features, .ndo_change_rx_flags = macvlan_change_rx_flags, .ndo_set_mac_address = macvlan_set_mac_address, diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c index 8eeb26d8aeb7..f124a8a58bd4 100644 --- a/drivers/net/phy/phy.c +++ b/drivers/net/phy/phy.c @@ -426,7 +426,7 @@ int phy_mii_ioctl(struct phy_device *phydev, struct ifreq *ifr, int cmd) EXPORT_SYMBOL(phy_mii_ioctl); /** - * phy_do_ioctl - generic ndo_do_ioctl implementation + * phy_do_ioctl - generic ndo_eth_ioctl implementation * @dev: the net_device struct * @ifr: &struct ifreq for socket ioctl's * @cmd: ioctl cmd to execute @@ -441,7 +441,7 @@ int phy_do_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) EXPORT_SYMBOL(phy_do_ioctl); /** - * phy_do_ioctl_running - generic ndo_do_ioctl implementation but test first + * phy_do_ioctl_running - generic ndo_eth_ioctl implementation but test first * * @dev: the net_device struct * @ifr: &struct ifreq for socket ioctl's diff --git a/drivers/net/usb/asix_devices.c b/drivers/net/usb/asix_devices.c index 9b914765c2de..cb01897c7a5d 100644 --- a/drivers/net/usb/asix_devices.c +++ b/drivers/net/usb/asix_devices.c @@ -197,7 +197,7 @@ static const struct net_device_ops ax88172_netdev_ops = { .ndo_get_stats64 = dev_get_tstats64, .ndo_set_mac_address = eth_mac_addr, .ndo_validate_addr = eth_validate_addr, - .ndo_do_ioctl = asix_ioctl, + .ndo_eth_ioctl = asix_ioctl, .ndo_set_rx_mode = ax88172_set_multicast, }; @@ -589,7 +589,7 @@ static const struct net_device_ops ax88772_netdev_ops = { .ndo_get_stats64 = dev_get_tstats64, .ndo_set_mac_address = asix_set_mac_address, .ndo_validate_addr = eth_validate_addr, - .ndo_do_ioctl = phy_do_ioctl_running, + .ndo_eth_ioctl = phy_do_ioctl_running, .ndo_set_rx_mode = asix_set_multicast, }; @@ -1095,7 +1095,7 @@ static const struct net_device_ops ax88178_netdev_ops = { .ndo_set_mac_address = asix_set_mac_address, .ndo_validate_addr = eth_validate_addr, .ndo_set_rx_mode = asix_set_multicast, - .ndo_do_ioctl = asix_ioctl, + .ndo_eth_ioctl = asix_ioctl, .ndo_change_mtu = ax88178_change_mtu, }; diff --git a/drivers/net/usb/ax88172a.c b/drivers/net/usb/ax88172a.c index 530947d7477b..d9777d9a7c5d 100644 --- a/drivers/net/usb/ax88172a.c +++ b/drivers/net/usb/ax88172a.c @@ -109,7 +109,7 @@ static const struct net_device_ops ax88172a_netdev_ops = { .ndo_get_stats64 = dev_get_tstats64, .ndo_set_mac_address = asix_set_mac_address, .ndo_validate_addr = eth_validate_addr, - .ndo_do_ioctl = phy_do_ioctl_running, + .ndo_eth_ioctl = phy_do_ioctl_running, .ndo_set_rx_mode = asix_set_multicast, }; diff --git a/drivers/net/usb/ax88179_178a.c b/drivers/net/usb/ax88179_178a.c index c1316718304d..f25448a08870 100644 --- a/drivers/net/usb/ax88179_178a.c +++ b/drivers/net/usb/ax88179_178a.c @@ -1035,7 +1035,7 @@ static const struct net_device_ops ax88179_netdev_ops = { .ndo_change_mtu = ax88179_change_mtu, .ndo_set_mac_address = ax88179_set_mac_addr, .ndo_validate_addr = eth_validate_addr, - .ndo_do_ioctl = ax88179_ioctl, + .ndo_eth_ioctl = ax88179_ioctl, .ndo_set_rx_mode = ax88179_set_multicast, .ndo_set_features = ax88179_set_features, }; diff --git a/drivers/net/usb/dm9601.c b/drivers/net/usb/dm9601.c index 89cc61d7a675..907f98b1eefe 100644 --- a/drivers/net/usb/dm9601.c +++ b/drivers/net/usb/dm9601.c @@ -345,7 +345,7 @@ static const struct net_device_ops dm9601_netdev_ops = { .ndo_change_mtu = usbnet_change_mtu, .ndo_get_stats64 = dev_get_tstats64, .ndo_validate_addr = eth_validate_addr, - .ndo_do_ioctl = dm9601_ioctl, + .ndo_eth_ioctl = dm9601_ioctl, .ndo_set_rx_mode = dm9601_set_multicast, .ndo_set_mac_address = dm9601_set_mac_address, }; diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c index 25489389ea49..13f86368b78a 100644 --- a/drivers/net/usb/lan78xx.c +++ b/drivers/net/usb/lan78xx.c @@ -3601,7 +3601,7 @@ static const struct net_device_ops lan78xx_netdev_ops = { .ndo_change_mtu = lan78xx_change_mtu, .ndo_set_mac_address = lan78xx_set_mac_addr, .ndo_validate_addr = eth_validate_addr, - .ndo_do_ioctl = phy_do_ioctl_running, + .ndo_eth_ioctl = phy_do_ioctl_running, .ndo_set_rx_mode = lan78xx_set_multicast, .ndo_set_features = lan78xx_set_features, .ndo_vlan_rx_add_vid = lan78xx_vlan_rx_add_vid, diff --git a/drivers/net/usb/mcs7830.c b/drivers/net/usb/mcs7830.c index 2469bdcb1a04..66866bef25df 100644 --- a/drivers/net/usb/mcs7830.c +++ b/drivers/net/usb/mcs7830.c @@ -464,7 +464,7 @@ static const struct net_device_ops mcs7830_netdev_ops = { .ndo_change_mtu = usbnet_change_mtu, .ndo_get_stats64 = dev_get_tstats64, .ndo_validate_addr = eth_validate_addr, - .ndo_do_ioctl = mcs7830_ioctl, + .ndo_eth_ioctl = mcs7830_ioctl, .ndo_set_rx_mode = mcs7830_set_multicast, .ndo_set_mac_address = mcs7830_set_mac_address, }; diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index e09b107b5c99..d7fbc81b518a 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -9173,7 +9173,7 @@ static int rtl8152_change_mtu(struct net_device *dev, int new_mtu) static const struct net_device_ops rtl8152_netdev_ops = { .ndo_open = rtl8152_open, .ndo_stop = rtl8152_close, - .ndo_do_ioctl = rtl8152_ioctl, + .ndo_eth_ioctl = rtl8152_ioctl, .ndo_start_xmit = rtl8152_start_xmit, .ndo_tx_timeout = rtl8152_tx_timeout, .ndo_set_features = rtl8152_set_features, diff --git a/drivers/net/usb/smsc75xx.c b/drivers/net/usb/smsc75xx.c index 13141dbfa3a8..76f7af161313 100644 --- a/drivers/net/usb/smsc75xx.c +++ b/drivers/net/usb/smsc75xx.c @@ -1439,7 +1439,7 @@ static const struct net_device_ops smsc75xx_netdev_ops = { .ndo_change_mtu = smsc75xx_change_mtu, .ndo_set_mac_address = eth_mac_addr, .ndo_validate_addr = eth_validate_addr, - .ndo_do_ioctl = smsc75xx_ioctl, + .ndo_eth_ioctl = smsc75xx_ioctl, .ndo_set_rx_mode = smsc75xx_set_multicast, .ndo_set_features = smsc75xx_set_features, }; diff --git a/drivers/net/usb/smsc95xx.c b/drivers/net/usb/smsc95xx.c index 4c8ee1cff4d4..7d953974eb9b 100644 --- a/drivers/net/usb/smsc95xx.c +++ b/drivers/net/usb/smsc95xx.c @@ -1044,7 +1044,7 @@ static const struct net_device_ops smsc95xx_netdev_ops = { .ndo_get_stats64 = dev_get_tstats64, .ndo_set_mac_address = eth_mac_addr, .ndo_validate_addr = eth_validate_addr, - .ndo_do_ioctl = smsc95xx_ioctl, + .ndo_eth_ioctl = smsc95xx_ioctl, .ndo_set_rx_mode = smsc95xx_set_multicast, .ndo_set_features = smsc95xx_set_features, }; diff --git a/drivers/net/usb/sr9700.c b/drivers/net/usb/sr9700.c index ce29261263cd..6516a37893e2 100644 --- a/drivers/net/usb/sr9700.c +++ b/drivers/net/usb/sr9700.c @@ -310,7 +310,7 @@ static const struct net_device_ops sr9700_netdev_ops = { .ndo_change_mtu = usbnet_change_mtu, .ndo_get_stats64 = dev_get_tstats64, .ndo_validate_addr = eth_validate_addr, - .ndo_do_ioctl = sr9700_ioctl, + .ndo_eth_ioctl = sr9700_ioctl, .ndo_set_rx_mode = sr9700_set_multicast, .ndo_set_mac_address = sr9700_set_mac_address, }; diff --git a/drivers/net/usb/sr9800.c b/drivers/net/usb/sr9800.c index a822d81310d5..576401c8b1be 100644 --- a/drivers/net/usb/sr9800.c +++ b/drivers/net/usb/sr9800.c @@ -684,7 +684,7 @@ static const struct net_device_ops sr9800_netdev_ops = { .ndo_get_stats64 = dev_get_tstats64, .ndo_set_mac_address = sr_set_mac_address, .ndo_validate_addr = eth_validate_addr, - .ndo_do_ioctl = sr_ioctl, + .ndo_eth_ioctl = sr_ioctl, .ndo_set_rx_mode = sr_set_multicast, }; diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c index d50d3cba238e..69afc0311dd1 100644 --- a/drivers/s390/net/qeth_l2_main.c +++ b/drivers/s390/net/qeth_l2_main.c @@ -836,7 +836,7 @@ static const struct net_device_ops qeth_l2_netdev_ops = { .ndo_select_queue = qeth_l2_select_queue, .ndo_validate_addr = qeth_l2_validate_addr, .ndo_set_rx_mode = qeth_l2_set_rx_mode, - .ndo_do_ioctl = qeth_do_ioctl, + .ndo_eth_ioctl = qeth_do_ioctl, .ndo_siocdevprivate = qeth_siocdevprivate, .ndo_set_mac_address = qeth_l2_set_mac_address, .ndo_vlan_rx_add_vid = qeth_l2_vlan_rx_add_vid, diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c index d7a895372f19..3a523e700a5a 100644 --- a/drivers/s390/net/qeth_l3_main.c +++ b/drivers/s390/net/qeth_l3_main.c @@ -1841,7 +1841,7 @@ static const struct net_device_ops qeth_l3_netdev_ops = { .ndo_select_queue = qeth_l3_iqd_select_queue, .ndo_validate_addr = eth_validate_addr, .ndo_set_rx_mode = qeth_l3_set_rx_mode, - .ndo_do_ioctl = qeth_do_ioctl, + .ndo_eth_ioctl = qeth_do_ioctl, .ndo_siocdevprivate = qeth_siocdevprivate, .ndo_fix_features = qeth_fix_features, .ndo_set_features = qeth_set_features, @@ -1857,7 +1857,7 @@ static const struct net_device_ops qeth_l3_osa_netdev_ops = { .ndo_select_queue = qeth_l3_osa_select_queue, .ndo_validate_addr = eth_validate_addr, .ndo_set_rx_mode = qeth_l3_set_rx_mode, - .ndo_do_ioctl = qeth_do_ioctl, + .ndo_eth_ioctl = qeth_do_ioctl, .ndo_siocdevprivate = qeth_siocdevprivate, .ndo_fix_features = qeth_fix_features, .ndo_set_features = qeth_set_features, diff --git a/drivers/staging/octeon/ethernet.c b/drivers/staging/octeon/ethernet.c index dcbba9621b21..5d24c1b6663b 100644 --- a/drivers/staging/octeon/ethernet.c +++ b/drivers/staging/octeon/ethernet.c @@ -524,7 +524,7 @@ static const struct net_device_ops cvm_oct_npi_netdev_ops = { .ndo_start_xmit = cvm_oct_xmit, .ndo_set_rx_mode = cvm_oct_common_set_multicast_list, .ndo_set_mac_address = cvm_oct_common_set_mac_address, - .ndo_do_ioctl = cvm_oct_ioctl, + .ndo_eth_ioctl = cvm_oct_ioctl, .ndo_change_mtu = cvm_oct_common_change_mtu, .ndo_get_stats = cvm_oct_common_get_stats, #ifdef CONFIG_NET_POLL_CONTROLLER @@ -540,7 +540,7 @@ static const struct net_device_ops cvm_oct_xaui_netdev_ops = { .ndo_start_xmit = cvm_oct_xmit, .ndo_set_rx_mode = cvm_oct_common_set_multicast_list, .ndo_set_mac_address = cvm_oct_common_set_mac_address, - .ndo_do_ioctl = cvm_oct_ioctl, + .ndo_eth_ioctl = cvm_oct_ioctl, .ndo_change_mtu = cvm_oct_common_change_mtu, .ndo_get_stats = cvm_oct_common_get_stats, #ifdef CONFIG_NET_POLL_CONTROLLER @@ -556,7 +556,7 @@ static const struct net_device_ops cvm_oct_sgmii_netdev_ops = { .ndo_start_xmit = cvm_oct_xmit, .ndo_set_rx_mode = cvm_oct_common_set_multicast_list, .ndo_set_mac_address = cvm_oct_common_set_mac_address, - .ndo_do_ioctl = cvm_oct_ioctl, + .ndo_eth_ioctl = cvm_oct_ioctl, .ndo_change_mtu = cvm_oct_common_change_mtu, .ndo_get_stats = cvm_oct_common_get_stats, #ifdef CONFIG_NET_POLL_CONTROLLER @@ -570,7 +570,7 @@ static const struct net_device_ops cvm_oct_spi_netdev_ops = { .ndo_start_xmit = cvm_oct_xmit, .ndo_set_rx_mode = cvm_oct_common_set_multicast_list, .ndo_set_mac_address = cvm_oct_common_set_mac_address, - .ndo_do_ioctl = cvm_oct_ioctl, + .ndo_eth_ioctl = cvm_oct_ioctl, .ndo_change_mtu = cvm_oct_common_change_mtu, .ndo_get_stats = cvm_oct_common_get_stats, #ifdef CONFIG_NET_POLL_CONTROLLER @@ -586,7 +586,7 @@ static const struct net_device_ops cvm_oct_rgmii_netdev_ops = { .ndo_start_xmit = cvm_oct_xmit, .ndo_set_rx_mode = cvm_oct_common_set_multicast_list, .ndo_set_mac_address = cvm_oct_common_set_mac_address, - .ndo_do_ioctl = cvm_oct_ioctl, + .ndo_eth_ioctl = cvm_oct_ioctl, .ndo_change_mtu = cvm_oct_common_change_mtu, .ndo_get_stats = cvm_oct_common_get_stats, #ifdef CONFIG_NET_POLL_CONTROLLER @@ -599,7 +599,7 @@ static const struct net_device_ops cvm_oct_pow_netdev_ops = { .ndo_start_xmit = cvm_oct_xmit_pow, .ndo_set_rx_mode = cvm_oct_common_set_multicast_list, .ndo_set_mac_address = cvm_oct_common_set_mac_address, - .ndo_do_ioctl = cvm_oct_ioctl, + .ndo_eth_ioctl = cvm_oct_ioctl, .ndo_change_mtu = cvm_oct_common_change_mtu, .ndo_get_stats = cvm_oct_common_get_stats, #ifdef CONFIG_NET_POLL_CONTROLLER diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 658d8cf57342..b6e062a3b0d4 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1090,6 +1090,10 @@ struct netdev_net_notifier { * the generic interface code. If not defined ioctls return * not supported error code. * + * * int (*ndo_eth_ioctl)(struct net_device *dev, struct ifreq *ifr, int cmd); + * Called for ethernet specific ioctls: SIOCGMIIPHY, SIOCGMIIREG, + * SIOCSMIIREG, SIOCSHWTSTAMP and SIOCGHWTSTAMP. + * * int (*ndo_set_config)(struct net_device *dev, struct ifmap *map); * Used to set network devices bus interface parameters. This interface * is retained for legacy reasons; new devices should use the bus @@ -1361,6 +1365,8 @@ struct net_device_ops { int (*ndo_validate_addr)(struct net_device *dev); int (*ndo_do_ioctl)(struct net_device *dev, struct ifreq *ifr, int cmd); + int (*ndo_eth_ioctl)(struct net_device *dev, + struct ifreq *ifr, int cmd); int (*ndo_siocdevprivate)(struct net_device *dev, struct ifreq *ifr, void __user *data, int cmd); diff --git a/include/net/dsa.h b/include/net/dsa.h index 55fcac854058..2af6ee2f2bfb 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -106,8 +106,8 @@ struct dsa_device_ops { * function pointers. */ struct dsa_netdevice_ops { - int (*ndo_do_ioctl)(struct net_device *dev, struct ifreq *ifr, - int cmd); + int (*ndo_eth_ioctl)(struct net_device *dev, struct ifreq *ifr, + int cmd); }; #define DSA_TAG_DRIVER_ALIAS "dsa_tag-" @@ -1019,8 +1019,8 @@ static inline int __dsa_netdevice_ops_check(struct net_device *dev) return 0; } -static inline int dsa_ndo_do_ioctl(struct net_device *dev, struct ifreq *ifr, - int cmd) +static inline int dsa_ndo_eth_ioctl(struct net_device *dev, struct ifreq *ifr, + int cmd) { const struct dsa_netdevice_ops *ops; int err; @@ -1031,11 +1031,11 @@ static inline int dsa_ndo_do_ioctl(struct net_device *dev, struct ifreq *ifr, ops = dev->dsa_ptr->netdev_ops; - return ops->ndo_do_ioctl(dev, ifr, cmd); + return ops->ndo_eth_ioctl(dev, ifr, cmd); } #else -static inline int dsa_ndo_do_ioctl(struct net_device *dev, struct ifreq *ifr, - int cmd) +static inline int dsa_ndo_eth_ioctl(struct net_device *dev, struct ifreq *ifr, + int cmd) { return -EOPNOTSUPP; } diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index a0367b37512d..0c21d1fec852 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -372,8 +372,8 @@ static int vlan_dev_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) case SIOCGMIIREG: case SIOCSMIIREG: case SIOCGHWTSTAMP: - if (netif_device_present(real_dev) && ops->ndo_do_ioctl) - err = ops->ndo_do_ioctl(real_dev, &ifrr, cmd); + if (netif_device_present(real_dev) && ops->ndo_eth_ioctl) + err = ops->ndo_eth_ioctl(real_dev, &ifrr, cmd); break; } @@ -814,7 +814,7 @@ static const struct net_device_ops vlan_netdev_ops = { .ndo_set_mac_address = vlan_dev_set_mac_address, .ndo_set_rx_mode = vlan_dev_set_rx_mode, .ndo_change_rx_flags = vlan_dev_change_rx_flags, - .ndo_do_ioctl = vlan_dev_ioctl, + .ndo_eth_ioctl = vlan_dev_ioctl, .ndo_neigh_setup = vlan_dev_neigh_setup, .ndo_get_stats64 = vlan_dev_get_stats64, #if IS_ENABLED(CONFIG_FCOE) diff --git a/net/core/dev_ioctl.c b/net/core/dev_ioctl.c index 3ace1e4f6b80..8e30fe8b5645 100644 --- a/net/core/dev_ioctl.c +++ b/net/core/dev_ioctl.c @@ -239,19 +239,19 @@ static int net_hwtstamp_validate(struct ifreq *ifr) return 0; } -static int dev_do_ioctl(struct net_device *dev, - struct ifreq *ifr, unsigned int cmd) +static int dev_eth_ioctl(struct net_device *dev, + struct ifreq *ifr, unsigned int cmd) { const struct net_device_ops *ops = dev->netdev_ops; int err; - err = dsa_ndo_do_ioctl(dev, ifr, cmd); + err = dsa_ndo_eth_ioctl(dev, ifr, cmd); if (err == 0 || err != -EOPNOTSUPP) return err; - if (ops->ndo_do_ioctl) { + if (ops->ndo_eth_ioctl) { if (netif_device_present(dev)) - err = ops->ndo_do_ioctl(dev, ifr, cmd); + err = ops->ndo_eth_ioctl(dev, ifr, cmd); else err = -ENODEV; } @@ -259,6 +259,21 @@ static int dev_do_ioctl(struct net_device *dev, return err; } +static int dev_do_ioctl(struct net_device *dev, + struct ifreq *ifr, unsigned int cmd) +{ + const struct net_device_ops *ops = dev->netdev_ops; + + if (ops->ndo_do_ioctl) { + if (netif_device_present(dev)) + return ops->ndo_do_ioctl(dev, ifr, cmd); + else + return -ENODEV; + } + + return -EOPNOTSUPP; +} + static int dev_siocdevprivate(struct net_device *dev, struct ifreq *ifr, void __user *data, unsigned int cmd) { @@ -358,19 +373,20 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, void __user *data, cmd <= SIOCDEVPRIVATE + 15) return dev_siocdevprivate(dev, ifr, data, cmd); - if (cmd == SIOCBONDENSLAVE || + if (cmd == SIOCGMIIPHY || + cmd == SIOCGMIIREG || + cmd == SIOCSMIIREG || + cmd == SIOCSHWTSTAMP || + cmd == SIOCGHWTSTAMP) { + err = dev_eth_ioctl(dev, ifr, cmd); + } else if (cmd == SIOCBONDENSLAVE || cmd == SIOCBONDRELEASE || cmd == SIOCBONDSETHWADDR || cmd == SIOCBONDSLAVEINFOQUERY || cmd == SIOCBONDINFOQUERY || cmd == SIOCBONDCHANGEACTIVE || - cmd == SIOCGMIIPHY || - cmd == SIOCGMIIREG || - cmd == SIOCSMIIREG || cmd == SIOCBRADDIF || cmd == SIOCBRDELIF || - cmd == SIOCSHWTSTAMP || - cmd == SIOCGHWTSTAMP || cmd == SIOCWANDEV) { err = dev_do_ioctl(dev, ifr, cmd); } else diff --git a/net/dsa/master.c b/net/dsa/master.c index 3fc90e36772d..e8e19857621b 100644 --- a/net/dsa/master.c +++ b/net/dsa/master.c @@ -210,14 +210,14 @@ static int dsa_master_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) break; } - if (dev->netdev_ops->ndo_do_ioctl) - err = dev->netdev_ops->ndo_do_ioctl(dev, ifr, cmd); + if (dev->netdev_ops->ndo_eth_ioctl) + err = dev->netdev_ops->ndo_eth_ioctl(dev, ifr, cmd); return err; } static const struct dsa_netdevice_ops dsa_netdev_ops = { - .ndo_do_ioctl = dsa_master_ioctl, + .ndo_eth_ioctl = dsa_master_ioctl, }; static int dsa_master_ethtool_setup(struct net_device *dev) diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 8c112d7d5b0a..6e1135d3ee33 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -1687,7 +1687,7 @@ static const struct net_device_ops dsa_slave_netdev_ops = { .ndo_set_rx_mode = dsa_slave_set_rx_mode, .ndo_set_mac_address = dsa_slave_set_mac_address, .ndo_fdb_dump = dsa_slave_fdb_dump, - .ndo_do_ioctl = dsa_slave_ioctl, + .ndo_eth_ioctl = dsa_slave_ioctl, .ndo_get_iflink = dsa_slave_get_iflink, #ifdef CONFIG_NET_POLL_CONTROLLER .ndo_netpoll_setup = dsa_slave_netpoll_setup, -- cgit v1.2.3 From d7907a2b1a3b89bea136025f885035a083525e41 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Wed, 28 Jul 2021 10:33:47 +0300 Subject: devlink: Remove duplicated registration check Both registered flag and devlink pointer are set at the same time and indicate the same thing - devlink/devlink_port are ready. Instead of checking ->registered use devlink pointer as an indication. Signed-off-by: Leon Romanovsky Signed-off-by: David S. Miller --- include/net/devlink.h | 4 +--- net/core/devlink.c | 19 ++++++++++--------- 2 files changed, 11 insertions(+), 12 deletions(-) (limited to 'include/net') diff --git a/include/net/devlink.h b/include/net/devlink.h index 57b738b78073..e48a62320407 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -55,8 +55,7 @@ struct devlink { * port, sb, dpipe, resource, params, region, traps and more. */ u8 reload_failed:1, - reload_enabled:1, - registered:1; + reload_enabled:1; char priv[0] __aligned(NETDEV_ALIGN); }; @@ -158,7 +157,6 @@ struct devlink_port { struct list_head region_list; struct devlink *devlink; unsigned int index; - bool registered; spinlock_t type_lock; /* Protects type and type_dev * pointer consistency. */ diff --git a/net/core/devlink.c b/net/core/devlink.c index 8fdd04f00fd7..b596a971b473 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -115,7 +115,7 @@ static void __devlink_net_set(struct devlink *devlink, struct net *net) void devlink_net_set(struct devlink *devlink, struct net *net) { - if (WARN_ON(devlink->registered)) + if (WARN_ON(devlink->dev)) return; __devlink_net_set(devlink, net); } @@ -1043,7 +1043,7 @@ static void devlink_port_notify(struct devlink_port *devlink_port, struct sk_buff *msg; int err; - if (!devlink_port->registered) + if (!devlink_port->devlink) return; WARN_ON(cmd != DEVLINK_CMD_PORT_NEW && cmd != DEVLINK_CMD_PORT_DEL); @@ -8817,8 +8817,8 @@ EXPORT_SYMBOL_GPL(devlink_alloc); */ int devlink_register(struct devlink *devlink, struct device *dev) { + WARN_ON(devlink->dev); devlink->dev = dev; - devlink->registered = true; mutex_lock(&devlink_mutex); list_add_tail(&devlink->list, &devlink_list); devlink_notify(devlink, DEVLINK_CMD_NEW); @@ -8960,9 +8960,10 @@ int devlink_port_register(struct devlink *devlink, mutex_unlock(&devlink->lock); return -EEXIST; } + + WARN_ON(devlink_port->devlink); devlink_port->devlink = devlink; devlink_port->index = port_index; - devlink_port->registered = true; spin_lock_init(&devlink_port->type_lock); INIT_LIST_HEAD(&devlink_port->reporter_list); mutex_init(&devlink_port->reporters_lock); @@ -9001,7 +9002,7 @@ static void __devlink_port_type_set(struct devlink_port *devlink_port, enum devlink_port_type type, void *type_dev) { - if (WARN_ON(!devlink_port->registered)) + if (WARN_ON(!devlink_port->devlink)) return; devlink_port_type_warn_cancel(devlink_port); spin_lock_bh(&devlink_port->type_lock); @@ -9121,7 +9122,7 @@ void devlink_port_attrs_set(struct devlink_port *devlink_port, { int ret; - if (WARN_ON(devlink_port->registered)) + if (WARN_ON(devlink_port->devlink)) return; devlink_port->attrs = *attrs; ret = __devlink_port_attrs_set(devlink_port, attrs->flavour); @@ -9145,7 +9146,7 @@ void devlink_port_attrs_pci_pf_set(struct devlink_port *devlink_port, u32 contro struct devlink_port_attrs *attrs = &devlink_port->attrs; int ret; - if (WARN_ON(devlink_port->registered)) + if (WARN_ON(devlink_port->devlink)) return; ret = __devlink_port_attrs_set(devlink_port, DEVLINK_PORT_FLAVOUR_PCI_PF); @@ -9172,7 +9173,7 @@ void devlink_port_attrs_pci_vf_set(struct devlink_port *devlink_port, u32 contro struct devlink_port_attrs *attrs = &devlink_port->attrs; int ret; - if (WARN_ON(devlink_port->registered)) + if (WARN_ON(devlink_port->devlink)) return; ret = __devlink_port_attrs_set(devlink_port, DEVLINK_PORT_FLAVOUR_PCI_VF); @@ -9200,7 +9201,7 @@ void devlink_port_attrs_pci_sf_set(struct devlink_port *devlink_port, u32 contro struct devlink_port_attrs *attrs = &devlink_port->attrs; int ret; - if (WARN_ON(devlink_port->registered)) + if (WARN_ON(devlink_port->devlink)) return; ret = __devlink_port_attrs_set(devlink_port, DEVLINK_PORT_FLAVOUR_PCI_SF); -- cgit v1.2.3 From 58ce6d5b271ab25fb2056f84a8e5546945eb5fc9 Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Tue, 27 Jul 2021 06:12:04 +0900 Subject: Bluetooth: defer cleanup of resources in hci_unregister_dev() syzbot is hitting might_sleep() warning at hci_sock_dev_event() due to calling lock_sock() with rw spinlock held [1]. It seems that history of this locking problem is a trial and error. Commit b40df5743ee8aed8 ("[PATCH] bluetooth: fix socket locking in hci_sock_dev_event()") in 2.6.21-rc4 changed bh_lock_sock() to lock_sock() as an attempt to fix lockdep warning. Then, commit 4ce61d1c7a8ef4c1 ("[BLUETOOTH]: Fix locking in hci_sock_dev_event().") in 2.6.22-rc2 changed lock_sock() to local_bh_disable() + bh_lock_sock_nested() as an attempt to fix sleep in atomic context warning. Then, commit 4b5dd696f81b210c ("Bluetooth: Remove local_bh_disable() from hci_sock.c") in 3.3-rc1 removed local_bh_disable(). Then, commit e305509e678b3a4a ("Bluetooth: use correct lock to prevent UAF of hdev object") in 5.13-rc5 again changed bh_lock_sock_nested() to lock_sock() as an attempt to fix CVE-2021-3573. This difficulty comes from current implementation that hci_sock_dev_event(HCI_DEV_UNREG) is responsible for dropping all references from sockets because hci_unregister_dev() immediately reclaims resources as soon as returning from hci_sock_dev_event(HCI_DEV_UNREG). But the history suggests that hci_sock_dev_event(HCI_DEV_UNREG) was not doing what it should do. Therefore, instead of trying to detach sockets from device, let's accept not detaching sockets from device at hci_sock_dev_event(HCI_DEV_UNREG), by moving actual cleanup of resources from hci_unregister_dev() to hci_release_dev() which is called by bt_host_release when all references to this unregistered device (which is a kobject) are gone. Link: https://syzkaller.appspot.com/bug?extid=a5df189917e79d5e59c9 [1] Reported-by: syzbot Signed-off-by: Tetsuo Handa Tested-by: syzbot Fixes: e305509e678b3a4a ("Bluetooth: use correct lock to prevent UAF of hdev object") Signed-off-by: Luiz Augusto von Dentz --- include/net/bluetooth/hci_core.h | 1 + net/bluetooth/hci_core.c | 17 +++++++++-------- net/bluetooth/hci_sock.c | 20 +++++++++++++------- net/bluetooth/hci_sysfs.c | 2 +- 4 files changed, 24 insertions(+), 16 deletions(-) (limited to 'include/net') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index a53e94459ecd..4abe3c494002 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -1230,6 +1230,7 @@ struct hci_dev *hci_alloc_dev(void); void hci_free_dev(struct hci_dev *hdev); int hci_register_dev(struct hci_dev *hdev); void hci_unregister_dev(struct hci_dev *hdev); +void hci_release_dev(struct hci_dev *hdev); int hci_suspend_dev(struct hci_dev *hdev); int hci_resume_dev(struct hci_dev *hdev); int hci_reset_dev(struct hci_dev *hdev); diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 2560ed2f144d..2b78e1336c53 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -3996,14 +3996,10 @@ EXPORT_SYMBOL(hci_register_dev); /* Unregister HCI device */ void hci_unregister_dev(struct hci_dev *hdev) { - int id; - BT_DBG("%p name %s bus %d", hdev, hdev->name, hdev->bus); hci_dev_set_flag(hdev, HCI_UNREGISTER); - id = hdev->id; - write_lock(&hci_dev_list_lock); list_del(&hdev->list); write_unlock(&hci_dev_list_lock); @@ -4038,7 +4034,13 @@ void hci_unregister_dev(struct hci_dev *hdev) } device_del(&hdev->dev); + hci_dev_put(hdev); +} +EXPORT_SYMBOL(hci_unregister_dev); +/* Release HCI device */ +void hci_release_dev(struct hci_dev *hdev) +{ debugfs_remove_recursive(hdev->debugfs); kfree_const(hdev->hw_info); kfree_const(hdev->fw_info); @@ -4063,11 +4065,10 @@ void hci_unregister_dev(struct hci_dev *hdev) hci_blocked_keys_clear(hdev); hci_dev_unlock(hdev); - hci_dev_put(hdev); - - ida_simple_remove(&hci_index_ida, id); + ida_simple_remove(&hci_index_ida, hdev->id); + kfree(hdev); } -EXPORT_SYMBOL(hci_unregister_dev); +EXPORT_SYMBOL(hci_release_dev); /* Suspend HCI device */ int hci_suspend_dev(struct hci_dev *hdev) diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index b04a5a02ecf3..d810a5adf064 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -759,19 +759,13 @@ void hci_sock_dev_event(struct hci_dev *hdev, int event) if (event == HCI_DEV_UNREG) { struct sock *sk; - /* Detach sockets from device */ + /* Wake up sockets using this dead device */ read_lock(&hci_sk_list.lock); sk_for_each(sk, &hci_sk_list.head) { - lock_sock(sk); if (hci_pi(sk)->hdev == hdev) { - hci_pi(sk)->hdev = NULL; sk->sk_err = EPIPE; - sk->sk_state = BT_OPEN; sk->sk_state_change(sk); - - hci_dev_put(hdev); } - release_sock(sk); } read_unlock(&hci_sk_list.lock); } @@ -1103,6 +1097,18 @@ static int hci_sock_bind(struct socket *sock, struct sockaddr *addr, lock_sock(sk); + /* Allow detaching from dead device and attaching to alive device, if + * the caller wants to re-bind (instead of close) this socket in + * response to hci_sock_dev_event(HCI_DEV_UNREG) notification. + */ + hdev = hci_pi(sk)->hdev; + if (hdev && hci_dev_test_flag(hdev, HCI_UNREGISTER)) { + hci_pi(sk)->hdev = NULL; + sk->sk_state = BT_OPEN; + hci_dev_put(hdev); + } + hdev = NULL; + if (sk->sk_state == BT_BOUND) { err = -EALREADY; goto done; diff --git a/net/bluetooth/hci_sysfs.c b/net/bluetooth/hci_sysfs.c index 9874844a95a9..ebf282d1eb2b 100644 --- a/net/bluetooth/hci_sysfs.c +++ b/net/bluetooth/hci_sysfs.c @@ -83,7 +83,7 @@ void hci_conn_del_sysfs(struct hci_conn *conn) static void bt_host_release(struct device *dev) { struct hci_dev *hdev = to_hci_dev(dev); - kfree(hdev); + hci_release_dev(hdev); module_put(THIS_MODULE); } -- cgit v1.2.3 From 8a886b142bd03d36612747e9aefdf0282c8b02dd Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Wed, 28 Jul 2021 18:24:00 +0200 Subject: sk_buff: track dst status in slow_gro Similar to the previous patch, but covering the dst field: the slow_gro flag is additionally set when a dst is attached to the skb RFC -> v1: - use the existing flag instead of adding a new one Signed-off-by: Paolo Abeni Signed-off-by: David S. Miller --- include/linux/skbuff.h | 2 ++ include/net/dst.h | 2 ++ 2 files changed, 4 insertions(+) (limited to 'include/net') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 3ff18300d210..b1e5bbfcc926 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -992,6 +992,7 @@ static inline struct dst_entry *skb_dst(const struct sk_buff *skb) */ static inline void skb_dst_set(struct sk_buff *skb, struct dst_entry *dst) { + skb->slow_gro |= !!dst; skb->_skb_refdst = (unsigned long)dst; } @@ -1008,6 +1009,7 @@ static inline void skb_dst_set(struct sk_buff *skb, struct dst_entry *dst) static inline void skb_dst_set_noref(struct sk_buff *skb, struct dst_entry *dst) { WARN_ON(!rcu_read_lock_held() && !rcu_read_lock_bh_held()); + skb->slow_gro = !!dst; skb->_skb_refdst = (unsigned long)dst | SKB_DST_NOREF; } diff --git a/include/net/dst.h b/include/net/dst.h index 75b1e734e9c2..a057319aabef 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -277,6 +277,7 @@ static inline void skb_dst_drop(struct sk_buff *skb) static inline void __skb_dst_copy(struct sk_buff *nskb, unsigned long refdst) { + nskb->slow_gro |= !!refdst; nskb->_skb_refdst = refdst; if (!(nskb->_skb_refdst & SKB_DST_NOREF)) dst_clone(skb_dst(nskb)); @@ -316,6 +317,7 @@ static inline bool skb_dst_force(struct sk_buff *skb) dst = NULL; skb->_skb_refdst = (unsigned long)dst; + skb->slow_gro |= !!dst; } return skb->_skb_refdst != 0UL; -- cgit v1.2.3 From 5e10da5385d20c4bae587bc2921e5fdd9655d5fc Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Wed, 28 Jul 2021 18:24:03 +0200 Subject: skbuff: allow 'slow_gro' for skb carring sock reference This change leverages the infrastructure introduced by the previous patches to allow soft devices passing to the GRO engine owned skbs without impacting the fast-path. It's up to the GRO caller ensuring the slow_gro bit validity before invoking the GRO engine. The new helper skb_prepare_for_gro() is introduced for that goal. On slow_gro, skbs are aggregated only with equal sk. Additionally, skb truesize on GRO recycle and free is correctly updated so that sk wmem is not changed by the GRO processing. rfc-> v1: - fixed bad truesize on dev_gro_receive NAPI_FREE - use the existing state bit Signed-off-by: Paolo Abeni Signed-off-by: David S. Miller --- include/net/sock.h | 9 +++++++++ net/core/dev.c | 2 ++ net/core/skbuff.c | 17 +++++++++++++---- 3 files changed, 24 insertions(+), 4 deletions(-) (limited to 'include/net') diff --git a/include/net/sock.h b/include/net/sock.h index f23cb259b0e2..ff1be7e7e90b 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -2249,6 +2249,15 @@ static inline __must_check bool skb_set_owner_sk_safe(struct sk_buff *skb, struc return false; } +static inline void skb_prepare_for_gro(struct sk_buff *skb) +{ + if (skb->destructor != sock_wfree) { + skb_orphan(skb); + return; + } + skb->slow_gro = 1; +} + void sk_reset_timer(struct sock *sk, struct timer_list *timer, unsigned long expires); diff --git a/net/core/dev.c b/net/core/dev.c index 19565f7497ee..dcc87fcd64ba 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -6034,6 +6034,7 @@ static void gro_list_prepare(const struct list_head *head, struct tc_skb_ext *p_ext; #endif + diffs |= p->sk != skb->sk; diffs |= skb_metadata_dst_cmp(p, skb); diffs |= skb_get_nfct(p) ^ skb_get_nfct(skb); @@ -6311,6 +6312,7 @@ static void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb) skb_shinfo(skb)->gso_type = 0; skb->truesize = SKB_TRUESIZE(skb_end_offset(skb)); if (unlikely(skb->slow_gro)) { + skb_orphan(skb); skb_ext_reset(skb); nf_reset_ct(skb); skb->slow_gro = 0; diff --git a/net/core/skbuff.c b/net/core/skbuff.c index d04e286149cc..fcbd977186b0 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -958,6 +958,7 @@ void napi_skb_free_stolen_head(struct sk_buff *skb) nf_reset_ct(skb); skb_dst_drop(skb); skb_ext_put(skb); + skb_orphan(skb); skb->slow_gro = 0; } napi_skb_cache_put(skb); @@ -3892,6 +3893,9 @@ int skb_gro_receive_list(struct sk_buff *p, struct sk_buff *skb) NAPI_GRO_CB(p)->last = skb; NAPI_GRO_CB(p)->count++; p->data_len += skb->len; + + /* sk owenrship - if any - completely transferred to the aggregated packet */ + skb->destructor = NULL; p->truesize += skb->truesize; p->len += skb->len; @@ -4259,6 +4263,7 @@ int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb) unsigned int headlen = skb_headlen(skb); unsigned int len = skb_gro_len(skb); unsigned int delta_truesize; + unsigned int new_truesize; struct sk_buff *lp; if (unlikely(p->len + len >= 65536 || NAPI_GRO_CB(skb)->flush)) @@ -4290,10 +4295,10 @@ int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb) skb_frag_size_sub(frag, offset); /* all fragments truesize : remove (head size + sk_buff) */ - delta_truesize = skb->truesize - - SKB_TRUESIZE(skb_end_offset(skb)); + new_truesize = SKB_TRUESIZE(skb_end_offset(skb)); + delta_truesize = skb->truesize - new_truesize; - skb->truesize -= skb->data_len; + skb->truesize = new_truesize; skb->len -= skb->data_len; skb->data_len = 0; @@ -4322,12 +4327,16 @@ int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb) memcpy(frag + 1, skbinfo->frags, sizeof(*frag) * skbinfo->nr_frags); /* We dont need to clear skbinfo->nr_frags here */ - delta_truesize = skb->truesize - SKB_DATA_ALIGN(sizeof(struct sk_buff)); + new_truesize = SKB_TRUESIZE(sizeof(struct sk_buff)); + delta_truesize = skb->truesize - new_truesize; + skb->truesize = new_truesize; NAPI_GRO_CB(skb)->free = NAPI_GRO_FREE_STOLEN_HEAD; goto done; } merge: + /* sk owenrship - if any - completely transferred to the aggregated packet */ + skb->destructor = NULL; delta_truesize = skb->truesize; if (offset > headlen) { unsigned int eat = offset - headlen; -- cgit v1.2.3 From dd8987a394c0730380167e0b0aebd766cf3511e1 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Thu, 29 Jul 2021 12:40:11 +0200 Subject: nfc: constify passed nfc_dev The struct nfc_dev is not modified by nfc_get_drvdata() and nfc_device_name() so it can be made a const. Signed-off-by: Krzysztof Kozlowski Signed-off-by: David S. Miller --- include/net/nfc/nfc.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/net/nfc/nfc.h b/include/net/nfc/nfc.h index 85b698794b14..901779138e2b 100644 --- a/include/net/nfc/nfc.h +++ b/include/net/nfc/nfc.h @@ -245,7 +245,7 @@ static inline void nfc_set_drvdata(struct nfc_dev *dev, void *data) * * @dev: The nfc device */ -static inline void *nfc_get_drvdata(struct nfc_dev *dev) +static inline void *nfc_get_drvdata(const struct nfc_dev *dev) { return dev_get_drvdata(&dev->dev); } @@ -255,7 +255,7 @@ static inline void *nfc_get_drvdata(struct nfc_dev *dev) * * @dev: The nfc device whose name to return */ -static inline const char *nfc_device_name(struct nfc_dev *dev) +static inline const char *nfc_device_name(const struct nfc_dev *dev) { return dev_name(&dev->dev); } -- cgit v1.2.3 From 2c8e2e9aec7927b387540a88351b8405ee82b34a Mon Sep 17 00:00:00 2001 From: Jeremy Kerr Date: Thu, 29 Jul 2021 10:20:41 +0800 Subject: mctp: Add base packet definitions Simple packet header format as defined by DMTF DSP0236. Signed-off-by: Jeremy Kerr Signed-off-by: David S. Miller --- MAINTAINERS | 1 + include/net/mctp.h | 35 +++++++++++++++++++++++++++++++++++ 2 files changed, 36 insertions(+) create mode 100644 include/net/mctp.h (limited to 'include/net') diff --git a/MAINTAINERS b/MAINTAINERS index 22a1ff9afd9d..770b986d10f0 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -11037,6 +11037,7 @@ M: Jeremy Kerr M: Matt Johnston L: netdev@vger.kernel.org S: Maintained +F: include/net/mctp.h F: net/mctp/ MAN-PAGES: MANUAL PAGES FOR LINUX -- Sections 2, 3, 4, 5, and 7 diff --git a/include/net/mctp.h b/include/net/mctp.h new file mode 100644 index 000000000000..4c01e083be45 --- /dev/null +++ b/include/net/mctp.h @@ -0,0 +1,35 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Management Component Transport Protocol (MCTP) + * + * Copyright (c) 2021 Code Construct + * Copyright (c) 2021 Google + */ + +#ifndef __NET_MCTP_H +#define __NET_MCTP_H + +#include + +/* MCTP packet definitions */ +struct mctp_hdr { + u8 ver; + u8 dest; + u8 src; + u8 flags_seq_tag; +}; + +#define MCTP_VER_MIN 1 +#define MCTP_VER_MAX 1 + +/* Definitions for flags_seq_tag field */ +#define MCTP_HDR_FLAG_SOM BIT(7) +#define MCTP_HDR_FLAG_EOM BIT(6) +#define MCTP_HDR_FLAG_TO BIT(3) +#define MCTP_HDR_FLAGS GENMASK(5, 3) +#define MCTP_HDR_SEQ_SHIFT 4 +#define MCTP_HDR_SEQ_MASK GENMASK(1, 0) +#define MCTP_HDR_TAG_SHIFT 0 +#define MCTP_HDR_TAG_MASK GENMASK(2, 0) + +#endif /* __NET_MCTP_H */ -- cgit v1.2.3 From 583be982d93479ea3d85091b0fd0b01201ede87d Mon Sep 17 00:00:00 2001 From: Jeremy Kerr Date: Thu, 29 Jul 2021 10:20:44 +0800 Subject: mctp: Add device handling and netlink interface This change adds the infrastructure for managing MCTP netdevices; we add a pointer to the AF_MCTP-specific data to struct netdevice, and hook up the rtnetlink operations for adding and removing addresses. Includes changes from Matt Johnston . Signed-off-by: Jeremy Kerr Signed-off-by: David S. Miller --- MAINTAINERS | 1 + include/linux/netdevice.h | 4 + include/net/mctp.h | 14 ++ include/net/mctpdevice.h | 35 ++++ include/uapi/linux/if_ether.h | 3 + include/uapi/linux/if_link.h | 10 + include/uapi/linux/mctp.h | 1 + net/mctp/Makefile | 2 +- net/mctp/af_mctp.c | 8 + net/mctp/device.c | 414 ++++++++++++++++++++++++++++++++++++++++++ 10 files changed, 491 insertions(+), 1 deletion(-) create mode 100644 include/net/mctpdevice.h create mode 100644 net/mctp/device.c (limited to 'include/net') diff --git a/MAINTAINERS b/MAINTAINERS index e95eb3b00cd2..c2943c227ee6 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -11039,6 +11039,7 @@ L: netdev@vger.kernel.org S: Maintained F: drivers/net/mctp/ F: include/net/mctp.h +F: include/net/mctpdevice.h F: net/mctp/ MAN-PAGES: MANUAL PAGES FOR LINUX -- Sections 2, 3, 4, 5, and 7 diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 226bbee06730..d63a94ecbf3b 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1823,6 +1823,7 @@ enum netdev_ml_priv_type { * @ieee802154_ptr: IEEE 802.15.4 low-rate Wireless Personal Area Network * device struct * @mpls_ptr: mpls_dev struct pointer + * @mctp_ptr: MCTP specific data * * @dev_addr: Hw address (before bcast, * because most packets are unicast) @@ -2110,6 +2111,9 @@ struct net_device { #if IS_ENABLED(CONFIG_MPLS_ROUTING) struct mpls_dev __rcu *mpls_ptr; #endif +#if IS_ENABLED(CONFIG_MCTP) + struct mctp_dev __rcu *mctp_ptr; +#endif /* * Cache lines mostly used on receive path (including eth_type_trans()) diff --git a/include/net/mctp.h b/include/net/mctp.h index 4c01e083be45..61452e03aa85 100644 --- a/include/net/mctp.h +++ b/include/net/mctp.h @@ -10,6 +10,7 @@ #define __NET_MCTP_H #include +#include /* MCTP packet definitions */ struct mctp_hdr { @@ -32,4 +33,17 @@ struct mctp_hdr { #define MCTP_HDR_TAG_SHIFT 0 #define MCTP_HDR_TAG_MASK GENMASK(2, 0) +static inline bool mctp_address_ok(mctp_eid_t eid) +{ + return eid >= 8 && eid < 255; +} + +static inline struct mctp_hdr *mctp_hdr(struct sk_buff *skb) +{ + return (struct mctp_hdr *)skb_network_header(skb); +} + +void mctp_device_init(void); +void mctp_device_exit(void); + #endif /* __NET_MCTP_H */ diff --git a/include/net/mctpdevice.h b/include/net/mctpdevice.h new file mode 100644 index 000000000000..71a11012fac7 --- /dev/null +++ b/include/net/mctpdevice.h @@ -0,0 +1,35 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Management Component Transport Protocol (MCTP) - device + * definitions. + * + * Copyright (c) 2021 Code Construct + * Copyright (c) 2021 Google + */ + +#ifndef __NET_MCTPDEVICE_H +#define __NET_MCTPDEVICE_H + +#include +#include +#include + +struct mctp_dev { + struct net_device *dev; + + unsigned int net; + + /* Only modified under RTNL. Reads have addrs_lock held */ + u8 *addrs; + size_t num_addrs; + spinlock_t addrs_lock; + + struct rcu_head rcu; +}; + +#define MCTP_INITIAL_DEFAULT_NET 1 + +struct mctp_dev *mctp_dev_get_rtnl(const struct net_device *dev); +struct mctp_dev *__mctp_dev_get(const struct net_device *dev); + +#endif /* __NET_MCTPDEVICE_H */ diff --git a/include/uapi/linux/if_ether.h b/include/uapi/linux/if_ether.h index a0b637911d3c..5f589c7a8382 100644 --- a/include/uapi/linux/if_ether.h +++ b/include/uapi/linux/if_ether.h @@ -151,6 +151,9 @@ #define ETH_P_MAP 0x00F9 /* Qualcomm multiplexing and * aggregation protocol */ +#define ETH_P_MCTP 0x00FA /* Management component transport + * protocol packets + */ /* * This is an Ethernet frame header. diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 4882e81514b6..49b22afab78f 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -1260,4 +1260,14 @@ struct ifla_rmnet_flags { __u32 mask; }; +/* MCTP section */ + +enum { + IFLA_MCTP_UNSPEC, + IFLA_MCTP_NET, + __IFLA_MCTP_MAX, +}; + +#define IFLA_MCTP_MAX (__IFLA_MCTP_MAX - 1) + #endif /* _UAPI_LINUX_IF_LINK_H */ diff --git a/include/uapi/linux/mctp.h b/include/uapi/linux/mctp.h index 52b54d13f385..a9d8edb3402b 100644 --- a/include/uapi/linux/mctp.h +++ b/include/uapi/linux/mctp.h @@ -26,6 +26,7 @@ struct sockaddr_mctp { }; #define MCTP_NET_ANY 0x0 +#define MCTP_NET_DEFAULT 0x0 #define MCTP_ADDR_NULL 0x00 #define MCTP_ADDR_ANY 0xff diff --git a/net/mctp/Makefile b/net/mctp/Makefile index 7c056b1b7939..2ea98c27b262 100644 --- a/net/mctp/Makefile +++ b/net/mctp/Makefile @@ -1,3 +1,3 @@ # SPDX-License-Identifier: GPL-2.0 obj-$(CONFIG_MCTP) += mctp.o -mctp-objs := af_mctp.o +mctp-objs := af_mctp.o device.o diff --git a/net/mctp/af_mctp.c b/net/mctp/af_mctp.c index b3aeca6486e3..401b4fa141a5 100644 --- a/net/mctp/af_mctp.c +++ b/net/mctp/af_mctp.c @@ -6,13 +6,18 @@ * Copyright (c) 2021 Google */ +#include #include #include #include #include +#include +#include #include +/* socket implementation */ + struct mctp_sock { struct sock sk; }; @@ -152,6 +157,8 @@ static __init int mctp_init(void) if (rc) goto err_unreg_sock; + mctp_device_init(); + return 0; err_unreg_sock: @@ -162,6 +169,7 @@ err_unreg_sock: static __exit void mctp_exit(void) { + mctp_device_exit(); proto_unregister(&mctp_proto); sock_unregister(PF_MCTP); } diff --git a/net/mctp/device.c b/net/mctp/device.c new file mode 100644 index 000000000000..877abe5312cd --- /dev/null +++ b/net/mctp/device.c @@ -0,0 +1,414 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Management Component Transport Protocol (MCTP) - device implementation. + * + * Copyright (c) 2021 Code Construct + * Copyright (c) 2021 Google + */ + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +struct mctp_dump_cb { + int h; + int idx; + size_t a_idx; +}; + +/* unlocked: caller must hold rcu_read_lock */ +struct mctp_dev *__mctp_dev_get(const struct net_device *dev) +{ + return rcu_dereference(dev->mctp_ptr); +} + +struct mctp_dev *mctp_dev_get_rtnl(const struct net_device *dev) +{ + return rtnl_dereference(dev->mctp_ptr); +} + +static void mctp_dev_destroy(struct mctp_dev *mdev) +{ + struct net_device *dev = mdev->dev; + + dev_put(dev); + kfree_rcu(mdev, rcu); +} + +static int mctp_fill_addrinfo(struct sk_buff *skb, struct netlink_callback *cb, + struct mctp_dev *mdev, mctp_eid_t eid) +{ + struct ifaddrmsg *hdr; + struct nlmsghdr *nlh; + + nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, + RTM_NEWADDR, sizeof(*hdr), NLM_F_MULTI); + if (!nlh) + return -EMSGSIZE; + + hdr = nlmsg_data(nlh); + hdr->ifa_family = AF_MCTP; + hdr->ifa_prefixlen = 0; + hdr->ifa_flags = 0; + hdr->ifa_scope = 0; + hdr->ifa_index = mdev->dev->ifindex; + + if (nla_put_u8(skb, IFA_LOCAL, eid)) + goto cancel; + + if (nla_put_u8(skb, IFA_ADDRESS, eid)) + goto cancel; + + nlmsg_end(skb, nlh); + + return 0; + +cancel: + nlmsg_cancel(skb, nlh); + return -EMSGSIZE; +} + +static int mctp_dump_dev_addrinfo(struct mctp_dev *mdev, struct sk_buff *skb, + struct netlink_callback *cb) +{ + struct mctp_dump_cb *mcb = (void *)cb->ctx; + int rc = 0; + + for (; mcb->a_idx < mdev->num_addrs; mcb->a_idx++) { + rc = mctp_fill_addrinfo(skb, cb, mdev, mdev->addrs[mcb->a_idx]); + if (rc < 0) + break; + } + + return rc; +} + +static int mctp_dump_addrinfo(struct sk_buff *skb, struct netlink_callback *cb) +{ + struct mctp_dump_cb *mcb = (void *)cb->ctx; + struct net *net = sock_net(skb->sk); + struct hlist_head *head; + struct net_device *dev; + struct ifaddrmsg *hdr; + struct mctp_dev *mdev; + int ifindex; + int idx, rc; + + hdr = nlmsg_data(cb->nlh); + // filter by ifindex if requested + ifindex = hdr->ifa_index; + + rcu_read_lock(); + for (; mcb->h < NETDEV_HASHENTRIES; mcb->h++, mcb->idx = 0) { + idx = 0; + head = &net->dev_index_head[mcb->h]; + hlist_for_each_entry_rcu(dev, head, index_hlist) { + if (idx >= mcb->idx && + (ifindex == 0 || ifindex == dev->ifindex)) { + mdev = __mctp_dev_get(dev); + if (mdev) { + rc = mctp_dump_dev_addrinfo(mdev, + skb, cb); + // Error indicates full buffer, this + // callback will get retried. + if (rc < 0) + goto out; + } + } + idx++; + // reset for next iteration + mcb->a_idx = 0; + } + } +out: + rcu_read_unlock(); + mcb->idx = idx; + + return skb->len; +} + +static const struct nla_policy ifa_mctp_policy[IFA_MAX + 1] = { + [IFA_ADDRESS] = { .type = NLA_U8 }, + [IFA_LOCAL] = { .type = NLA_U8 }, +}; + +static int mctp_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, + struct netlink_ext_ack *extack) +{ + struct net *net = sock_net(skb->sk); + struct nlattr *tb[IFA_MAX + 1]; + struct net_device *dev; + struct mctp_addr *addr; + struct mctp_dev *mdev; + struct ifaddrmsg *ifm; + unsigned long flags; + u8 *tmp_addrs; + int rc; + + rc = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_mctp_policy, + extack); + if (rc < 0) + return rc; + + ifm = nlmsg_data(nlh); + + if (tb[IFA_LOCAL]) + addr = nla_data(tb[IFA_LOCAL]); + else if (tb[IFA_ADDRESS]) + addr = nla_data(tb[IFA_ADDRESS]); + else + return -EINVAL; + + /* find device */ + dev = __dev_get_by_index(net, ifm->ifa_index); + if (!dev) + return -ENODEV; + + mdev = mctp_dev_get_rtnl(dev); + if (!mdev) + return -ENODEV; + + if (!mctp_address_ok(addr->s_addr)) + return -EINVAL; + + /* Prevent duplicates. Under RTNL so don't need to lock for reading */ + if (memchr(mdev->addrs, addr->s_addr, mdev->num_addrs)) + return -EEXIST; + + tmp_addrs = kmalloc(mdev->num_addrs + 1, GFP_KERNEL); + if (!tmp_addrs) + return -ENOMEM; + memcpy(tmp_addrs, mdev->addrs, mdev->num_addrs); + tmp_addrs[mdev->num_addrs] = addr->s_addr; + + /* Lock to write */ + spin_lock_irqsave(&mdev->addrs_lock, flags); + mdev->num_addrs++; + swap(mdev->addrs, tmp_addrs); + spin_unlock_irqrestore(&mdev->addrs_lock, flags); + + kfree(tmp_addrs); + + return 0; +} + +static int mctp_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, + struct netlink_ext_ack *extack) +{ + struct net *net = sock_net(skb->sk); + struct nlattr *tb[IFA_MAX + 1]; + struct net_device *dev; + struct mctp_addr *addr; + struct mctp_dev *mdev; + struct ifaddrmsg *ifm; + unsigned long flags; + u8 *pos; + int rc; + + rc = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_mctp_policy, + extack); + if (rc < 0) + return rc; + + ifm = nlmsg_data(nlh); + + if (tb[IFA_LOCAL]) + addr = nla_data(tb[IFA_LOCAL]); + else if (tb[IFA_ADDRESS]) + addr = nla_data(tb[IFA_ADDRESS]); + else + return -EINVAL; + + /* find device */ + dev = __dev_get_by_index(net, ifm->ifa_index); + if (!dev) + return -ENODEV; + + mdev = mctp_dev_get_rtnl(dev); + if (!mdev) + return -ENODEV; + + pos = memchr(mdev->addrs, addr->s_addr, mdev->num_addrs); + if (!pos) + return -ENOENT; + + spin_lock_irqsave(&mdev->addrs_lock, flags); + memmove(pos, pos + 1, mdev->num_addrs - 1 - (pos - mdev->addrs)); + mdev->num_addrs--; + spin_unlock_irqrestore(&mdev->addrs_lock, flags); + + return 0; +} + +static struct mctp_dev *mctp_add_dev(struct net_device *dev) +{ + struct mctp_dev *mdev; + + ASSERT_RTNL(); + + mdev = kzalloc(sizeof(*mdev), GFP_KERNEL); + if (!mdev) + return ERR_PTR(-ENOMEM); + + spin_lock_init(&mdev->addrs_lock); + + mdev->net = MCTP_INITIAL_DEFAULT_NET; + + /* associate to net_device */ + rcu_assign_pointer(dev->mctp_ptr, mdev); + dev_hold(dev); + mdev->dev = dev; + + return mdev; +} + +static int mctp_fill_link_af(struct sk_buff *skb, + const struct net_device *dev, u32 ext_filter_mask) +{ + struct mctp_dev *mdev; + + mdev = mctp_dev_get_rtnl(dev); + if (!mdev) + return -ENODATA; + if (nla_put_u32(skb, IFLA_MCTP_NET, mdev->net)) + return -EMSGSIZE; + return 0; +} + +static size_t mctp_get_link_af_size(const struct net_device *dev, + u32 ext_filter_mask) +{ + struct mctp_dev *mdev; + unsigned int ret; + + /* caller holds RCU */ + mdev = __mctp_dev_get(dev); + if (!mdev) + return 0; + ret = nla_total_size(4); /* IFLA_MCTP_NET */ + return ret; +} + +static const struct nla_policy ifla_af_mctp_policy[IFLA_MCTP_MAX + 1] = { + [IFLA_MCTP_NET] = { .type = NLA_U32 }, +}; + +static int mctp_set_link_af(struct net_device *dev, const struct nlattr *attr, + struct netlink_ext_ack *extack) +{ + struct nlattr *tb[IFLA_MCTP_MAX + 1]; + struct mctp_dev *mdev; + int rc; + + rc = nla_parse_nested(tb, IFLA_MCTP_MAX, attr, ifla_af_mctp_policy, + NULL); + if (rc) + return rc; + + mdev = mctp_dev_get_rtnl(dev); + if (!mdev) + return 0; + + if (tb[IFLA_MCTP_NET]) + WRITE_ONCE(mdev->net, nla_get_u32(tb[IFLA_MCTP_NET])); + + return 0; +} + +static void mctp_unregister(struct net_device *dev) +{ + struct mctp_dev *mdev; + + mdev = mctp_dev_get_rtnl(dev); + + if (!mdev) + return; + + RCU_INIT_POINTER(mdev->dev->mctp_ptr, NULL); + + kfree(mdev->addrs); + + mctp_dev_destroy(mdev); +} + +static int mctp_register(struct net_device *dev) +{ + struct mctp_dev *mdev; + + /* Already registered? */ + if (rtnl_dereference(dev->mctp_ptr)) + return 0; + + /* only register specific types; MCTP-specific and loopback for now */ + if (dev->type != ARPHRD_MCTP && dev->type != ARPHRD_LOOPBACK) + return 0; + + mdev = mctp_add_dev(dev); + if (IS_ERR(mdev)) + return PTR_ERR(mdev); + + return 0; +} + +static int mctp_dev_notify(struct notifier_block *this, unsigned long event, + void *ptr) +{ + struct net_device *dev = netdev_notifier_info_to_dev(ptr); + int rc; + + switch (event) { + case NETDEV_REGISTER: + rc = mctp_register(dev); + if (rc) + return notifier_from_errno(rc); + break; + case NETDEV_UNREGISTER: + mctp_unregister(dev); + break; + } + + return NOTIFY_OK; +} + +static struct rtnl_af_ops mctp_af_ops = { + .family = AF_MCTP, + .fill_link_af = mctp_fill_link_af, + .get_link_af_size = mctp_get_link_af_size, + .set_link_af = mctp_set_link_af, +}; + +static struct notifier_block mctp_dev_nb = { + .notifier_call = mctp_dev_notify, + .priority = ADDRCONF_NOTIFY_PRIORITY, +}; + +void __init mctp_device_init(void) +{ + register_netdevice_notifier(&mctp_dev_nb); + + rtnl_register_module(THIS_MODULE, PF_MCTP, RTM_GETADDR, + NULL, mctp_dump_addrinfo, 0); + rtnl_register_module(THIS_MODULE, PF_MCTP, RTM_NEWADDR, + mctp_rtm_newaddr, NULL, 0); + rtnl_register_module(THIS_MODULE, PF_MCTP, RTM_DELADDR, + mctp_rtm_deladdr, NULL, 0); + rtnl_af_register(&mctp_af_ops); +} + +void __exit mctp_device_exit(void) +{ + rtnl_af_unregister(&mctp_af_ops); + rtnl_unregister(PF_MCTP, RTM_DELADDR); + rtnl_unregister(PF_MCTP, RTM_NEWADDR); + rtnl_unregister(PF_MCTP, RTM_GETADDR); + + unregister_netdevice_notifier(&mctp_dev_nb); +} -- cgit v1.2.3 From 889b7da23abf92faf34491df95733bda63639e32 Mon Sep 17 00:00:00 2001 From: Jeremy Kerr Date: Thu, 29 Jul 2021 10:20:45 +0800 Subject: mctp: Add initial routing framework Add a simple routing table, and a couple of route output handlers, and the mctp packet_type & handler. Includes changes from Matt Johnston . Signed-off-by: Jeremy Kerr Signed-off-by: David S. Miller --- MAINTAINERS | 1 + include/net/mctp.h | 75 ++++++++++ include/net/net_namespace.h | 4 + include/net/netns/mctp.h | 16 +++ net/mctp/Makefile | 2 +- net/mctp/af_mctp.c | 7 + net/mctp/device.c | 8 ++ net/mctp/route.c | 329 ++++++++++++++++++++++++++++++++++++++++++++ 8 files changed, 441 insertions(+), 1 deletion(-) create mode 100644 include/net/netns/mctp.h create mode 100644 net/mctp/route.c (limited to 'include/net') diff --git a/MAINTAINERS b/MAINTAINERS index c2943c227ee6..4ca73465e690 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -11040,6 +11040,7 @@ S: Maintained F: drivers/net/mctp/ F: include/net/mctp.h F: include/net/mctpdevice.h +F: include/net/netns/mctp.h F: net/mctp/ MAN-PAGES: MANUAL PAGES FOR LINUX -- Sections 2, 3, 4, 5, and 7 diff --git a/include/net/mctp.h b/include/net/mctp.h index 61452e03aa85..4094bec5e5db 100644 --- a/include/net/mctp.h +++ b/include/net/mctp.h @@ -11,6 +11,7 @@ #include #include +#include /* MCTP packet definitions */ struct mctp_hdr { @@ -33,6 +34,8 @@ struct mctp_hdr { #define MCTP_HDR_TAG_SHIFT 0 #define MCTP_HDR_TAG_MASK GENMASK(2, 0) +#define MCTP_HEADER_MAXLEN 4 + static inline bool mctp_address_ok(mctp_eid_t eid) { return eid >= 8 && eid < 255; @@ -43,6 +46,78 @@ static inline struct mctp_hdr *mctp_hdr(struct sk_buff *skb) return (struct mctp_hdr *)skb_network_header(skb); } +struct mctp_skb_cb { + unsigned int magic; + unsigned int net; + mctp_eid_t src; +}; + +/* skb control-block accessors with a little extra debugging for initial + * development. + * + * TODO: remove checks & mctp_skb_cb->magic; replace callers of __mctp_cb + * with mctp_cb(). + * + * __mctp_cb() is only for the initial ingress code; we should see ->magic set + * at all times after this. + */ +static inline struct mctp_skb_cb *__mctp_cb(struct sk_buff *skb) +{ + struct mctp_skb_cb *cb = (void *)skb->cb; + + cb->magic = 0x4d435450; + return cb; +} + +static inline struct mctp_skb_cb *mctp_cb(struct sk_buff *skb) +{ + struct mctp_skb_cb *cb = (void *)skb->cb; + + WARN_ON(cb->magic != 0x4d435450); + return (void *)(skb->cb); +} + +/* Route definition. + * + * These are held in the pernet->mctp.routes list, with RCU protection for + * removed routes. We hold a reference to the netdev; routes need to be + * dropped on NETDEV_UNREGISTER events. + * + * Updates to the route table are performed under rtnl; all reads under RCU, + * so routes cannot be referenced over a RCU grace period. Specifically: A + * caller cannot block between mctp_route_lookup and passing the route to + * mctp_do_route. + */ +struct mctp_route { + mctp_eid_t min, max; + + struct mctp_dev *dev; + unsigned int mtu; + int (*output)(struct mctp_route *route, + struct sk_buff *skb); + + struct list_head list; + refcount_t refs; + struct rcu_head rcu; +}; + +/* route interfaces */ +struct mctp_route *mctp_route_lookup(struct net *net, unsigned int dnet, + mctp_eid_t daddr); + +int mctp_do_route(struct mctp_route *rt, struct sk_buff *skb); + +int mctp_local_output(struct sock *sk, struct mctp_route *rt, + struct sk_buff *skb, mctp_eid_t daddr, u8 req_tag); + +/* routing <--> device interface */ +int mctp_route_add_local(struct mctp_dev *mdev, mctp_eid_t addr); +int mctp_route_remove_local(struct mctp_dev *mdev, mctp_eid_t addr); +void mctp_route_remove_dev(struct mctp_dev *mdev); + +int mctp_routes_init(void); +void mctp_routes_exit(void); + void mctp_device_init(void); void mctp_device_exit(void); diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index 12cf6d7ea62c..cc54750dd3db 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -34,6 +34,7 @@ #include #include #include +#include #include #include #include @@ -167,6 +168,9 @@ struct net { #ifdef CONFIG_XDP_SOCKETS struct netns_xdp xdp; #endif +#if IS_ENABLED(CONFIG_MCTP) + struct netns_mctp mctp; +#endif #if IS_ENABLED(CONFIG_CRYPTO_USER) struct sock *crypto_nlsk; #endif diff --git a/include/net/netns/mctp.h b/include/net/netns/mctp.h new file mode 100644 index 000000000000..508459b08a59 --- /dev/null +++ b/include/net/netns/mctp.h @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * MCTP per-net structures + */ + +#ifndef __NETNS_MCTP_H__ +#define __NETNS_MCTP_H__ + +#include + +struct netns_mctp { + /* Only updated under RTNL, entries freed via RCU */ + struct list_head routes; +}; + +#endif /* __NETNS_MCTP_H__ */ diff --git a/net/mctp/Makefile b/net/mctp/Makefile index 2ea98c27b262..b1a330e9d82a 100644 --- a/net/mctp/Makefile +++ b/net/mctp/Makefile @@ -1,3 +1,3 @@ # SPDX-License-Identifier: GPL-2.0 obj-$(CONFIG_MCTP) += mctp.o -mctp-objs := af_mctp.o device.o +mctp-objs := af_mctp.o device.o route.o diff --git a/net/mctp/af_mctp.c b/net/mctp/af_mctp.c index 401b4fa141a5..8085f5912101 100644 --- a/net/mctp/af_mctp.c +++ b/net/mctp/af_mctp.c @@ -157,10 +157,16 @@ static __init int mctp_init(void) if (rc) goto err_unreg_sock; + rc = mctp_routes_init(); + if (rc) + goto err_unreg_proto; + mctp_device_init(); return 0; +err_unreg_proto: + proto_unregister(&mctp_proto); err_unreg_sock: sock_unregister(PF_MCTP); @@ -170,6 +176,7 @@ err_unreg_sock: static __exit void mctp_exit(void) { mctp_device_exit(); + mctp_routes_exit(); proto_unregister(&mctp_proto); sock_unregister(PF_MCTP); } diff --git a/net/mctp/device.c b/net/mctp/device.c index 877abe5312cd..5f1b18c17351 100644 --- a/net/mctp/device.c +++ b/net/mctp/device.c @@ -197,6 +197,8 @@ static int mctp_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, kfree(tmp_addrs); + mctp_route_add_local(mdev, addr->s_addr); + return 0; } @@ -240,6 +242,11 @@ static int mctp_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, if (!pos) return -ENOENT; + rc = mctp_route_remove_local(mdev, addr->s_addr); + // we can ignore -ENOENT in the case a route was already removed + if (rc < 0 && rc != -ENOENT) + return rc; + spin_lock_irqsave(&mdev->addrs_lock, flags); memmove(pos, pos + 1, mdev->num_addrs - 1 - (pos - mdev->addrs)); mdev->num_addrs--; @@ -334,6 +341,7 @@ static void mctp_unregister(struct net_device *dev) RCU_INIT_POINTER(mdev->dev->mctp_ptr, NULL); + mctp_route_remove_dev(mdev); kfree(mdev->addrs); mctp_dev_destroy(mdev); diff --git a/net/mctp/route.c b/net/mctp/route.c new file mode 100644 index 000000000000..2bcc13175a9e --- /dev/null +++ b/net/mctp/route.c @@ -0,0 +1,329 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Management Component Transport Protocol (MCTP) - routing + * implementation. + * + * This is currently based on a simple routing table, with no dst cache. The + * number of routes should stay fairly small, so the lookup cost is small. + * + * Copyright (c) 2021 Code Construct + * Copyright (c) 2021 Google + */ + +#include +#include +#include +#include +#include + +#include + +#include +#include + +/* route output callbacks */ +static int mctp_route_discard(struct mctp_route *route, struct sk_buff *skb) +{ + kfree_skb(skb); + return 0; +} + +static int mctp_route_input(struct mctp_route *route, struct sk_buff *skb) +{ + /* -> to local stack */ + /* TODO: socket lookup, reassemble */ + kfree_skb(skb); + return 0; +} + +static int __always_unused mctp_route_output(struct mctp_route *route, + struct sk_buff *skb) +{ + unsigned int mtu; + int rc; + + skb->protocol = htons(ETH_P_MCTP); + + mtu = READ_ONCE(skb->dev->mtu); + if (skb->len > mtu) { + kfree_skb(skb); + return -EMSGSIZE; + } + + /* TODO: daddr (from rt->neigh), saddr (from device?) */ + rc = dev_hard_header(skb, skb->dev, ntohs(skb->protocol), + NULL, NULL, skb->len); + if (rc) { + kfree_skb(skb); + return -EHOSTUNREACH; + } + + rc = dev_queue_xmit(skb); + if (rc) + rc = net_xmit_errno(rc); + + return rc; +} + +/* route alloc/release */ +static void mctp_route_release(struct mctp_route *rt) +{ + if (refcount_dec_and_test(&rt->refs)) { + dev_put(rt->dev->dev); + kfree_rcu(rt, rcu); + } +} + +/* returns a route with the refcount at 1 */ +static struct mctp_route *mctp_route_alloc(void) +{ + struct mctp_route *rt; + + rt = kzalloc(sizeof(*rt), GFP_KERNEL); + if (!rt) + return NULL; + + INIT_LIST_HEAD(&rt->list); + refcount_set(&rt->refs, 1); + rt->output = mctp_route_discard; + + return rt; +} + +/* routing lookups */ +static bool mctp_rt_match_eid(struct mctp_route *rt, + unsigned int net, mctp_eid_t eid) +{ + return READ_ONCE(rt->dev->net) == net && + rt->min <= eid && rt->max >= eid; +} + +/* compares match, used for duplicate prevention */ +static bool mctp_rt_compare_exact(struct mctp_route *rt1, + struct mctp_route *rt2) +{ + ASSERT_RTNL(); + return rt1->dev->net == rt2->dev->net && + rt1->min == rt2->min && + rt1->max == rt2->max; +} + +struct mctp_route *mctp_route_lookup(struct net *net, unsigned int dnet, + mctp_eid_t daddr) +{ + struct mctp_route *tmp, *rt = NULL; + + list_for_each_entry_rcu(tmp, &net->mctp.routes, list) { + /* TODO: add metrics */ + if (mctp_rt_match_eid(tmp, dnet, daddr)) { + if (refcount_inc_not_zero(&tmp->refs)) { + rt = tmp; + break; + } + } + } + + return rt; +} + +/* sends a skb to rt and releases the route. */ +int mctp_do_route(struct mctp_route *rt, struct sk_buff *skb) +{ + int rc; + + rc = rt->output(rt, skb); + mctp_route_release(rt); + return rc; +} + +int mctp_local_output(struct sock *sk, struct mctp_route *rt, + struct sk_buff *skb, mctp_eid_t daddr, u8 req_tag) +{ + struct mctp_skb_cb *cb = mctp_cb(skb); + struct mctp_hdr *hdr; + unsigned long flags; + mctp_eid_t saddr; + int rc; + + if (WARN_ON(!rt->dev)) + return -EINVAL; + + spin_lock_irqsave(&rt->dev->addrs_lock, flags); + if (rt->dev->num_addrs == 0) { + rc = -EHOSTUNREACH; + } else { + /* use the outbound interface's first address as our source */ + saddr = rt->dev->addrs[0]; + rc = 0; + } + spin_unlock_irqrestore(&rt->dev->addrs_lock, flags); + + if (rc) + return rc; + + /* TODO: we have the route MTU here; packetise */ + + skb_reset_transport_header(skb); + skb_push(skb, sizeof(struct mctp_hdr)); + skb_reset_network_header(skb); + hdr = mctp_hdr(skb); + hdr->ver = 1; + hdr->dest = daddr; + hdr->src = saddr; + hdr->flags_seq_tag = MCTP_HDR_FLAG_SOM | MCTP_HDR_FLAG_EOM; /* TODO */ + + skb->protocol = htons(ETH_P_MCTP); + skb->priority = 0; + + /* cb->net will have been set on initial ingress */ + cb->src = saddr; + + return mctp_do_route(rt, skb); +} + +/* route management */ +int mctp_route_add_local(struct mctp_dev *mdev, mctp_eid_t addr) +{ + struct net *net = dev_net(mdev->dev); + struct mctp_route *rt, *ert; + + rt = mctp_route_alloc(); + if (!rt) + return -ENOMEM; + + rt->min = addr; + rt->max = addr; + rt->dev = mdev; + dev_hold(rt->dev->dev); + rt->output = mctp_route_input; + + ASSERT_RTNL(); + /* Prevent duplicate identical routes. */ + list_for_each_entry(ert, &net->mctp.routes, list) { + if (mctp_rt_compare_exact(rt, ert)) { + mctp_route_release(rt); + return -EEXIST; + } + } + + list_add_rcu(&rt->list, &net->mctp.routes); + + return 0; +} + +int mctp_route_remove_local(struct mctp_dev *mdev, mctp_eid_t addr) +{ + struct net *net = dev_net(mdev->dev); + struct mctp_route *rt, *tmp; + + ASSERT_RTNL(); + + list_for_each_entry_safe(rt, tmp, &net->mctp.routes, list) { + if (rt->dev == mdev && rt->min == addr && rt->max == addr) { + list_del_rcu(&rt->list); + /* TODO: immediate RTM_DELROUTE */ + mctp_route_release(rt); + } + } + + return 0; +} + +/* removes all entries for a given device */ +void mctp_route_remove_dev(struct mctp_dev *mdev) +{ + struct net *net = dev_net(mdev->dev); + struct mctp_route *rt, *tmp; + + ASSERT_RTNL(); + list_for_each_entry_safe(rt, tmp, &net->mctp.routes, list) { + if (rt->dev == mdev) { + list_del_rcu(&rt->list); + /* TODO: immediate RTM_DELROUTE */ + mctp_route_release(rt); + } + } +} + +/* Incoming packet-handling */ + +static int mctp_pkttype_receive(struct sk_buff *skb, struct net_device *dev, + struct packet_type *pt, + struct net_device *orig_dev) +{ + struct net *net = dev_net(dev); + struct mctp_skb_cb *cb; + struct mctp_route *rt; + struct mctp_hdr *mh; + + /* basic non-data sanity checks */ + if (dev->type != ARPHRD_MCTP) + goto err_drop; + + if (!pskb_may_pull(skb, sizeof(struct mctp_hdr))) + goto err_drop; + + skb_reset_transport_header(skb); + skb_reset_network_header(skb); + + /* We have enough for a header; decode and route */ + mh = mctp_hdr(skb); + if (mh->ver < MCTP_VER_MIN || mh->ver > MCTP_VER_MAX) + goto err_drop; + + cb = __mctp_cb(skb); + rcu_read_lock(); + cb->net = READ_ONCE(__mctp_dev_get(dev)->net); + rcu_read_unlock(); + + rt = mctp_route_lookup(net, cb->net, mh->dest); + if (!rt) + goto err_drop; + + mctp_do_route(rt, skb); + + return NET_RX_SUCCESS; + +err_drop: + kfree_skb(skb); + return NET_RX_DROP; +} + +static struct packet_type mctp_packet_type = { + .type = cpu_to_be16(ETH_P_MCTP), + .func = mctp_pkttype_receive, +}; + +/* net namespace implementation */ +static int __net_init mctp_routes_net_init(struct net *net) +{ + struct netns_mctp *ns = &net->mctp; + + INIT_LIST_HEAD(&ns->routes); + return 0; +} + +static void __net_exit mctp_routes_net_exit(struct net *net) +{ + struct mctp_route *rt; + + list_for_each_entry_rcu(rt, &net->mctp.routes, list) + mctp_route_release(rt); +} + +static struct pernet_operations mctp_net_ops = { + .init = mctp_routes_net_init, + .exit = mctp_routes_net_exit, +}; + +int __init mctp_routes_init(void) +{ + dev_add_pack(&mctp_packet_type); + return register_pernet_subsys(&mctp_net_ops); +} + +void __exit mctp_routes_exit(void) +{ + unregister_pernet_subsys(&mctp_net_ops); + dev_remove_pack(&mctp_packet_type); +} -- cgit v1.2.3 From 06d2f4c583a7d892300920fc85d654d48a15e914 Mon Sep 17 00:00:00 2001 From: Matt Johnston Date: Thu, 29 Jul 2021 10:20:46 +0800 Subject: mctp: Add netlink route management This change adds RTM_GETROUTE, RTM_NEWROUTE & RTM_DELROUTE handlers, allowing management of the MCTP route table. Includes changes from Jeremy Kerr . Signed-off-by: Matt Johnston Signed-off-by: David S. Miller --- include/net/mctp.h | 2 + net/mctp/route.c | 258 +++++++++++++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 251 insertions(+), 9 deletions(-) (limited to 'include/net') diff --git a/include/net/mctp.h b/include/net/mctp.h index 4094bec5e5db..bc36e37e8198 100644 --- a/include/net/mctp.h +++ b/include/net/mctp.h @@ -111,6 +111,8 @@ int mctp_local_output(struct sock *sk, struct mctp_route *rt, struct sk_buff *skb, mctp_eid_t daddr, u8 req_tag); /* routing <--> device interface */ +unsigned int mctp_default_net(struct net *net); +int mctp_default_net_set(struct net *net, unsigned int index); int mctp_route_add_local(struct mctp_dev *mdev, mctp_eid_t addr); int mctp_route_remove_local(struct mctp_dev *mdev, mctp_eid_t addr); void mctp_route_remove_dev(struct mctp_dev *mdev); diff --git a/net/mctp/route.c b/net/mctp/route.c index 2bcc13175a9e..31568203f9d0 100644 --- a/net/mctp/route.c +++ b/net/mctp/route.c @@ -20,6 +20,8 @@ #include #include +#include +#include /* route output callbacks */ static int mctp_route_discard(struct mctp_route *route, struct sk_buff *skb) @@ -36,8 +38,7 @@ static int mctp_route_input(struct mctp_route *route, struct sk_buff *skb) return 0; } -static int __always_unused mctp_route_output(struct mctp_route *route, - struct sk_buff *skb) +static int mctp_route_output(struct mctp_route *route, struct sk_buff *skb) { unsigned int mtu; int rc; @@ -182,20 +183,29 @@ int mctp_local_output(struct sock *sk, struct mctp_route *rt, } /* route management */ -int mctp_route_add_local(struct mctp_dev *mdev, mctp_eid_t addr) +static int mctp_route_add(struct mctp_dev *mdev, mctp_eid_t daddr_start, + unsigned int daddr_extent, unsigned int mtu, + bool is_local) { struct net *net = dev_net(mdev->dev); struct mctp_route *rt, *ert; + if (!mctp_address_ok(daddr_start)) + return -EINVAL; + + if (daddr_extent > 0xff || daddr_start + daddr_extent >= 255) + return -EINVAL; + rt = mctp_route_alloc(); if (!rt) return -ENOMEM; - rt->min = addr; - rt->max = addr; + rt->min = daddr_start; + rt->max = daddr_start + daddr_extent; + rt->mtu = mtu; rt->dev = mdev; dev_hold(rt->dev->dev); - rt->output = mctp_route_input; + rt->output = is_local ? mctp_route_input : mctp_route_output; ASSERT_RTNL(); /* Prevent duplicate identical routes. */ @@ -211,22 +221,43 @@ int mctp_route_add_local(struct mctp_dev *mdev, mctp_eid_t addr) return 0; } -int mctp_route_remove_local(struct mctp_dev *mdev, mctp_eid_t addr) +static int mctp_route_remove(struct mctp_dev *mdev, mctp_eid_t daddr_start, + unsigned int daddr_extent) { struct net *net = dev_net(mdev->dev); struct mctp_route *rt, *tmp; + mctp_eid_t daddr_end; + bool dropped; + + if (daddr_extent > 0xff || daddr_start + daddr_extent >= 255) + return -EINVAL; + + daddr_end = daddr_start + daddr_extent; + dropped = false; ASSERT_RTNL(); list_for_each_entry_safe(rt, tmp, &net->mctp.routes, list) { - if (rt->dev == mdev && rt->min == addr && rt->max == addr) { + if (rt->dev == mdev && + rt->min == daddr_start && rt->max == daddr_end) { list_del_rcu(&rt->list); /* TODO: immediate RTM_DELROUTE */ mctp_route_release(rt); + dropped = true; } } - return 0; + return dropped ? 0 : -ENOENT; +} + +int mctp_route_add_local(struct mctp_dev *mdev, mctp_eid_t addr) +{ + return mctp_route_add(mdev, addr, 0, 0, true); +} + +int mctp_route_remove_local(struct mctp_dev *mdev, mctp_eid_t addr) +{ + return mctp_route_remove(mdev, addr, 0); } /* removes all entries for a given device */ @@ -294,6 +325,204 @@ static struct packet_type mctp_packet_type = { .func = mctp_pkttype_receive, }; +/* netlink interface */ + +static const struct nla_policy rta_mctp_policy[RTA_MAX + 1] = { + [RTA_DST] = { .type = NLA_U8 }, + [RTA_METRICS] = { .type = NLA_NESTED }, + [RTA_OIF] = { .type = NLA_U32 }, +}; + +/* Common part for RTM_NEWROUTE and RTM_DELROUTE parsing. + * tb must hold RTA_MAX+1 elements. + */ +static int mctp_route_nlparse(struct sk_buff *skb, struct nlmsghdr *nlh, + struct netlink_ext_ack *extack, + struct nlattr **tb, struct rtmsg **rtm, + struct mctp_dev **mdev, mctp_eid_t *daddr_start) +{ + struct net *net = sock_net(skb->sk); + struct net_device *dev; + unsigned int ifindex; + int rc; + + rc = nlmsg_parse(nlh, sizeof(struct rtmsg), tb, RTA_MAX, + rta_mctp_policy, extack); + if (rc < 0) { + NL_SET_ERR_MSG(extack, "incorrect format"); + return rc; + } + + if (!tb[RTA_DST]) { + NL_SET_ERR_MSG(extack, "dst EID missing"); + return -EINVAL; + } + *daddr_start = nla_get_u8(tb[RTA_DST]); + + if (!tb[RTA_OIF]) { + NL_SET_ERR_MSG(extack, "ifindex missing"); + return -EINVAL; + } + ifindex = nla_get_u32(tb[RTA_OIF]); + + *rtm = nlmsg_data(nlh); + if ((*rtm)->rtm_family != AF_MCTP) { + NL_SET_ERR_MSG(extack, "route family must be AF_MCTP"); + return -EINVAL; + } + + dev = __dev_get_by_index(net, ifindex); + if (!dev) { + NL_SET_ERR_MSG(extack, "bad ifindex"); + return -ENODEV; + } + *mdev = mctp_dev_get_rtnl(dev); + if (!*mdev) + return -ENODEV; + + if (dev->flags & IFF_LOOPBACK) { + NL_SET_ERR_MSG(extack, "no routes to loopback"); + return -EINVAL; + } + + return 0; +} + +static int mctp_newroute(struct sk_buff *skb, struct nlmsghdr *nlh, + struct netlink_ext_ack *extack) +{ + struct nlattr *tb[RTA_MAX + 1]; + mctp_eid_t daddr_start; + struct mctp_dev *mdev; + struct rtmsg *rtm; + unsigned int mtu; + int rc; + + rc = mctp_route_nlparse(skb, nlh, extack, tb, + &rtm, &mdev, &daddr_start); + if (rc < 0) + return rc; + + if (rtm->rtm_type != RTN_UNICAST) { + NL_SET_ERR_MSG(extack, "rtm_type must be RTN_UNICAST"); + return -EINVAL; + } + + /* TODO: parse mtu from nlparse */ + mtu = 0; + + rc = mctp_route_add(mdev, daddr_start, rtm->rtm_dst_len, mtu, false); + return rc; +} + +static int mctp_delroute(struct sk_buff *skb, struct nlmsghdr *nlh, + struct netlink_ext_ack *extack) +{ + struct nlattr *tb[RTA_MAX + 1]; + mctp_eid_t daddr_start; + struct mctp_dev *mdev; + struct rtmsg *rtm; + int rc; + + rc = mctp_route_nlparse(skb, nlh, extack, tb, + &rtm, &mdev, &daddr_start); + if (rc < 0) + return rc; + + /* we only have unicast routes */ + if (rtm->rtm_type != RTN_UNICAST) + return -EINVAL; + + rc = mctp_route_remove(mdev, daddr_start, rtm->rtm_dst_len); + return rc; +} + +static int mctp_fill_rtinfo(struct sk_buff *skb, struct mctp_route *rt, + u32 portid, u32 seq, int event, unsigned int flags) +{ + struct nlmsghdr *nlh; + struct rtmsg *hdr; + void *metrics; + + nlh = nlmsg_put(skb, portid, seq, event, sizeof(*hdr), flags); + if (!nlh) + return -EMSGSIZE; + + hdr = nlmsg_data(nlh); + hdr->rtm_family = AF_MCTP; + + /* we use the _len fields as a number of EIDs, rather than + * a number of bits in the address + */ + hdr->rtm_dst_len = rt->max - rt->min; + hdr->rtm_src_len = 0; + hdr->rtm_tos = 0; + hdr->rtm_table = RT_TABLE_DEFAULT; + hdr->rtm_protocol = RTPROT_STATIC; /* everything is user-defined */ + hdr->rtm_scope = RT_SCOPE_LINK; /* TODO: scope in mctp_route? */ + hdr->rtm_type = RTN_ANYCAST; /* TODO: type from route */ + + if (nla_put_u8(skb, RTA_DST, rt->min)) + goto cancel; + + metrics = nla_nest_start_noflag(skb, RTA_METRICS); + if (!metrics) + goto cancel; + + if (rt->mtu) { + if (nla_put_u32(skb, RTAX_MTU, rt->mtu)) + goto cancel; + } + + nla_nest_end(skb, metrics); + + if (rt->dev) { + if (nla_put_u32(skb, RTA_OIF, rt->dev->dev->ifindex)) + goto cancel; + } + + /* TODO: conditional neighbour physaddr? */ + + nlmsg_end(skb, nlh); + + return 0; + +cancel: + nlmsg_cancel(skb, nlh); + return -EMSGSIZE; +} + +static int mctp_dump_rtinfo(struct sk_buff *skb, struct netlink_callback *cb) +{ + struct net *net = sock_net(skb->sk); + struct mctp_route *rt; + int s_idx, idx; + + /* TODO: allow filtering on route data, possibly under + * cb->strict_check + */ + + /* TODO: change to struct overlay */ + s_idx = cb->args[0]; + idx = 0; + + rcu_read_lock(); + list_for_each_entry_rcu(rt, &net->mctp.routes, list) { + if (idx++ < s_idx) + continue; + if (mctp_fill_rtinfo(skb, rt, + NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, + RTM_NEWROUTE, NLM_F_MULTI) < 0) + break; + } + + rcu_read_unlock(); + cb->args[0] = idx; + + return skb->len; +} + /* net namespace implementation */ static int __net_init mctp_routes_net_init(struct net *net) { @@ -319,11 +548,22 @@ static struct pernet_operations mctp_net_ops = { int __init mctp_routes_init(void) { dev_add_pack(&mctp_packet_type); + + rtnl_register_module(THIS_MODULE, PF_MCTP, RTM_GETROUTE, + NULL, mctp_dump_rtinfo, 0); + rtnl_register_module(THIS_MODULE, PF_MCTP, RTM_NEWROUTE, + mctp_newroute, NULL, 0); + rtnl_register_module(THIS_MODULE, PF_MCTP, RTM_DELROUTE, + mctp_delroute, NULL, 0); + return register_pernet_subsys(&mctp_net_ops); } void __exit mctp_routes_exit(void) { unregister_pernet_subsys(&mctp_net_ops); + rtnl_unregister(PF_MCTP, RTM_DELROUTE); + rtnl_unregister(PF_MCTP, RTM_NEWROUTE); + rtnl_unregister(PF_MCTP, RTM_GETROUTE); dev_remove_pack(&mctp_packet_type); } -- cgit v1.2.3 From 4d8b9319282ae84f5a17b28d8b5b5d1e7e537312 Mon Sep 17 00:00:00 2001 From: Matt Johnston Date: Thu, 29 Jul 2021 10:20:47 +0800 Subject: mctp: Add neighbour implementation Add an initial neighbour table implementation, to be used in the route output path. Signed-off-by: Matt Johnston Signed-off-by: David S. Miller --- include/net/mctp.h | 25 +++++++++ include/net/mctpdevice.h | 1 + include/net/netns/mctp.h | 4 ++ net/mctp/Makefile | 2 +- net/mctp/af_mctp.c | 5 ++ net/mctp/device.c | 1 + net/mctp/neigh.c | 141 +++++++++++++++++++++++++++++++++++++++++++++++ 7 files changed, 178 insertions(+), 1 deletion(-) create mode 100644 net/mctp/neigh.c (limited to 'include/net') diff --git a/include/net/mctp.h b/include/net/mctp.h index bc36e37e8198..53f035c8b59c 100644 --- a/include/net/mctp.h +++ b/include/net/mctp.h @@ -117,6 +117,31 @@ int mctp_route_add_local(struct mctp_dev *mdev, mctp_eid_t addr); int mctp_route_remove_local(struct mctp_dev *mdev, mctp_eid_t addr); void mctp_route_remove_dev(struct mctp_dev *mdev); +/* neighbour definitions */ +enum mctp_neigh_source { + MCTP_NEIGH_STATIC, + MCTP_NEIGH_DISCOVER, +}; + +struct mctp_neigh { + struct mctp_dev *dev; + mctp_eid_t eid; + enum mctp_neigh_source source; + + unsigned char ha[MAX_ADDR_LEN]; + + struct list_head list; + struct rcu_head rcu; +}; + +int mctp_neigh_init(void); +void mctp_neigh_exit(void); + +// ret_hwaddr may be NULL, otherwise must have space for MAX_ADDR_LEN +int mctp_neigh_lookup(struct mctp_dev *dev, mctp_eid_t eid, + void *ret_hwaddr); +void mctp_neigh_remove_dev(struct mctp_dev *mdev); + int mctp_routes_init(void); void mctp_routes_exit(void); diff --git a/include/net/mctpdevice.h b/include/net/mctpdevice.h index 71a11012fac7..57e773ff08bb 100644 --- a/include/net/mctpdevice.h +++ b/include/net/mctpdevice.h @@ -31,5 +31,6 @@ struct mctp_dev { struct mctp_dev *mctp_dev_get_rtnl(const struct net_device *dev); struct mctp_dev *__mctp_dev_get(const struct net_device *dev); +struct mctp_dev *mctp_dev_get_rtnl(const struct net_device *dev); #endif /* __NET_MCTPDEVICE_H */ diff --git a/include/net/netns/mctp.h b/include/net/netns/mctp.h index 508459b08a59..2f5ebeeb320e 100644 --- a/include/net/netns/mctp.h +++ b/include/net/netns/mctp.h @@ -11,6 +11,10 @@ struct netns_mctp { /* Only updated under RTNL, entries freed via RCU */ struct list_head routes; + + /* neighbour table */ + struct mutex neigh_lock; + struct list_head neighbours; }; #endif /* __NETNS_MCTP_H__ */ diff --git a/net/mctp/Makefile b/net/mctp/Makefile index b1a330e9d82a..0171333384d7 100644 --- a/net/mctp/Makefile +++ b/net/mctp/Makefile @@ -1,3 +1,3 @@ # SPDX-License-Identifier: GPL-2.0 obj-$(CONFIG_MCTP) += mctp.o -mctp-objs := af_mctp.o device.o route.o +mctp-objs := af_mctp.o device.o route.o neigh.o diff --git a/net/mctp/af_mctp.c b/net/mctp/af_mctp.c index 8085f5912101..58701e6b282c 100644 --- a/net/mctp/af_mctp.c +++ b/net/mctp/af_mctp.c @@ -161,6 +161,10 @@ static __init int mctp_init(void) if (rc) goto err_unreg_proto; + rc = mctp_neigh_init(); + if (rc) + goto err_unreg_proto; + mctp_device_init(); return 0; @@ -176,6 +180,7 @@ err_unreg_sock: static __exit void mctp_exit(void) { mctp_device_exit(); + mctp_neigh_exit(); mctp_routes_exit(); proto_unregister(&mctp_proto); sock_unregister(PF_MCTP); diff --git a/net/mctp/device.c b/net/mctp/device.c index 5f1b18c17351..aa049590acda 100644 --- a/net/mctp/device.c +++ b/net/mctp/device.c @@ -342,6 +342,7 @@ static void mctp_unregister(struct net_device *dev) RCU_INIT_POINTER(mdev->dev->mctp_ptr, NULL); mctp_route_remove_dev(mdev); + mctp_neigh_remove_dev(mdev); kfree(mdev->addrs); mctp_dev_destroy(mdev); diff --git a/net/mctp/neigh.c b/net/mctp/neigh.c new file mode 100644 index 000000000000..8603f0c45a8f --- /dev/null +++ b/net/mctp/neigh.c @@ -0,0 +1,141 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Management Component Transport Protocol (MCTP) - routing + * implementation. + * + * This is currently based on a simple routing table, with no dst cache. The + * number of routes should stay fairly small, so the lookup cost is small. + * + * Copyright (c) 2021 Code Construct + * Copyright (c) 2021 Google + */ + +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +static int __always_unused mctp_neigh_add(struct mctp_dev *mdev, mctp_eid_t eid, + enum mctp_neigh_source source, + size_t lladdr_len, const void *lladdr) +{ + struct net *net = dev_net(mdev->dev); + struct mctp_neigh *neigh; + int rc; + + mutex_lock(&net->mctp.neigh_lock); + if (mctp_neigh_lookup(mdev, eid, NULL) == 0) { + rc = -EEXIST; + goto out; + } + + if (lladdr_len > sizeof(neigh->ha)) { + rc = -EINVAL; + goto out; + } + + neigh = kzalloc(sizeof(*neigh), GFP_KERNEL); + if (!neigh) { + rc = -ENOMEM; + goto out; + } + INIT_LIST_HEAD(&neigh->list); + neigh->dev = mdev; + dev_hold(neigh->dev->dev); + neigh->eid = eid; + neigh->source = source; + memcpy(neigh->ha, lladdr, lladdr_len); + + list_add_rcu(&neigh->list, &net->mctp.neighbours); + rc = 0; +out: + mutex_unlock(&net->mctp.neigh_lock); + return rc; +} + +static void __mctp_neigh_free(struct rcu_head *rcu) +{ + struct mctp_neigh *neigh = container_of(rcu, struct mctp_neigh, rcu); + + dev_put(neigh->dev->dev); + kfree(neigh); +} + +/* Removes all neighbour entries referring to a device */ +void mctp_neigh_remove_dev(struct mctp_dev *mdev) +{ + struct net *net = dev_net(mdev->dev); + struct mctp_neigh *neigh, *tmp; + + mutex_lock(&net->mctp.neigh_lock); + list_for_each_entry_safe(neigh, tmp, &net->mctp.neighbours, list) { + if (neigh->dev == mdev) { + list_del_rcu(&neigh->list); + /* TODO: immediate RTM_DELNEIGH */ + call_rcu(&neigh->rcu, __mctp_neigh_free); + } + } + + mutex_unlock(&net->mctp.neigh_lock); +} + +int mctp_neigh_lookup(struct mctp_dev *mdev, mctp_eid_t eid, void *ret_hwaddr) +{ + struct net *net = dev_net(mdev->dev); + struct mctp_neigh *neigh; + int rc = -EHOSTUNREACH; // TODO: or ENOENT? + + rcu_read_lock(); + list_for_each_entry_rcu(neigh, &net->mctp.neighbours, list) { + if (mdev == neigh->dev && eid == neigh->eid) { + if (ret_hwaddr) + memcpy(ret_hwaddr, neigh->ha, + sizeof(neigh->ha)); + rc = 0; + break; + } + } + rcu_read_unlock(); + return rc; +} + +/* namespace registration */ +static int __net_init mctp_neigh_net_init(struct net *net) +{ + struct netns_mctp *ns = &net->mctp; + + INIT_LIST_HEAD(&ns->neighbours); + return 0; +} + +static void __net_exit mctp_neigh_net_exit(struct net *net) +{ + struct netns_mctp *ns = &net->mctp; + struct mctp_neigh *neigh; + + list_for_each_entry(neigh, &ns->neighbours, list) + call_rcu(&neigh->rcu, __mctp_neigh_free); +} + +/* net namespace implementation */ + +static struct pernet_operations mctp_net_ops = { + .init = mctp_neigh_net_init, + .exit = mctp_neigh_net_exit, +}; + +int __init mctp_neigh_init(void) +{ + return register_pernet_subsys(&mctp_net_ops); +} + +void __exit mctp_neigh_exit(void) +{ + unregister_pernet_subsys(&mctp_net_ops); +} -- cgit v1.2.3 From 833ef3b91de692ef33b800bca6b1569c39dece74 Mon Sep 17 00:00:00 2001 From: Jeremy Kerr Date: Thu, 29 Jul 2021 10:20:49 +0800 Subject: mctp: Populate socket implementation Start filling-out the socket syscalls: bind, sendmsg & recvmsg. This requires an input route implementation, so we add to mctp_route_input, allowing lookups on binds & message tags. This just handles single-packet messages at present, we will add fragmentation in a future change. Signed-off-by: Jeremy Kerr Signed-off-by: David S. Miller --- include/net/mctp.h | 59 +++++++++++++ include/net/netns/mctp.h | 13 +++ net/mctp/af_mctp.c | 203 ++++++++++++++++++++++++++++++++++++++++-- net/mctp/route.c | 226 ++++++++++++++++++++++++++++++++++++++++++++++- 4 files changed, 491 insertions(+), 10 deletions(-) (limited to 'include/net') diff --git a/include/net/mctp.h b/include/net/mctp.h index 53f035c8b59c..f2d98f6993c0 100644 --- a/include/net/mctp.h +++ b/include/net/mctp.h @@ -12,6 +12,7 @@ #include #include #include +#include /* MCTP packet definitions */ struct mctp_hdr { @@ -46,6 +47,64 @@ static inline struct mctp_hdr *mctp_hdr(struct sk_buff *skb) return (struct mctp_hdr *)skb_network_header(skb); } +/* socket implementation */ +struct mctp_sock { + struct sock sk; + + /* bind() params */ + int bind_net; + mctp_eid_t bind_addr; + __u8 bind_type; + + /* list of mctp_sk_key, for incoming tag lookup. updates protected + * by sk->net->keys_lock + */ + struct hlist_head keys; +}; + +/* Key for matching incoming packets to sockets or reassembly contexts. + * Packets are matched on (src,dest,tag). + * + * Lifetime requirements: + * + * - keys are free()ed via RCU + * + * - a mctp_sk_key contains a reference to a struct sock; this is valid + * for the life of the key. On sock destruction (through unhash), the key is + * removed from lists (see below), and will not be observable after a RCU + * grace period. + * + * any RX occurring within that grace period may still queue to the socket, + * but will hit the SOCK_DEAD case before the socket is freed. + * + * - these mctp_sk_keys appear on two lists: + * 1) the struct mctp_sock->keys list + * 2) the struct netns_mctp->keys list + * + * updates to either list are performed under the netns_mctp->keys + * lock. + * + * - there is a single destruction path for a mctp_sk_key - through socket + * unhash (see mctp_sk_unhash). This performs the list removal under + * keys_lock. + */ +struct mctp_sk_key { + mctp_eid_t peer_addr; + mctp_eid_t local_addr; + __u8 tag; /* incoming tag match; invert TO for local */ + + /* we hold a ref to sk when set */ + struct sock *sk; + + /* routing lookup list */ + struct hlist_node hlist; + + /* per-socket list */ + struct hlist_node sklist; + + struct rcu_head rcu; +}; + struct mctp_skb_cb { unsigned int magic; unsigned int net; diff --git a/include/net/netns/mctp.h b/include/net/netns/mctp.h index 2f5ebeeb320e..14ae6d37e52a 100644 --- a/include/net/netns/mctp.h +++ b/include/net/netns/mctp.h @@ -12,6 +12,19 @@ struct netns_mctp { /* Only updated under RTNL, entries freed via RCU */ struct list_head routes; + /* Bound sockets: list of sockets bound by type. + * This list is updated from non-atomic contexts (under bind_lock), + * and read (under rcu) in packet rx + */ + struct mutex bind_lock; + struct hlist_head binds; + + /* tag allocations. This list is read and updated from atomic contexts, + * but elements are free()ed after a RCU grace-period + */ + spinlock_t keys_lock; + struct hlist_head keys; + /* neighbour table */ struct mutex neigh_lock; struct list_head neighbours; diff --git a/net/mctp/af_mctp.c b/net/mctp/af_mctp.c index 58701e6b282c..52bd7f2b78db 100644 --- a/net/mctp/af_mctp.c +++ b/net/mctp/af_mctp.c @@ -18,10 +18,6 @@ /* socket implementation */ -struct mctp_sock { - struct sock sk; -}; - static int mctp_release(struct socket *sock) { struct sock *sk = sock->sk; @@ -36,18 +32,160 @@ static int mctp_release(struct socket *sock) static int mctp_bind(struct socket *sock, struct sockaddr *addr, int addrlen) { - return 0; + struct sock *sk = sock->sk; + struct mctp_sock *msk = container_of(sk, struct mctp_sock, sk); + struct sockaddr_mctp *smctp; + int rc; + + if (addrlen < sizeof(*smctp)) + return -EINVAL; + + if (addr->sa_family != AF_MCTP) + return -EAFNOSUPPORT; + + if (!capable(CAP_NET_BIND_SERVICE)) + return -EACCES; + + /* it's a valid sockaddr for MCTP, cast and do protocol checks */ + smctp = (struct sockaddr_mctp *)addr; + + lock_sock(sk); + + /* TODO: allow rebind */ + if (sk_hashed(sk)) { + rc = -EADDRINUSE; + goto out_release; + } + msk->bind_net = smctp->smctp_network; + msk->bind_addr = smctp->smctp_addr.s_addr; + msk->bind_type = smctp->smctp_type & 0x7f; /* ignore the IC bit */ + + rc = sk->sk_prot->hash(sk); + +out_release: + release_sock(sk); + + return rc; } static int mctp_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) { - return 0; + DECLARE_SOCKADDR(struct sockaddr_mctp *, addr, msg->msg_name); + const int hlen = MCTP_HEADER_MAXLEN + sizeof(struct mctp_hdr); + int rc, addrlen = msg->msg_namelen; + struct sock *sk = sock->sk; + struct mctp_skb_cb *cb; + struct mctp_route *rt; + struct sk_buff *skb; + + if (addr) { + if (addrlen < sizeof(struct sockaddr_mctp)) + return -EINVAL; + if (addr->smctp_family != AF_MCTP) + return -EINVAL; + if (addr->smctp_tag & ~(MCTP_TAG_MASK | MCTP_TAG_OWNER)) + return -EINVAL; + + } else { + /* TODO: connect()ed sockets */ + return -EDESTADDRREQ; + } + + if (!capable(CAP_NET_RAW)) + return -EACCES; + + rt = mctp_route_lookup(sock_net(sk), addr->smctp_network, + addr->smctp_addr.s_addr); + if (!rt) + return -EHOSTUNREACH; + + skb = sock_alloc_send_skb(sk, hlen + 1 + len, + msg->msg_flags & MSG_DONTWAIT, &rc); + if (!skb) + return rc; + + skb_reserve(skb, hlen); + + /* set type as fist byte in payload */ + *(u8 *)skb_put(skb, 1) = addr->smctp_type; + + rc = memcpy_from_msg((void *)skb_put(skb, len), msg, len); + if (rc < 0) { + kfree_skb(skb); + return rc; + } + + /* set up cb */ + cb = __mctp_cb(skb); + cb->net = addr->smctp_network; + + rc = mctp_local_output(sk, rt, skb, addr->smctp_addr.s_addr, + addr->smctp_tag); + + return rc ? : len; } static int mctp_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, int flags) { - return 0; + DECLARE_SOCKADDR(struct sockaddr_mctp *, addr, msg->msg_name); + struct sock *sk = sock->sk; + struct sk_buff *skb; + size_t msglen; + u8 type; + int rc; + + if (flags & ~(MSG_DONTWAIT | MSG_TRUNC | MSG_PEEK)) + return -EOPNOTSUPP; + + skb = skb_recv_datagram(sk, flags, flags & MSG_DONTWAIT, &rc); + if (!skb) + return rc; + + if (!skb->len) { + rc = 0; + goto out_free; + } + + /* extract message type, remove from data */ + type = *((u8 *)skb->data); + msglen = skb->len - 1; + + if (len < msglen) + msg->msg_flags |= MSG_TRUNC; + else + len = msglen; + + rc = skb_copy_datagram_msg(skb, 1, msg, len); + if (rc < 0) + goto out_free; + + sock_recv_ts_and_drops(msg, sk, skb); + + if (addr) { + struct mctp_skb_cb *cb = mctp_cb(skb); + /* TODO: expand mctp_skb_cb for header fields? */ + struct mctp_hdr *hdr = mctp_hdr(skb); + + hdr = mctp_hdr(skb); + addr = msg->msg_name; + addr->smctp_family = AF_MCTP; + addr->smctp_network = cb->net; + addr->smctp_addr.s_addr = hdr->src; + addr->smctp_type = type; + addr->smctp_tag = hdr->flags_seq_tag & + (MCTP_HDR_TAG_MASK | MCTP_HDR_FLAG_TO); + msg->msg_namelen = sizeof(*addr); + } + + rc = len; + + if (flags & MSG_TRUNC) + rc = msglen; + +out_free: + skb_free_datagram(sk, skb); + return rc; } static int mctp_setsockopt(struct socket *sock, int level, int optname, @@ -83,16 +221,63 @@ static const struct proto_ops mctp_dgram_ops = { .sendpage = sock_no_sendpage, }; +static int mctp_sk_init(struct sock *sk) +{ + struct mctp_sock *msk = container_of(sk, struct mctp_sock, sk); + + INIT_HLIST_HEAD(&msk->keys); + return 0; +} + static void mctp_sk_close(struct sock *sk, long timeout) { sk_common_release(sk); } +static int mctp_sk_hash(struct sock *sk) +{ + struct net *net = sock_net(sk); + + mutex_lock(&net->mctp.bind_lock); + sk_add_node_rcu(sk, &net->mctp.binds); + mutex_unlock(&net->mctp.bind_lock); + + return 0; +} + +static void mctp_sk_unhash(struct sock *sk) +{ + struct mctp_sock *msk = container_of(sk, struct mctp_sock, sk); + struct net *net = sock_net(sk); + struct mctp_sk_key *key; + struct hlist_node *tmp; + unsigned long flags; + + /* remove from any type-based binds */ + mutex_lock(&net->mctp.bind_lock); + sk_del_node_init_rcu(sk); + mutex_unlock(&net->mctp.bind_lock); + + /* remove tag allocations */ + spin_lock_irqsave(&net->mctp.keys_lock, flags); + hlist_for_each_entry_safe(key, tmp, &msk->keys, sklist) { + hlist_del_rcu(&key->sklist); + hlist_del_rcu(&key->hlist); + kfree_rcu(key, rcu); + } + spin_unlock_irqrestore(&net->mctp.keys_lock, flags); + + synchronize_rcu(); +} + static struct proto mctp_proto = { .name = "MCTP", .owner = THIS_MODULE, .obj_size = sizeof(struct mctp_sock), + .init = mctp_sk_init, .close = mctp_sk_close, + .hash = mctp_sk_hash, + .unhash = mctp_sk_unhash, }; static int mctp_pf_create(struct net *net, struct socket *sock, @@ -147,6 +332,10 @@ static __init int mctp_init(void) { int rc; + /* ensure our uapi tag definitions match the header format */ + BUILD_BUG_ON(MCTP_TAG_OWNER != MCTP_HDR_FLAG_TO); + BUILD_BUG_ON(MCTP_TAG_MASK != MCTP_HDR_TAG_MASK); + pr_info("mctp: management component transport protocol core\n"); rc = sock_register(&mctp_pf); diff --git a/net/mctp/route.c b/net/mctp/route.c index 31568203f9d0..cc9891672eaa 100644 --- a/net/mctp/route.c +++ b/net/mctp/route.c @@ -30,10 +30,139 @@ static int mctp_route_discard(struct mctp_route *route, struct sk_buff *skb) return 0; } +static struct mctp_sock *mctp_lookup_bind(struct net *net, struct sk_buff *skb) +{ + struct mctp_skb_cb *cb = mctp_cb(skb); + struct mctp_hdr *mh; + struct sock *sk; + u8 type; + + WARN_ON(!rcu_read_lock_held()); + + /* TODO: look up in skb->cb? */ + mh = mctp_hdr(skb); + + if (!skb_headlen(skb)) + return NULL; + + type = (*(u8 *)skb->data) & 0x7f; + + sk_for_each_rcu(sk, &net->mctp.binds) { + struct mctp_sock *msk = container_of(sk, struct mctp_sock, sk); + + if (msk->bind_net != MCTP_NET_ANY && msk->bind_net != cb->net) + continue; + + if (msk->bind_type != type) + continue; + + if (msk->bind_addr != MCTP_ADDR_ANY && + msk->bind_addr != mh->dest) + continue; + + return msk; + } + + return NULL; +} + +static bool mctp_key_match(struct mctp_sk_key *key, mctp_eid_t local, + mctp_eid_t peer, u8 tag) +{ + if (key->local_addr != local) + return false; + + if (key->peer_addr != peer) + return false; + + if (key->tag != tag) + return false; + + return true; +} + +static struct mctp_sk_key *mctp_lookup_key(struct net *net, struct sk_buff *skb, + mctp_eid_t peer) +{ + struct mctp_sk_key *key, *ret; + struct mctp_hdr *mh; + u8 tag; + + WARN_ON(!rcu_read_lock_held()); + + mh = mctp_hdr(skb); + tag = mh->flags_seq_tag & (MCTP_HDR_TAG_MASK | MCTP_HDR_FLAG_TO); + + ret = NULL; + + hlist_for_each_entry_rcu(key, &net->mctp.keys, hlist) { + if (mctp_key_match(key, mh->dest, peer, tag)) { + ret = key; + break; + } + } + + return ret; +} + static int mctp_route_input(struct mctp_route *route, struct sk_buff *skb) { - /* -> to local stack */ - /* TODO: socket lookup, reassemble */ + struct net *net = dev_net(skb->dev); + struct mctp_sk_key *key; + struct mctp_sock *msk; + struct mctp_hdr *mh; + + msk = NULL; + + /* we may be receiving a locally-routed packet; drop source sk + * accounting + */ + skb_orphan(skb); + + /* ensure we have enough data for a header and a type */ + if (skb->len < sizeof(struct mctp_hdr) + 1) + goto drop; + + /* grab header, advance data ptr */ + mh = mctp_hdr(skb); + skb_pull(skb, sizeof(struct mctp_hdr)); + + if (mh->ver != 1) + goto drop; + + /* TODO: reassembly */ + if ((mh->flags_seq_tag & (MCTP_HDR_FLAG_SOM | MCTP_HDR_FLAG_EOM)) + != (MCTP_HDR_FLAG_SOM | MCTP_HDR_FLAG_EOM)) + goto drop; + + rcu_read_lock(); + /* 1. lookup socket matching (src,dest,tag) */ + key = mctp_lookup_key(net, skb, mh->src); + + /* 2. lookup socket macthing (BCAST,dest,tag) */ + if (!key) + key = mctp_lookup_key(net, skb, MCTP_ADDR_ANY); + + /* 3. SOM? -> lookup bound socket, conditionally (!EOM) create + * mapping for future (1)/(2). + */ + if (key) + msk = container_of(key->sk, struct mctp_sock, sk); + else if (!msk && (mh->flags_seq_tag & MCTP_HDR_FLAG_SOM)) + msk = mctp_lookup_bind(net, skb); + + if (!msk) + goto unlock_drop; + + sock_queue_rcv_skb(&msk->sk, skb); + + rcu_read_unlock(); + + return 0; + +unlock_drop: + rcu_read_unlock(); +drop: kfree_skb(skb); return 0; } @@ -91,6 +220,80 @@ static struct mctp_route *mctp_route_alloc(void) return rt; } +/* tag management */ +static void mctp_reserve_tag(struct net *net, struct mctp_sk_key *key, + struct mctp_sock *msk) +{ + struct netns_mctp *mns = &net->mctp; + + lockdep_assert_held(&mns->keys_lock); + + key->sk = &msk->sk; + + /* we hold the net->key_lock here, allowing updates to both + * then net and sk + */ + hlist_add_head_rcu(&key->hlist, &mns->keys); + hlist_add_head_rcu(&key->sklist, &msk->keys); +} + +/* Allocate a locally-owned tag value for (saddr, daddr), and reserve + * it for the socket msk + */ +static int mctp_alloc_local_tag(struct mctp_sock *msk, + mctp_eid_t saddr, mctp_eid_t daddr, u8 *tagp) +{ + struct net *net = sock_net(&msk->sk); + struct netns_mctp *mns = &net->mctp; + struct mctp_sk_key *key, *tmp; + unsigned long flags; + int rc = -EAGAIN; + u8 tagbits; + + /* be optimistic, alloc now */ + key = kzalloc(sizeof(*key), GFP_KERNEL); + if (!key) + return -ENOMEM; + key->local_addr = saddr; + key->peer_addr = daddr; + + /* 8 possible tag values */ + tagbits = 0xff; + + spin_lock_irqsave(&mns->keys_lock, flags); + + /* Walk through the existing keys, looking for potential conflicting + * tags. If we find a conflict, clear that bit from tagbits + */ + hlist_for_each_entry(tmp, &mns->keys, hlist) { + /* if we don't own the tag, it can't conflict */ + if (tmp->tag & MCTP_HDR_FLAG_TO) + continue; + + if ((tmp->peer_addr == daddr || + tmp->peer_addr == MCTP_ADDR_ANY) && + tmp->local_addr == saddr) + tagbits &= ~(1 << tmp->tag); + + if (!tagbits) + break; + } + + if (tagbits) { + key->tag = __ffs(tagbits); + mctp_reserve_tag(net, key, msk); + *tagp = key->tag; + rc = 0; + } + + spin_unlock_irqrestore(&mns->keys_lock, flags); + + if (!tagbits) + kfree(key); + + return rc; +} + /* routing lookups */ static bool mctp_rt_match_eid(struct mctp_route *rt, unsigned int net, mctp_eid_t eid) @@ -140,11 +343,13 @@ int mctp_do_route(struct mctp_route *rt, struct sk_buff *skb) int mctp_local_output(struct sock *sk, struct mctp_route *rt, struct sk_buff *skb, mctp_eid_t daddr, u8 req_tag) { + struct mctp_sock *msk = container_of(sk, struct mctp_sock, sk); struct mctp_skb_cb *cb = mctp_cb(skb); struct mctp_hdr *hdr; unsigned long flags; mctp_eid_t saddr; int rc; + u8 tag; if (WARN_ON(!rt->dev)) return -EINVAL; @@ -162,6 +367,15 @@ int mctp_local_output(struct sock *sk, struct mctp_route *rt, if (rc) return rc; + if (req_tag & MCTP_HDR_FLAG_TO) { + rc = mctp_alloc_local_tag(msk, saddr, daddr, &tag); + if (rc) + return rc; + tag |= MCTP_HDR_FLAG_TO; + } else { + tag = req_tag; + } + /* TODO: we have the route MTU here; packetise */ skb_reset_transport_header(skb); @@ -171,8 +385,10 @@ int mctp_local_output(struct sock *sk, struct mctp_route *rt, hdr->ver = 1; hdr->dest = daddr; hdr->src = saddr; - hdr->flags_seq_tag = MCTP_HDR_FLAG_SOM | MCTP_HDR_FLAG_EOM; /* TODO */ + hdr->flags_seq_tag = MCTP_HDR_FLAG_SOM | MCTP_HDR_FLAG_EOM | /* TODO */ + tag; + skb->dev = rt->dev->dev; skb->protocol = htons(ETH_P_MCTP); skb->priority = 0; @@ -529,6 +745,10 @@ static int __net_init mctp_routes_net_init(struct net *net) struct netns_mctp *ns = &net->mctp; INIT_LIST_HEAD(&ns->routes); + INIT_HLIST_HEAD(&ns->binds); + mutex_init(&ns->bind_lock); + INIT_HLIST_HEAD(&ns->keys); + spin_lock_init(&ns->keys_lock); return 0; } -- cgit v1.2.3 From 4a992bbd365094730a31bae1e12a6ca695336d57 Mon Sep 17 00:00:00 2001 From: Jeremy Kerr Date: Thu, 29 Jul 2021 10:20:50 +0800 Subject: mctp: Implement message fragmentation & reassembly This change implements MCTP fragmentation (based on route & device MTU), and corresponding reassembly. The MCTP specification only allows for fragmentation on the originating message endpoint, and reassembly on the destination endpoint - intermediate nodes do not need to reassemble/refragment. Consequently, we only fragment in the local transmit path, and reassemble locally-bound packets. Messages are required to be in-order, so we simply cancel reassembly on out-of-order or missing packets. In the fragmentation path, we just break up the message into MTU-sized fragments; the skb structure is a simple copy for now, which we can later improve with a shared data implementation. For reassembly, we keep track of incoming message fragments using the existing tag infrastructure, allocating a key on the (src,dest,tag) tuple, and reassembles matching fragments into a skb->frag_list. Signed-off-by: Jeremy Kerr Signed-off-by: David S. Miller --- include/net/mctp.h | 25 +++- net/mctp/af_mctp.c | 8 ++ net/mctp/route.c | 372 +++++++++++++++++++++++++++++++++++++++++++++++------ 3 files changed, 361 insertions(+), 44 deletions(-) (limited to 'include/net') diff --git a/include/net/mctp.h b/include/net/mctp.h index f2d98f6993c0..0a460ba185b8 100644 --- a/include/net/mctp.h +++ b/include/net/mctp.h @@ -84,9 +84,21 @@ struct mctp_sock { * updates to either list are performed under the netns_mctp->keys * lock. * - * - there is a single destruction path for a mctp_sk_key - through socket - * unhash (see mctp_sk_unhash). This performs the list removal under - * keys_lock. + * - a key may have a sk_buff attached as part of an in-progress message + * reassembly (->reasm_head). The reassembly context is protected by + * reasm_lock, which may be acquired with the keys lock (above) held, if + * necessary. Consequently, keys lock *cannot* be acquired with the + * reasm_lock held. + * + * - there are two destruction paths for a mctp_sk_key: + * + * - through socket unhash (see mctp_sk_unhash). This performs the list + * removal under keys_lock. + * + * - where a key is established to receive a reply message: after receiving + * the (complete) reply, or during reassembly errors. Here, we clean up + * the reassembly context (marking reasm_dead, to prevent another from + * starting), and remove the socket from the netns & socket lists. */ struct mctp_sk_key { mctp_eid_t peer_addr; @@ -102,6 +114,13 @@ struct mctp_sk_key { /* per-socket list */ struct hlist_node sklist; + /* incoming fragment reassembly context */ + spinlock_t reasm_lock; + struct sk_buff *reasm_head; + struct sk_buff **reasm_tailp; + bool reasm_dead; + u8 last_seq; + struct rcu_head rcu; }; diff --git a/net/mctp/af_mctp.c b/net/mctp/af_mctp.c index 52bd7f2b78db..9ca836df19d0 100644 --- a/net/mctp/af_mctp.c +++ b/net/mctp/af_mctp.c @@ -263,6 +263,14 @@ static void mctp_sk_unhash(struct sock *sk) hlist_for_each_entry_safe(key, tmp, &msk->keys, sklist) { hlist_del_rcu(&key->sklist); hlist_del_rcu(&key->hlist); + + spin_lock(&key->reasm_lock); + if (key->reasm_head) + kfree_skb(key->reasm_head); + key->reasm_head = NULL; + key->reasm_dead = true; + spin_unlock(&key->reasm_lock); + kfree_rcu(key, rcu); } spin_unlock_irqrestore(&net->mctp.keys_lock, flags); diff --git a/net/mctp/route.c b/net/mctp/route.c index cc9891672eaa..160220e6f241 100644 --- a/net/mctp/route.c +++ b/net/mctp/route.c @@ -23,6 +23,8 @@ #include #include +static const unsigned int mctp_message_maxlen = 64 * 1024; + /* route output callbacks */ static int mctp_route_discard(struct mctp_route *route, struct sk_buff *skb) { @@ -105,14 +107,125 @@ static struct mctp_sk_key *mctp_lookup_key(struct net *net, struct sk_buff *skb, return ret; } +static struct mctp_sk_key *mctp_key_alloc(struct mctp_sock *msk, + mctp_eid_t local, mctp_eid_t peer, + u8 tag, gfp_t gfp) +{ + struct mctp_sk_key *key; + + key = kzalloc(sizeof(*key), gfp); + if (!key) + return NULL; + + key->peer_addr = peer; + key->local_addr = local; + key->tag = tag; + key->sk = &msk->sk; + spin_lock_init(&key->reasm_lock); + + return key; +} + +static int mctp_key_add(struct mctp_sk_key *key, struct mctp_sock *msk) +{ + struct net *net = sock_net(&msk->sk); + struct mctp_sk_key *tmp; + unsigned long flags; + int rc = 0; + + spin_lock_irqsave(&net->mctp.keys_lock, flags); + + hlist_for_each_entry(tmp, &net->mctp.keys, hlist) { + if (mctp_key_match(tmp, key->local_addr, key->peer_addr, + key->tag)) { + rc = -EEXIST; + break; + } + } + + if (!rc) { + hlist_add_head(&key->hlist, &net->mctp.keys); + hlist_add_head(&key->sklist, &msk->keys); + } + + spin_unlock_irqrestore(&net->mctp.keys_lock, flags); + + return rc; +} + +/* Must be called with key->reasm_lock, which it will release. Will schedule + * the key for an RCU free. + */ +static void __mctp_key_unlock_drop(struct mctp_sk_key *key, struct net *net, + unsigned long flags) + __releases(&key->reasm_lock) +{ + struct sk_buff *skb; + + skb = key->reasm_head; + key->reasm_head = NULL; + key->reasm_dead = true; + spin_unlock_irqrestore(&key->reasm_lock, flags); + + spin_lock_irqsave(&net->mctp.keys_lock, flags); + hlist_del_rcu(&key->hlist); + hlist_del_rcu(&key->sklist); + spin_unlock_irqrestore(&net->mctp.keys_lock, flags); + kfree_rcu(key, rcu); + + if (skb) + kfree_skb(skb); +} + +static int mctp_frag_queue(struct mctp_sk_key *key, struct sk_buff *skb) +{ + struct mctp_hdr *hdr = mctp_hdr(skb); + u8 exp_seq, this_seq; + + this_seq = (hdr->flags_seq_tag >> MCTP_HDR_SEQ_SHIFT) + & MCTP_HDR_SEQ_MASK; + + if (!key->reasm_head) { + key->reasm_head = skb; + key->reasm_tailp = &(skb_shinfo(skb)->frag_list); + key->last_seq = this_seq; + return 0; + } + + exp_seq = (key->last_seq + 1) & MCTP_HDR_SEQ_MASK; + + if (this_seq != exp_seq) + return -EINVAL; + + if (key->reasm_head->len + skb->len > mctp_message_maxlen) + return -EINVAL; + + skb->next = NULL; + skb->sk = NULL; + *key->reasm_tailp = skb; + key->reasm_tailp = &skb->next; + + key->last_seq = this_seq; + + key->reasm_head->data_len += skb->len; + key->reasm_head->len += skb->len; + key->reasm_head->truesize += skb->truesize; + + return 0; +} + static int mctp_route_input(struct mctp_route *route, struct sk_buff *skb) { struct net *net = dev_net(skb->dev); struct mctp_sk_key *key; struct mctp_sock *msk; struct mctp_hdr *mh; + unsigned long f; + u8 tag, flags; + int rc; msk = NULL; + rc = -EINVAL; /* we may be receiving a locally-routed packet; drop source sk * accounting @@ -121,50 +234,144 @@ static int mctp_route_input(struct mctp_route *route, struct sk_buff *skb) /* ensure we have enough data for a header and a type */ if (skb->len < sizeof(struct mctp_hdr) + 1) - goto drop; + goto out; /* grab header, advance data ptr */ mh = mctp_hdr(skb); skb_pull(skb, sizeof(struct mctp_hdr)); if (mh->ver != 1) - goto drop; + goto out; - /* TODO: reassembly */ - if ((mh->flags_seq_tag & (MCTP_HDR_FLAG_SOM | MCTP_HDR_FLAG_EOM)) - != (MCTP_HDR_FLAG_SOM | MCTP_HDR_FLAG_EOM)) - goto drop; + flags = mh->flags_seq_tag & (MCTP_HDR_FLAG_SOM | MCTP_HDR_FLAG_EOM); + tag = mh->flags_seq_tag & (MCTP_HDR_TAG_MASK | MCTP_HDR_FLAG_TO); rcu_read_lock(); - /* 1. lookup socket matching (src,dest,tag) */ + + /* lookup socket / reasm context, exactly matching (src,dest,tag) */ key = mctp_lookup_key(net, skb, mh->src); - /* 2. lookup socket macthing (BCAST,dest,tag) */ - if (!key) - key = mctp_lookup_key(net, skb, MCTP_ADDR_ANY); + if (flags & MCTP_HDR_FLAG_SOM) { + if (key) { + msk = container_of(key->sk, struct mctp_sock, sk); + } else { + /* first response to a broadcast? do a more general + * key lookup to find the socket, but don't use this + * key for reassembly - we'll create a more specific + * one for future packets if required (ie, !EOM). + */ + key = mctp_lookup_key(net, skb, MCTP_ADDR_ANY); + if (key) { + msk = container_of(key->sk, + struct mctp_sock, sk); + key = NULL; + } + } - /* 3. SOM? -> lookup bound socket, conditionally (!EOM) create - * mapping for future (1)/(2). - */ - if (key) - msk = container_of(key->sk, struct mctp_sock, sk); - else if (!msk && (mh->flags_seq_tag & MCTP_HDR_FLAG_SOM)) - msk = mctp_lookup_bind(net, skb); + if (!key && !msk && (tag & MCTP_HDR_FLAG_TO)) + msk = mctp_lookup_bind(net, skb); - if (!msk) - goto unlock_drop; + if (!msk) { + rc = -ENOENT; + goto out_unlock; + } - sock_queue_rcv_skb(&msk->sk, skb); + /* single-packet message? deliver to socket, clean up any + * pending key. + */ + if (flags & MCTP_HDR_FLAG_EOM) { + sock_queue_rcv_skb(&msk->sk, skb); + if (key) { + spin_lock_irqsave(&key->reasm_lock, f); + /* we've hit a pending reassembly; not much we + * can do but drop it + */ + __mctp_key_unlock_drop(key, net, f); + } + rc = 0; + goto out_unlock; + } - rcu_read_unlock(); + /* broadcast response or a bind() - create a key for further + * packets for this message + */ + if (!key) { + key = mctp_key_alloc(msk, mh->dest, mh->src, + tag, GFP_ATOMIC); + if (!key) { + rc = -ENOMEM; + goto out_unlock; + } - return 0; + /* we can queue without the reasm lock here, as the + * key isn't observable yet + */ + mctp_frag_queue(key, skb); + + /* if the key_add fails, we've raced with another + * SOM packet with the same src, dest and tag. There's + * no way to distinguish future packets, so all we + * can do is drop; we'll free the skb on exit from + * this function. + */ + rc = mctp_key_add(key, msk); + if (rc) + kfree(key); + + } else { + /* existing key: start reassembly */ + spin_lock_irqsave(&key->reasm_lock, f); + + if (key->reasm_head || key->reasm_dead) { + /* duplicate start? drop everything */ + __mctp_key_unlock_drop(key, net, f); + rc = -EEXIST; + } else { + rc = mctp_frag_queue(key, skb); + spin_unlock_irqrestore(&key->reasm_lock, f); + } + } + + } else if (key) { + /* this packet continues a previous message; reassemble + * using the message-specific key + */ + + spin_lock_irqsave(&key->reasm_lock, f); + + /* we need to be continuing an existing reassembly... */ + if (!key->reasm_head) + rc = -EINVAL; + else + rc = mctp_frag_queue(key, skb); + + /* end of message? deliver to socket, and we're done with + * the reassembly/response key + */ + if (!rc && flags & MCTP_HDR_FLAG_EOM) { + sock_queue_rcv_skb(key->sk, key->reasm_head); + key->reasm_head = NULL; + __mctp_key_unlock_drop(key, net, f); + } else { + spin_unlock_irqrestore(&key->reasm_lock, f); + } + + } else { + /* not a start, no matching key */ + rc = -ENOENT; + } -unlock_drop: +out_unlock: rcu_read_unlock(); -drop: - kfree_skb(skb); - return 0; +out: + if (rc) + kfree_skb(skb); + return rc; +} + +static unsigned int mctp_route_mtu(struct mctp_route *rt) +{ + return rt->mtu ?: READ_ONCE(rt->dev->dev->mtu); } static int mctp_route_output(struct mctp_route *route, struct sk_buff *skb) @@ -228,8 +435,6 @@ static void mctp_reserve_tag(struct net *net, struct mctp_sk_key *key, lockdep_assert_held(&mns->keys_lock); - key->sk = &msk->sk; - /* we hold the net->key_lock here, allowing updates to both * then net and sk */ @@ -251,11 +456,9 @@ static int mctp_alloc_local_tag(struct mctp_sock *msk, u8 tagbits; /* be optimistic, alloc now */ - key = kzalloc(sizeof(*key), GFP_KERNEL); + key = mctp_key_alloc(msk, saddr, daddr, 0, GFP_KERNEL); if (!key) return -ENOMEM; - key->local_addr = saddr; - key->peer_addr = daddr; /* 8 possible tag values */ tagbits = 0xff; @@ -340,6 +543,86 @@ int mctp_do_route(struct mctp_route *rt, struct sk_buff *skb) return rc; } +static int mctp_do_fragment_route(struct mctp_route *rt, struct sk_buff *skb, + unsigned int mtu, u8 tag) +{ + const unsigned int hlen = sizeof(struct mctp_hdr); + struct mctp_hdr *hdr, *hdr2; + unsigned int pos, size; + struct sk_buff *skb2; + int rc; + u8 seq; + + hdr = mctp_hdr(skb); + seq = 0; + rc = 0; + + if (mtu < hlen + 1) { + kfree_skb(skb); + return -EMSGSIZE; + } + + /* we've got the header */ + skb_pull(skb, hlen); + + for (pos = 0; pos < skb->len;) { + /* size of message payload */ + size = min(mtu - hlen, skb->len - pos); + + skb2 = alloc_skb(MCTP_HEADER_MAXLEN + hlen + size, GFP_KERNEL); + if (!skb2) { + rc = -ENOMEM; + break; + } + + /* generic skb copy */ + skb2->protocol = skb->protocol; + skb2->priority = skb->priority; + skb2->dev = skb->dev; + memcpy(skb2->cb, skb->cb, sizeof(skb2->cb)); + + if (skb->sk) + skb_set_owner_w(skb2, skb->sk); + + /* establish packet */ + skb_reserve(skb2, MCTP_HEADER_MAXLEN); + skb_reset_network_header(skb2); + skb_put(skb2, hlen + size); + skb2->transport_header = skb2->network_header + hlen; + + /* copy header fields, calculate SOM/EOM flags & seq */ + hdr2 = mctp_hdr(skb2); + hdr2->ver = hdr->ver; + hdr2->dest = hdr->dest; + hdr2->src = hdr->src; + hdr2->flags_seq_tag = tag & + (MCTP_HDR_TAG_MASK | MCTP_HDR_FLAG_TO); + + if (pos == 0) + hdr2->flags_seq_tag |= MCTP_HDR_FLAG_SOM; + + if (pos + size == skb->len) + hdr2->flags_seq_tag |= MCTP_HDR_FLAG_EOM; + + hdr2->flags_seq_tag |= seq << MCTP_HDR_SEQ_SHIFT; + + /* copy message payload */ + skb_copy_bits(skb, pos, skb_transport_header(skb2), size); + + /* do route, but don't drop the rt reference */ + rc = rt->output(rt, skb2); + if (rc) + break; + + seq = (seq + 1) & MCTP_HDR_SEQ_MASK; + pos += size; + } + + mctp_route_release(rt); + consume_skb(skb); + return rc; +} + int mctp_local_output(struct sock *sk, struct mctp_route *rt, struct sk_buff *skb, mctp_eid_t daddr, u8 req_tag) { @@ -347,6 +630,7 @@ int mctp_local_output(struct sock *sk, struct mctp_route *rt, struct mctp_skb_cb *cb = mctp_cb(skb); struct mctp_hdr *hdr; unsigned long flags; + unsigned int mtu; mctp_eid_t saddr; int rc; u8 tag; @@ -376,26 +660,32 @@ int mctp_local_output(struct sock *sk, struct mctp_route *rt, tag = req_tag; } - /* TODO: we have the route MTU here; packetise */ + skb->protocol = htons(ETH_P_MCTP); + skb->priority = 0; skb_reset_transport_header(skb); skb_push(skb, sizeof(struct mctp_hdr)); skb_reset_network_header(skb); + skb->dev = rt->dev->dev; + + /* cb->net will have been set on initial ingress */ + cb->src = saddr; + + /* set up common header fields */ hdr = mctp_hdr(skb); hdr->ver = 1; hdr->dest = daddr; hdr->src = saddr; - hdr->flags_seq_tag = MCTP_HDR_FLAG_SOM | MCTP_HDR_FLAG_EOM | /* TODO */ - tag; - skb->dev = rt->dev->dev; - skb->protocol = htons(ETH_P_MCTP); - skb->priority = 0; + mtu = mctp_route_mtu(rt); - /* cb->net will have been set on initial ingress */ - cb->src = saddr; - - return mctp_do_route(rt, skb); + if (skb->len + sizeof(struct mctp_hdr) <= mtu) { + hdr->flags_seq_tag = MCTP_HDR_FLAG_SOM | MCTP_HDR_FLAG_EOM | + tag; + return mctp_do_route(rt, skb); + } else { + return mctp_do_fragment_route(rt, skb, mtu, tag); + } } /* route management */ -- cgit v1.2.3 From 03f2bbc4ee57ca53b2fa1d9caabc5006e0b8f375 Mon Sep 17 00:00:00 2001 From: Matt Johnston Date: Thu, 29 Jul 2021 10:20:52 +0800 Subject: mctp: Allow per-netns default networks Currently we have a compile-time default network (MCTP_INITIAL_DEFAULT_NET). This change introduces a default_net field on the net namespace, allowing future configuration for new interfaces. Signed-off-by: Matt Johnston Signed-off-by: David S. Miller --- include/net/mctp.h | 2 ++ include/net/netns/mctp.h | 3 +++ include/uapi/linux/mctp.h | 1 - net/mctp/af_mctp.c | 3 +++ net/mctp/device.c | 2 +- net/mctp/route.c | 14 ++++++++++++++ 6 files changed, 23 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/net/mctp.h b/include/net/mctp.h index 0a460ba185b8..54bbe042c973 100644 --- a/include/net/mctp.h +++ b/include/net/mctp.h @@ -37,6 +37,8 @@ struct mctp_hdr { #define MCTP_HEADER_MAXLEN 4 +#define MCTP_INITIAL_DEFAULT_NET 1 + static inline bool mctp_address_ok(mctp_eid_t eid) { return eid >= 8 && eid < 255; diff --git a/include/net/netns/mctp.h b/include/net/netns/mctp.h index 14ae6d37e52a..acedef12a35e 100644 --- a/include/net/netns/mctp.h +++ b/include/net/netns/mctp.h @@ -25,6 +25,9 @@ struct netns_mctp { spinlock_t keys_lock; struct hlist_head keys; + /* MCTP network */ + unsigned int default_net; + /* neighbour table */ struct mutex neigh_lock; struct list_head neighbours; diff --git a/include/uapi/linux/mctp.h b/include/uapi/linux/mctp.h index a9d8edb3402b..52b54d13f385 100644 --- a/include/uapi/linux/mctp.h +++ b/include/uapi/linux/mctp.h @@ -26,7 +26,6 @@ struct sockaddr_mctp { }; #define MCTP_NET_ANY 0x0 -#define MCTP_NET_DEFAULT 0x0 #define MCTP_ADDR_NULL 0x00 #define MCTP_ADDR_ANY 0xff diff --git a/net/mctp/af_mctp.c b/net/mctp/af_mctp.c index 9ca836df19d0..84f722d31fd7 100644 --- a/net/mctp/af_mctp.c +++ b/net/mctp/af_mctp.c @@ -94,6 +94,9 @@ static int mctp_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) if (!capable(CAP_NET_RAW)) return -EACCES; + if (addr->smctp_network == MCTP_NET_ANY) + addr->smctp_network = mctp_default_net(sock_net(sk)); + rt = mctp_route_lookup(sock_net(sk), addr->smctp_network, addr->smctp_addr.s_addr); if (!rt) diff --git a/net/mctp/device.c b/net/mctp/device.c index aa049590acda..b9f38e765f61 100644 --- a/net/mctp/device.c +++ b/net/mctp/device.c @@ -267,7 +267,7 @@ static struct mctp_dev *mctp_add_dev(struct net_device *dev) spin_lock_init(&mdev->addrs_lock); - mdev->net = MCTP_INITIAL_DEFAULT_NET; + mdev->net = mctp_default_net(dev_net(dev)); /* associate to net_device */ rcu_assign_pointer(dev->mctp_ptr, mdev); diff --git a/net/mctp/route.c b/net/mctp/route.c index 38f0a7278520..b3101375c8e7 100644 --- a/net/mctp/route.c +++ b/net/mctp/route.c @@ -433,6 +433,19 @@ static struct mctp_route *mctp_route_alloc(void) return rt; } +unsigned int mctp_default_net(struct net *net) +{ + return READ_ONCE(net->mctp.default_net); +} + +int mctp_default_net_set(struct net *net, unsigned int index) +{ + if (index == 0) + return -EINVAL; + WRITE_ONCE(net->mctp.default_net, index); + return 0; +} + /* tag management */ static void mctp_reserve_tag(struct net *net, struct mctp_sk_key *key, struct mctp_sock *msk) @@ -1045,6 +1058,7 @@ static int __net_init mctp_routes_net_init(struct net *net) mutex_init(&ns->bind_lock); INIT_HLIST_HEAD(&ns->keys); spin_lock_init(&ns->keys_lock); + WARN_ON(mctp_default_net_set(net, MCTP_INITIAL_DEFAULT_NET)); return 0; } -- cgit v1.2.3 From 3aa2605594556c676fb88744bd9845acae60683d Mon Sep 17 00:00:00 2001 From: Davide Caratti Date: Wed, 28 Jul 2021 20:08:00 +0200 Subject: net/sched: store the last executed chain also for clsact egress currently, only 'ingress' and 'clsact ingress' qdiscs store the tc 'chain id' in the skb extension. However, userspace programs (like ovs) are able to setup egress rules, and datapath gets confused in case it doesn't find the 'chain id' for a packet that's "recirculated" by tc. Change tcf_classify() to have the same semantic as tcf_classify_ingress() so that a single function can be called in ingress / egress, using the tc ingress / egress block respectively. Suggested-by: Alaa Hleilel Signed-off-by: Davide Caratti Reviewed-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- include/net/pkt_cls.h | 22 +++++++--------------- net/core/dev.c | 5 ++--- net/sched/cls_api.c | 42 +++++++++++++++++------------------------- net/sched/sch_atm.c | 2 +- net/sched/sch_cake.c | 2 +- net/sched/sch_cbq.c | 2 +- net/sched/sch_drr.c | 2 +- net/sched/sch_dsmark.c | 2 +- net/sched/sch_ets.c | 2 +- net/sched/sch_fq_codel.c | 2 +- net/sched/sch_fq_pie.c | 2 +- net/sched/sch_hfsc.c | 2 +- net/sched/sch_htb.c | 2 +- net/sched/sch_multiq.c | 2 +- net/sched/sch_prio.c | 2 +- net/sched/sch_qfq.c | 2 +- net/sched/sch_sfb.c | 2 +- net/sched/sch_sfq.c | 2 +- 18 files changed, 41 insertions(+), 58 deletions(-) (limited to 'include/net') diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index ec7823921bd2..dc28fcb6f0a2 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -76,12 +76,10 @@ static inline struct Qdisc *tcf_block_q(struct tcf_block *block) return block->q; } -int tcf_classify(struct sk_buff *skb, const struct tcf_proto *tp, - struct tcf_result *res, bool compat_mode); -int tcf_classify_ingress(struct sk_buff *skb, - const struct tcf_block *ingress_block, - const struct tcf_proto *tp, struct tcf_result *res, - bool compat_mode); +int tcf_classify(struct sk_buff *skb, + const struct tcf_block *block, + const struct tcf_proto *tp, struct tcf_result *res, + bool compat_mode); #else static inline bool tcf_block_shared(struct tcf_block *block) @@ -138,20 +136,14 @@ void tc_setup_cb_block_unregister(struct tcf_block *block, flow_setup_cb_t *cb, { } -static inline int tcf_classify(struct sk_buff *skb, const struct tcf_proto *tp, +static inline int tcf_classify(struct sk_buff *skb, + const struct tcf_block *block, + const struct tcf_proto *tp, struct tcf_result *res, bool compat_mode) { return TC_ACT_UNSPEC; } -static inline int tcf_classify_ingress(struct sk_buff *skb, - const struct tcf_block *ingress_block, - const struct tcf_proto *tp, - struct tcf_result *res, bool compat_mode) -{ - return TC_ACT_UNSPEC; -} - #endif static inline unsigned long diff --git a/net/core/dev.c b/net/core/dev.c index dcc87fcd64ba..b51e41d0a7fe 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4012,7 +4012,7 @@ sch_handle_egress(struct sk_buff *skb, int *ret, struct net_device *dev) qdisc_skb_cb(skb)->post_ct = false; mini_qdisc_bstats_cpu_update(miniq, skb); - switch (tcf_classify(skb, miniq->filter_list, &cl_res, false)) { + switch (tcf_classify(skb, miniq->block, miniq->filter_list, &cl_res, false)) { case TC_ACT_OK: case TC_ACT_RECLASSIFY: skb->tc_index = TC_H_MIN(cl_res.classid); @@ -5164,8 +5164,7 @@ sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret, skb->tc_at_ingress = 1; mini_qdisc_bstats_cpu_update(miniq, skb); - switch (tcf_classify_ingress(skb, miniq->block, miniq->filter_list, - &cl_res, false)) { + switch (tcf_classify(skb, miniq->block, miniq->filter_list, &cl_res, false)) { case TC_ACT_OK: case TC_ACT_RECLASSIFY: skb->tc_index = TC_H_MIN(cl_res.classid); diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 1167cd0be179..7be5b9d2aead 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -1577,20 +1577,10 @@ reset: #endif } -int tcf_classify(struct sk_buff *skb, const struct tcf_proto *tp, +int tcf_classify(struct sk_buff *skb, + const struct tcf_block *block, + const struct tcf_proto *tp, struct tcf_result *res, bool compat_mode) -{ - u32 last_executed_chain = 0; - - return __tcf_classify(skb, tp, tp, res, compat_mode, - &last_executed_chain); -} -EXPORT_SYMBOL(tcf_classify); - -int tcf_classify_ingress(struct sk_buff *skb, - const struct tcf_block *ingress_block, - const struct tcf_proto *tp, - struct tcf_result *res, bool compat_mode) { #if !IS_ENABLED(CONFIG_NET_TC_SKB_EXT) u32 last_executed_chain = 0; @@ -1603,20 +1593,22 @@ int tcf_classify_ingress(struct sk_buff *skb, struct tc_skb_ext *ext; int ret; - ext = skb_ext_find(skb, TC_SKB_EXT); + if (block) { + ext = skb_ext_find(skb, TC_SKB_EXT); - if (ext && ext->chain) { - struct tcf_chain *fchain; + if (ext && ext->chain) { + struct tcf_chain *fchain; - fchain = tcf_chain_lookup_rcu(ingress_block, ext->chain); - if (!fchain) - return TC_ACT_SHOT; + fchain = tcf_chain_lookup_rcu(block, ext->chain); + if (!fchain) + return TC_ACT_SHOT; - /* Consume, so cloned/redirect skbs won't inherit ext */ - skb_ext_del(skb, TC_SKB_EXT); + /* Consume, so cloned/redirect skbs won't inherit ext */ + skb_ext_del(skb, TC_SKB_EXT); - tp = rcu_dereference_bh(fchain->filter_chain); - last_executed_chain = fchain->index; + tp = rcu_dereference_bh(fchain->filter_chain); + last_executed_chain = fchain->index; + } } ret = __tcf_classify(skb, tp, orig_tp, res, compat_mode, @@ -1635,7 +1627,7 @@ int tcf_classify_ingress(struct sk_buff *skb, return ret; #endif } -EXPORT_SYMBOL(tcf_classify_ingress); +EXPORT_SYMBOL(tcf_classify); struct tcf_chain_info { struct tcf_proto __rcu **pprev; @@ -3825,7 +3817,7 @@ struct sk_buff *tcf_qevent_handle(struct tcf_qevent *qe, struct Qdisc *sch, stru fl = rcu_dereference_bh(qe->filter_chain); - switch (tcf_classify(skb, fl, &cl_res, false)) { + switch (tcf_classify(skb, NULL, fl, &cl_res, false)) { case TC_ACT_SHOT: qdisc_qstats_drop(sch); __qdisc_drop(skb, to_free); diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c index d0c9a57398fc..7d8518176b45 100644 --- a/net/sched/sch_atm.c +++ b/net/sched/sch_atm.c @@ -394,7 +394,7 @@ static int atm_tc_enqueue(struct sk_buff *skb, struct Qdisc *sch, list_for_each_entry(flow, &p->flows, list) { fl = rcu_dereference_bh(flow->filter_list); if (fl) { - result = tcf_classify(skb, fl, &res, true); + result = tcf_classify(skb, NULL, fl, &res, true); if (result < 0) continue; flow = (struct atm_flow_data *)res.class; diff --git a/net/sched/sch_cake.c b/net/sched/sch_cake.c index 951542843cab..ecc5c4d93779 100644 --- a/net/sched/sch_cake.c +++ b/net/sched/sch_cake.c @@ -1665,7 +1665,7 @@ static u32 cake_classify(struct Qdisc *sch, struct cake_tin_data **t, goto hash; *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS; - result = tcf_classify(skb, filter, &res, false); + result = tcf_classify(skb, NULL, filter, &res, false); if (result >= 0) { #ifdef CONFIG_NET_CLS_ACT diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c index b79a7e27bb31..2dabaffd39d0 100644 --- a/net/sched/sch_cbq.c +++ b/net/sched/sch_cbq.c @@ -228,7 +228,7 @@ cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr) /* * Step 2+n. Apply classifier. */ - result = tcf_classify(skb, fl, &res, true); + result = tcf_classify(skb, NULL, fl, &res, true); if (!fl || result < 0) goto fallback; diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c index fc1e47069593..642cd179b7a7 100644 --- a/net/sched/sch_drr.c +++ b/net/sched/sch_drr.c @@ -317,7 +317,7 @@ static struct drr_class *drr_classify(struct sk_buff *skb, struct Qdisc *sch, *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS; fl = rcu_dereference_bh(q->filter_list); - result = tcf_classify(skb, fl, &res, false); + result = tcf_classify(skb, NULL, fl, &res, false); if (result >= 0) { #ifdef CONFIG_NET_CLS_ACT switch (result) { diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c index d320bcfb2da2..4c100d105269 100644 --- a/net/sched/sch_dsmark.c +++ b/net/sched/sch_dsmark.c @@ -242,7 +242,7 @@ static int dsmark_enqueue(struct sk_buff *skb, struct Qdisc *sch, else { struct tcf_result res; struct tcf_proto *fl = rcu_dereference_bh(p->filter_list); - int result = tcf_classify(skb, fl, &res, false); + int result = tcf_classify(skb, NULL, fl, &res, false); pr_debug("result %d class 0x%04x\n", result, res.classid); diff --git a/net/sched/sch_ets.c b/net/sched/sch_ets.c index c1e84d1eeaba..925924fab1ab 100644 --- a/net/sched/sch_ets.c +++ b/net/sched/sch_ets.c @@ -390,7 +390,7 @@ static struct ets_class *ets_classify(struct sk_buff *skb, struct Qdisc *sch, *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS; if (TC_H_MAJ(skb->priority) != sch->handle) { fl = rcu_dereference_bh(q->filter_list); - err = tcf_classify(skb, fl, &res, false); + err = tcf_classify(skb, NULL, fl, &res, false); #ifdef CONFIG_NET_CLS_ACT switch (err) { case TC_ACT_STOLEN: diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c index bbd5f8753600..c4afdd026f51 100644 --- a/net/sched/sch_fq_codel.c +++ b/net/sched/sch_fq_codel.c @@ -91,7 +91,7 @@ static unsigned int fq_codel_classify(struct sk_buff *skb, struct Qdisc *sch, return fq_codel_hash(q, skb) + 1; *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS; - result = tcf_classify(skb, filter, &res, false); + result = tcf_classify(skb, NULL, filter, &res, false); if (result >= 0) { #ifdef CONFIG_NET_CLS_ACT switch (result) { diff --git a/net/sched/sch_fq_pie.c b/net/sched/sch_fq_pie.c index cac684952edc..830f3559f727 100644 --- a/net/sched/sch_fq_pie.c +++ b/net/sched/sch_fq_pie.c @@ -94,7 +94,7 @@ static unsigned int fq_pie_classify(struct sk_buff *skb, struct Qdisc *sch, return fq_pie_hash(q, skb) + 1; *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS; - result = tcf_classify(skb, filter, &res, false); + result = tcf_classify(skb, NULL, filter, &res, false); if (result >= 0) { #ifdef CONFIG_NET_CLS_ACT switch (result) { diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c index bf0034c66e35..b7ac30cca035 100644 --- a/net/sched/sch_hfsc.c +++ b/net/sched/sch_hfsc.c @@ -1130,7 +1130,7 @@ hfsc_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr) *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS; head = &q->root; tcf = rcu_dereference_bh(q->root.filter_list); - while (tcf && (result = tcf_classify(skb, tcf, &res, false)) >= 0) { + while (tcf && (result = tcf_classify(skb, NULL, tcf, &res, false)) >= 0) { #ifdef CONFIG_NET_CLS_ACT switch (result) { case TC_ACT_QUEUED: diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index 5f7ac27a5264..81ea8332547a 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -238,7 +238,7 @@ static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch, } *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS; - while (tcf && (result = tcf_classify(skb, tcf, &res, false)) >= 0) { + while (tcf && (result = tcf_classify(skb, NULL, tcf, &res, false)) >= 0) { #ifdef CONFIG_NET_CLS_ACT switch (result) { case TC_ACT_QUEUED: diff --git a/net/sched/sch_multiq.c b/net/sched/sch_multiq.c index 5c27b4270b90..e282e7382117 100644 --- a/net/sched/sch_multiq.c +++ b/net/sched/sch_multiq.c @@ -36,7 +36,7 @@ multiq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr) int err; *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS; - err = tcf_classify(skb, fl, &res, false); + err = tcf_classify(skb, NULL, fl, &res, false); #ifdef CONFIG_NET_CLS_ACT switch (err) { case TC_ACT_STOLEN: diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c index 3eabb871a1d5..03fdf31ccb6a 100644 --- a/net/sched/sch_prio.c +++ b/net/sched/sch_prio.c @@ -39,7 +39,7 @@ prio_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr) *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS; if (TC_H_MAJ(skb->priority) != sch->handle) { fl = rcu_dereference_bh(q->filter_list); - err = tcf_classify(skb, fl, &res, false); + err = tcf_classify(skb, NULL, fl, &res, false); #ifdef CONFIG_NET_CLS_ACT switch (err) { case TC_ACT_STOLEN: diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c index b692a0de1ad5..58a9d42b52b8 100644 --- a/net/sched/sch_qfq.c +++ b/net/sched/sch_qfq.c @@ -690,7 +690,7 @@ static struct qfq_class *qfq_classify(struct sk_buff *skb, struct Qdisc *sch, *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS; fl = rcu_dereference_bh(q->filter_list); - result = tcf_classify(skb, fl, &res, false); + result = tcf_classify(skb, NULL, fl, &res, false); if (result >= 0) { #ifdef CONFIG_NET_CLS_ACT switch (result) { diff --git a/net/sched/sch_sfb.c b/net/sched/sch_sfb.c index dde829d4b9f8..3d061a13d7ed 100644 --- a/net/sched/sch_sfb.c +++ b/net/sched/sch_sfb.c @@ -257,7 +257,7 @@ static bool sfb_classify(struct sk_buff *skb, struct tcf_proto *fl, struct tcf_result res; int result; - result = tcf_classify(skb, fl, &res, false); + result = tcf_classify(skb, NULL, fl, &res, false); if (result >= 0) { #ifdef CONFIG_NET_CLS_ACT switch (result) { diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c index 066754a18569..f8e569f79f13 100644 --- a/net/sched/sch_sfq.c +++ b/net/sched/sch_sfq.c @@ -178,7 +178,7 @@ static unsigned int sfq_classify(struct sk_buff *skb, struct Qdisc *sch, return sfq_hash(q, skb) + 1; *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS; - result = tcf_classify(skb, fl, &res, false); + result = tcf_classify(skb, NULL, fl, &res, false); if (result >= 0) { #ifdef CONFIG_NET_CLS_ACT switch (result) { -- cgit v1.2.3 From 79976892f7ea37f44f8bcfb6d266954e8ae0124d Mon Sep 17 00:00:00 2001 From: Yajun Deng Date: Thu, 29 Jul 2021 15:13:50 +0800 Subject: net: convert fib_treeref from int to refcount_t refcount_t type should be used instead of int when fib_treeref is used as a reference counter,and avoid use-after-free risks. Signed-off-by: Yajun Deng Reviewed-by: David Ahern Link: https://lore.kernel.org/r/20210729071350.28919-1-yajun.deng@linux.dev Signed-off-by: Jakub Kicinski --- include/net/dn_fib.h | 2 +- include/net/ip_fib.h | 2 +- net/decnet/dn_fib.c | 6 +++--- net/ipv4/fib_semantics.c | 8 ++++---- 4 files changed, 9 insertions(+), 9 deletions(-) (limited to 'include/net') diff --git a/include/net/dn_fib.h b/include/net/dn_fib.h index ccc6e9df178b..ddd6565957b3 100644 --- a/include/net/dn_fib.h +++ b/include/net/dn_fib.h @@ -29,7 +29,7 @@ struct dn_fib_nh { struct dn_fib_info { struct dn_fib_info *fib_next; struct dn_fib_info *fib_prev; - int fib_treeref; + refcount_t fib_treeref; refcount_t fib_clntref; int fib_dead; unsigned int fib_flags; diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index 3ab2563b1a23..21c5386d4a6d 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -133,7 +133,7 @@ struct fib_info { struct hlist_node fib_lhash; struct list_head nh_list; struct net *fib_net; - int fib_treeref; + refcount_t fib_treeref; refcount_t fib_clntref; unsigned int fib_flags; unsigned char fib_dead; diff --git a/net/decnet/dn_fib.c b/net/decnet/dn_fib.c index 77fbf8e9df4b..387a7e81dd00 100644 --- a/net/decnet/dn_fib.c +++ b/net/decnet/dn_fib.c @@ -102,7 +102,7 @@ void dn_fib_free_info(struct dn_fib_info *fi) void dn_fib_release_info(struct dn_fib_info *fi) { spin_lock(&dn_fib_info_lock); - if (fi && --fi->fib_treeref == 0) { + if (fi && refcount_dec_and_test(&fi->fib_treeref)) { if (fi->fib_next) fi->fib_next->fib_prev = fi->fib_prev; if (fi->fib_prev) @@ -385,11 +385,11 @@ link_it: if ((ofi = dn_fib_find_info(fi)) != NULL) { fi->fib_dead = 1; dn_fib_free_info(fi); - ofi->fib_treeref++; + refcount_inc(&ofi->fib_treeref); return ofi; } - fi->fib_treeref++; + refcount_inc(&fi->fib_treeref); refcount_set(&fi->fib_clntref, 1); spin_lock(&dn_fib_info_lock); fi->fib_next = dn_fib_info_list; diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 4c0c33e4710d..fa19f4cdf3a4 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -260,7 +260,7 @@ EXPORT_SYMBOL_GPL(free_fib_info); void fib_release_info(struct fib_info *fi) { spin_lock_bh(&fib_info_lock); - if (fi && --fi->fib_treeref == 0) { + if (fi && refcount_dec_and_test(&fi->fib_treeref)) { hlist_del(&fi->fib_hash); if (fi->fib_prefsrc) hlist_del(&fi->fib_lhash); @@ -1373,7 +1373,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg, if (!cfg->fc_mx) { fi = fib_find_info_nh(net, cfg); if (fi) { - fi->fib_treeref++; + refcount_inc(&fi->fib_treeref); return fi; } } @@ -1547,11 +1547,11 @@ link_it: if (ofi) { fi->fib_dead = 1; free_fib_info(fi); - ofi->fib_treeref++; + refcount_inc(&ofi->fib_treeref); return ofi; } - fi->fib_treeref++; + refcount_inc(&fi->fib_treeref); refcount_set(&fi->fib_clntref, 1); spin_lock_bh(&fib_info_lock); hlist_add_head(&fi->fib_hash, -- cgit v1.2.3 From 3df40eb3a2ea58bf404a38f15a7a2768e4762cb0 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Fri, 30 Jul 2021 16:41:59 +0200 Subject: nfc: constify several pointers to u8, char and sk_buff Several functions receive pointers to u8, char or sk_buff but do not modify the contents so make them const. This allows doing the same for local variables and in total makes the code a little bit safer. Signed-off-by: Krzysztof Kozlowski Signed-off-by: Jakub Kicinski --- include/net/nfc/nfc.h | 4 ++-- net/nfc/core.c | 4 ++-- net/nfc/hci/llc_shdlc.c | 10 +++++----- net/nfc/llcp.h | 8 ++++---- net/nfc/llcp_commands.c | 46 +++++++++++++++++++++++++--------------------- net/nfc/llcp_core.c | 44 ++++++++++++++++++++++++-------------------- net/nfc/nfc.h | 2 +- 7 files changed, 63 insertions(+), 55 deletions(-) (limited to 'include/net') diff --git a/include/net/nfc/nfc.h b/include/net/nfc/nfc.h index 901779138e2b..5dee575fbe86 100644 --- a/include/net/nfc/nfc.h +++ b/include/net/nfc/nfc.h @@ -266,7 +266,7 @@ struct sk_buff *nfc_alloc_send_skb(struct nfc_dev *dev, struct sock *sk, struct sk_buff *nfc_alloc_recv_skb(unsigned int size, gfp_t gfp); int nfc_set_remote_general_bytes(struct nfc_dev *dev, - u8 *gt, u8 gt_len); + const u8 *gt, u8 gt_len); u8 *nfc_get_local_general_bytes(struct nfc_dev *dev, size_t *gb_len); int nfc_fw_download_done(struct nfc_dev *dev, const char *firmware_name, @@ -280,7 +280,7 @@ int nfc_dep_link_is_up(struct nfc_dev *dev, u32 target_idx, u8 comm_mode, u8 rf_mode); int nfc_tm_activated(struct nfc_dev *dev, u32 protocol, u8 comm_mode, - u8 *gb, size_t gb_len); + const u8 *gb, size_t gb_len); int nfc_tm_deactivated(struct nfc_dev *dev); int nfc_tm_data_received(struct nfc_dev *dev, struct sk_buff *skb); diff --git a/net/nfc/core.c b/net/nfc/core.c index 6ade54149b73..08182e209144 100644 --- a/net/nfc/core.c +++ b/net/nfc/core.c @@ -636,7 +636,7 @@ error: return rc; } -int nfc_set_remote_general_bytes(struct nfc_dev *dev, u8 *gb, u8 gb_len) +int nfc_set_remote_general_bytes(struct nfc_dev *dev, const u8 *gb, u8 gb_len) { pr_debug("dev_name=%s gb_len=%d\n", dev_name(&dev->dev), gb_len); @@ -665,7 +665,7 @@ int nfc_tm_data_received(struct nfc_dev *dev, struct sk_buff *skb) EXPORT_SYMBOL(nfc_tm_data_received); int nfc_tm_activated(struct nfc_dev *dev, u32 protocol, u8 comm_mode, - u8 *gb, size_t gb_len) + const u8 *gb, size_t gb_len) { int rc; diff --git a/net/nfc/hci/llc_shdlc.c b/net/nfc/hci/llc_shdlc.c index 6b747856d095..aef750d7787c 100644 --- a/net/nfc/hci/llc_shdlc.c +++ b/net/nfc/hci/llc_shdlc.c @@ -123,7 +123,7 @@ static bool llc_shdlc_x_lteq_y_lt_z(int x, int y, int z) return ((y >= x) || (y < z)) ? true : false; } -static struct sk_buff *llc_shdlc_alloc_skb(struct llc_shdlc *shdlc, +static struct sk_buff *llc_shdlc_alloc_skb(const struct llc_shdlc *shdlc, int payload_len) { struct sk_buff *skb; @@ -137,7 +137,7 @@ static struct sk_buff *llc_shdlc_alloc_skb(struct llc_shdlc *shdlc, } /* immediately sends an S frame. */ -static int llc_shdlc_send_s_frame(struct llc_shdlc *shdlc, +static int llc_shdlc_send_s_frame(const struct llc_shdlc *shdlc, enum sframe_type sframe_type, int nr) { int r; @@ -159,7 +159,7 @@ static int llc_shdlc_send_s_frame(struct llc_shdlc *shdlc, } /* immediately sends an U frame. skb may contain optional payload */ -static int llc_shdlc_send_u_frame(struct llc_shdlc *shdlc, +static int llc_shdlc_send_u_frame(const struct llc_shdlc *shdlc, struct sk_buff *skb, enum uframe_modifier uframe_modifier) { @@ -361,7 +361,7 @@ static void llc_shdlc_connect_complete(struct llc_shdlc *shdlc, int r) wake_up(shdlc->connect_wq); } -static int llc_shdlc_connect_initiate(struct llc_shdlc *shdlc) +static int llc_shdlc_connect_initiate(const struct llc_shdlc *shdlc) { struct sk_buff *skb; @@ -377,7 +377,7 @@ static int llc_shdlc_connect_initiate(struct llc_shdlc *shdlc) return llc_shdlc_send_u_frame(shdlc, skb, U_FRAME_RSET); } -static int llc_shdlc_connect_send_ua(struct llc_shdlc *shdlc) +static int llc_shdlc_connect_send_ua(const struct llc_shdlc *shdlc) { struct sk_buff *skb; diff --git a/net/nfc/llcp.h b/net/nfc/llcp.h index 97853c9cefc7..d49d4bf2e37c 100644 --- a/net/nfc/llcp.h +++ b/net/nfc/llcp.h @@ -221,15 +221,15 @@ struct sock *nfc_llcp_accept_dequeue(struct sock *sk, struct socket *newsock); /* TLV API */ int nfc_llcp_parse_gb_tlv(struct nfc_llcp_local *local, - u8 *tlv_array, u16 tlv_array_len); + const u8 *tlv_array, u16 tlv_array_len); int nfc_llcp_parse_connection_tlv(struct nfc_llcp_sock *sock, - u8 *tlv_array, u16 tlv_array_len); + const u8 *tlv_array, u16 tlv_array_len); /* Commands API */ void nfc_llcp_recv(void *data, struct sk_buff *skb, int err); -u8 *nfc_llcp_build_tlv(u8 type, u8 *value, u8 value_length, u8 *tlv_length); +u8 *nfc_llcp_build_tlv(u8 type, const u8 *value, u8 value_length, u8 *tlv_length); struct nfc_llcp_sdp_tlv *nfc_llcp_build_sdres_tlv(u8 tid, u8 sap); -struct nfc_llcp_sdp_tlv *nfc_llcp_build_sdreq_tlv(u8 tid, char *uri, +struct nfc_llcp_sdp_tlv *nfc_llcp_build_sdreq_tlv(u8 tid, const char *uri, size_t uri_len); void nfc_llcp_free_sdp_tlv(struct nfc_llcp_sdp_tlv *sdp); void nfc_llcp_free_sdp_tlv_list(struct hlist_head *sdp_head); diff --git a/net/nfc/llcp_commands.c b/net/nfc/llcp_commands.c index 475061c79c44..3c4172a5aeb5 100644 --- a/net/nfc/llcp_commands.c +++ b/net/nfc/llcp_commands.c @@ -15,7 +15,7 @@ #include "nfc.h" #include "llcp.h" -static u8 llcp_tlv_length[LLCP_TLV_MAX] = { +static const u8 llcp_tlv_length[LLCP_TLV_MAX] = { 0, 1, /* VERSION */ 2, /* MIUX */ @@ -29,7 +29,7 @@ static u8 llcp_tlv_length[LLCP_TLV_MAX] = { }; -static u8 llcp_tlv8(u8 *tlv, u8 type) +static u8 llcp_tlv8(const u8 *tlv, u8 type) { if (tlv[0] != type || tlv[1] != llcp_tlv_length[tlv[0]]) return 0; @@ -37,7 +37,7 @@ static u8 llcp_tlv8(u8 *tlv, u8 type) return tlv[2]; } -static u16 llcp_tlv16(u8 *tlv, u8 type) +static u16 llcp_tlv16(const u8 *tlv, u8 type) { if (tlv[0] != type || tlv[1] != llcp_tlv_length[tlv[0]]) return 0; @@ -46,37 +46,37 @@ static u16 llcp_tlv16(u8 *tlv, u8 type) } -static u8 llcp_tlv_version(u8 *tlv) +static u8 llcp_tlv_version(const u8 *tlv) { return llcp_tlv8(tlv, LLCP_TLV_VERSION); } -static u16 llcp_tlv_miux(u8 *tlv) +static u16 llcp_tlv_miux(const u8 *tlv) { return llcp_tlv16(tlv, LLCP_TLV_MIUX) & 0x7ff; } -static u16 llcp_tlv_wks(u8 *tlv) +static u16 llcp_tlv_wks(const u8 *tlv) { return llcp_tlv16(tlv, LLCP_TLV_WKS); } -static u16 llcp_tlv_lto(u8 *tlv) +static u16 llcp_tlv_lto(const u8 *tlv) { return llcp_tlv8(tlv, LLCP_TLV_LTO); } -static u8 llcp_tlv_opt(u8 *tlv) +static u8 llcp_tlv_opt(const u8 *tlv) { return llcp_tlv8(tlv, LLCP_TLV_OPT); } -static u8 llcp_tlv_rw(u8 *tlv) +static u8 llcp_tlv_rw(const u8 *tlv) { return llcp_tlv8(tlv, LLCP_TLV_RW) & 0xf; } -u8 *nfc_llcp_build_tlv(u8 type, u8 *value, u8 value_length, u8 *tlv_length) +u8 *nfc_llcp_build_tlv(u8 type, const u8 *value, u8 value_length, u8 *tlv_length) { u8 *tlv, length; @@ -130,7 +130,7 @@ struct nfc_llcp_sdp_tlv *nfc_llcp_build_sdres_tlv(u8 tid, u8 sap) return sdres; } -struct nfc_llcp_sdp_tlv *nfc_llcp_build_sdreq_tlv(u8 tid, char *uri, +struct nfc_llcp_sdp_tlv *nfc_llcp_build_sdreq_tlv(u8 tid, const char *uri, size_t uri_len) { struct nfc_llcp_sdp_tlv *sdreq; @@ -190,9 +190,10 @@ void nfc_llcp_free_sdp_tlv_list(struct hlist_head *head) } int nfc_llcp_parse_gb_tlv(struct nfc_llcp_local *local, - u8 *tlv_array, u16 tlv_array_len) + const u8 *tlv_array, u16 tlv_array_len) { - u8 *tlv = tlv_array, type, length, offset = 0; + const u8 *tlv = tlv_array; + u8 type, length, offset = 0; pr_debug("TLV array length %d\n", tlv_array_len); @@ -239,9 +240,10 @@ int nfc_llcp_parse_gb_tlv(struct nfc_llcp_local *local, } int nfc_llcp_parse_connection_tlv(struct nfc_llcp_sock *sock, - u8 *tlv_array, u16 tlv_array_len) + const u8 *tlv_array, u16 tlv_array_len) { - u8 *tlv = tlv_array, type, length, offset = 0; + const u8 *tlv = tlv_array; + u8 type, length, offset = 0; pr_debug("TLV array length %d\n", tlv_array_len); @@ -295,7 +297,7 @@ static struct sk_buff *llcp_add_header(struct sk_buff *pdu, return pdu; } -static struct sk_buff *llcp_add_tlv(struct sk_buff *pdu, u8 *tlv, +static struct sk_buff *llcp_add_tlv(struct sk_buff *pdu, const u8 *tlv, u8 tlv_length) { /* XXX Add an skb length check */ @@ -389,9 +391,10 @@ int nfc_llcp_send_connect(struct nfc_llcp_sock *sock) { struct nfc_llcp_local *local; struct sk_buff *skb; - u8 *service_name_tlv = NULL, service_name_tlv_length; - u8 *miux_tlv = NULL, miux_tlv_length; - u8 *rw_tlv = NULL, rw_tlv_length, rw; + const u8 *service_name_tlv = NULL; + const u8 *miux_tlv = NULL; + const u8 *rw_tlv = NULL; + u8 service_name_tlv_length, miux_tlv_length, rw_tlv_length, rw; int err; u16 size = 0; __be16 miux; @@ -465,8 +468,9 @@ int nfc_llcp_send_cc(struct nfc_llcp_sock *sock) { struct nfc_llcp_local *local; struct sk_buff *skb; - u8 *miux_tlv = NULL, miux_tlv_length; - u8 *rw_tlv = NULL, rw_tlv_length, rw; + const u8 *miux_tlv = NULL; + const u8 *rw_tlv = NULL; + u8 miux_tlv_length, rw_tlv_length, rw; int err; u16 size = 0; __be16 miux; diff --git a/net/nfc/llcp_core.c b/net/nfc/llcp_core.c index cc997518f79d..eaeb2b1cfa6a 100644 --- a/net/nfc/llcp_core.c +++ b/net/nfc/llcp_core.c @@ -301,7 +301,7 @@ static char *wks[] = { "urn:nfc:sn:snep", }; -static int nfc_llcp_wks_sap(char *service_name, size_t service_name_len) +static int nfc_llcp_wks_sap(const char *service_name, size_t service_name_len) { int sap, num_wks; @@ -325,7 +325,7 @@ static int nfc_llcp_wks_sap(char *service_name, size_t service_name_len) static struct nfc_llcp_sock *nfc_llcp_sock_from_sn(struct nfc_llcp_local *local, - u8 *sn, size_t sn_len) + const u8 *sn, size_t sn_len) { struct sock *sk; struct nfc_llcp_sock *llcp_sock, *tmp_sock; @@ -522,7 +522,7 @@ static int nfc_llcp_build_gb(struct nfc_llcp_local *local) { u8 *gb_cur, version, version_length; u8 lto_length, wks_length, miux_length; - u8 *version_tlv = NULL, *lto_tlv = NULL, + const u8 *version_tlv = NULL, *lto_tlv = NULL, *wks_tlv = NULL, *miux_tlv = NULL; __be16 wks = cpu_to_be16(local->local_wks); u8 gb_len = 0; @@ -612,7 +612,7 @@ u8 *nfc_llcp_general_bytes(struct nfc_dev *dev, size_t *general_bytes_len) return local->gb; } -int nfc_llcp_set_remote_gb(struct nfc_dev *dev, u8 *gb, u8 gb_len) +int nfc_llcp_set_remote_gb(struct nfc_dev *dev, const u8 *gb, u8 gb_len) { struct nfc_llcp_local *local; @@ -639,27 +639,27 @@ int nfc_llcp_set_remote_gb(struct nfc_dev *dev, u8 *gb, u8 gb_len) local->remote_gb_len - 3); } -static u8 nfc_llcp_dsap(struct sk_buff *pdu) +static u8 nfc_llcp_dsap(const struct sk_buff *pdu) { return (pdu->data[0] & 0xfc) >> 2; } -static u8 nfc_llcp_ptype(struct sk_buff *pdu) +static u8 nfc_llcp_ptype(const struct sk_buff *pdu) { return ((pdu->data[0] & 0x03) << 2) | ((pdu->data[1] & 0xc0) >> 6); } -static u8 nfc_llcp_ssap(struct sk_buff *pdu) +static u8 nfc_llcp_ssap(const struct sk_buff *pdu) { return pdu->data[1] & 0x3f; } -static u8 nfc_llcp_ns(struct sk_buff *pdu) +static u8 nfc_llcp_ns(const struct sk_buff *pdu) { return pdu->data[2] >> 4; } -static u8 nfc_llcp_nr(struct sk_buff *pdu) +static u8 nfc_llcp_nr(const struct sk_buff *pdu) { return pdu->data[2] & 0xf; } @@ -801,7 +801,7 @@ out: } static struct nfc_llcp_sock *nfc_llcp_sock_get_sn(struct nfc_llcp_local *local, - u8 *sn, size_t sn_len) + const u8 *sn, size_t sn_len) { struct nfc_llcp_sock *llcp_sock; @@ -815,9 +815,10 @@ static struct nfc_llcp_sock *nfc_llcp_sock_get_sn(struct nfc_llcp_local *local, return llcp_sock; } -static u8 *nfc_llcp_connect_sn(struct sk_buff *skb, size_t *sn_len) +static const u8 *nfc_llcp_connect_sn(const struct sk_buff *skb, size_t *sn_len) { - u8 *tlv = &skb->data[2], type, length; + u8 type, length; + const u8 *tlv = &skb->data[2]; size_t tlv_array_len = skb->len - LLCP_HEADER_SIZE, offset = 0; while (offset < tlv_array_len) { @@ -875,7 +876,7 @@ static void nfc_llcp_recv_ui(struct nfc_llcp_local *local, } static void nfc_llcp_recv_connect(struct nfc_llcp_local *local, - struct sk_buff *skb) + const struct sk_buff *skb) { struct sock *new_sk, *parent; struct nfc_llcp_sock *sock, *new_sock; @@ -893,7 +894,7 @@ static void nfc_llcp_recv_connect(struct nfc_llcp_local *local, goto fail; } } else { - u8 *sn; + const u8 *sn; size_t sn_len; sn = nfc_llcp_connect_sn(skb, &sn_len); @@ -1112,7 +1113,7 @@ static void nfc_llcp_recv_hdlc(struct nfc_llcp_local *local, } static void nfc_llcp_recv_disc(struct nfc_llcp_local *local, - struct sk_buff *skb) + const struct sk_buff *skb) { struct nfc_llcp_sock *llcp_sock; struct sock *sk; @@ -1155,7 +1156,8 @@ static void nfc_llcp_recv_disc(struct nfc_llcp_local *local, nfc_llcp_sock_put(llcp_sock); } -static void nfc_llcp_recv_cc(struct nfc_llcp_local *local, struct sk_buff *skb) +static void nfc_llcp_recv_cc(struct nfc_llcp_local *local, + const struct sk_buff *skb) { struct nfc_llcp_sock *llcp_sock; struct sock *sk; @@ -1188,7 +1190,8 @@ static void nfc_llcp_recv_cc(struct nfc_llcp_local *local, struct sk_buff *skb) nfc_llcp_sock_put(llcp_sock); } -static void nfc_llcp_recv_dm(struct nfc_llcp_local *local, struct sk_buff *skb) +static void nfc_llcp_recv_dm(struct nfc_llcp_local *local, + const struct sk_buff *skb) { struct nfc_llcp_sock *llcp_sock; struct sock *sk; @@ -1226,12 +1229,13 @@ static void nfc_llcp_recv_dm(struct nfc_llcp_local *local, struct sk_buff *skb) } static void nfc_llcp_recv_snl(struct nfc_llcp_local *local, - struct sk_buff *skb) + const struct sk_buff *skb) { struct nfc_llcp_sock *llcp_sock; - u8 dsap, ssap, *tlv, type, length, tid, sap; + u8 dsap, ssap, type, length, tid, sap; + const u8 *tlv; u16 tlv_len, offset; - char *service_name; + const char *service_name; size_t service_name_len; struct nfc_llcp_sdp_tlv *sdp; HLIST_HEAD(llc_sdres_list); diff --git a/net/nfc/nfc.h b/net/nfc/nfc.h index 889fefd64e56..de2ec66d7e83 100644 --- a/net/nfc/nfc.h +++ b/net/nfc/nfc.h @@ -48,7 +48,7 @@ void nfc_llcp_mac_is_up(struct nfc_dev *dev, u32 target_idx, u8 comm_mode, u8 rf_mode); int nfc_llcp_register_device(struct nfc_dev *dev); void nfc_llcp_unregister_device(struct nfc_dev *dev); -int nfc_llcp_set_remote_gb(struct nfc_dev *dev, u8 *gb, u8 gb_len); +int nfc_llcp_set_remote_gb(struct nfc_dev *dev, const u8 *gb, u8 gb_len); u8 *nfc_llcp_general_bytes(struct nfc_dev *dev, size_t *general_bytes_len); int nfc_llcp_data_received(struct nfc_dev *dev, struct sk_buff *skb); struct nfc_llcp_local *nfc_llcp_find_local(struct nfc_dev *dev); -- cgit v1.2.3 From ddecf5556f7fdf871fa8ce26b376e3e8ae6213b6 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Fri, 30 Jul 2021 16:42:01 +0200 Subject: nfc: nci: constify several pointers to u8, sk_buff and other structs Several functions receive pointers to u8, sk_buff or other structs but do not modify the contents so make them const. This allows doing the same for local variables and in total makes the code a little bit safer. This makes const also data passed as "unsigned long opt" argument to nci_request() function. Usual flow for such functions is: 1. Receive "u8 *" and store it (the pointer) in a structure allocated on stack (e.g. struct nci_set_config_param), 2. Call nci_request() or __nci_request() passing a callback function an the pointer to the structure via an "unsigned long opt", 3. nci_request() calls the callback which dereferences "unsigned long opt" in a read-only way. This converts all above paths to use proper pointer to const data, so entire flow is safer. Signed-off-by: Krzysztof Kozlowski Signed-off-by: Jakub Kicinski --- include/net/nfc/nci_core.h | 14 ++++---- net/nfc/nci/core.c | 40 +++++++++++----------- net/nfc/nci/data.c | 12 +++---- net/nfc/nci/hci.c | 24 +++++++------- net/nfc/nci/ntf.c | 83 +++++++++++++++++++++++++--------------------- net/nfc/nci/rsp.c | 46 +++++++++++++------------ net/nfc/nci/spi.c | 2 +- 7 files changed, 118 insertions(+), 103 deletions(-) (limited to 'include/net') diff --git a/include/net/nfc/nci_core.h b/include/net/nfc/nci_core.h index 00f2c60971d7..4770a81f4aa7 100644 --- a/include/net/nfc/nci_core.h +++ b/include/net/nfc/nci_core.h @@ -278,23 +278,25 @@ int nci_request(struct nci_dev *ndev, void (*req)(struct nci_dev *ndev, unsigned long opt), unsigned long opt, __u32 timeout); -int nci_prop_cmd(struct nci_dev *ndev, __u8 oid, size_t len, __u8 *payload); -int nci_core_cmd(struct nci_dev *ndev, __u16 opcode, size_t len, __u8 *payload); +int nci_prop_cmd(struct nci_dev *ndev, __u8 oid, size_t len, + const __u8 *payload); +int nci_core_cmd(struct nci_dev *ndev, __u16 opcode, size_t len, + const __u8 *payload); int nci_core_reset(struct nci_dev *ndev); int nci_core_init(struct nci_dev *ndev); int nci_recv_frame(struct nci_dev *ndev, struct sk_buff *skb); int nci_send_frame(struct nci_dev *ndev, struct sk_buff *skb); -int nci_set_config(struct nci_dev *ndev, __u8 id, size_t len, __u8 *val); +int nci_set_config(struct nci_dev *ndev, __u8 id, size_t len, const __u8 *val); int nci_nfcee_discover(struct nci_dev *ndev, u8 action); int nci_nfcee_mode_set(struct nci_dev *ndev, u8 nfcee_id, u8 nfcee_mode); int nci_core_conn_create(struct nci_dev *ndev, u8 destination_type, u8 number_destination_params, size_t params_len, - struct core_conn_create_dest_spec_params *params); + const struct core_conn_create_dest_spec_params *params); int nci_core_conn_close(struct nci_dev *ndev, u8 conn_id); -int nci_nfcc_loopback(struct nci_dev *ndev, void *data, size_t data_len, +int nci_nfcc_loopback(struct nci_dev *ndev, const void *data, size_t data_len, struct sk_buff **resp); struct nci_hci_dev *nci_hci_allocate(struct nci_dev *ndev); @@ -378,7 +380,7 @@ void nci_req_complete(struct nci_dev *ndev, int result); struct nci_conn_info *nci_get_conn_info_by_conn_id(struct nci_dev *ndev, int conn_id); int nci_get_conn_info_by_dest_type_params(struct nci_dev *ndev, u8 dest_type, - struct dest_spec_params *params); + const struct dest_spec_params *params); /* ----- NCI status code ----- */ int nci_to_errno(__u8 code); diff --git a/net/nfc/nci/core.c b/net/nfc/nci/core.c index 400d66c4e210..774ddf957388 100644 --- a/net/nfc/nci/core.c +++ b/net/nfc/nci/core.c @@ -53,9 +53,9 @@ struct nci_conn_info *nci_get_conn_info_by_conn_id(struct nci_dev *ndev, } int nci_get_conn_info_by_dest_type_params(struct nci_dev *ndev, u8 dest_type, - struct dest_spec_params *params) + const struct dest_spec_params *params) { - struct nci_conn_info *conn_info; + const struct nci_conn_info *conn_info; list_for_each_entry(conn_info, &ndev->conn_info_list, list) { if (conn_info->dest_type == dest_type) { @@ -210,14 +210,15 @@ static void nci_init_complete_req(struct nci_dev *ndev, unsigned long opt) } struct nci_set_config_param { - __u8 id; - size_t len; - __u8 *val; + __u8 id; + size_t len; + const __u8 *val; }; static void nci_set_config_req(struct nci_dev *ndev, unsigned long opt) { - struct nci_set_config_param *param = (struct nci_set_config_param *)opt; + const struct nci_set_config_param *param = + (struct nci_set_config_param *)opt; struct nci_core_set_config_cmd cmd; BUG_ON(param->len > NCI_MAX_PARAM_LEN); @@ -237,7 +238,7 @@ struct nci_rf_discover_param { static void nci_rf_discover_req(struct nci_dev *ndev, unsigned long opt) { - struct nci_rf_discover_param *param = + const struct nci_rf_discover_param *param = (struct nci_rf_discover_param *)opt; struct nci_rf_disc_cmd cmd; @@ -303,7 +304,7 @@ struct nci_rf_discover_select_param { static void nci_rf_discover_select_req(struct nci_dev *ndev, unsigned long opt) { - struct nci_rf_discover_select_param *param = + const struct nci_rf_discover_select_param *param = (struct nci_rf_discover_select_param *)opt; struct nci_rf_discover_select_cmd cmd; @@ -341,18 +342,18 @@ static void nci_rf_deactivate_req(struct nci_dev *ndev, unsigned long opt) struct nci_cmd_param { __u16 opcode; size_t len; - __u8 *payload; + const __u8 *payload; }; static void nci_generic_req(struct nci_dev *ndev, unsigned long opt) { - struct nci_cmd_param *param = + const struct nci_cmd_param *param = (struct nci_cmd_param *)opt; nci_send_cmd(ndev, param->opcode, param->len, param->payload); } -int nci_prop_cmd(struct nci_dev *ndev, __u8 oid, size_t len, __u8 *payload) +int nci_prop_cmd(struct nci_dev *ndev, __u8 oid, size_t len, const __u8 *payload) { struct nci_cmd_param param; @@ -365,7 +366,8 @@ int nci_prop_cmd(struct nci_dev *ndev, __u8 oid, size_t len, __u8 *payload) } EXPORT_SYMBOL(nci_prop_cmd); -int nci_core_cmd(struct nci_dev *ndev, __u16 opcode, size_t len, __u8 *payload) +int nci_core_cmd(struct nci_dev *ndev, __u16 opcode, size_t len, + const __u8 *payload) { struct nci_cmd_param param; @@ -399,7 +401,7 @@ struct nci_loopback_data { static void nci_send_data_req(struct nci_dev *ndev, unsigned long opt) { - struct nci_loopback_data *data = (struct nci_loopback_data *)opt; + const struct nci_loopback_data *data = (struct nci_loopback_data *)opt; nci_send_data(ndev, data->conn_id, data->data); } @@ -420,7 +422,7 @@ static void nci_nfcc_loopback_cb(void *context, struct sk_buff *skb, int err) nci_req_complete(ndev, NCI_STATUS_OK); } -int nci_nfcc_loopback(struct nci_dev *ndev, void *data, size_t data_len, +int nci_nfcc_loopback(struct nci_dev *ndev, const void *data, size_t data_len, struct sk_buff **resp) { int r; @@ -624,7 +626,7 @@ static int nci_dev_down(struct nfc_dev *nfc_dev) return nci_close_device(ndev); } -int nci_set_config(struct nci_dev *ndev, __u8 id, size_t len, __u8 *val) +int nci_set_config(struct nci_dev *ndev, __u8 id, size_t len, const __u8 *val) { struct nci_set_config_param param; @@ -659,7 +661,7 @@ EXPORT_SYMBOL(nci_nfcee_discover); static void nci_nfcee_mode_set_req(struct nci_dev *ndev, unsigned long opt) { - struct nci_nfcee_mode_set_cmd *cmd = + const struct nci_nfcee_mode_set_cmd *cmd = (struct nci_nfcee_mode_set_cmd *)opt; nci_send_cmd(ndev, NCI_OP_NFCEE_MODE_SET_CMD, @@ -681,7 +683,7 @@ EXPORT_SYMBOL(nci_nfcee_mode_set); static void nci_core_conn_create_req(struct nci_dev *ndev, unsigned long opt) { - struct core_conn_create_data *data = + const struct core_conn_create_data *data = (struct core_conn_create_data *)opt; nci_send_cmd(ndev, NCI_OP_CORE_CONN_CREATE_CMD, data->length, data->cmd); @@ -690,7 +692,7 @@ static void nci_core_conn_create_req(struct nci_dev *ndev, unsigned long opt) int nci_core_conn_create(struct nci_dev *ndev, u8 destination_type, u8 number_destination_params, size_t params_len, - struct core_conn_create_dest_spec_params *params) + const struct core_conn_create_dest_spec_params *params) { int r; struct nci_core_conn_create_cmd *cmd; @@ -863,7 +865,7 @@ static int nci_activate_target(struct nfc_dev *nfc_dev, { struct nci_dev *ndev = nfc_get_drvdata(nfc_dev); struct nci_rf_discover_select_param param; - struct nfc_target *nci_target = NULL; + const struct nfc_target *nci_target = NULL; int i; int rc = 0; diff --git a/net/nfc/nci/data.c b/net/nfc/nci/data.c index ce3382be937f..6055dc9a82aa 100644 --- a/net/nfc/nci/data.c +++ b/net/nfc/nci/data.c @@ -26,7 +26,7 @@ void nci_data_exchange_complete(struct nci_dev *ndev, struct sk_buff *skb, __u8 conn_id, int err) { - struct nci_conn_info *conn_info; + const struct nci_conn_info *conn_info; data_exchange_cb_t cb; void *cb_context; @@ -80,7 +80,7 @@ static inline void nci_push_data_hdr(struct nci_dev *ndev, int nci_conn_max_data_pkt_payload_size(struct nci_dev *ndev, __u8 conn_id) { - struct nci_conn_info *conn_info; + const struct nci_conn_info *conn_info; conn_info = nci_get_conn_info_by_conn_id(ndev, conn_id); if (!conn_info) @@ -93,9 +93,9 @@ EXPORT_SYMBOL(nci_conn_max_data_pkt_payload_size); static int nci_queue_tx_data_frags(struct nci_dev *ndev, __u8 conn_id, struct sk_buff *skb) { - struct nci_conn_info *conn_info; + const struct nci_conn_info *conn_info; int total_len = skb->len; - unsigned char *data = skb->data; + const unsigned char *data = skb->data; unsigned long flags; struct sk_buff_head frags_q; struct sk_buff *skb_frag; @@ -166,7 +166,7 @@ exit: /* Send NCI data */ int nci_send_data(struct nci_dev *ndev, __u8 conn_id, struct sk_buff *skb) { - struct nci_conn_info *conn_info; + const struct nci_conn_info *conn_info; int rc = 0; pr_debug("conn_id 0x%x, plen %d\n", conn_id, skb->len); @@ -269,7 +269,7 @@ void nci_rx_data_packet(struct nci_dev *ndev, struct sk_buff *skb) __u8 pbf = nci_pbf(skb->data); __u8 status = 0; __u8 conn_id = nci_conn_id(skb->data); - struct nci_conn_info *conn_info; + const struct nci_conn_info *conn_info; pr_debug("len %d\n", skb->len); diff --git a/net/nfc/nci/hci.c b/net/nfc/nci/hci.c index d6732e5e8958..71a306b29735 100644 --- a/net/nfc/nci/hci.c +++ b/net/nfc/nci/hci.c @@ -142,7 +142,7 @@ static int nci_hci_send_data(struct nci_dev *ndev, u8 pipe, const u8 data_type, const u8 *data, size_t data_len) { - struct nci_conn_info *conn_info; + const struct nci_conn_info *conn_info; struct sk_buff *skb; int len, i, r; u8 cb = pipe; @@ -197,7 +197,7 @@ static int nci_hci_send_data(struct nci_dev *ndev, u8 pipe, static void nci_hci_send_data_req(struct nci_dev *ndev, unsigned long opt) { - struct nci_data *data = (struct nci_data *)opt; + const struct nci_data *data = (struct nci_data *)opt; nci_hci_send_data(ndev, data->pipe, data->cmd, data->data, data->data_len); @@ -221,8 +221,8 @@ int nci_hci_send_cmd(struct nci_dev *ndev, u8 gate, u8 cmd, const u8 *param, size_t param_len, struct sk_buff **skb) { - struct nci_hcp_message *message; - struct nci_conn_info *conn_info; + const struct nci_hcp_message *message; + const struct nci_conn_info *conn_info; struct nci_data data; int r; u8 pipe = ndev->hci_dev->gate2pipe[gate]; @@ -406,7 +406,7 @@ static void nci_hci_msg_rx_work(struct work_struct *work) struct nci_hci_dev *hdev = container_of(work, struct nci_hci_dev, msg_rx_work); struct sk_buff *skb; - struct nci_hcp_message *message; + const struct nci_hcp_message *message; u8 pipe, type, instruction; while ((skb = skb_dequeue(&hdev->msg_rx_queue)) != NULL) { @@ -498,7 +498,7 @@ void nci_hci_data_received_cb(void *context, int nci_hci_open_pipe(struct nci_dev *ndev, u8 pipe) { struct nci_data data; - struct nci_conn_info *conn_info; + const struct nci_conn_info *conn_info; conn_info = ndev->hci_dev->conn_info; if (!conn_info) @@ -523,7 +523,7 @@ static u8 nci_hci_create_pipe(struct nci_dev *ndev, u8 dest_host, u8 pipe; struct sk_buff *skb; struct nci_hci_create_pipe_params params; - struct nci_hci_create_pipe_resp *resp; + const struct nci_hci_create_pipe_resp *resp; pr_debug("gate=%d\n", dest_gate); @@ -557,8 +557,8 @@ static int nci_hci_delete_pipe(struct nci_dev *ndev, u8 pipe) int nci_hci_set_param(struct nci_dev *ndev, u8 gate, u8 idx, const u8 *param, size_t param_len) { - struct nci_hcp_message *message; - struct nci_conn_info *conn_info; + const struct nci_hcp_message *message; + const struct nci_conn_info *conn_info; struct nci_data data; int r; u8 *tmp; @@ -605,8 +605,8 @@ EXPORT_SYMBOL(nci_hci_set_param); int nci_hci_get_param(struct nci_dev *ndev, u8 gate, u8 idx, struct sk_buff **skb) { - struct nci_hcp_message *message; - struct nci_conn_info *conn_info; + const struct nci_hcp_message *message; + const struct nci_conn_info *conn_info; struct nci_data data; int r; u8 pipe = ndev->hci_dev->gate2pipe[gate]; @@ -697,7 +697,7 @@ EXPORT_SYMBOL(nci_hci_connect_gate); static int nci_hci_dev_connect_gates(struct nci_dev *ndev, u8 gate_count, - struct nci_hci_gate *gates) + const struct nci_hci_gate *gates) { int r; diff --git a/net/nfc/nci/ntf.c b/net/nfc/nci/ntf.c index 98af04c86b2c..d6251363b72b 100644 --- a/net/nfc/nci/ntf.c +++ b/net/nfc/nci/ntf.c @@ -28,10 +28,10 @@ /* Handle NCI Notification packets */ static void nci_core_reset_ntf_packet(struct nci_dev *ndev, - struct sk_buff *skb) + const struct sk_buff *skb) { /* Handle NCI 2.x core reset notification */ - struct nci_core_reset_ntf *ntf = (void *)skb->data; + const struct nci_core_reset_ntf *ntf = (void *)skb->data; ndev->nci_ver = ntf->nci_ver; pr_debug("nci_ver 0x%x, config_status 0x%x\n", @@ -80,7 +80,7 @@ static void nci_core_conn_credits_ntf_packet(struct nci_dev *ndev, } static void nci_core_generic_error_ntf_packet(struct nci_dev *ndev, - struct sk_buff *skb) + const struct sk_buff *skb) { __u8 status = skb->data[0]; @@ -107,9 +107,10 @@ static void nci_core_conn_intf_error_ntf_packet(struct nci_dev *ndev, nci_data_exchange_complete(ndev, NULL, ntf->conn_id, -EIO); } -static __u8 *nci_extract_rf_params_nfca_passive_poll(struct nci_dev *ndev, - struct rf_tech_specific_params_nfca_poll *nfca_poll, - __u8 *data) +static const __u8 * +nci_extract_rf_params_nfca_passive_poll(struct nci_dev *ndev, + struct rf_tech_specific_params_nfca_poll *nfca_poll, + const __u8 *data) { nfca_poll->sens_res = __le16_to_cpu(*((__le16 *)data)); data += 2; @@ -134,9 +135,10 @@ static __u8 *nci_extract_rf_params_nfca_passive_poll(struct nci_dev *ndev, return data; } -static __u8 *nci_extract_rf_params_nfcb_passive_poll(struct nci_dev *ndev, - struct rf_tech_specific_params_nfcb_poll *nfcb_poll, - __u8 *data) +static const __u8 * +nci_extract_rf_params_nfcb_passive_poll(struct nci_dev *ndev, + struct rf_tech_specific_params_nfcb_poll *nfcb_poll, + const __u8 *data) { nfcb_poll->sensb_res_len = min_t(__u8, *data++, NFC_SENSB_RES_MAXSIZE); @@ -148,9 +150,10 @@ static __u8 *nci_extract_rf_params_nfcb_passive_poll(struct nci_dev *ndev, return data; } -static __u8 *nci_extract_rf_params_nfcf_passive_poll(struct nci_dev *ndev, - struct rf_tech_specific_params_nfcf_poll *nfcf_poll, - __u8 *data) +static const __u8 * +nci_extract_rf_params_nfcf_passive_poll(struct nci_dev *ndev, + struct rf_tech_specific_params_nfcf_poll *nfcf_poll, + const __u8 *data) { nfcf_poll->bit_rate = *data++; nfcf_poll->sensf_res_len = min_t(__u8, *data++, NFC_SENSF_RES_MAXSIZE); @@ -164,9 +167,10 @@ static __u8 *nci_extract_rf_params_nfcf_passive_poll(struct nci_dev *ndev, return data; } -static __u8 *nci_extract_rf_params_nfcv_passive_poll(struct nci_dev *ndev, - struct rf_tech_specific_params_nfcv_poll *nfcv_poll, - __u8 *data) +static const __u8 * +nci_extract_rf_params_nfcv_passive_poll(struct nci_dev *ndev, + struct rf_tech_specific_params_nfcv_poll *nfcv_poll, + const __u8 *data) { ++data; nfcv_poll->dsfid = *data++; @@ -175,9 +179,10 @@ static __u8 *nci_extract_rf_params_nfcv_passive_poll(struct nci_dev *ndev, return data; } -static __u8 *nci_extract_rf_params_nfcf_passive_listen(struct nci_dev *ndev, - struct rf_tech_specific_params_nfcf_listen *nfcf_listen, - __u8 *data) +static const __u8 * +nci_extract_rf_params_nfcf_passive_listen(struct nci_dev *ndev, + struct rf_tech_specific_params_nfcf_listen *nfcf_listen, + const __u8 *data) { nfcf_listen->local_nfcid2_len = min_t(__u8, *data++, NFC_NFCID2_MAXSIZE); @@ -198,12 +203,12 @@ static int nci_add_new_protocol(struct nci_dev *ndev, struct nfc_target *target, __u8 rf_protocol, __u8 rf_tech_and_mode, - void *params) + const void *params) { - struct rf_tech_specific_params_nfca_poll *nfca_poll; - struct rf_tech_specific_params_nfcb_poll *nfcb_poll; - struct rf_tech_specific_params_nfcf_poll *nfcf_poll; - struct rf_tech_specific_params_nfcv_poll *nfcv_poll; + const struct rf_tech_specific_params_nfca_poll *nfca_poll; + const struct rf_tech_specific_params_nfcb_poll *nfcb_poll; + const struct rf_tech_specific_params_nfcf_poll *nfcf_poll; + const struct rf_tech_specific_params_nfcv_poll *nfcv_poll; __u32 protocol; if (rf_protocol == NCI_RF_PROTOCOL_T1T) @@ -274,7 +279,7 @@ static int nci_add_new_protocol(struct nci_dev *ndev, } static void nci_add_new_target(struct nci_dev *ndev, - struct nci_rf_discover_ntf *ntf) + const struct nci_rf_discover_ntf *ntf) { struct nfc_target *target; int i, rc; @@ -319,10 +324,10 @@ void nci_clear_target_list(struct nci_dev *ndev) } static void nci_rf_discover_ntf_packet(struct nci_dev *ndev, - struct sk_buff *skb) + const struct sk_buff *skb) { struct nci_rf_discover_ntf ntf; - __u8 *data = skb->data; + const __u8 *data = skb->data; bool add_target = true; ntf.rf_discovery_id = *data++; @@ -382,7 +387,8 @@ static void nci_rf_discover_ntf_packet(struct nci_dev *ndev, } static int nci_extract_activation_params_iso_dep(struct nci_dev *ndev, - struct nci_rf_intf_activated_ntf *ntf, __u8 *data) + struct nci_rf_intf_activated_ntf *ntf, + const __u8 *data) { struct activation_params_nfca_poll_iso_dep *nfca_poll; struct activation_params_nfcb_poll_iso_dep *nfcb_poll; @@ -418,7 +424,8 @@ static int nci_extract_activation_params_iso_dep(struct nci_dev *ndev, } static int nci_extract_activation_params_nfc_dep(struct nci_dev *ndev, - struct nci_rf_intf_activated_ntf *ntf, __u8 *data) + struct nci_rf_intf_activated_ntf *ntf, + const __u8 *data) { struct activation_params_poll_nfc_dep *poll; struct activation_params_listen_nfc_dep *listen; @@ -454,7 +461,7 @@ static int nci_extract_activation_params_nfc_dep(struct nci_dev *ndev, } static void nci_target_auto_activated(struct nci_dev *ndev, - struct nci_rf_intf_activated_ntf *ntf) + const struct nci_rf_intf_activated_ntf *ntf) { struct nfc_target *target; int rc; @@ -477,7 +484,7 @@ static void nci_target_auto_activated(struct nci_dev *ndev, } static int nci_store_general_bytes_nfc_dep(struct nci_dev *ndev, - struct nci_rf_intf_activated_ntf *ntf) + const struct nci_rf_intf_activated_ntf *ntf) { ndev->remote_gb_len = 0; @@ -519,11 +526,11 @@ static int nci_store_general_bytes_nfc_dep(struct nci_dev *ndev, } static void nci_rf_intf_activated_ntf_packet(struct nci_dev *ndev, - struct sk_buff *skb) + const struct sk_buff *skb) { struct nci_conn_info *conn_info; struct nci_rf_intf_activated_ntf ntf; - __u8 *data = skb->data; + const __u8 *data = skb->data; int err = NCI_STATUS_OK; ntf.rf_discovery_id = *data++; @@ -681,10 +688,10 @@ listen: } static void nci_rf_deactivate_ntf_packet(struct nci_dev *ndev, - struct sk_buff *skb) + const struct sk_buff *skb) { - struct nci_conn_info *conn_info; - struct nci_rf_deactivate_ntf *ntf = (void *) skb->data; + const struct nci_conn_info *conn_info; + const struct nci_rf_deactivate_ntf *ntf = (void *)skb->data; pr_debug("entry, type 0x%x, reason 0x%x\n", ntf->type, ntf->reason); @@ -725,10 +732,10 @@ static void nci_rf_deactivate_ntf_packet(struct nci_dev *ndev, } static void nci_nfcee_discover_ntf_packet(struct nci_dev *ndev, - struct sk_buff *skb) + const struct sk_buff *skb) { u8 status = NCI_STATUS_OK; - struct nci_nfcee_discover_ntf *nfcee_ntf = + const struct nci_nfcee_discover_ntf *nfcee_ntf = (struct nci_nfcee_discover_ntf *)skb->data; pr_debug("\n"); @@ -745,7 +752,7 @@ static void nci_nfcee_discover_ntf_packet(struct nci_dev *ndev, } static void nci_nfcee_action_ntf_packet(struct nci_dev *ndev, - struct sk_buff *skb) + const struct sk_buff *skb) { pr_debug("\n"); } diff --git a/net/nfc/nci/rsp.c b/net/nfc/nci/rsp.c index e9605922a322..dbb0b55a1757 100644 --- a/net/nfc/nci/rsp.c +++ b/net/nfc/nci/rsp.c @@ -25,9 +25,10 @@ /* Handle NCI Response packets */ -static void nci_core_reset_rsp_packet(struct nci_dev *ndev, struct sk_buff *skb) +static void nci_core_reset_rsp_packet(struct nci_dev *ndev, + const struct sk_buff *skb) { - struct nci_core_reset_rsp *rsp = (void *) skb->data; + const struct nci_core_reset_rsp *rsp = (void *)skb->data; pr_debug("status 0x%x\n", rsp->status); @@ -43,10 +44,11 @@ static void nci_core_reset_rsp_packet(struct nci_dev *ndev, struct sk_buff *skb) } } -static u8 nci_core_init_rsp_packet_v1(struct nci_dev *ndev, struct sk_buff *skb) +static u8 nci_core_init_rsp_packet_v1(struct nci_dev *ndev, + const struct sk_buff *skb) { - struct nci_core_init_rsp_1 *rsp_1 = (void *) skb->data; - struct nci_core_init_rsp_2 *rsp_2; + const struct nci_core_init_rsp_1 *rsp_1 = (void *)skb->data; + const struct nci_core_init_rsp_2 *rsp_2; pr_debug("status 0x%x\n", rsp_1->status); @@ -81,10 +83,11 @@ static u8 nci_core_init_rsp_packet_v1(struct nci_dev *ndev, struct sk_buff *skb) return NCI_STATUS_OK; } -static u8 nci_core_init_rsp_packet_v2(struct nci_dev *ndev, struct sk_buff *skb) +static u8 nci_core_init_rsp_packet_v2(struct nci_dev *ndev, + const struct sk_buff *skb) { - struct nci_core_init_rsp_nci_ver2 *rsp = (void *)skb->data; - u8 *supported_rf_interface = rsp->supported_rf_interfaces; + const struct nci_core_init_rsp_nci_ver2 *rsp = (void *)skb->data; + const u8 *supported_rf_interface = rsp->supported_rf_interfaces; u8 rf_interface_idx = 0; u8 rf_extension_cnt = 0; @@ -118,7 +121,7 @@ static u8 nci_core_init_rsp_packet_v2(struct nci_dev *ndev, struct sk_buff *skb) return NCI_STATUS_OK; } -static void nci_core_init_rsp_packet(struct nci_dev *ndev, struct sk_buff *skb) +static void nci_core_init_rsp_packet(struct nci_dev *ndev, const struct sk_buff *skb) { u8 status = 0; @@ -160,9 +163,9 @@ exit: } static void nci_core_set_config_rsp_packet(struct nci_dev *ndev, - struct sk_buff *skb) + const struct sk_buff *skb) { - struct nci_core_set_config_rsp *rsp = (void *) skb->data; + const struct nci_core_set_config_rsp *rsp = (void *)skb->data; pr_debug("status 0x%x\n", rsp->status); @@ -170,7 +173,7 @@ static void nci_core_set_config_rsp_packet(struct nci_dev *ndev, } static void nci_rf_disc_map_rsp_packet(struct nci_dev *ndev, - struct sk_buff *skb) + const struct sk_buff *skb) { __u8 status = skb->data[0]; @@ -179,7 +182,8 @@ static void nci_rf_disc_map_rsp_packet(struct nci_dev *ndev, nci_req_complete(ndev, status); } -static void nci_rf_disc_rsp_packet(struct nci_dev *ndev, struct sk_buff *skb) +static void nci_rf_disc_rsp_packet(struct nci_dev *ndev, + const struct sk_buff *skb) { struct nci_conn_info *conn_info; __u8 status = skb->data[0]; @@ -210,7 +214,7 @@ exit: } static void nci_rf_disc_select_rsp_packet(struct nci_dev *ndev, - struct sk_buff *skb) + const struct sk_buff *skb) { __u8 status = skb->data[0]; @@ -222,7 +226,7 @@ static void nci_rf_disc_select_rsp_packet(struct nci_dev *ndev, } static void nci_rf_deactivate_rsp_packet(struct nci_dev *ndev, - struct sk_buff *skb) + const struct sk_buff *skb) { __u8 status = skb->data[0]; @@ -238,9 +242,9 @@ static void nci_rf_deactivate_rsp_packet(struct nci_dev *ndev, } static void nci_nfcee_discover_rsp_packet(struct nci_dev *ndev, - struct sk_buff *skb) + const struct sk_buff *skb) { - struct nci_nfcee_discover_rsp *discover_rsp; + const struct nci_nfcee_discover_rsp *discover_rsp; if (skb->len != 2) { nci_req_complete(ndev, NCI_STATUS_NFCEE_PROTOCOL_ERROR); @@ -255,7 +259,7 @@ static void nci_nfcee_discover_rsp_packet(struct nci_dev *ndev, } static void nci_nfcee_mode_set_rsp_packet(struct nci_dev *ndev, - struct sk_buff *skb) + const struct sk_buff *skb) { __u8 status = skb->data[0]; @@ -264,11 +268,11 @@ static void nci_nfcee_mode_set_rsp_packet(struct nci_dev *ndev, } static void nci_core_conn_create_rsp_packet(struct nci_dev *ndev, - struct sk_buff *skb) + const struct sk_buff *skb) { __u8 status = skb->data[0]; struct nci_conn_info *conn_info = NULL; - struct nci_core_conn_create_rsp *rsp; + const struct nci_core_conn_create_rsp *rsp; pr_debug("status 0x%x\n", status); @@ -319,7 +323,7 @@ exit: } static void nci_core_conn_close_rsp_packet(struct nci_dev *ndev, - struct sk_buff *skb) + const struct sk_buff *skb) { struct nci_conn_info *conn_info; __u8 status = skb->data[0]; diff --git a/net/nfc/nci/spi.c b/net/nfc/nci/spi.c index 7d8e10e27c20..0935527d1d12 100644 --- a/net/nfc/nci/spi.c +++ b/net/nfc/nci/spi.c @@ -27,7 +27,7 @@ #define CRC_INIT 0xFFFF -static int __nci_spi_send(struct nci_spi *nspi, struct sk_buff *skb, +static int __nci_spi_send(struct nci_spi *nspi, const struct sk_buff *skb, int cs_change) { struct spi_message m; -- cgit v1.2.3 From 26713455048eb19122b1561b471d30710177ef97 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Thu, 29 Jul 2021 20:19:25 +0300 Subject: devlink: Allocate devlink directly in requested net namespace There is no need in extra call indirection and check from impossible flow where someone tries to set namespace without prior call to devlink_alloc(). Instead of this extra logic and additional EXPORT_SYMBOL, use specialized devlink allocation function that receives net namespace as an argument. Such specialized API allows clear view when devlink initialized in wrong net namespace and/or kernel users don't try to change devlink namespace under the hood. Reviewed-by: Jiri Pirko Signed-off-by: Leon Romanovsky Signed-off-by: Jakub Kicinski --- drivers/net/netdevsim/dev.c | 4 ++-- include/net/devlink.h | 14 ++++++++++++-- net/core/devlink.c | 26 ++++++++------------------ 3 files changed, 22 insertions(+), 22 deletions(-) (limited to 'include/net') diff --git a/drivers/net/netdevsim/dev.c b/drivers/net/netdevsim/dev.c index 6348307bfa84..d538a39d4225 100644 --- a/drivers/net/netdevsim/dev.c +++ b/drivers/net/netdevsim/dev.c @@ -1431,10 +1431,10 @@ int nsim_dev_probe(struct nsim_bus_dev *nsim_bus_dev) struct devlink *devlink; int err; - devlink = devlink_alloc(&nsim_dev_devlink_ops, sizeof(*nsim_dev)); + devlink = devlink_alloc_ns(&nsim_dev_devlink_ops, sizeof(*nsim_dev), + nsim_bus_dev->initial_net); if (!devlink) return -ENOMEM; - devlink_net_set(devlink, nsim_bus_dev->initial_net); nsim_dev = devlink_priv(devlink); nsim_dev->nsim_bus_dev = nsim_bus_dev; nsim_dev->switch_id.id_len = sizeof(nsim_dev->switch_id.id); diff --git a/include/net/devlink.h b/include/net/devlink.h index e48a62320407..08f4c6191e72 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -1540,8 +1540,18 @@ static inline struct devlink *netdev_to_devlink(struct net_device *dev) struct ib_device; struct net *devlink_net(const struct devlink *devlink); -void devlink_net_set(struct devlink *devlink, struct net *net); -struct devlink *devlink_alloc(const struct devlink_ops *ops, size_t priv_size); +/* This call is intended for software devices that can create + * devlink instances in other namespaces than init_net. + * + * Drivers that operate on real HW must use devlink_alloc() instead. + */ +struct devlink *devlink_alloc_ns(const struct devlink_ops *ops, + size_t priv_size, struct net *net); +static inline struct devlink *devlink_alloc(const struct devlink_ops *ops, + size_t priv_size) +{ + return devlink_alloc_ns(ops, priv_size, &init_net); +} int devlink_register(struct devlink *devlink, struct device *dev); void devlink_unregister(struct devlink *devlink); void devlink_reload_enable(struct devlink *devlink); diff --git a/net/core/devlink.c b/net/core/devlink.c index fbca61ad9bbc..fd2fc2befba9 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -108,19 +108,6 @@ struct net *devlink_net(const struct devlink *devlink) } EXPORT_SYMBOL_GPL(devlink_net); -static void __devlink_net_set(struct devlink *devlink, struct net *net) -{ - write_pnet(&devlink->_net, net); -} - -void devlink_net_set(struct devlink *devlink, struct net *net) -{ - if (WARN_ON(devlink->dev)) - return; - __devlink_net_set(devlink, net); -} -EXPORT_SYMBOL_GPL(devlink_net_set); - static struct devlink *devlink_get_from_attrs(struct net *net, struct nlattr **attrs) { @@ -3921,7 +3908,7 @@ static int devlink_reload(struct devlink *devlink, struct net *dest_net, return err; if (dest_net && !net_eq(dest_net, curr_net)) - __devlink_net_set(devlink, dest_net); + write_pnet(&devlink->_net, dest_net); err = devlink->ops->reload_up(devlink, action, limit, actions_performed, extack); devlink_reload_failed_set(devlink, !!err); @@ -8776,15 +8763,18 @@ static bool devlink_reload_actions_valid(const struct devlink_ops *ops) } /** - * devlink_alloc - Allocate new devlink instance resources + * devlink_alloc_ns - Allocate new devlink instance resources + * in specific namespace * * @ops: ops * @priv_size: size of user private data + * @net: net namespace * * Allocate new devlink instance resources, including devlink index * and name. */ -struct devlink *devlink_alloc(const struct devlink_ops *ops, size_t priv_size) +struct devlink *devlink_alloc_ns(const struct devlink_ops *ops, + size_t priv_size, struct net *net) { struct devlink *devlink; @@ -8799,7 +8789,7 @@ struct devlink *devlink_alloc(const struct devlink_ops *ops, size_t priv_size) return NULL; devlink->ops = ops; xa_init_flags(&devlink->snapshot_ids, XA_FLAGS_ALLOC); - __devlink_net_set(devlink, &init_net); + write_pnet(&devlink->_net, net); INIT_LIST_HEAD(&devlink->port_list); INIT_LIST_HEAD(&devlink->rate_list); INIT_LIST_HEAD(&devlink->sb_list); @@ -8815,7 +8805,7 @@ struct devlink *devlink_alloc(const struct devlink_ops *ops, size_t priv_size) mutex_init(&devlink->reporters_lock); return devlink; } -EXPORT_SYMBOL_GPL(devlink_alloc); +EXPORT_SYMBOL_GPL(devlink_alloc_ns); /** * devlink_register - Register devlink instance -- cgit v1.2.3 From f2e3778db7e13fd9a83c309dd8c9fbe69ba8efbe Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 22 Jul 2021 10:48:34 +0200 Subject: netfilter: remove xt pernet data clusterip is now handled via net_generic. NOTRACK is tiny compared to rest of xt_CT feature set, even the existing deprecation warning is bigger than the actual functionality. Just remove the warning, its not worth keeping/adding a net_generic one. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/net_namespace.h | 2 -- include/net/netns/x_tables.h | 12 ------------ net/netfilter/xt_CT.c | 11 ----------- 3 files changed, 25 deletions(-) delete mode 100644 include/net/netns/x_tables.h (limited to 'include/net') diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index cc54750dd3db..bb5fa5914032 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -23,7 +23,6 @@ #include #include #include -#include #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) #include #endif @@ -133,7 +132,6 @@ struct net { #endif #ifdef CONFIG_NETFILTER struct netns_nf nf; - struct netns_xt xt; #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) struct netns_ct ct; #endif diff --git a/include/net/netns/x_tables.h b/include/net/netns/x_tables.h deleted file mode 100644 index d02316ec2906..000000000000 --- a/include/net/netns/x_tables.h +++ /dev/null @@ -1,12 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __NETNS_X_TABLES_H -#define __NETNS_X_TABLES_H - -#include -#include - -struct netns_xt { - bool notrack_deprecated_warning; - bool clusterip_deprecated_warning; -}; -#endif diff --git a/net/netfilter/xt_CT.c b/net/netfilter/xt_CT.c index 12404d221026..0a913ce07425 100644 --- a/net/netfilter/xt_CT.c +++ b/net/netfilter/xt_CT.c @@ -351,21 +351,10 @@ notrack_tg(struct sk_buff *skb, const struct xt_action_param *par) return XT_CONTINUE; } -static int notrack_chk(const struct xt_tgchk_param *par) -{ - if (!par->net->xt.notrack_deprecated_warning) { - pr_info("netfilter: NOTRACK target is deprecated, " - "use CT instead or upgrade iptables\n"); - par->net->xt.notrack_deprecated_warning = true; - } - return 0; -} - static struct xt_target notrack_tg_reg __read_mostly = { .name = "NOTRACK", .revision = 0, .family = NFPROTO_UNSPEC, - .checkentry = notrack_chk, .target = notrack_tg, .table = "raw", .me = THIS_MODULE, -- cgit v1.2.3 From 695176bfe5dec2051f950bdac0ae0b21e29e6de3 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Thu, 29 Jul 2021 16:12:14 -0700 Subject: net_sched: refactor TC action init API TC action ->init() API has 10 parameters, it becomes harder to read. Some of them are just boolean and can be replaced by flags. Similarly for the internal API tcf_action_init() and tcf_exts_validate(). This patch converts them to flags and fold them into the upper 16 bits of "flags", whose lower 16 bits are still reserved for user-space. More specifically, the following kernel flags are introduced: TCA_ACT_FLAGS_POLICE replace 'name' in a few contexts, to distinguish whether it is compatible with policer. TCA_ACT_FLAGS_BIND replaces 'bind', to indicate whether this action is bound to a filter. TCA_ACT_FLAGS_REPLACE replaces 'ovr' in most contexts, means we are replacing an existing action. TCA_ACT_FLAGS_NO_RTNL replaces 'rtnl_held' but has the opposite meaning, because we still hold RTNL in most cases. The only user-space flag TCA_ACT_FLAGS_NO_PERCPU_STATS is untouched and still stored as before. I have tested this patch with tdc and I do not see any failure related to this patch. Tested-by: Vlad Buslov Acked-by: Jamal Hadi Salim Cc: Jiri Pirko Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- include/net/act_api.h | 22 ++++++++++------ include/net/pkt_cls.h | 2 +- include/net/sch_generic.h | 2 +- include/uapi/linux/pkt_cls.h | 1 + net/sched/act_api.c | 61 ++++++++++++++++++++++---------------------- net/sched/act_bpf.c | 4 +-- net/sched/act_connmark.c | 4 +-- net/sched/act_csum.c | 7 ++--- net/sched/act_ct.c | 4 +-- net/sched/act_ctinfo.c | 4 +-- net/sched/act_gact.c | 4 +-- net/sched/act_gate.c | 4 +-- net/sched/act_ife.c | 9 ++++--- net/sched/act_ipt.c | 21 +++++++-------- net/sched/act_mirred.c | 4 +-- net/sched/act_mpls.c | 4 +-- net/sched/act_nat.c | 6 ++--- net/sched/act_pedit.c | 4 +-- net/sched/act_police.c | 4 +-- net/sched/act_sample.c | 7 ++--- net/sched/act_simple.c | 4 +-- net/sched/act_skbedit.c | 4 +-- net/sched/act_skbmod.c | 3 ++- net/sched/act_tunnel_key.c | 4 +-- net/sched/act_vlan.c | 4 +-- net/sched/cls_api.c | 28 ++++++++++++-------- net/sched/cls_basic.c | 10 ++++---- net/sched/cls_bpf.c | 8 +++--- net/sched/cls_cgroup.c | 6 ++--- net/sched/cls_flow.c | 6 ++--- net/sched/cls_flower.c | 18 ++++++------- net/sched/cls_fw.c | 13 +++++----- net/sched/cls_matchall.c | 17 ++++++------ net/sched/cls_route.c | 10 ++++---- net/sched/cls_rsvp.h | 7 +++-- net/sched/cls_tcindex.c | 10 ++++---- net/sched/cls_u32.c | 24 ++++++++--------- 37 files changed, 185 insertions(+), 169 deletions(-) (limited to 'include/net') diff --git a/include/net/act_api.h b/include/net/act_api.h index 086b291e9530..f19f7f4a463c 100644 --- a/include/net/act_api.h +++ b/include/net/act_api.h @@ -58,6 +58,14 @@ struct tc_action { #define TCA_ACT_HW_STATS_ANY (TCA_ACT_HW_STATS_IMMEDIATE | \ TCA_ACT_HW_STATS_DELAYED) +/* Reserve 16 bits for user-space. See TCA_ACT_FLAGS_NO_PERCPU_STATS. */ +#define TCA_ACT_FLAGS_USER_BITS 16 +#define TCA_ACT_FLAGS_USER_MASK 0xffff +#define TCA_ACT_FLAGS_POLICE (1U << TCA_ACT_FLAGS_USER_BITS) +#define TCA_ACT_FLAGS_BIND (1U << (TCA_ACT_FLAGS_USER_BITS + 1)) +#define TCA_ACT_FLAGS_REPLACE (1U << (TCA_ACT_FLAGS_USER_BITS + 2)) +#define TCA_ACT_FLAGS_NO_RTNL (1U << (TCA_ACT_FLAGS_USER_BITS + 3)) + /* Update lastuse only if needed, to avoid dirtying a cache line. * We use a temp variable to avoid fetching jiffies twice. */ @@ -99,8 +107,8 @@ struct tc_action_ops { void (*cleanup)(struct tc_action *); int (*lookup)(struct net *net, struct tc_action **a, u32 index); int (*init)(struct net *net, struct nlattr *nla, - struct nlattr *est, struct tc_action **act, int ovr, - int bind, bool rtnl_held, struct tcf_proto *tp, + struct nlattr *est, struct tc_action **act, + struct tcf_proto *tp, u32 flags, struct netlink_ext_ack *extack); int (*walk)(struct net *, struct sk_buff *, struct netlink_callback *, int, @@ -179,18 +187,16 @@ int tcf_action_destroy(struct tc_action *actions[], int bind); int tcf_action_exec(struct sk_buff *skb, struct tc_action **actions, int nr_actions, struct tcf_result *res); int tcf_action_init(struct net *net, struct tcf_proto *tp, struct nlattr *nla, - struct nlattr *est, char *name, int ovr, int bind, + struct nlattr *est, struct tc_action *actions[], int init_res[], size_t *attr_size, - bool rtnl_held, struct netlink_ext_ack *extack); -struct tc_action_ops *tc_action_load_ops(char *name, struct nlattr *nla, + u32 flags, struct netlink_ext_ack *extack); +struct tc_action_ops *tc_action_load_ops(struct nlattr *nla, bool police, bool rtnl_held, struct netlink_ext_ack *extack); struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp, struct nlattr *nla, struct nlattr *est, - char *name, int ovr, int bind, struct tc_action_ops *a_o, int *init_res, - bool rtnl_held, - struct netlink_ext_ack *extack); + u32 flags, struct netlink_ext_ack *extack); int tcf_action_dump(struct sk_buff *skb, struct tc_action *actions[], int bind, int ref, bool terse); int tcf_action_dump_old(struct sk_buff *skb, struct tc_action *a, int, int); diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index dc28fcb6f0a2..64de26b7ad39 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -319,7 +319,7 @@ tcf_exts_exec(struct sk_buff *skb, struct tcf_exts *exts, int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb, struct nlattr *rate_tlv, - struct tcf_exts *exts, bool ovr, bool rtnl_held, + struct tcf_exts *exts, u32 flags, struct netlink_ext_ack *extack); void tcf_exts_destroy(struct tcf_exts *exts); void tcf_exts_change(struct tcf_exts *dst, struct tcf_exts *src); diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 9ed33e6840bd..c0069ac00e62 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -357,7 +357,7 @@ struct tcf_proto_ops { int (*change)(struct net *net, struct sk_buff *, struct tcf_proto*, unsigned long, u32 handle, struct nlattr **, - void **, bool, bool, + void **, u32, struct netlink_ext_ack *); int (*delete)(struct tcf_proto *tp, void *arg, bool *last, bool rtnl_held, diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h index 025c40fef93d..6836ccb9c45d 100644 --- a/include/uapi/linux/pkt_cls.h +++ b/include/uapi/linux/pkt_cls.h @@ -22,6 +22,7 @@ enum { __TCA_ACT_MAX }; +/* See other TCA_ACT_FLAGS_ * flags in include/net/act_api.h. */ #define TCA_ACT_FLAGS_NO_PERCPU_STATS 1 /* Don't use percpu allocator for * actions stats. */ diff --git a/net/sched/act_api.c b/net/sched/act_api.c index 998a2374f7ae..7dd3a2dc5fa4 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -495,7 +495,7 @@ int tcf_idr_create(struct tc_action_net *tn, u32 index, struct nlattr *est, p->tcfa_tm.install = jiffies; p->tcfa_tm.lastuse = jiffies; p->tcfa_tm.firstuse = 0; - p->tcfa_flags = flags; + p->tcfa_flags = flags & TCA_ACT_FLAGS_USER_MASK; if (est) { err = gen_new_estimator(&p->tcfa_bstats, p->cpu_bstats, &p->tcfa_rate_est, @@ -941,7 +941,7 @@ void tcf_idr_insert_many(struct tc_action *actions[]) } } -struct tc_action_ops *tc_action_load_ops(char *name, struct nlattr *nla, +struct tc_action_ops *tc_action_load_ops(struct nlattr *nla, bool police, bool rtnl_held, struct netlink_ext_ack *extack) { @@ -951,7 +951,7 @@ struct tc_action_ops *tc_action_load_ops(char *name, struct nlattr *nla, struct nlattr *kind; int err; - if (name == NULL) { + if (!police) { err = nla_parse_nested_deprecated(tb, TCA_ACT_MAX, nla, tcf_action_policy, extack); if (err < 0) @@ -967,7 +967,7 @@ struct tc_action_ops *tc_action_load_ops(char *name, struct nlattr *nla, return ERR_PTR(err); } } else { - if (strlcpy(act_name, name, IFNAMSIZ) >= IFNAMSIZ) { + if (strlcpy(act_name, "police", IFNAMSIZ) >= IFNAMSIZ) { NL_SET_ERR_MSG(extack, "TC action name too long"); return ERR_PTR(-EINVAL); } @@ -1004,12 +1004,11 @@ struct tc_action_ops *tc_action_load_ops(char *name, struct nlattr *nla, struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp, struct nlattr *nla, struct nlattr *est, - char *name, int ovr, int bind, struct tc_action_ops *a_o, int *init_res, - bool rtnl_held, - struct netlink_ext_ack *extack) + u32 flags, struct netlink_ext_ack *extack) { - struct nla_bitfield32 flags = { 0, 0 }; + bool police = flags & TCA_ACT_FLAGS_POLICE; + struct nla_bitfield32 userflags = { 0, 0 }; u8 hw_stats = TCA_ACT_HW_STATS_ANY; struct nlattr *tb[TCA_ACT_MAX + 1]; struct tc_cookie *cookie = NULL; @@ -1017,7 +1016,7 @@ struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp, int err; /* backward compatibility for policer */ - if (name == NULL) { + if (!police) { err = nla_parse_nested_deprecated(tb, TCA_ACT_MAX, nla, tcf_action_policy, extack); if (err < 0) @@ -1032,22 +1031,22 @@ struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp, } hw_stats = tcf_action_hw_stats_get(tb[TCA_ACT_HW_STATS]); if (tb[TCA_ACT_FLAGS]) - flags = nla_get_bitfield32(tb[TCA_ACT_FLAGS]); + userflags = nla_get_bitfield32(tb[TCA_ACT_FLAGS]); - err = a_o->init(net, tb[TCA_ACT_OPTIONS], est, &a, ovr, bind, - rtnl_held, tp, flags.value, extack); + err = a_o->init(net, tb[TCA_ACT_OPTIONS], est, &a, tp, + userflags.value | flags, extack); } else { - err = a_o->init(net, nla, est, &a, ovr, bind, rtnl_held, - tp, flags.value, extack); + err = a_o->init(net, nla, est, &a, tp, userflags.value | flags, + extack); } if (err < 0) goto err_out; *init_res = err; - if (!name && tb[TCA_ACT_COOKIE]) + if (!police && tb[TCA_ACT_COOKIE]) tcf_set_action_cookie(&a->act_cookie, cookie); - if (!name) + if (!police) a->hw_stats = hw_stats; return a; @@ -1063,9 +1062,9 @@ err_out: /* Returns numbers of initialized actions or negative error. */ int tcf_action_init(struct net *net, struct tcf_proto *tp, struct nlattr *nla, - struct nlattr *est, char *name, int ovr, int bind, - struct tc_action *actions[], int init_res[], size_t *attr_size, - bool rtnl_held, struct netlink_ext_ack *extack) + struct nlattr *est, struct tc_action *actions[], + int init_res[], size_t *attr_size, u32 flags, + struct netlink_ext_ack *extack) { struct tc_action_ops *ops[TCA_ACT_MAX_PRIO] = {}; struct nlattr *tb[TCA_ACT_MAX_PRIO + 1]; @@ -1082,7 +1081,9 @@ int tcf_action_init(struct net *net, struct tcf_proto *tp, struct nlattr *nla, for (i = 1; i <= TCA_ACT_MAX_PRIO && tb[i]; i++) { struct tc_action_ops *a_o; - a_o = tc_action_load_ops(name, tb[i], rtnl_held, extack); + a_o = tc_action_load_ops(tb[i], flags & TCA_ACT_FLAGS_POLICE, + !(flags & TCA_ACT_FLAGS_NO_RTNL), + extack); if (IS_ERR(a_o)) { err = PTR_ERR(a_o); goto err_mod; @@ -1091,9 +1092,8 @@ int tcf_action_init(struct net *net, struct tcf_proto *tp, struct nlattr *nla, } for (i = 1; i <= TCA_ACT_MAX_PRIO && tb[i]; i++) { - act = tcf_action_init_1(net, tp, tb[i], est, name, ovr, bind, - ops[i - 1], &init_res[i - 1], rtnl_held, - extack); + act = tcf_action_init_1(net, tp, tb[i], est, ops[i - 1], + &init_res[i - 1], flags, extack); if (IS_ERR(act)) { err = PTR_ERR(act); goto err; @@ -1113,7 +1113,7 @@ int tcf_action_init(struct net *net, struct tcf_proto *tp, struct nlattr *nla, goto err_mod; err: - tcf_action_destroy(actions, bind); + tcf_action_destroy(actions, flags & TCA_ACT_FLAGS_BIND); err_mod: for (i = 0; i < TCA_ACT_MAX_PRIO; i++) { if (ops[i]) @@ -1495,7 +1495,7 @@ tcf_add_notify(struct net *net, struct nlmsghdr *n, struct tc_action *actions[], } static int tcf_action_add(struct net *net, struct nlattr *nla, - struct nlmsghdr *n, u32 portid, int ovr, + struct nlmsghdr *n, u32 portid, u32 flags, struct netlink_ext_ack *extack) { size_t attr_size = 0; @@ -1504,8 +1504,8 @@ static int tcf_action_add(struct net *net, struct nlattr *nla, int init_res[TCA_ACT_MAX_PRIO] = {}; for (loop = 0; loop < 10; loop++) { - ret = tcf_action_init(net, NULL, nla, NULL, NULL, ovr, 0, - actions, init_res, &attr_size, true, extack); + ret = tcf_action_init(net, NULL, nla, NULL, actions, init_res, + &attr_size, flags, extack); if (ret != -EAGAIN) break; } @@ -1535,7 +1535,8 @@ static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n, struct net *net = sock_net(skb->sk); struct nlattr *tca[TCA_ROOT_MAX + 1]; u32 portid = NETLINK_CB(skb).portid; - int ret = 0, ovr = 0; + u32 flags = 0; + int ret = 0; if ((n->nlmsg_type != RTM_GETACTION) && !netlink_capable(skb, CAP_NET_ADMIN)) @@ -1561,8 +1562,8 @@ static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n, * is zero) then just set this */ if (n->nlmsg_flags & NLM_F_REPLACE) - ovr = 1; - ret = tcf_action_add(net, tca[TCA_ACT_TAB], n, portid, ovr, + flags = TCA_ACT_FLAGS_REPLACE; + ret = tcf_action_add(net, tca[TCA_ACT_TAB], n, portid, flags, extack); break; case RTM_DELACTION: diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c index e409a0005717..040807aa15b9 100644 --- a/net/sched/act_bpf.c +++ b/net/sched/act_bpf.c @@ -275,11 +275,11 @@ static void tcf_bpf_prog_fill_cfg(const struct tcf_bpf *prog, static int tcf_bpf_init(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action **act, - int replace, int bind, bool rtnl_held, struct tcf_proto *tp, u32 flags, struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, bpf_net_id); + bool bind = flags & TCA_ACT_FLAGS_BIND; struct nlattr *tb[TCA_ACT_BPF_MAX + 1]; struct tcf_chain *goto_ch = NULL; struct tcf_bpf_cfg cfg, old; @@ -317,7 +317,7 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla, if (bind) return 0; - if (!replace) { + if (!(flags & TCA_ACT_FLAGS_REPLACE)) { tcf_idr_release(*act, bind); return -EEXIST; } diff --git a/net/sched/act_connmark.c b/net/sched/act_connmark.c index e19885d7fe2c..94e78ac7a748 100644 --- a/net/sched/act_connmark.c +++ b/net/sched/act_connmark.c @@ -96,12 +96,12 @@ static const struct nla_policy connmark_policy[TCA_CONNMARK_MAX + 1] = { static int tcf_connmark_init(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action **a, - int ovr, int bind, bool rtnl_held, struct tcf_proto *tp, u32 flags, struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, connmark_net_id); struct nlattr *tb[TCA_CONNMARK_MAX + 1]; + bool bind = flags & TCA_ACT_FLAGS_BIND; struct tcf_chain *goto_ch = NULL; struct tcf_connmark_info *ci; struct tc_connmark *parm; @@ -144,7 +144,7 @@ static int tcf_connmark_init(struct net *net, struct nlattr *nla, ci = to_connmark(*a); if (bind) return 0; - if (!ovr) { + if (!(flags & TCA_ACT_FLAGS_REPLACE)) { tcf_idr_release(*a, bind); return -EEXIST; } diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c index 4fa4fcb842ba..a15ec95e69c3 100644 --- a/net/sched/act_csum.c +++ b/net/sched/act_csum.c @@ -41,11 +41,12 @@ static unsigned int csum_net_id; static struct tc_action_ops act_csum_ops; static int tcf_csum_init(struct net *net, struct nlattr *nla, - struct nlattr *est, struct tc_action **a, int ovr, - int bind, bool rtnl_held, struct tcf_proto *tp, + struct nlattr *est, struct tc_action **a, + struct tcf_proto *tp, u32 flags, struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, csum_net_id); + bool bind = flags & TCA_ACT_FLAGS_BIND; struct tcf_csum_params *params_new; struct nlattr *tb[TCA_CSUM_MAX + 1]; struct tcf_chain *goto_ch = NULL; @@ -78,7 +79,7 @@ static int tcf_csum_init(struct net *net, struct nlattr *nla, } else if (err > 0) { if (bind)/* dont override defaults */ return 0; - if (!ovr) { + if (!(flags & TCA_ACT_FLAGS_REPLACE)) { tcf_idr_release(*a, bind); return -EEXIST; } diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c index 1b4b3514c94f..ad9df0cb4b98 100644 --- a/net/sched/act_ct.c +++ b/net/sched/act_ct.c @@ -1235,11 +1235,11 @@ static int tcf_ct_fill_params(struct net *net, static int tcf_ct_init(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action **a, - int replace, int bind, bool rtnl_held, struct tcf_proto *tp, u32 flags, struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, ct_net_id); + bool bind = flags & TCA_ACT_FLAGS_BIND; struct tcf_ct_params *params = NULL; struct nlattr *tb[TCA_CT_MAX + 1]; struct tcf_chain *goto_ch = NULL; @@ -1279,7 +1279,7 @@ static int tcf_ct_init(struct net *net, struct nlattr *nla, if (bind) return 0; - if (!replace) { + if (!(flags & TCA_ACT_FLAGS_REPLACE)) { tcf_idr_release(*a, bind); return -EEXIST; } diff --git a/net/sched/act_ctinfo.c b/net/sched/act_ctinfo.c index b20c8ce59905..549374a2d008 100644 --- a/net/sched/act_ctinfo.c +++ b/net/sched/act_ctinfo.c @@ -154,11 +154,11 @@ static const struct nla_policy ctinfo_policy[TCA_CTINFO_MAX + 1] = { static int tcf_ctinfo_init(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action **a, - int ovr, int bind, bool rtnl_held, struct tcf_proto *tp, u32 flags, struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, ctinfo_net_id); + bool bind = flags & TCA_ACT_FLAGS_BIND; u32 dscpmask = 0, dscpstatemask, index; struct nlattr *tb[TCA_CTINFO_MAX + 1]; struct tcf_ctinfo_params *cp_new; @@ -221,7 +221,7 @@ static int tcf_ctinfo_init(struct net *net, struct nlattr *nla, } else if (err > 0) { if (bind) /* don't override defaults */ return 0; - if (!ovr) { + if (!(flags & TCA_ACT_FLAGS_REPLACE)) { tcf_idr_release(*a, bind); return -EEXIST; } diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c index 73c3926358a0..d8dce173df37 100644 --- a/net/sched/act_gact.c +++ b/net/sched/act_gact.c @@ -52,11 +52,11 @@ static const struct nla_policy gact_policy[TCA_GACT_MAX + 1] = { static int tcf_gact_init(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action **a, - int ovr, int bind, bool rtnl_held, struct tcf_proto *tp, u32 flags, struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, gact_net_id); + bool bind = flags & TCA_ACT_FLAGS_BIND; struct nlattr *tb[TCA_GACT_MAX + 1]; struct tcf_chain *goto_ch = NULL; struct tc_gact *parm; @@ -109,7 +109,7 @@ static int tcf_gact_init(struct net *net, struct nlattr *nla, } else if (err > 0) { if (bind)/* dont override defaults */ return 0; - if (!ovr) { + if (!(flags & TCA_ACT_FLAGS_REPLACE)) { tcf_idr_release(*a, bind); return -EEXIST; } diff --git a/net/sched/act_gate.c b/net/sched/act_gate.c index a78cb7965718..7df72a4197a3 100644 --- a/net/sched/act_gate.c +++ b/net/sched/act_gate.c @@ -295,12 +295,12 @@ static void gate_setup_timer(struct tcf_gate *gact, u64 basetime, static int tcf_gate_init(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action **a, - int ovr, int bind, bool rtnl_held, struct tcf_proto *tp, u32 flags, struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, gate_net_id); enum tk_offsets tk_offset = TK_OFFS_TAI; + bool bind = flags & TCA_ACT_FLAGS_BIND; struct nlattr *tb[TCA_GATE_MAX + 1]; struct tcf_chain *goto_ch = NULL; u64 cycletime = 0, basetime = 0; @@ -364,7 +364,7 @@ static int tcf_gate_init(struct net *net, struct nlattr *nla, } ret = ACT_P_CREATED; - } else if (!ovr) { + } else if (!(flags & TCA_ACT_FLAGS_REPLACE)) { tcf_idr_release(*a, bind); return -EEXIST; } diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c index a2ddea04183a..7064a365a1a9 100644 --- a/net/sched/act_ife.c +++ b/net/sched/act_ife.c @@ -479,11 +479,11 @@ static int populate_metalist(struct tcf_ife_info *ife, struct nlattr **tb, static int tcf_ife_init(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action **a, - int ovr, int bind, bool rtnl_held, struct tcf_proto *tp, u32 flags, struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, ife_net_id); + bool bind = flags & TCA_ACT_FLAGS_BIND; struct nlattr *tb[TCA_IFE_MAX + 1]; struct nlattr *tb2[IFE_META_MAX + 1]; struct tcf_chain *goto_ch = NULL; @@ -532,7 +532,7 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla, kfree(p); return err; } - err = load_metalist(tb2, rtnl_held); + err = load_metalist(tb2, !(flags & TCA_ACT_FLAGS_NO_RTNL)); if (err) { kfree(p); return err; @@ -560,7 +560,7 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla, return ret; } ret = ACT_P_CREATED; - } else if (!ovr) { + } else if (!(flags & TCA_ACT_FLAGS_REPLACE)) { tcf_idr_release(*a, bind); kfree(p); return -EEXIST; @@ -600,7 +600,8 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla, } if (tb[TCA_IFE_METALST]) { - err = populate_metalist(ife, tb2, exists, rtnl_held); + err = populate_metalist(ife, tb2, exists, + !(flags & TCA_ACT_FLAGS_NO_RTNL)); if (err) goto metadata_parse_err; } else { diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c index ac7297f42355..265b1443e252 100644 --- a/net/sched/act_ipt.c +++ b/net/sched/act_ipt.c @@ -94,10 +94,11 @@ static const struct nla_policy ipt_policy[TCA_IPT_MAX + 1] = { static int __tcf_ipt_init(struct net *net, unsigned int id, struct nlattr *nla, struct nlattr *est, struct tc_action **a, - const struct tc_action_ops *ops, int ovr, int bind, + const struct tc_action_ops *ops, struct tcf_proto *tp, u32 flags) { struct tc_action_net *tn = net_generic(net, id); + bool bind = flags & TCA_ACT_FLAGS_BIND; struct nlattr *tb[TCA_IPT_MAX + 1]; struct tcf_ipt *ipt; struct xt_entry_target *td, *t; @@ -154,7 +155,7 @@ static int __tcf_ipt_init(struct net *net, unsigned int id, struct nlattr *nla, if (bind)/* dont override defaults */ return 0; - if (!ovr) { + if (!(flags & TCA_ACT_FLAGS_REPLACE)) { tcf_idr_release(*a, bind); return -EEXIST; } @@ -201,21 +202,21 @@ err1: } static int tcf_ipt_init(struct net *net, struct nlattr *nla, - struct nlattr *est, struct tc_action **a, int ovr, - int bind, bool rtnl_held, struct tcf_proto *tp, + struct nlattr *est, struct tc_action **a, + struct tcf_proto *tp, u32 flags, struct netlink_ext_ack *extack) { - return __tcf_ipt_init(net, ipt_net_id, nla, est, a, &act_ipt_ops, ovr, - bind, tp, flags); + return __tcf_ipt_init(net, ipt_net_id, nla, est, a, &act_ipt_ops, + tp, flags); } static int tcf_xt_init(struct net *net, struct nlattr *nla, - struct nlattr *est, struct tc_action **a, int ovr, - int bind, bool unlocked, struct tcf_proto *tp, + struct nlattr *est, struct tc_action **a, + struct tcf_proto *tp, u32 flags, struct netlink_ext_ack *extack) { - return __tcf_ipt_init(net, xt_net_id, nla, est, a, &act_xt_ops, ovr, - bind, tp, flags); + return __tcf_ipt_init(net, xt_net_id, nla, est, a, &act_xt_ops, + tp, flags); } static int tcf_ipt_act(struct sk_buff *skb, const struct tc_action *a, diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index 7153c67f641e..77ee80e3effc 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -91,11 +91,11 @@ static struct tc_action_ops act_mirred_ops; static int tcf_mirred_init(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action **a, - int ovr, int bind, bool rtnl_held, struct tcf_proto *tp, u32 flags, struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, mirred_net_id); + bool bind = flags & TCA_ACT_FLAGS_BIND; struct nlattr *tb[TCA_MIRRED_MAX + 1]; struct tcf_chain *goto_ch = NULL; bool mac_header_xmit = false; @@ -155,7 +155,7 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla, return ret; } ret = ACT_P_CREATED; - } else if (!ovr) { + } else if (!(flags & TCA_ACT_FLAGS_REPLACE)) { tcf_idr_release(*a, bind); return -EEXIST; } diff --git a/net/sched/act_mpls.c b/net/sched/act_mpls.c index d1486ea496a2..e4529b428cf4 100644 --- a/net/sched/act_mpls.c +++ b/net/sched/act_mpls.c @@ -152,11 +152,11 @@ static const struct nla_policy mpls_policy[TCA_MPLS_MAX + 1] = { static int tcf_mpls_init(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action **a, - int ovr, int bind, bool rtnl_held, struct tcf_proto *tp, u32 flags, struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, mpls_net_id); + bool bind = flags & TCA_ACT_FLAGS_BIND; struct nlattr *tb[TCA_MPLS_MAX + 1]; struct tcf_chain *goto_ch = NULL; struct tcf_mpls_params *p; @@ -255,7 +255,7 @@ static int tcf_mpls_init(struct net *net, struct nlattr *nla, } ret = ACT_P_CREATED; - } else if (!ovr) { + } else if (!(flags & TCA_ACT_FLAGS_REPLACE)) { tcf_idr_release(*a, bind); return -EEXIST; } diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c index 1ebd2a86d980..7dd6b586ba7f 100644 --- a/net/sched/act_nat.c +++ b/net/sched/act_nat.c @@ -34,11 +34,11 @@ static const struct nla_policy nat_policy[TCA_NAT_MAX + 1] = { }; static int tcf_nat_init(struct net *net, struct nlattr *nla, struct nlattr *est, - struct tc_action **a, int ovr, int bind, - bool rtnl_held, struct tcf_proto *tp, + struct tc_action **a, struct tcf_proto *tp, u32 flags, struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, nat_net_id); + bool bind = flags & TCA_ACT_FLAGS_BIND; struct nlattr *tb[TCA_NAT_MAX + 1]; struct tcf_chain *goto_ch = NULL; struct tc_nat *parm; @@ -70,7 +70,7 @@ static int tcf_nat_init(struct net *net, struct nlattr *nla, struct nlattr *est, } else if (err > 0) { if (bind) return 0; - if (!ovr) { + if (!(flags & TCA_ACT_FLAGS_REPLACE)) { tcf_idr_release(*a, bind); return -EEXIST; } diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c index b45304446e13..c6c862c459cc 100644 --- a/net/sched/act_pedit.c +++ b/net/sched/act_pedit.c @@ -136,11 +136,11 @@ nla_failure: static int tcf_pedit_init(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action **a, - int ovr, int bind, bool rtnl_held, struct tcf_proto *tp, u32 flags, struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, pedit_net_id); + bool bind = flags & TCA_ACT_FLAGS_BIND; struct nlattr *tb[TCA_PEDIT_MAX + 1]; struct tcf_chain *goto_ch = NULL; struct tc_pedit_key *keys = NULL; @@ -198,7 +198,7 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla, } else if (err > 0) { if (bind) goto out_free; - if (!ovr) { + if (!(flags & TCA_ACT_FLAGS_REPLACE)) { ret = -EEXIST; goto out_release; } diff --git a/net/sched/act_police.c b/net/sched/act_police.c index 0fab8de176d2..832157a840fc 100644 --- a/net/sched/act_police.c +++ b/net/sched/act_police.c @@ -48,11 +48,11 @@ static const struct nla_policy police_policy[TCA_POLICE_MAX + 1] = { static int tcf_police_init(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action **a, - int ovr, int bind, bool rtnl_held, struct tcf_proto *tp, u32 flags, struct netlink_ext_ack *extack) { int ret = 0, tcfp_result = TC_ACT_OK, err, size; + bool bind = flags & TCA_ACT_FLAGS_BIND; struct nlattr *tb[TCA_POLICE_MAX + 1]; struct tcf_chain *goto_ch = NULL; struct tc_police *parm; @@ -97,7 +97,7 @@ static int tcf_police_init(struct net *net, struct nlattr *nla, } ret = ACT_P_CREATED; spin_lock_init(&(to_police(*a)->tcfp_lock)); - } else if (!ovr) { + } else if (!(flags & TCA_ACT_FLAGS_REPLACE)) { tcf_idr_release(*a, bind); return -EEXIST; } diff --git a/net/sched/act_sample.c b/net/sched/act_sample.c index 6a0c16e4351d..230501eb9e06 100644 --- a/net/sched/act_sample.c +++ b/net/sched/act_sample.c @@ -34,11 +34,12 @@ static const struct nla_policy sample_policy[TCA_SAMPLE_MAX + 1] = { }; static int tcf_sample_init(struct net *net, struct nlattr *nla, - struct nlattr *est, struct tc_action **a, int ovr, - int bind, bool rtnl_held, struct tcf_proto *tp, + struct nlattr *est, struct tc_action **a, + struct tcf_proto *tp, u32 flags, struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, sample_net_id); + bool bind = flags & TCA_ACT_FLAGS_BIND; struct nlattr *tb[TCA_SAMPLE_MAX + 1]; struct psample_group *psample_group; u32 psample_group_num, rate, index; @@ -75,7 +76,7 @@ static int tcf_sample_init(struct net *net, struct nlattr *nla, return ret; } ret = ACT_P_CREATED; - } else if (!ovr) { + } else if (!(flags & TCA_ACT_FLAGS_REPLACE)) { tcf_idr_release(*a, bind); return -EEXIST; } diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c index 726cc956d06f..cbbe1861d3a2 100644 --- a/net/sched/act_simple.c +++ b/net/sched/act_simple.c @@ -85,11 +85,11 @@ static const struct nla_policy simple_policy[TCA_DEF_MAX + 1] = { static int tcf_simp_init(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action **a, - int ovr, int bind, bool rtnl_held, struct tcf_proto *tp, u32 flags, struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, simp_net_id); + bool bind = flags & TCA_ACT_FLAGS_BIND; struct nlattr *tb[TCA_DEF_MAX + 1]; struct tcf_chain *goto_ch = NULL; struct tc_defact *parm; @@ -147,7 +147,7 @@ static int tcf_simp_init(struct net *net, struct nlattr *nla, tcf_action_set_ctrlact(*a, parm->action, goto_ch); ret = ACT_P_CREATED; } else { - if (!ovr) { + if (!(flags & TCA_ACT_FLAGS_REPLACE)) { err = -EEXIST; goto release_idr; } diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c index e5f3fb8b00e3..605418538347 100644 --- a/net/sched/act_skbedit.c +++ b/net/sched/act_skbedit.c @@ -96,11 +96,11 @@ static const struct nla_policy skbedit_policy[TCA_SKBEDIT_MAX + 1] = { static int tcf_skbedit_init(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action **a, - int ovr, int bind, bool rtnl_held, struct tcf_proto *tp, u32 act_flags, struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, skbedit_net_id); + bool bind = act_flags & TCA_ACT_FLAGS_BIND; struct tcf_skbedit_params *params_new; struct nlattr *tb[TCA_SKBEDIT_MAX + 1]; struct tcf_chain *goto_ch = NULL; @@ -186,7 +186,7 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla, ret = ACT_P_CREATED; } else { d = to_skbedit(*a); - if (!ovr) { + if (!(act_flags & TCA_ACT_FLAGS_REPLACE)) { tcf_idr_release(*a, bind); return -EEXIST; } diff --git a/net/sched/act_skbmod.c b/net/sched/act_skbmod.c index 762ceec3e6f6..ecb9ee666095 100644 --- a/net/sched/act_skbmod.c +++ b/net/sched/act_skbmod.c @@ -100,11 +100,12 @@ static const struct nla_policy skbmod_policy[TCA_SKBMOD_MAX + 1] = { static int tcf_skbmod_init(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action **a, - int ovr, int bind, bool rtnl_held, struct tcf_proto *tp, u32 flags, struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, skbmod_net_id); + bool ovr = flags & TCA_ACT_FLAGS_REPLACE; + bool bind = flags & TCA_ACT_FLAGS_BIND; struct nlattr *tb[TCA_SKBMOD_MAX + 1]; struct tcf_skbmod_params *p, *p_old; struct tcf_chain *goto_ch = NULL; diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c index 85c0d0d5b9da..d9cd174eecb7 100644 --- a/net/sched/act_tunnel_key.c +++ b/net/sched/act_tunnel_key.c @@ -355,11 +355,11 @@ static void tunnel_key_release_params(struct tcf_tunnel_key_params *p) static int tunnel_key_init(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action **a, - int ovr, int bind, bool rtnl_held, struct tcf_proto *tp, u32 act_flags, struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, tunnel_key_net_id); + bool bind = act_flags & TCA_ACT_FLAGS_BIND; struct nlattr *tb[TCA_TUNNEL_KEY_MAX + 1]; struct tcf_tunnel_key_params *params_new; struct metadata_dst *metadata = NULL; @@ -504,7 +504,7 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla, } ret = ACT_P_CREATED; - } else if (!ovr) { + } else if (!(act_flags & TCA_ACT_FLAGS_REPLACE)) { NL_SET_ERR_MSG(extack, "TC IDR already exists"); ret = -EEXIST; goto release_tun_meta; diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c index 71f2015c70ca..e4dc5a555bd8 100644 --- a/net/sched/act_vlan.c +++ b/net/sched/act_vlan.c @@ -114,11 +114,11 @@ static const struct nla_policy vlan_policy[TCA_VLAN_MAX + 1] = { static int tcf_vlan_init(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action **a, - int ovr, int bind, bool rtnl_held, struct tcf_proto *tp, u32 flags, struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, vlan_net_id); + bool bind = flags & TCA_ACT_FLAGS_BIND; struct nlattr *tb[TCA_VLAN_MAX + 1]; struct tcf_chain *goto_ch = NULL; bool push_prio_exists = false; @@ -223,7 +223,7 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla, } ret = ACT_P_CREATED; - } else if (!ovr) { + } else if (!(flags & TCA_ACT_FLAGS_REPLACE)) { tcf_idr_release(*a, bind); return -EEXIST; } diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 7be5b9d2aead..69185e311422 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -1949,6 +1949,7 @@ static int tc_new_tfilter(struct sk_buff *skb, struct nlmsghdr *n, int err; int tp_created; bool rtnl_held = false; + u32 flags = 0; if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN)) return -EPERM; @@ -2112,9 +2113,12 @@ replay: goto errout; } + if (!(n->nlmsg_flags & NLM_F_CREATE)) + flags |= TCA_ACT_FLAGS_REPLACE; + if (!rtnl_held) + flags |= TCA_ACT_FLAGS_NO_RTNL; err = tp->ops->change(net, skb, tp, cl, t->tcm_handle, tca, &fh, - n->nlmsg_flags & NLM_F_CREATE ? TCA_ACT_NOREPLACE : TCA_ACT_REPLACE, - rtnl_held, extack); + flags, extack); if (err == 0) { tfilter_notify(net, skb, n, tp, block, q, parent, fh, RTM_NEWTFILTER, false, rtnl_held); @@ -3020,8 +3024,8 @@ void tcf_exts_destroy(struct tcf_exts *exts) EXPORT_SYMBOL(tcf_exts_destroy); int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb, - struct nlattr *rate_tlv, struct tcf_exts *exts, bool ovr, - bool rtnl_held, struct netlink_ext_ack *extack) + struct nlattr *rate_tlv, struct tcf_exts *exts, + u32 flags, struct netlink_ext_ack *extack) { #ifdef CONFIG_NET_CLS_ACT { @@ -3032,13 +3036,15 @@ int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb, if (exts->police && tb[exts->police]) { struct tc_action_ops *a_o; - a_o = tc_action_load_ops("police", tb[exts->police], rtnl_held, extack); + a_o = tc_action_load_ops(tb[exts->police], true, + !(flags & TCA_ACT_FLAGS_NO_RTNL), + extack); if (IS_ERR(a_o)) return PTR_ERR(a_o); + flags |= TCA_ACT_FLAGS_POLICE | TCA_ACT_FLAGS_BIND; act = tcf_action_init_1(net, tp, tb[exts->police], - rate_tlv, "police", ovr, - TCA_ACT_BIND, a_o, init_res, - rtnl_held, extack); + rate_tlv, a_o, init_res, flags, + extack); module_put(a_o->owner); if (IS_ERR(act)) return PTR_ERR(act); @@ -3050,10 +3056,10 @@ int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb, } else if (exts->action && tb[exts->action]) { int err; + flags |= TCA_ACT_FLAGS_BIND; err = tcf_action_init(net, tp, tb[exts->action], - rate_tlv, NULL, ovr, TCA_ACT_BIND, - exts->actions, init_res, - &attr_size, rtnl_held, extack); + rate_tlv, exts->actions, init_res, + &attr_size, flags, extack); if (err < 0) return err; exts->nr_actions = err; diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c index f256a7c69093..8158fc9ee1ab 100644 --- a/net/sched/cls_basic.c +++ b/net/sched/cls_basic.c @@ -145,12 +145,12 @@ static const struct nla_policy basic_policy[TCA_BASIC_MAX + 1] = { static int basic_set_parms(struct net *net, struct tcf_proto *tp, struct basic_filter *f, unsigned long base, struct nlattr **tb, - struct nlattr *est, bool ovr, + struct nlattr *est, u32 flags, struct netlink_ext_ack *extack) { int err; - err = tcf_exts_validate(net, tp, tb, est, &f->exts, ovr, true, extack); + err = tcf_exts_validate(net, tp, tb, est, &f->exts, flags, extack); if (err < 0) return err; @@ -169,8 +169,8 @@ static int basic_set_parms(struct net *net, struct tcf_proto *tp, static int basic_change(struct net *net, struct sk_buff *in_skb, struct tcf_proto *tp, unsigned long base, u32 handle, - struct nlattr **tca, void **arg, bool ovr, - bool rtnl_held, struct netlink_ext_ack *extack) + struct nlattr **tca, void **arg, + u32 flags, struct netlink_ext_ack *extack) { int err; struct basic_head *head = rtnl_dereference(tp->root); @@ -216,7 +216,7 @@ static int basic_change(struct net *net, struct sk_buff *in_skb, goto errout; } - err = basic_set_parms(net, tp, fnew, base, tb, tca[TCA_RATE], ovr, + err = basic_set_parms(net, tp, fnew, base, tb, tca[TCA_RATE], flags, extack); if (err < 0) { if (!fold) diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c index fa739efa59f4..3b472bafdc9d 100644 --- a/net/sched/cls_bpf.c +++ b/net/sched/cls_bpf.c @@ -404,7 +404,7 @@ static int cls_bpf_prog_from_efd(struct nlattr **tb, struct cls_bpf_prog *prog, static int cls_bpf_set_parms(struct net *net, struct tcf_proto *tp, struct cls_bpf_prog *prog, unsigned long base, - struct nlattr **tb, struct nlattr *est, bool ovr, + struct nlattr **tb, struct nlattr *est, u32 flags, struct netlink_ext_ack *extack) { bool is_bpf, is_ebpf, have_exts = false; @@ -416,7 +416,7 @@ static int cls_bpf_set_parms(struct net *net, struct tcf_proto *tp, if ((!is_bpf && !is_ebpf) || (is_bpf && is_ebpf)) return -EINVAL; - ret = tcf_exts_validate(net, tp, tb, est, &prog->exts, ovr, true, + ret = tcf_exts_validate(net, tp, tb, est, &prog->exts, flags, extack); if (ret < 0) return ret; @@ -455,7 +455,7 @@ static int cls_bpf_set_parms(struct net *net, struct tcf_proto *tp, static int cls_bpf_change(struct net *net, struct sk_buff *in_skb, struct tcf_proto *tp, unsigned long base, u32 handle, struct nlattr **tca, - void **arg, bool ovr, bool rtnl_held, + void **arg, u32 flags, struct netlink_ext_ack *extack) { struct cls_bpf_head *head = rtnl_dereference(tp->root); @@ -500,7 +500,7 @@ static int cls_bpf_change(struct net *net, struct sk_buff *in_skb, goto errout; prog->handle = handle; - ret = cls_bpf_set_parms(net, tp, prog, base, tb, tca[TCA_RATE], ovr, + ret = cls_bpf_set_parms(net, tp, prog, base, tb, tca[TCA_RATE], flags, extack); if (ret < 0) goto errout_idr; diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c index fb881144fa01..ed00001b528a 100644 --- a/net/sched/cls_cgroup.c +++ b/net/sched/cls_cgroup.c @@ -76,7 +76,7 @@ static void cls_cgroup_destroy_work(struct work_struct *work) static int cls_cgroup_change(struct net *net, struct sk_buff *in_skb, struct tcf_proto *tp, unsigned long base, u32 handle, struct nlattr **tca, - void **arg, bool ovr, bool rtnl_held, + void **arg, u32 flags, struct netlink_ext_ack *extack) { struct nlattr *tb[TCA_CGROUP_MAX + 1]; @@ -108,8 +108,8 @@ static int cls_cgroup_change(struct net *net, struct sk_buff *in_skb, if (err < 0) goto errout; - err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &new->exts, ovr, - true, extack); + err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &new->exts, flags, + extack); if (err < 0) goto errout; diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c index 87398af2715a..972303aa8edd 100644 --- a/net/sched/cls_flow.c +++ b/net/sched/cls_flow.c @@ -387,7 +387,7 @@ static void flow_destroy_filter_work(struct work_struct *work) static int flow_change(struct net *net, struct sk_buff *in_skb, struct tcf_proto *tp, unsigned long base, u32 handle, struct nlattr **tca, - void **arg, bool ovr, bool rtnl_held, + void **arg, u32 flags, struct netlink_ext_ack *extack) { struct flow_head *head = rtnl_dereference(tp->root); @@ -442,8 +442,8 @@ static int flow_change(struct net *net, struct sk_buff *in_skb, if (err < 0) goto err2; - err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &fnew->exts, ovr, - true, extack); + err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &fnew->exts, flags, + extack); if (err < 0) goto err2; diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index d7869a984881..23b21253b3c3 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -1915,23 +1915,22 @@ errout_cleanup: static int fl_set_parms(struct net *net, struct tcf_proto *tp, struct cls_fl_filter *f, struct fl_flow_mask *mask, unsigned long base, struct nlattr **tb, - struct nlattr *est, bool ovr, - struct fl_flow_tmplt *tmplt, bool rtnl_held, + struct nlattr *est, + struct fl_flow_tmplt *tmplt, u32 flags, struct netlink_ext_ack *extack) { int err; - err = tcf_exts_validate(net, tp, tb, est, &f->exts, ovr, rtnl_held, - extack); + err = tcf_exts_validate(net, tp, tb, est, &f->exts, flags, extack); if (err < 0) return err; if (tb[TCA_FLOWER_CLASSID]) { f->res.classid = nla_get_u32(tb[TCA_FLOWER_CLASSID]); - if (!rtnl_held) + if (flags & TCA_ACT_FLAGS_NO_RTNL) rtnl_lock(); tcf_bind_filter(tp, &f->res, base); - if (!rtnl_held) + if (flags & TCA_ACT_FLAGS_NO_RTNL) rtnl_unlock(); } @@ -1975,10 +1974,11 @@ static int fl_ht_insert_unique(struct cls_fl_filter *fnew, static int fl_change(struct net *net, struct sk_buff *in_skb, struct tcf_proto *tp, unsigned long base, u32 handle, struct nlattr **tca, - void **arg, bool ovr, bool rtnl_held, + void **arg, u32 flags, struct netlink_ext_ack *extack) { struct cls_fl_head *head = fl_head_dereference(tp); + bool rtnl_held = !(flags & TCA_ACT_FLAGS_NO_RTNL); struct cls_fl_filter *fold = *arg; struct cls_fl_filter *fnew; struct fl_flow_mask *mask; @@ -2034,8 +2034,8 @@ static int fl_change(struct net *net, struct sk_buff *in_skb, } } - err = fl_set_parms(net, tp, fnew, mask, base, tb, tca[TCA_RATE], ovr, - tp->chain->tmplt_priv, rtnl_held, extack); + err = fl_set_parms(net, tp, fnew, mask, base, tb, tca[TCA_RATE], + tp->chain->tmplt_priv, flags, extack); if (err) goto errout; diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c index ec945294626a..8654b0ce997c 100644 --- a/net/sched/cls_fw.c +++ b/net/sched/cls_fw.c @@ -198,15 +198,15 @@ static const struct nla_policy fw_policy[TCA_FW_MAX + 1] = { static int fw_set_parms(struct net *net, struct tcf_proto *tp, struct fw_filter *f, struct nlattr **tb, - struct nlattr **tca, unsigned long base, bool ovr, + struct nlattr **tca, unsigned long base, u32 flags, struct netlink_ext_ack *extack) { struct fw_head *head = rtnl_dereference(tp->root); u32 mask; int err; - err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &f->exts, ovr, - true, extack); + err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &f->exts, flags, + extack); if (err < 0) return err; @@ -237,8 +237,7 @@ static int fw_set_parms(struct net *net, struct tcf_proto *tp, static int fw_change(struct net *net, struct sk_buff *in_skb, struct tcf_proto *tp, unsigned long base, u32 handle, struct nlattr **tca, void **arg, - bool ovr, bool rtnl_held, - struct netlink_ext_ack *extack) + u32 flags, struct netlink_ext_ack *extack) { struct fw_head *head = rtnl_dereference(tp->root); struct fw_filter *f = *arg; @@ -277,7 +276,7 @@ static int fw_change(struct net *net, struct sk_buff *in_skb, return err; } - err = fw_set_parms(net, tp, fnew, tb, tca, base, ovr, extack); + err = fw_set_parms(net, tp, fnew, tb, tca, base, flags, extack); if (err < 0) { tcf_exts_destroy(&fnew->exts); kfree(fnew); @@ -326,7 +325,7 @@ static int fw_change(struct net *net, struct sk_buff *in_skb, f->id = handle; f->tp = tp; - err = fw_set_parms(net, tp, f, tb, tca, base, ovr, extack); + err = fw_set_parms(net, tp, f, tb, tca, base, flags, extack); if (err < 0) goto errout; diff --git a/net/sched/cls_matchall.c b/net/sched/cls_matchall.c index cafb84480bab..24f0046ce0b3 100644 --- a/net/sched/cls_matchall.c +++ b/net/sched/cls_matchall.c @@ -163,13 +163,12 @@ static const struct nla_policy mall_policy[TCA_MATCHALL_MAX + 1] = { static int mall_set_parms(struct net *net, struct tcf_proto *tp, struct cls_mall_head *head, unsigned long base, struct nlattr **tb, - struct nlattr *est, bool ovr, + struct nlattr *est, u32 flags, struct netlink_ext_ack *extack) { int err; - err = tcf_exts_validate(net, tp, tb, est, &head->exts, ovr, true, - extack); + err = tcf_exts_validate(net, tp, tb, est, &head->exts, flags, extack); if (err < 0) return err; @@ -183,13 +182,13 @@ static int mall_set_parms(struct net *net, struct tcf_proto *tp, static int mall_change(struct net *net, struct sk_buff *in_skb, struct tcf_proto *tp, unsigned long base, u32 handle, struct nlattr **tca, - void **arg, bool ovr, bool rtnl_held, + void **arg, u32 flags, struct netlink_ext_ack *extack) { struct cls_mall_head *head = rtnl_dereference(tp->root); struct nlattr *tb[TCA_MATCHALL_MAX + 1]; struct cls_mall_head *new; - u32 flags = 0; + u32 userflags = 0; int err; if (!tca[TCA_OPTIONS]) @@ -204,8 +203,8 @@ static int mall_change(struct net *net, struct sk_buff *in_skb, return err; if (tb[TCA_MATCHALL_FLAGS]) { - flags = nla_get_u32(tb[TCA_MATCHALL_FLAGS]); - if (!tc_flags_valid(flags)) + userflags = nla_get_u32(tb[TCA_MATCHALL_FLAGS]); + if (!tc_flags_valid(userflags)) return -EINVAL; } @@ -220,14 +219,14 @@ static int mall_change(struct net *net, struct sk_buff *in_skb, if (!handle) handle = 1; new->handle = handle; - new->flags = flags; + new->flags = userflags; new->pf = alloc_percpu(struct tc_matchall_pcnt); if (!new->pf) { err = -ENOMEM; goto err_alloc_percpu; } - err = mall_set_parms(net, tp, new, base, tb, tca[TCA_RATE], ovr, + err = mall_set_parms(net, tp, new, base, tb, tca[TCA_RATE], flags, extack); if (err) goto err_set_parms; diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c index 5efa3e7ace15..a35ab8c27866 100644 --- a/net/sched/cls_route.c +++ b/net/sched/cls_route.c @@ -382,7 +382,7 @@ static int route4_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, struct route4_filter *f, u32 handle, struct route4_head *head, struct nlattr **tb, struct nlattr *est, int new, - bool ovr, struct netlink_ext_ack *extack) + u32 flags, struct netlink_ext_ack *extack) { u32 id = 0, to = 0, nhandle = 0x8000; struct route4_filter *fp; @@ -390,7 +390,7 @@ static int route4_set_parms(struct net *net, struct tcf_proto *tp, struct route4_bucket *b; int err; - err = tcf_exts_validate(net, tp, tb, est, &f->exts, ovr, true, extack); + err = tcf_exts_validate(net, tp, tb, est, &f->exts, flags, extack); if (err < 0) return err; @@ -464,8 +464,8 @@ static int route4_set_parms(struct net *net, struct tcf_proto *tp, static int route4_change(struct net *net, struct sk_buff *in_skb, struct tcf_proto *tp, unsigned long base, u32 handle, - struct nlattr **tca, void **arg, bool ovr, - bool rtnl_held, struct netlink_ext_ack *extack) + struct nlattr **tca, void **arg, u32 flags, + struct netlink_ext_ack *extack) { struct route4_head *head = rtnl_dereference(tp->root); struct route4_filter __rcu **fp; @@ -510,7 +510,7 @@ static int route4_change(struct net *net, struct sk_buff *in_skb, } err = route4_set_parms(net, tp, base, f, handle, head, tb, - tca[TCA_RATE], new, ovr, extack); + tca[TCA_RATE], new, flags, extack); if (err < 0) goto errout; diff --git a/net/sched/cls_rsvp.h b/net/sched/cls_rsvp.h index 27a4b6dbcf57..5cd9d6b143c4 100644 --- a/net/sched/cls_rsvp.h +++ b/net/sched/cls_rsvp.h @@ -470,9 +470,8 @@ static const struct nla_policy rsvp_policy[TCA_RSVP_MAX + 1] = { static int rsvp_change(struct net *net, struct sk_buff *in_skb, struct tcf_proto *tp, unsigned long base, - u32 handle, - struct nlattr **tca, - void **arg, bool ovr, bool rtnl_held, + u32 handle, struct nlattr **tca, + void **arg, u32 flags, struct netlink_ext_ack *extack) { struct rsvp_head *data = rtnl_dereference(tp->root); @@ -499,7 +498,7 @@ static int rsvp_change(struct net *net, struct sk_buff *in_skb, err = tcf_exts_init(&e, net, TCA_RSVP_ACT, TCA_RSVP_POLICE); if (err < 0) return err; - err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e, ovr, true, + err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e, flags, extack); if (err < 0) goto errout2; diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c index e9a8a2c86bbd..742c7d49a958 100644 --- a/net/sched/cls_tcindex.c +++ b/net/sched/cls_tcindex.c @@ -330,7 +330,7 @@ static int tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, u32 handle, struct tcindex_data *p, struct tcindex_filter_result *r, struct nlattr **tb, - struct nlattr *est, bool ovr, struct netlink_ext_ack *extack) + struct nlattr *est, u32 flags, struct netlink_ext_ack *extack) { struct tcindex_filter_result new_filter_result, *old_r = r; struct tcindex_data *cp = NULL, *oldp; @@ -342,7 +342,7 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, err = tcf_exts_init(&e, net, TCA_TCINDEX_ACT, TCA_TCINDEX_POLICE); if (err < 0) return err; - err = tcf_exts_validate(net, tp, tb, est, &e, ovr, true, extack); + err = tcf_exts_validate(net, tp, tb, est, &e, flags, extack); if (err < 0) goto errout; @@ -529,8 +529,8 @@ errout: static int tcindex_change(struct net *net, struct sk_buff *in_skb, struct tcf_proto *tp, unsigned long base, u32 handle, - struct nlattr **tca, void **arg, bool ovr, - bool rtnl_held, struct netlink_ext_ack *extack) + struct nlattr **tca, void **arg, u32 flags, + struct netlink_ext_ack *extack) { struct nlattr *opt = tca[TCA_OPTIONS]; struct nlattr *tb[TCA_TCINDEX_MAX + 1]; @@ -551,7 +551,7 @@ tcindex_change(struct net *net, struct sk_buff *in_skb, return err; return tcindex_set_parms(net, tp, base, handle, p, r, tb, - tca[TCA_RATE], ovr, extack); + tca[TCA_RATE], flags, extack); } static void tcindex_walk(struct tcf_proto *tp, struct tcf_walker *walker, diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index 6e1abe805448..4272814487f0 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c @@ -709,12 +709,12 @@ static const struct nla_policy u32_policy[TCA_U32_MAX + 1] = { static int u32_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, struct tc_u_knode *n, struct nlattr **tb, - struct nlattr *est, bool ovr, + struct nlattr *est, u32 flags, struct netlink_ext_ack *extack) { int err; - err = tcf_exts_validate(net, tp, tb, est, &n->exts, ovr, true, extack); + err = tcf_exts_validate(net, tp, tb, est, &n->exts, flags, extack); if (err < 0) return err; @@ -840,7 +840,7 @@ static struct tc_u_knode *u32_init_knode(struct net *net, struct tcf_proto *tp, static int u32_change(struct net *net, struct sk_buff *in_skb, struct tcf_proto *tp, unsigned long base, u32 handle, - struct nlattr **tca, void **arg, bool ovr, bool rtnl_held, + struct nlattr **tca, void **arg, u32 flags, struct netlink_ext_ack *extack) { struct tc_u_common *tp_c = tp->data; @@ -849,7 +849,7 @@ static int u32_change(struct net *net, struct sk_buff *in_skb, struct tc_u32_sel *s; struct nlattr *opt = tca[TCA_OPTIONS]; struct nlattr *tb[TCA_U32_MAX + 1]; - u32 htid, flags = 0; + u32 htid, userflags = 0; size_t sel_size; int err; @@ -868,8 +868,8 @@ static int u32_change(struct net *net, struct sk_buff *in_skb, return err; if (tb[TCA_U32_FLAGS]) { - flags = nla_get_u32(tb[TCA_U32_FLAGS]); - if (!tc_flags_valid(flags)) { + userflags = nla_get_u32(tb[TCA_U32_FLAGS]); + if (!tc_flags_valid(userflags)) { NL_SET_ERR_MSG_MOD(extack, "Invalid filter flags"); return -EINVAL; } @@ -884,7 +884,7 @@ static int u32_change(struct net *net, struct sk_buff *in_skb, return -EINVAL; } - if ((n->flags ^ flags) & + if ((n->flags ^ userflags) & ~(TCA_CLS_FLAGS_IN_HW | TCA_CLS_FLAGS_NOT_IN_HW)) { NL_SET_ERR_MSG_MOD(extack, "Key node flags do not match passed flags"); return -EINVAL; @@ -895,7 +895,7 @@ static int u32_change(struct net *net, struct sk_buff *in_skb, return -ENOMEM; err = u32_set_parms(net, tp, base, new, tb, - tca[TCA_RATE], ovr, extack); + tca[TCA_RATE], flags, extack); if (err) { u32_destroy_key(new, false); @@ -955,9 +955,9 @@ static int u32_change(struct net *net, struct sk_buff *in_skb, ht->handle = handle; ht->prio = tp->prio; idr_init(&ht->handle_idr); - ht->flags = flags; + ht->flags = userflags; - err = u32_replace_hw_hnode(tp, ht, flags, extack); + err = u32_replace_hw_hnode(tp, ht, userflags, extack); if (err) { idr_remove(&tp_c->handle_idr, handle); kfree(ht); @@ -1038,7 +1038,7 @@ static int u32_change(struct net *net, struct sk_buff *in_skb, RCU_INIT_POINTER(n->ht_up, ht); n->handle = handle; n->fshift = s->hmask ? ffs(ntohl(s->hmask)) - 1 : 0; - n->flags = flags; + n->flags = userflags; err = tcf_exts_init(&n->exts, net, TCA_U32_ACT, TCA_U32_POLICE); if (err < 0) @@ -1060,7 +1060,7 @@ static int u32_change(struct net *net, struct sk_buff *in_skb, } #endif - err = u32_set_parms(net, tp, base, n, tb, tca[TCA_RATE], ovr, + err = u32_set_parms(net, tp, base, n, tb, tca[TCA_RATE], flags, extack); if (err == 0) { struct tc_u_knode __rcu **ins; -- cgit v1.2.3 From 35d7a6f1fb53479965e9f99e8c87edc642336eba Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Sat, 31 Jul 2021 12:21:44 +0200 Subject: nfc: hci: pass callback data param as pointer in nci_request() The nci_request() receives a callback function and unsigned long data argument "opt" which is passed to the callback. Almost all of the nci_request() callers pass pointer to a stack variable as data argument. Only few pass scalar value (e.g. u8). All such callbacks do not modify passed data argument and in previous commit they were made as const. However passing pointers via unsigned long removes the const annotation. The callback could simply cast unsigned long to a pointer to writeable memory. Use "const void *" as type of this "opt" argument to solve this and prevent modifying the pointed contents. This is also consistent with generic pattern of passing data arguments - via "void *". In few places which pass scalar values, use casts via "unsigned long" to suppress any warnings. Signed-off-by: Krzysztof Kozlowski Signed-off-by: David S. Miller --- include/net/nfc/nci_core.h | 4 +- net/nfc/nci/core.c | 119 ++++++++++++++++++++++----------------------- net/nfc/nci/hci.c | 16 +++--- 3 files changed, 67 insertions(+), 72 deletions(-) (limited to 'include/net') diff --git a/include/net/nfc/nci_core.h b/include/net/nfc/nci_core.h index 4770a81f4aa7..a964daedc17b 100644 --- a/include/net/nfc/nci_core.h +++ b/include/net/nfc/nci_core.h @@ -276,8 +276,8 @@ int nci_register_device(struct nci_dev *ndev); void nci_unregister_device(struct nci_dev *ndev); int nci_request(struct nci_dev *ndev, void (*req)(struct nci_dev *ndev, - unsigned long opt), - unsigned long opt, __u32 timeout); + const void *opt), + const void *opt, __u32 timeout); int nci_prop_cmd(struct nci_dev *ndev, __u8 oid, size_t len, const __u8 *payload); int nci_core_cmd(struct nci_dev *ndev, __u16 opcode, size_t len, diff --git a/net/nfc/nci/core.c b/net/nfc/nci/core.c index 80a5c2a8e9fa..82ab39d80726 100644 --- a/net/nfc/nci/core.c +++ b/net/nfc/nci/core.c @@ -95,8 +95,8 @@ static void nci_req_cancel(struct nci_dev *ndev, int err) /* Execute request and wait for completion. */ static int __nci_request(struct nci_dev *ndev, - void (*req)(struct nci_dev *ndev, unsigned long opt), - unsigned long opt, __u32 timeout) + void (*req)(struct nci_dev *ndev, const void *opt), + const void *opt, __u32 timeout) { int rc = 0; long completion_rc; @@ -139,8 +139,8 @@ static int __nci_request(struct nci_dev *ndev, inline int nci_request(struct nci_dev *ndev, void (*req)(struct nci_dev *ndev, - unsigned long opt), - unsigned long opt, __u32 timeout) + const void *opt), + const void *opt, __u32 timeout) { int rc; @@ -155,7 +155,7 @@ inline int nci_request(struct nci_dev *ndev, return rc; } -static void nci_reset_req(struct nci_dev *ndev, unsigned long opt) +static void nci_reset_req(struct nci_dev *ndev, const void *opt) { struct nci_core_reset_cmd cmd; @@ -163,17 +163,17 @@ static void nci_reset_req(struct nci_dev *ndev, unsigned long opt) nci_send_cmd(ndev, NCI_OP_CORE_RESET_CMD, 1, &cmd); } -static void nci_init_req(struct nci_dev *ndev, unsigned long opt) +static void nci_init_req(struct nci_dev *ndev, const void *opt) { u8 plen = 0; if (opt) plen = sizeof(struct nci_core_init_v2_cmd); - nci_send_cmd(ndev, NCI_OP_CORE_INIT_CMD, plen, (void *)opt); + nci_send_cmd(ndev, NCI_OP_CORE_INIT_CMD, plen, opt); } -static void nci_init_complete_req(struct nci_dev *ndev, unsigned long opt) +static void nci_init_complete_req(struct nci_dev *ndev, const void *opt) { struct nci_rf_disc_map_cmd cmd; struct disc_map_config *cfg = cmd.mapping_configs; @@ -215,10 +215,9 @@ struct nci_set_config_param { const __u8 *val; }; -static void nci_set_config_req(struct nci_dev *ndev, unsigned long opt) +static void nci_set_config_req(struct nci_dev *ndev, const void *opt) { - const struct nci_set_config_param *param = - (struct nci_set_config_param *)opt; + const struct nci_set_config_param *param = opt; struct nci_core_set_config_cmd cmd; BUG_ON(param->len > NCI_MAX_PARAM_LEN); @@ -236,10 +235,9 @@ struct nci_rf_discover_param { __u32 tm_protocols; }; -static void nci_rf_discover_req(struct nci_dev *ndev, unsigned long opt) +static void nci_rf_discover_req(struct nci_dev *ndev, const void *opt) { - const struct nci_rf_discover_param *param = - (struct nci_rf_discover_param *)opt; + const struct nci_rf_discover_param *param = opt; struct nci_rf_disc_cmd cmd; cmd.num_disc_configs = 0; @@ -302,10 +300,9 @@ struct nci_rf_discover_select_param { __u8 rf_protocol; }; -static void nci_rf_discover_select_req(struct nci_dev *ndev, unsigned long opt) +static void nci_rf_discover_select_req(struct nci_dev *ndev, const void *opt) { - const struct nci_rf_discover_select_param *param = - (struct nci_rf_discover_select_param *)opt; + const struct nci_rf_discover_select_param *param = opt; struct nci_rf_discover_select_cmd cmd; cmd.rf_discovery_id = param->rf_discovery_id; @@ -329,11 +326,11 @@ static void nci_rf_discover_select_req(struct nci_dev *ndev, unsigned long opt) sizeof(struct nci_rf_discover_select_cmd), &cmd); } -static void nci_rf_deactivate_req(struct nci_dev *ndev, unsigned long opt) +static void nci_rf_deactivate_req(struct nci_dev *ndev, const void *opt) { struct nci_rf_deactivate_cmd cmd; - cmd.type = opt; + cmd.type = (unsigned long)opt; nci_send_cmd(ndev, NCI_OP_RF_DEACTIVATE_CMD, sizeof(struct nci_rf_deactivate_cmd), &cmd); @@ -345,10 +342,9 @@ struct nci_cmd_param { const __u8 *payload; }; -static void nci_generic_req(struct nci_dev *ndev, unsigned long opt) +static void nci_generic_req(struct nci_dev *ndev, const void *opt) { - const struct nci_cmd_param *param = - (struct nci_cmd_param *)opt; + const struct nci_cmd_param *param = opt; nci_send_cmd(ndev, param->opcode, param->len, param->payload); } @@ -361,7 +357,7 @@ int nci_prop_cmd(struct nci_dev *ndev, __u8 oid, size_t len, const __u8 *payload param.len = len; param.payload = payload; - return __nci_request(ndev, nci_generic_req, (unsigned long)¶m, + return __nci_request(ndev, nci_generic_req, ¶m, msecs_to_jiffies(NCI_CMD_TIMEOUT)); } EXPORT_SYMBOL(nci_prop_cmd); @@ -375,21 +371,21 @@ int nci_core_cmd(struct nci_dev *ndev, __u16 opcode, size_t len, param.len = len; param.payload = payload; - return __nci_request(ndev, nci_generic_req, (unsigned long)¶m, + return __nci_request(ndev, nci_generic_req, ¶m, msecs_to_jiffies(NCI_CMD_TIMEOUT)); } EXPORT_SYMBOL(nci_core_cmd); int nci_core_reset(struct nci_dev *ndev) { - return __nci_request(ndev, nci_reset_req, 0, + return __nci_request(ndev, nci_reset_req, (void *)0, msecs_to_jiffies(NCI_RESET_TIMEOUT)); } EXPORT_SYMBOL(nci_core_reset); int nci_core_init(struct nci_dev *ndev) { - return __nci_request(ndev, nci_init_req, 0, + return __nci_request(ndev, nci_init_req, (void *)0, msecs_to_jiffies(NCI_INIT_TIMEOUT)); } EXPORT_SYMBOL(nci_core_init); @@ -399,9 +395,9 @@ struct nci_loopback_data { struct sk_buff *data; }; -static void nci_send_data_req(struct nci_dev *ndev, unsigned long opt) +static void nci_send_data_req(struct nci_dev *ndev, const void *opt) { - const struct nci_loopback_data *data = (struct nci_loopback_data *)opt; + const struct nci_loopback_data *data = opt; nci_send_data(ndev, data->conn_id, data->data); } @@ -462,7 +458,7 @@ int nci_nfcc_loopback(struct nci_dev *ndev, const void *data, size_t data_len, loopback_data.data = skb; ndev->cur_conn_id = conn_id; - r = nci_request(ndev, nci_send_data_req, (unsigned long)&loopback_data, + r = nci_request(ndev, nci_send_data_req, &loopback_data, msecs_to_jiffies(NCI_DATA_TIMEOUT)); if (r == NCI_STATUS_OK && resp) *resp = conn_info->rx_skb; @@ -495,7 +491,7 @@ static int nci_open_device(struct nci_dev *ndev) rc = ndev->ops->init(ndev); if (!rc) { - rc = __nci_request(ndev, nci_reset_req, 0, + rc = __nci_request(ndev, nci_reset_req, (void *)0, msecs_to_jiffies(NCI_RESET_TIMEOUT)); } @@ -508,10 +504,10 @@ static int nci_open_device(struct nci_dev *ndev) .feature1 = NCI_FEATURE_DISABLE, .feature2 = NCI_FEATURE_DISABLE }; - unsigned long opt = 0; + const void *opt = NULL; if (ndev->nci_ver & NCI_VER_2_MASK) - opt = (unsigned long)&nci_init_v2_cmd; + opt = &nci_init_v2_cmd; rc = __nci_request(ndev, nci_init_req, opt, msecs_to_jiffies(NCI_INIT_TIMEOUT)); @@ -521,7 +517,7 @@ static int nci_open_device(struct nci_dev *ndev) rc = ndev->ops->post_setup(ndev); if (!rc) { - rc = __nci_request(ndev, nci_init_complete_req, 0, + rc = __nci_request(ndev, nci_init_complete_req, (void *)0, msecs_to_jiffies(NCI_INIT_TIMEOUT)); } @@ -571,7 +567,7 @@ static int nci_close_device(struct nci_dev *ndev) atomic_set(&ndev->cmd_cnt, 1); set_bit(NCI_INIT, &ndev->flags); - __nci_request(ndev, nci_reset_req, 0, + __nci_request(ndev, nci_reset_req, (void *)0, msecs_to_jiffies(NCI_RESET_TIMEOUT)); /* After this point our queues are empty @@ -637,15 +633,15 @@ int nci_set_config(struct nci_dev *ndev, __u8 id, size_t len, const __u8 *val) param.len = len; param.val = val; - return __nci_request(ndev, nci_set_config_req, (unsigned long)¶m, + return __nci_request(ndev, nci_set_config_req, ¶m, msecs_to_jiffies(NCI_SET_CONFIG_TIMEOUT)); } EXPORT_SYMBOL(nci_set_config); -static void nci_nfcee_discover_req(struct nci_dev *ndev, unsigned long opt) +static void nci_nfcee_discover_req(struct nci_dev *ndev, const void *opt) { struct nci_nfcee_discover_cmd cmd; - __u8 action = opt; + __u8 action = (unsigned long)opt; cmd.discovery_action = action; @@ -654,15 +650,16 @@ static void nci_nfcee_discover_req(struct nci_dev *ndev, unsigned long opt) int nci_nfcee_discover(struct nci_dev *ndev, u8 action) { - return __nci_request(ndev, nci_nfcee_discover_req, action, + unsigned long opt = action; + + return __nci_request(ndev, nci_nfcee_discover_req, (void *)opt, msecs_to_jiffies(NCI_CMD_TIMEOUT)); } EXPORT_SYMBOL(nci_nfcee_discover); -static void nci_nfcee_mode_set_req(struct nci_dev *ndev, unsigned long opt) +static void nci_nfcee_mode_set_req(struct nci_dev *ndev, const void *opt) { - const struct nci_nfcee_mode_set_cmd *cmd = - (struct nci_nfcee_mode_set_cmd *)opt; + const struct nci_nfcee_mode_set_cmd *cmd = opt; nci_send_cmd(ndev, NCI_OP_NFCEE_MODE_SET_CMD, sizeof(struct nci_nfcee_mode_set_cmd), cmd); @@ -675,16 +672,14 @@ int nci_nfcee_mode_set(struct nci_dev *ndev, u8 nfcee_id, u8 nfcee_mode) cmd.nfcee_id = nfcee_id; cmd.nfcee_mode = nfcee_mode; - return __nci_request(ndev, nci_nfcee_mode_set_req, - (unsigned long)&cmd, + return __nci_request(ndev, nci_nfcee_mode_set_req, &cmd, msecs_to_jiffies(NCI_CMD_TIMEOUT)); } EXPORT_SYMBOL(nci_nfcee_mode_set); -static void nci_core_conn_create_req(struct nci_dev *ndev, unsigned long opt) +static void nci_core_conn_create_req(struct nci_dev *ndev, const void *opt) { - const struct core_conn_create_data *data = - (struct core_conn_create_data *)opt; + const struct core_conn_create_data *data = opt; nci_send_cmd(ndev, NCI_OP_CORE_CONN_CREATE_CMD, data->length, data->cmd); } @@ -721,24 +716,26 @@ int nci_core_conn_create(struct nci_dev *ndev, u8 destination_type, } ndev->cur_dest_type = destination_type; - r = __nci_request(ndev, nci_core_conn_create_req, (unsigned long)&data, + r = __nci_request(ndev, nci_core_conn_create_req, &data, msecs_to_jiffies(NCI_CMD_TIMEOUT)); kfree(cmd); return r; } EXPORT_SYMBOL(nci_core_conn_create); -static void nci_core_conn_close_req(struct nci_dev *ndev, unsigned long opt) +static void nci_core_conn_close_req(struct nci_dev *ndev, const void *opt) { - __u8 conn_id = opt; + __u8 conn_id = (unsigned long)opt; nci_send_cmd(ndev, NCI_OP_CORE_CONN_CLOSE_CMD, 1, &conn_id); } int nci_core_conn_close(struct nci_dev *ndev, u8 conn_id) { + unsigned long opt = conn_id; + ndev->cur_conn_id = conn_id; - return __nci_request(ndev, nci_core_conn_close_req, conn_id, + return __nci_request(ndev, nci_core_conn_close_req, (void *)opt, msecs_to_jiffies(NCI_CMD_TIMEOUT)); } EXPORT_SYMBOL(nci_core_conn_close); @@ -758,14 +755,14 @@ static int nci_set_local_general_bytes(struct nfc_dev *nfc_dev) param.id = NCI_PN_ATR_REQ_GEN_BYTES; - rc = nci_request(ndev, nci_set_config_req, (unsigned long)¶m, + rc = nci_request(ndev, nci_set_config_req, ¶m, msecs_to_jiffies(NCI_SET_CONFIG_TIMEOUT)); if (rc) return rc; param.id = NCI_LN_ATR_RES_GEN_BYTES; - return nci_request(ndev, nci_set_config_req, (unsigned long)¶m, + return nci_request(ndev, nci_set_config_req, ¶m, msecs_to_jiffies(NCI_SET_CONFIG_TIMEOUT)); } @@ -815,7 +812,7 @@ static int nci_start_poll(struct nfc_dev *nfc_dev, pr_debug("target active or w4 select, implicitly deactivate\n"); rc = nci_request(ndev, nci_rf_deactivate_req, - NCI_DEACTIVATE_TYPE_IDLE_MODE, + (void *)NCI_DEACTIVATE_TYPE_IDLE_MODE, msecs_to_jiffies(NCI_RF_DEACTIVATE_TIMEOUT)); if (rc) return -EBUSY; @@ -837,7 +834,7 @@ static int nci_start_poll(struct nfc_dev *nfc_dev, param.im_protocols = im_protocols; param.tm_protocols = tm_protocols; - rc = nci_request(ndev, nci_rf_discover_req, (unsigned long)¶m, + rc = nci_request(ndev, nci_rf_discover_req, ¶m, msecs_to_jiffies(NCI_RF_DISC_TIMEOUT)); if (!rc) @@ -856,7 +853,8 @@ static void nci_stop_poll(struct nfc_dev *nfc_dev) return; } - nci_request(ndev, nci_rf_deactivate_req, NCI_DEACTIVATE_TYPE_IDLE_MODE, + nci_request(ndev, nci_rf_deactivate_req, + (void *)NCI_DEACTIVATE_TYPE_IDLE_MODE, msecs_to_jiffies(NCI_RF_DEACTIVATE_TIMEOUT)); } @@ -915,8 +913,7 @@ static int nci_activate_target(struct nfc_dev *nfc_dev, else param.rf_protocol = NCI_RF_PROTOCOL_NFC_DEP; - rc = nci_request(ndev, nci_rf_discover_select_req, - (unsigned long)¶m, + rc = nci_request(ndev, nci_rf_discover_select_req, ¶m, msecs_to_jiffies(NCI_RF_DISC_SELECT_TIMEOUT)); } @@ -931,7 +928,7 @@ static void nci_deactivate_target(struct nfc_dev *nfc_dev, __u8 mode) { struct nci_dev *ndev = nfc_get_drvdata(nfc_dev); - u8 nci_mode = NCI_DEACTIVATE_TYPE_IDLE_MODE; + unsigned long nci_mode = NCI_DEACTIVATE_TYPE_IDLE_MODE; pr_debug("entry\n"); @@ -949,7 +946,7 @@ static void nci_deactivate_target(struct nfc_dev *nfc_dev, } if (atomic_read(&ndev->state) == NCI_POLL_ACTIVE) { - nci_request(ndev, nci_rf_deactivate_req, nci_mode, + nci_request(ndev, nci_rf_deactivate_req, (void *)nci_mode, msecs_to_jiffies(NCI_RF_DEACTIVATE_TIMEOUT)); } } @@ -987,8 +984,8 @@ static int nci_dep_link_down(struct nfc_dev *nfc_dev) } else { if (atomic_read(&ndev->state) == NCI_LISTEN_ACTIVE || atomic_read(&ndev->state) == NCI_DISCOVERY) { - nci_request(ndev, nci_rf_deactivate_req, 0, - msecs_to_jiffies(NCI_RF_DEACTIVATE_TIMEOUT)); + nci_request(ndev, nci_rf_deactivate_req, (void *)0, + msecs_to_jiffies(NCI_RF_DEACTIVATE_TIMEOUT)); } rc = nfc_tm_deactivated(nfc_dev); diff --git a/net/nfc/nci/hci.c b/net/nfc/nci/hci.c index a8ff794a8084..e199912ee1e5 100644 --- a/net/nfc/nci/hci.c +++ b/net/nfc/nci/hci.c @@ -195,9 +195,9 @@ static int nci_hci_send_data(struct nci_dev *ndev, u8 pipe, return i; } -static void nci_hci_send_data_req(struct nci_dev *ndev, unsigned long opt) +static void nci_hci_send_data_req(struct nci_dev *ndev, const void *opt) { - const struct nci_data *data = (struct nci_data *)opt; + const struct nci_data *data = opt; nci_hci_send_data(ndev, data->pipe, data->cmd, data->data, data->data_len); @@ -240,7 +240,7 @@ int nci_hci_send_cmd(struct nci_dev *ndev, u8 gate, u8 cmd, data.data = param; data.data_len = param_len; - r = nci_request(ndev, nci_hci_send_data_req, (unsigned long)&data, + r = nci_request(ndev, nci_hci_send_data_req, &data, msecs_to_jiffies(NCI_DATA_TIMEOUT)); if (r == NCI_STATUS_OK) { message = (struct nci_hcp_message *)conn_info->rx_skb->data; @@ -511,9 +511,8 @@ int nci_hci_open_pipe(struct nci_dev *ndev, u8 pipe) data.data = NULL; data.data_len = 0; - return nci_request(ndev, nci_hci_send_data_req, - (unsigned long)&data, - msecs_to_jiffies(NCI_DATA_TIMEOUT)); + return nci_request(ndev, nci_hci_send_data_req, &data, + msecs_to_jiffies(NCI_DATA_TIMEOUT)); } EXPORT_SYMBOL(nci_hci_open_pipe); @@ -587,8 +586,7 @@ int nci_hci_set_param(struct nci_dev *ndev, u8 gate, u8 idx, data.data = tmp; data.data_len = param_len + 1; - r = nci_request(ndev, nci_hci_send_data_req, - (unsigned long)&data, + r = nci_request(ndev, nci_hci_send_data_req, &data, msecs_to_jiffies(NCI_DATA_TIMEOUT)); if (r == NCI_STATUS_OK) { message = (struct nci_hcp_message *)conn_info->rx_skb->data; @@ -627,7 +625,7 @@ int nci_hci_get_param(struct nci_dev *ndev, u8 gate, u8 idx, data.data = &idx; data.data_len = 1; - r = nci_request(ndev, nci_hci_send_data_req, (unsigned long)&data, + r = nci_request(ndev, nci_hci_send_data_req, &data, msecs_to_jiffies(NCI_DATA_TIMEOUT)); if (r == NCI_STATUS_OK) { -- cgit v1.2.3 From 29a097b7747725da003245412dab61093d4e5976 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Sat, 31 Jul 2021 17:14:32 +0300 Subject: net: dsa: remove the struct packet_type argument from dsa_device_ops::rcv() No tagging driver uses this. Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- include/net/dsa.h | 7 ++----- net/dsa/dsa.c | 2 +- net/dsa/tag_ar9331.c | 3 +-- net/dsa/tag_brcm.c | 14 +++++--------- net/dsa/tag_dsa.c | 6 ++---- net/dsa/tag_gswip.c | 3 +-- net/dsa/tag_hellcreek.c | 3 +-- net/dsa/tag_ksz.c | 6 ++---- net/dsa/tag_lan9303.c | 3 +-- net/dsa/tag_mtk.c | 3 +-- net/dsa/tag_ocelot.c | 3 +-- net/dsa/tag_ocelot_8021q.c | 3 +-- net/dsa/tag_qca.c | 3 +-- net/dsa/tag_rtl4_a.c | 3 +-- net/dsa/tag_sja1105.c | 6 ++---- net/dsa/tag_trailer.c | 3 +-- net/dsa/tag_xrs700x.c | 3 +-- 17 files changed, 25 insertions(+), 49 deletions(-) (limited to 'include/net') diff --git a/include/net/dsa.h b/include/net/dsa.h index 2af6ee2f2bfb..7cc9507282d3 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -79,13 +79,11 @@ enum dsa_tag_protocol { DSA_TAG_PROTO_SJA1110 = DSA_TAG_PROTO_SJA1110_VALUE, }; -struct packet_type; struct dsa_switch; struct dsa_device_ops { struct sk_buff *(*xmit)(struct sk_buff *skb, struct net_device *dev); - struct sk_buff *(*rcv)(struct sk_buff *skb, struct net_device *dev, - struct packet_type *pt); + struct sk_buff *(*rcv)(struct sk_buff *skb, struct net_device *dev); void (*flow_dissect)(const struct sk_buff *skb, __be16 *proto, int *offset); unsigned int needed_headroom; @@ -239,8 +237,7 @@ struct dsa_port { /* Copies for faster access in master receive hot path */ struct dsa_switch_tree *dst; - struct sk_buff *(*rcv)(struct sk_buff *skb, struct net_device *dev, - struct packet_type *pt); + struct sk_buff *(*rcv)(struct sk_buff *skb, struct net_device *dev); enum { DSA_PORT_TYPE_UNUSED = 0, diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c index 84cad1be9ce4..1dc45e40f961 100644 --- a/net/dsa/dsa.c +++ b/net/dsa/dsa.c @@ -238,7 +238,7 @@ static int dsa_switch_rcv(struct sk_buff *skb, struct net_device *dev, if (!skb) return 0; - nskb = cpu_dp->rcv(skb, dev, pt); + nskb = cpu_dp->rcv(skb, dev); if (!nskb) { kfree_skb(skb); return 0; diff --git a/net/dsa/tag_ar9331.c b/net/dsa/tag_ar9331.c index 0efae1a372b3..8a02ac44282f 100644 --- a/net/dsa/tag_ar9331.c +++ b/net/dsa/tag_ar9331.c @@ -44,8 +44,7 @@ static struct sk_buff *ar9331_tag_xmit(struct sk_buff *skb, } static struct sk_buff *ar9331_tag_rcv(struct sk_buff *skb, - struct net_device *ndev, - struct packet_type *pt) + struct net_device *ndev) { u8 ver, port; u16 hdr; diff --git a/net/dsa/tag_brcm.c b/net/dsa/tag_brcm.c index a27f5096777a..96e93b544a0d 100644 --- a/net/dsa/tag_brcm.c +++ b/net/dsa/tag_brcm.c @@ -136,7 +136,6 @@ static struct sk_buff *brcm_tag_xmit_ll(struct sk_buff *skb, */ static struct sk_buff *brcm_tag_rcv_ll(struct sk_buff *skb, struct net_device *dev, - struct packet_type *pt, unsigned int offset) { int source_port; @@ -182,13 +181,12 @@ static struct sk_buff *brcm_tag_xmit(struct sk_buff *skb, } -static struct sk_buff *brcm_tag_rcv(struct sk_buff *skb, struct net_device *dev, - struct packet_type *pt) +static struct sk_buff *brcm_tag_rcv(struct sk_buff *skb, struct net_device *dev) { struct sk_buff *nskb; /* skb->data points to the EtherType, the tag is right before it */ - nskb = brcm_tag_rcv_ll(skb, dev, pt, 2); + nskb = brcm_tag_rcv_ll(skb, dev, 2); if (!nskb) return nskb; @@ -251,8 +249,7 @@ static struct sk_buff *brcm_leg_tag_xmit(struct sk_buff *skb, } static struct sk_buff *brcm_leg_tag_rcv(struct sk_buff *skb, - struct net_device *dev, - struct packet_type *pt) + struct net_device *dev) { int source_port; u8 *brcm_tag; @@ -302,11 +299,10 @@ static struct sk_buff *brcm_tag_xmit_prepend(struct sk_buff *skb, } static struct sk_buff *brcm_tag_rcv_prepend(struct sk_buff *skb, - struct net_device *dev, - struct packet_type *pt) + struct net_device *dev) { /* tag is prepended to the packet */ - return brcm_tag_rcv_ll(skb, dev, pt, ETH_HLEN); + return brcm_tag_rcv_ll(skb, dev, ETH_HLEN); } static const struct dsa_device_ops brcm_prepend_netdev_ops = { diff --git a/net/dsa/tag_dsa.c b/net/dsa/tag_dsa.c index 3607499d0697..e32f8160e895 100644 --- a/net/dsa/tag_dsa.c +++ b/net/dsa/tag_dsa.c @@ -332,8 +332,7 @@ static struct sk_buff *dsa_xmit(struct sk_buff *skb, struct net_device *dev) return dsa_xmit_ll(skb, dev, 0); } -static struct sk_buff *dsa_rcv(struct sk_buff *skb, struct net_device *dev, - struct packet_type *pt) +static struct sk_buff *dsa_rcv(struct sk_buff *skb, struct net_device *dev) { if (unlikely(!pskb_may_pull(skb, DSA_HLEN))) return NULL; @@ -373,8 +372,7 @@ static struct sk_buff *edsa_xmit(struct sk_buff *skb, struct net_device *dev) return skb; } -static struct sk_buff *edsa_rcv(struct sk_buff *skb, struct net_device *dev, - struct packet_type *pt) +static struct sk_buff *edsa_rcv(struct sk_buff *skb, struct net_device *dev) { if (unlikely(!pskb_may_pull(skb, EDSA_HLEN))) return NULL; diff --git a/net/dsa/tag_gswip.c b/net/dsa/tag_gswip.c index 5985dab06ab8..df7140984da3 100644 --- a/net/dsa/tag_gswip.c +++ b/net/dsa/tag_gswip.c @@ -75,8 +75,7 @@ static struct sk_buff *gswip_tag_xmit(struct sk_buff *skb, } static struct sk_buff *gswip_tag_rcv(struct sk_buff *skb, - struct net_device *dev, - struct packet_type *pt) + struct net_device *dev) { int port; u8 *gswip_tag; diff --git a/net/dsa/tag_hellcreek.c b/net/dsa/tag_hellcreek.c index c41208cbd936..f64b805303cd 100644 --- a/net/dsa/tag_hellcreek.c +++ b/net/dsa/tag_hellcreek.c @@ -29,8 +29,7 @@ static struct sk_buff *hellcreek_xmit(struct sk_buff *skb, } static struct sk_buff *hellcreek_rcv(struct sk_buff *skb, - struct net_device *dev, - struct packet_type *pt) + struct net_device *dev) { /* Tag decoding */ u8 *tag = skb_tail_pointer(skb) - HELLCREEK_TAG_LEN; diff --git a/net/dsa/tag_ksz.c b/net/dsa/tag_ksz.c index 1c2dfa80f9b0..fa1d60d13ad9 100644 --- a/net/dsa/tag_ksz.c +++ b/net/dsa/tag_ksz.c @@ -67,8 +67,7 @@ static struct sk_buff *ksz8795_xmit(struct sk_buff *skb, struct net_device *dev) return skb; } -static struct sk_buff *ksz8795_rcv(struct sk_buff *skb, struct net_device *dev, - struct packet_type *pt) +static struct sk_buff *ksz8795_rcv(struct sk_buff *skb, struct net_device *dev) { u8 *tag = skb_tail_pointer(skb) - KSZ_EGRESS_TAG_LEN; @@ -134,8 +133,7 @@ static struct sk_buff *ksz9477_xmit(struct sk_buff *skb, return skb; } -static struct sk_buff *ksz9477_rcv(struct sk_buff *skb, struct net_device *dev, - struct packet_type *pt) +static struct sk_buff *ksz9477_rcv(struct sk_buff *skb, struct net_device *dev) { /* Tag decoding */ u8 *tag = skb_tail_pointer(skb) - KSZ_EGRESS_TAG_LEN; diff --git a/net/dsa/tag_lan9303.c b/net/dsa/tag_lan9303.c index cf7cf2fa1240..58d3a0e712d2 100644 --- a/net/dsa/tag_lan9303.c +++ b/net/dsa/tag_lan9303.c @@ -74,8 +74,7 @@ static struct sk_buff *lan9303_xmit(struct sk_buff *skb, struct net_device *dev) return skb; } -static struct sk_buff *lan9303_rcv(struct sk_buff *skb, struct net_device *dev, - struct packet_type *pt) +static struct sk_buff *lan9303_rcv(struct sk_buff *skb, struct net_device *dev) { __be16 *lan9303_tag; u16 lan9303_tag1; diff --git a/net/dsa/tag_mtk.c b/net/dsa/tag_mtk.c index 3fb80e43f3a5..bbf37c031d44 100644 --- a/net/dsa/tag_mtk.c +++ b/net/dsa/tag_mtk.c @@ -61,8 +61,7 @@ static struct sk_buff *mtk_tag_xmit(struct sk_buff *skb, return skb; } -static struct sk_buff *mtk_tag_rcv(struct sk_buff *skb, struct net_device *dev, - struct packet_type *pt) +static struct sk_buff *mtk_tag_rcv(struct sk_buff *skb, struct net_device *dev) { u16 hdr; int port; diff --git a/net/dsa/tag_ocelot.c b/net/dsa/tag_ocelot.c index 3252634a29b8..d37ab98e7fe1 100644 --- a/net/dsa/tag_ocelot.c +++ b/net/dsa/tag_ocelot.c @@ -55,8 +55,7 @@ static struct sk_buff *seville_xmit(struct sk_buff *skb, } static struct sk_buff *ocelot_rcv(struct sk_buff *skb, - struct net_device *netdev, - struct packet_type *pt) + struct net_device *netdev) { u64 src_port, qos_class; u64 vlan_tci, tag_type; diff --git a/net/dsa/tag_ocelot_8021q.c b/net/dsa/tag_ocelot_8021q.c index c95de71d13b0..3038a257ba05 100644 --- a/net/dsa/tag_ocelot_8021q.c +++ b/net/dsa/tag_ocelot_8021q.c @@ -38,8 +38,7 @@ static struct sk_buff *ocelot_xmit(struct sk_buff *skb, } static struct sk_buff *ocelot_rcv(struct sk_buff *skb, - struct net_device *netdev, - struct packet_type *pt) + struct net_device *netdev) { int src_port, switch_id; diff --git a/net/dsa/tag_qca.c b/net/dsa/tag_qca.c index 693bda013065..6e3136990491 100644 --- a/net/dsa/tag_qca.c +++ b/net/dsa/tag_qca.c @@ -48,8 +48,7 @@ static struct sk_buff *qca_tag_xmit(struct sk_buff *skb, struct net_device *dev) return skb; } -static struct sk_buff *qca_tag_rcv(struct sk_buff *skb, struct net_device *dev, - struct packet_type *pt) +static struct sk_buff *qca_tag_rcv(struct sk_buff *skb, struct net_device *dev) { u8 ver; u16 hdr; diff --git a/net/dsa/tag_rtl4_a.c b/net/dsa/tag_rtl4_a.c index f6b63aad6551..aaddca3c0245 100644 --- a/net/dsa/tag_rtl4_a.c +++ b/net/dsa/tag_rtl4_a.c @@ -64,8 +64,7 @@ static struct sk_buff *rtl4a_tag_xmit(struct sk_buff *skb, } static struct sk_buff *rtl4a_tag_rcv(struct sk_buff *skb, - struct net_device *dev, - struct packet_type *pt) + struct net_device *dev) { u16 protport; __be16 *p; diff --git a/net/dsa/tag_sja1105.c b/net/dsa/tag_sja1105.c index 664cb802b71a..745c4560b4aa 100644 --- a/net/dsa/tag_sja1105.c +++ b/net/dsa/tag_sja1105.c @@ -391,8 +391,7 @@ static void sja1105_vlan_rcv(struct sk_buff *skb, int *source_port, } static struct sk_buff *sja1105_rcv(struct sk_buff *skb, - struct net_device *netdev, - struct packet_type *pt) + struct net_device *netdev) { int source_port = -1, switch_id = -1; struct sja1105_meta meta = {0}; @@ -546,8 +545,7 @@ static struct sk_buff *sja1110_rcv_inband_control_extension(struct sk_buff *skb, } static struct sk_buff *sja1110_rcv(struct sk_buff *skb, - struct net_device *netdev, - struct packet_type *pt) + struct net_device *netdev) { int source_port = -1, switch_id = -1; bool host_only = false; diff --git a/net/dsa/tag_trailer.c b/net/dsa/tag_trailer.c index ba73804340a5..5749ba85c2b8 100644 --- a/net/dsa/tag_trailer.c +++ b/net/dsa/tag_trailer.c @@ -24,8 +24,7 @@ static struct sk_buff *trailer_xmit(struct sk_buff *skb, struct net_device *dev) return skb; } -static struct sk_buff *trailer_rcv(struct sk_buff *skb, struct net_device *dev, - struct packet_type *pt) +static struct sk_buff *trailer_rcv(struct sk_buff *skb, struct net_device *dev) { u8 *trailer; int source_port; diff --git a/net/dsa/tag_xrs700x.c b/net/dsa/tag_xrs700x.c index da231c16ac82..ff442b8af636 100644 --- a/net/dsa/tag_xrs700x.c +++ b/net/dsa/tag_xrs700x.c @@ -25,8 +25,7 @@ static struct sk_buff *xrs700x_xmit(struct sk_buff *skb, struct net_device *dev) return skb; } -static struct sk_buff *xrs700x_rcv(struct sk_buff *skb, struct net_device *dev, - struct packet_type *pt) +static struct sk_buff *xrs700x_rcv(struct sk_buff *skb, struct net_device *dev) { int source_port; u8 *trailer; -- cgit v1.2.3 From 3a755cd8b7c601f756cbbf908b84f7cc8c04a02b Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Mon, 2 Aug 2021 11:02:19 +0800 Subject: bonding: add new option lacp_active Add an option lacp_active, which is similar with team's runner.active. This option specifies whether to send LACPDU frames periodically. If set on, the LACPDU frames are sent along with the configured lacp_rate setting. If set off, the LACPDU frames acts as "speak when spoken to". Note, the LACPDU state frames still will be sent when init or unbind port. v2: remove module parameter Signed-off-by: Hangbin Liu Signed-off-by: David S. Miller --- Documentation/networking/bonding.rst | 12 ++++++++++++ drivers/net/bonding/bond_3ad.c | 11 ++++++----- drivers/net/bonding/bond_main.c | 1 + drivers/net/bonding/bond_netlink.c | 16 ++++++++++++++++ drivers/net/bonding/bond_options.c | 27 +++++++++++++++++++++++++++ drivers/net/bonding/bond_procfs.c | 2 ++ drivers/net/bonding/bond_sysfs.c | 25 ++++++++++++++++++++----- include/net/bond_3ad.h | 1 + include/net/bond_options.h | 1 + include/net/bonding.h | 1 + include/uapi/linux/if_link.h | 1 + tools/include/uapi/linux/if_link.h | 1 + 12 files changed, 89 insertions(+), 10 deletions(-) (limited to 'include/net') diff --git a/Documentation/networking/bonding.rst b/Documentation/networking/bonding.rst index 62f2aab8eaec..31cfd7d674a6 100644 --- a/Documentation/networking/bonding.rst +++ b/Documentation/networking/bonding.rst @@ -501,6 +501,18 @@ fail_over_mac This option was added in bonding version 3.2.0. The "follow" policy was added in bonding version 3.3.0. +lacp_active + Option specifying whether to send LACPDU frames periodically. + + off or 0 + LACPDU frames acts as "speak when spoken to". + + on or 1 + LACPDU frames are sent along the configured links + periodically. See lacp_rate for more details. + + The default is on. + lacp_rate Option specifying the rate in which we'll ask our link partner diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c index 6908822d9773..a4a202b9a0a2 100644 --- a/drivers/net/bonding/bond_3ad.c +++ b/drivers/net/bonding/bond_3ad.c @@ -96,7 +96,7 @@ static int ad_marker_send(struct port *port, struct bond_marker *marker); static void ad_mux_machine(struct port *port, bool *update_slave_arr); static void ad_rx_machine(struct lacpdu *lacpdu, struct port *port); static void ad_tx_machine(struct port *port); -static void ad_periodic_machine(struct port *port); +static void ad_periodic_machine(struct port *port, struct bond_params bond_params); static void ad_port_selection_logic(struct port *port, bool *update_slave_arr); static void ad_agg_selection_logic(struct aggregator *aggregator, bool *update_slave_arr); @@ -1294,10 +1294,11 @@ static void ad_tx_machine(struct port *port) /** * ad_periodic_machine - handle a port's periodic state machine * @port: the port we're looking at + * @bond_params: bond parameters we will use * * Turn ntt flag on priodically to perform periodic transmission of lacpdu's. */ -static void ad_periodic_machine(struct port *port) +static void ad_periodic_machine(struct port *port, struct bond_params bond_params) { periodic_states_t last_state; @@ -1306,8 +1307,8 @@ static void ad_periodic_machine(struct port *port) /* check if port was reinitialized */ if (((port->sm_vars & AD_PORT_BEGIN) || !(port->sm_vars & AD_PORT_LACP_ENABLED) || !port->is_enabled) || - (!(port->actor_oper_port_state & LACP_STATE_LACP_ACTIVITY) && !(port->partner_oper.port_state & LACP_STATE_LACP_ACTIVITY)) - ) { + (!(port->actor_oper_port_state & LACP_STATE_LACP_ACTIVITY) && !(port->partner_oper.port_state & LACP_STATE_LACP_ACTIVITY)) || + !bond_params.lacp_active) { port->sm_periodic_state = AD_NO_PERIODIC; } /* check if state machine should change state */ @@ -2341,7 +2342,7 @@ void bond_3ad_state_machine_handler(struct work_struct *work) } ad_rx_machine(NULL, port); - ad_periodic_machine(port); + ad_periodic_machine(port, bond->params); ad_port_selection_logic(port, &update_slave_arr); ad_mux_machine(port, &update_slave_arr); ad_tx_machine(port); diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 616ebbb08ca6..3ba5f4871162 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -5478,6 +5478,7 @@ static int bond_check_params(struct bond_params *params) params->downdelay = downdelay; params->peer_notif_delay = 0; params->use_carrier = use_carrier; + params->lacp_active = 1; params->lacp_fast = lacp_fast; params->primary[0] = 0; params->primary_reselect = primary_reselect_value; diff --git a/drivers/net/bonding/bond_netlink.c b/drivers/net/bonding/bond_netlink.c index 0561ece1ba45..5d54e11d18fa 100644 --- a/drivers/net/bonding/bond_netlink.c +++ b/drivers/net/bonding/bond_netlink.c @@ -100,6 +100,7 @@ static const struct nla_policy bond_policy[IFLA_BOND_MAX + 1] = { [IFLA_BOND_MIN_LINKS] = { .type = NLA_U32 }, [IFLA_BOND_LP_INTERVAL] = { .type = NLA_U32 }, [IFLA_BOND_PACKETS_PER_SLAVE] = { .type = NLA_U32 }, + [IFLA_BOND_AD_LACP_ACTIVE] = { .type = NLA_U8 }, [IFLA_BOND_AD_LACP_RATE] = { .type = NLA_U8 }, [IFLA_BOND_AD_SELECT] = { .type = NLA_U8 }, [IFLA_BOND_AD_INFO] = { .type = NLA_NESTED }, @@ -387,6 +388,16 @@ static int bond_changelink(struct net_device *bond_dev, struct nlattr *tb[], if (err) return err; } + + if (data[IFLA_BOND_AD_LACP_ACTIVE]) { + int lacp_active = nla_get_u8(data[IFLA_BOND_AD_LACP_ACTIVE]); + + bond_opt_initval(&newval, lacp_active); + err = __bond_opt_set(bond, BOND_OPT_LACP_ACTIVE, &newval); + if (err) + return err; + } + if (data[IFLA_BOND_AD_LACP_RATE]) { int lacp_rate = nla_get_u8(data[IFLA_BOND_AD_LACP_RATE]); @@ -490,6 +501,7 @@ static size_t bond_get_size(const struct net_device *bond_dev) nla_total_size(sizeof(u32)) + /* IFLA_BOND_MIN_LINKS */ nla_total_size(sizeof(u32)) + /* IFLA_BOND_LP_INTERVAL */ nla_total_size(sizeof(u32)) + /* IFLA_BOND_PACKETS_PER_SLAVE */ + nla_total_size(sizeof(u8)) + /* IFLA_BOND_AD_LACP_ACTIVE */ nla_total_size(sizeof(u8)) + /* IFLA_BOND_AD_LACP_RATE */ nla_total_size(sizeof(u8)) + /* IFLA_BOND_AD_SELECT */ nla_total_size(sizeof(struct nlattr)) + /* IFLA_BOND_AD_INFO */ @@ -622,6 +634,10 @@ static int bond_fill_info(struct sk_buff *skb, packets_per_slave)) goto nla_put_failure; + if (nla_put_u8(skb, IFLA_BOND_AD_LACP_ACTIVE, + bond->params.lacp_active)) + goto nla_put_failure; + if (nla_put_u8(skb, IFLA_BOND_AD_LACP_RATE, bond->params.lacp_fast)) goto nla_put_failure; diff --git a/drivers/net/bonding/bond_options.c b/drivers/net/bonding/bond_options.c index 0cf25de6f46d..a8fde3bc458f 100644 --- a/drivers/net/bonding/bond_options.c +++ b/drivers/net/bonding/bond_options.c @@ -58,6 +58,8 @@ static int bond_option_lp_interval_set(struct bonding *bond, const struct bond_opt_value *newval); static int bond_option_pps_set(struct bonding *bond, const struct bond_opt_value *newval); +static int bond_option_lacp_active_set(struct bonding *bond, + const struct bond_opt_value *newval); static int bond_option_lacp_rate_set(struct bonding *bond, const struct bond_opt_value *newval); static int bond_option_ad_select_set(struct bonding *bond, @@ -135,6 +137,12 @@ static const struct bond_opt_value bond_intmax_tbl[] = { { NULL, -1, 0} }; +static const struct bond_opt_value bond_lacp_active[] = { + { "off", 0, 0}, + { "on", 1, BOND_VALFLAG_DEFAULT}, + { NULL, -1, 0} +}; + static const struct bond_opt_value bond_lacp_rate_tbl[] = { { "slow", AD_LACP_SLOW, 0}, { "fast", AD_LACP_FAST, 0}, @@ -283,6 +291,15 @@ static const struct bond_option bond_opts[BOND_OPT_LAST] = { .values = bond_intmax_tbl, .set = bond_option_updelay_set }, + [BOND_OPT_LACP_ACTIVE] = { + .id = BOND_OPT_LACP_ACTIVE, + .name = "lacp_active", + .desc = "Send LACPDU frames with configured lacp rate or acts as speak when spoken to", + .flags = BOND_OPTFLAG_IFDOWN, + .unsuppmodes = BOND_MODE_ALL_EX(BIT(BOND_MODE_8023AD)), + .values = bond_lacp_active, + .set = bond_option_lacp_active_set + }, [BOND_OPT_LACP_RATE] = { .id = BOND_OPT_LACP_RATE, .name = "lacp_rate", @@ -1333,6 +1350,16 @@ static int bond_option_pps_set(struct bonding *bond, return 0; } +static int bond_option_lacp_active_set(struct bonding *bond, + const struct bond_opt_value *newval) +{ + netdev_dbg(bond->dev, "Setting LACP active to %s (%llu)\n", + newval->string, newval->value); + bond->params.lacp_active = newval->value; + + return 0; +} + static int bond_option_lacp_rate_set(struct bonding *bond, const struct bond_opt_value *newval) { diff --git a/drivers/net/bonding/bond_procfs.c b/drivers/net/bonding/bond_procfs.c index 0fb1da361bb1..f3e3bfd72556 100644 --- a/drivers/net/bonding/bond_procfs.c +++ b/drivers/net/bonding/bond_procfs.c @@ -133,6 +133,8 @@ static void bond_info_show_master(struct seq_file *seq) struct ad_info ad_info; seq_puts(seq, "\n802.3ad info\n"); + seq_printf(seq, "LACP active: %s\n", + (bond->params.lacp_active) ? "on" : "off"); seq_printf(seq, "LACP rate: %s\n", (bond->params.lacp_fast) ? "fast" : "slow"); seq_printf(seq, "Min links: %d\n", bond->params.min_links); diff --git a/drivers/net/bonding/bond_sysfs.c b/drivers/net/bonding/bond_sysfs.c index 5f9e9a240226..b9e9842fed94 100644 --- a/drivers/net/bonding/bond_sysfs.c +++ b/drivers/net/bonding/bond_sysfs.c @@ -339,10 +339,24 @@ static ssize_t bonding_show_peer_notif_delay(struct device *d, static DEVICE_ATTR(peer_notif_delay, 0644, bonding_show_peer_notif_delay, bonding_sysfs_store_option); -/* Show the LACP interval. */ -static ssize_t bonding_show_lacp(struct device *d, - struct device_attribute *attr, - char *buf) +/* Show the LACP activity and interval. */ +static ssize_t bonding_show_lacp_active(struct device *d, + struct device_attribute *attr, + char *buf) +{ + struct bonding *bond = to_bond(d); + const struct bond_opt_value *val; + + val = bond_opt_get_val(BOND_OPT_LACP_ACTIVE, bond->params.lacp_active); + + return sprintf(buf, "%s %d\n", val->string, bond->params.lacp_active); +} +static DEVICE_ATTR(lacp_active, 0644, + bonding_show_lacp_active, bonding_sysfs_store_option); + +static ssize_t bonding_show_lacp_rate(struct device *d, + struct device_attribute *attr, + char *buf) { struct bonding *bond = to_bond(d); const struct bond_opt_value *val; @@ -352,7 +366,7 @@ static ssize_t bonding_show_lacp(struct device *d, return sprintf(buf, "%s %d\n", val->string, bond->params.lacp_fast); } static DEVICE_ATTR(lacp_rate, 0644, - bonding_show_lacp, bonding_sysfs_store_option); + bonding_show_lacp_rate, bonding_sysfs_store_option); static ssize_t bonding_show_min_links(struct device *d, struct device_attribute *attr, @@ -738,6 +752,7 @@ static struct attribute *per_bond_attrs[] = { &dev_attr_downdelay.attr, &dev_attr_updelay.attr, &dev_attr_peer_notif_delay.attr, + &dev_attr_lacp_active.attr, &dev_attr_lacp_rate.attr, &dev_attr_ad_select.attr, &dev_attr_xmit_hash_policy.attr, diff --git a/include/net/bond_3ad.h b/include/net/bond_3ad.h index c8696a230b7d..38785d48baff 100644 --- a/include/net/bond_3ad.h +++ b/include/net/bond_3ad.h @@ -303,6 +303,7 @@ int __bond_3ad_get_active_agg_info(struct bonding *bond, int bond_3ad_lacpdu_recv(const struct sk_buff *skb, struct bonding *bond, struct slave *slave); int bond_3ad_set_carrier(struct bonding *bond); +void bond_3ad_update_lacp_active(struct bonding *bond); void bond_3ad_update_lacp_rate(struct bonding *bond); void bond_3ad_update_ad_actor_settings(struct bonding *bond); int bond_3ad_stats_fill(struct sk_buff *skb, struct bond_3ad_stats *stats); diff --git a/include/net/bond_options.h b/include/net/bond_options.h index 9d382f2f0bc5..e64833a674eb 100644 --- a/include/net/bond_options.h +++ b/include/net/bond_options.h @@ -64,6 +64,7 @@ enum { BOND_OPT_AD_USER_PORT_KEY, BOND_OPT_NUM_PEER_NOTIF_ALIAS, BOND_OPT_PEER_NOTIF_DELAY, + BOND_OPT_LACP_ACTIVE, BOND_OPT_LAST }; diff --git a/include/net/bonding.h b/include/net/bonding.h index 625d9c72dee3..46df47004803 100644 --- a/include/net/bonding.h +++ b/include/net/bonding.h @@ -129,6 +129,7 @@ struct bond_params { int updelay; int downdelay; int peer_notif_delay; + int lacp_active; int lacp_fast; unsigned int min_links; int ad_select; diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 49b22afab78f..5310003523ce 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -855,6 +855,7 @@ enum { IFLA_BOND_AD_ACTOR_SYSTEM, IFLA_BOND_TLB_DYNAMIC_LB, IFLA_BOND_PEER_NOTIF_DELAY, + IFLA_BOND_AD_LACP_ACTIVE, __IFLA_BOND_MAX, }; diff --git a/tools/include/uapi/linux/if_link.h b/tools/include/uapi/linux/if_link.h index d208b2af697f..eb15f319aa57 100644 --- a/tools/include/uapi/linux/if_link.h +++ b/tools/include/uapi/linux/if_link.h @@ -653,6 +653,7 @@ enum { IFLA_BOND_AD_ACTOR_SYSTEM, IFLA_BOND_TLB_DYNAMIC_LB, IFLA_BOND_PEER_NOTIF_DELAY, + IFLA_BOND_AD_LACP_ACTIVE, __IFLA_BOND_MAX, }; -- cgit v1.2.3 From c87a4c542b5a796f795fec2b7a909c7d3067b11c Mon Sep 17 00:00:00 2001 From: Bijie Xu Date: Tue, 3 Aug 2021 11:40:18 +0200 Subject: net: flow_offload: correct comments mismatch with code Correct mismatch between the name of flow_offload_has_one_action() and its kdoc entry. Found using ./scripts/kernel-doc -Werror -none include/net/flow_offload.h Signed-off-by: Bijie Xu Signed-off-by: Simon Horman Signed-off-by: David S. Miller --- include/net/flow_offload.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/flow_offload.h b/include/net/flow_offload.h index 69c9eabf8325..f3c2841566a0 100644 --- a/include/net/flow_offload.h +++ b/include/net/flow_offload.h @@ -293,7 +293,7 @@ static inline bool flow_action_has_entries(const struct flow_action *action) } /** - * flow_action_has_one_action() - check if exactly one action is present + * flow_offload_has_one_action() - check if exactly one action is present * @action: tc filter flow offload action * * Returns true if exactly one action is present. -- cgit v1.2.3 From 0161d151f3e36306219f5aa6f5f6b3877038afd3 Mon Sep 17 00:00:00 2001 From: Bijie Xu Date: Tue, 3 Aug 2021 11:40:19 +0200 Subject: net: sched: provide missing kdoc for tcf_pkt_info and tcf_ematch_ops Provide missing kdoc of fields of struct tcf_pkt_info and tcf_ematch_ops. Found using ./scripts/kernel-doc -none -Werror include/net/pkt_cls.h Signed-off-by: Bijie Xu Signed-off-by: Simon Horman Signed-off-by: David S. Miller --- include/net/pkt_cls.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/net') diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index ec7823921bd2..298a8d10168b 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -337,6 +337,9 @@ int tcf_exts_dump_stats(struct sk_buff *skb, struct tcf_exts *exts); /** * struct tcf_pkt_info - packet information + * + * @ptr: start of the pkt data + * @nexthdr: offset of the next header */ struct tcf_pkt_info { unsigned char * ptr; @@ -355,6 +358,7 @@ struct tcf_ematch_ops; * @ops: the operations lookup table of the corresponding ematch module * @datalen: length of the ematch specific configuration data * @data: ematch specific data + * @net: the network namespace */ struct tcf_ematch { struct tcf_ematch_ops * ops; -- cgit v1.2.3 From 4039146777a91e1576da2bf38e0d8a1061a1ae47 Mon Sep 17 00:00:00 2001 From: Antoine Tenart Date: Tue, 3 Aug 2021 12:00:16 +0200 Subject: net: ipv6: fix returned variable type in ip6_skb_dst_mtu The patch fixing the returned value of ip6_skb_dst_mtu (int -> unsigned int) was rebased between its initial review and the version applied. In the meantime fade56410c22 was applied, which added a new variable (int) used as the returned value. This lead to a mismatch between the function prototype and the variable used as the return value. Fixes: 40fc3054b458 ("net: ipv6: fix return value of ip6_skb_dst_mtu") Cc: Vadim Fedorenko Signed-off-by: Antoine Tenart Signed-off-by: David S. Miller --- include/net/ip6_route.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index 625a38ccb5d9..0bf09a9bca4e 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -265,7 +265,7 @@ int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, static inline unsigned int ip6_skb_dst_mtu(struct sk_buff *skb) { - int mtu; + unsigned int mtu; struct ipv6_pinfo *np = skb->sk && !dev_recursion_level() ? inet6_sk(skb->sk) : NULL; -- cgit v1.2.3 From 81dd3ee5962d767b913d4c4efec3f50e888463c1 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 3 Aug 2021 13:40:40 +0200 Subject: appletalk: ltpc: remove static probing This driver never relies on the netdev_boot_setup_check() to get its configuration, so it can just as well do its own probing all the time. Signed-off-by: Arnd Bergmann Signed-off-by: David S. Miller --- drivers/net/Space.c | 3 --- drivers/net/appletalk/ltpc.c | 7 ++----- include/net/Space.h | 1 - 3 files changed, 2 insertions(+), 9 deletions(-) (limited to 'include/net') diff --git a/drivers/net/Space.c b/drivers/net/Space.c index df79e7370bcc..9196a26615cc 100644 --- a/drivers/net/Space.c +++ b/drivers/net/Space.c @@ -142,9 +142,6 @@ static int __init net_olddevs_init(void) cops_probe(1); cops_probe(2); #endif -#ifdef CONFIG_LTPC - ltpc_probe(); -#endif return 0; } diff --git a/drivers/net/appletalk/ltpc.c b/drivers/net/appletalk/ltpc.c index 69c270885ff0..1f8925e75b3f 100644 --- a/drivers/net/appletalk/ltpc.c +++ b/drivers/net/appletalk/ltpc.c @@ -1015,7 +1015,7 @@ static const struct net_device_ops ltpc_netdev = { .ndo_set_rx_mode = set_multicast_list, }; -struct net_device * __init ltpc_probe(void) +static struct net_device * __init ltpc_probe(void) { struct net_device *dev; int err = -ENOMEM; @@ -1221,12 +1221,10 @@ static int __init ltpc_setup(char *str) } __setup("ltpc=", ltpc_setup); -#endif /* MODULE */ +#endif static struct net_device *dev_ltpc; -#ifdef MODULE - MODULE_LICENSE("GPL"); module_param(debug, int, 0); module_param_hw(io, int, ioport, 0); @@ -1244,7 +1242,6 @@ static int __init ltpc_module_init(void) return PTR_ERR_OR_ZERO(dev_ltpc); } module_init(ltpc_module_init); -#endif static void __exit ltpc_cleanup(void) { diff --git a/include/net/Space.h b/include/net/Space.h index 9cce0d80d37a..e30e7a70ea99 100644 --- a/include/net/Space.h +++ b/include/net/Space.h @@ -21,7 +21,6 @@ struct net_device *mvme147lance_probe(int unit); struct net_device *tc515_probe(int unit); struct net_device *lance_probe(int unit); struct net_device *cops_probe(int unit); -struct net_device *ltpc_probe(void); /* Fibre Channel adapters */ int iph5526_probe(struct net_device *dev); -- cgit v1.2.3 From e179d78ee11a70e2675bc572f9f4e33d97233b23 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 3 Aug 2021 13:40:43 +0200 Subject: m68k: remove legacy probing There are six m68k specific drivers that use the legacy probe method in drivers/net/Space.c. However, all of these only support a single device, and they completely ignore the command line settings from netdev_boot_setup_check, so there is really no point at all. Aside from sun3_82586, these already have a module_init function that can be used for built-in mode as well, simply by removing the #ifdef. Note that the 82596 driver was previously used on ISA as well, but that got dropped long ago. Reviewed-by: Geert Uytterhoeven Signed-off-by: Arnd Bergmann Signed-off-by: David S. Miller --- drivers/net/Space.c | 25 +------------------------ drivers/net/ethernet/8390/apne.c | 11 ++--------- drivers/net/ethernet/amd/atarilance.c | 11 ++--------- drivers/net/ethernet/amd/mvme147.c | 16 ++++++---------- drivers/net/ethernet/amd/sun3lance.c | 19 ++++++------------- drivers/net/ethernet/i825xx/82596.c | 24 ++++++------------------ drivers/net/ethernet/i825xx/sun3_82586.c | 17 +++++++---------- include/net/Space.h | 6 ------ 8 files changed, 30 insertions(+), 99 deletions(-) (limited to 'include/net') diff --git a/drivers/net/Space.c b/drivers/net/Space.c index 9f573f7ded3c..a03559f23295 100644 --- a/drivers/net/Space.c +++ b/drivers/net/Space.c @@ -80,34 +80,12 @@ static struct devprobe2 isa_probes[] __initdata = { #ifdef CONFIG_CS89x0_ISA {cs89x0_probe, 0}, #endif -#if defined(CONFIG_MVME16x_NET) || defined(CONFIG_BVME6000_NET) /* Intel */ - {i82596_probe, 0}, /* I82596 */ -#endif #ifdef CONFIG_NI65 {ni65_probe, 0}, #endif {NULL, 0}, }; -static struct devprobe2 m68k_probes[] __initdata = { -#ifdef CONFIG_ATARILANCE /* Lance-based Atari ethernet boards */ - {atarilance_probe, 0}, -#endif -#ifdef CONFIG_SUN3LANCE /* sun3 onboard Lance chip */ - {sun3lance_probe, 0}, -#endif -#ifdef CONFIG_SUN3_82586 /* sun3 onboard Intel 82586 chip */ - {sun3_82586_probe, 0}, -#endif -#ifdef CONFIG_APNE /* A1200 PCMCIA NE2000 */ - {apne_probe, 0}, -#endif -#ifdef CONFIG_MVME147_NET /* MVME147 internal Ethernet */ - {mvme147lance_probe, 0}, -#endif - {NULL, 0}, -}; - /* Unified ethernet device probe, segmented per architecture and * per bus interface. This drives the legacy devices only for now. */ @@ -119,8 +97,7 @@ static void __init ethif_probe2(int unit) if (base_addr == 1) return; - (void)(probe_list2(unit, m68k_probes, base_addr == 0) && - probe_list2(unit, isa_probes, base_addr == 0)); + probe_list2(unit, isa_probes, base_addr == 0); } /* Statically configured drivers -- order matters here. */ diff --git a/drivers/net/ethernet/8390/apne.c b/drivers/net/ethernet/8390/apne.c index fe6c834c422e..da1ae37a9d73 100644 --- a/drivers/net/ethernet/8390/apne.c +++ b/drivers/net/ethernet/8390/apne.c @@ -75,7 +75,6 @@ #define NESM_STOP_PG 0x80 /* Last page +1 of RX ring */ -struct net_device * __init apne_probe(int unit); static int apne_probe1(struct net_device *dev, int ioaddr); static void apne_reset_8390(struct net_device *dev); @@ -120,7 +119,7 @@ static u32 apne_msg_enable; module_param_named(msg_enable, apne_msg_enable, uint, 0444); MODULE_PARM_DESC(msg_enable, "Debug message level (see linux/netdevice.h for bitmap)"); -struct net_device * __init apne_probe(int unit) +static struct net_device * __init apne_probe(void) { struct net_device *dev; struct ei_device *ei_local; @@ -150,10 +149,6 @@ struct net_device * __init apne_probe(int unit) dev = alloc_ei_netdev(); if (!dev) return ERR_PTR(-ENOMEM); - if (unit >= 0) { - sprintf(dev->name, "eth%d", unit); - netdev_boot_setup_check(dev); - } ei_local = netdev_priv(dev); ei_local->msg_enable = apne_msg_enable; @@ -554,12 +549,11 @@ static irqreturn_t apne_interrupt(int irq, void *dev_id) return IRQ_HANDLED; } -#ifdef MODULE static struct net_device *apne_dev; static int __init apne_module_init(void) { - apne_dev = apne_probe(-1); + apne_dev = apne_probe(); return PTR_ERR_OR_ZERO(apne_dev); } @@ -579,7 +573,6 @@ static void __exit apne_module_exit(void) } module_init(apne_module_init); module_exit(apne_module_exit); -#endif static int init_pcmcia(void) { diff --git a/drivers/net/ethernet/amd/atarilance.c b/drivers/net/ethernet/amd/atarilance.c index 36f54d13a2eb..9d2f49fd945e 100644 --- a/drivers/net/ethernet/amd/atarilance.c +++ b/drivers/net/ethernet/amd/atarilance.c @@ -367,7 +367,7 @@ static void *slow_memcpy( void *dst, const void *src, size_t len ) } -struct net_device * __init atarilance_probe(int unit) +struct net_device * __init atarilance_probe(void) { int i; static int found; @@ -382,10 +382,6 @@ struct net_device * __init atarilance_probe(int unit) dev = alloc_etherdev(sizeof(struct lance_private)); if (!dev) return ERR_PTR(-ENOMEM); - if (unit >= 0) { - sprintf(dev->name, "eth%d", unit); - netdev_boot_setup_check(dev); - } for( i = 0; i < N_LANCE_ADDR; ++i ) { if (lance_probe1( dev, &lance_addr_list[i] )) { @@ -1137,13 +1133,11 @@ static int lance_set_mac_address( struct net_device *dev, void *addr ) return 0; } - -#ifdef MODULE static struct net_device *atarilance_dev; static int __init atarilance_module_init(void) { - atarilance_dev = atarilance_probe(-1); + atarilance_dev = atarilance_probe(); return PTR_ERR_OR_ZERO(atarilance_dev); } @@ -1155,4 +1149,3 @@ static void __exit atarilance_module_exit(void) } module_init(atarilance_module_init); module_exit(atarilance_module_exit); -#endif /* MODULE */ diff --git a/drivers/net/ethernet/amd/mvme147.c b/drivers/net/ethernet/amd/mvme147.c index 3f2e4cdd0b83..da97fccea9ea 100644 --- a/drivers/net/ethernet/amd/mvme147.c +++ b/drivers/net/ethernet/amd/mvme147.c @@ -68,7 +68,7 @@ static const struct net_device_ops lance_netdev_ops = { }; /* Initialise the one and only on-board 7990 */ -struct net_device * __init mvme147lance_probe(int unit) +static struct net_device * __init mvme147lance_probe(void) { struct net_device *dev; static int called; @@ -86,9 +86,6 @@ struct net_device * __init mvme147lance_probe(int unit) if (!dev) return ERR_PTR(-ENOMEM); - if (unit >= 0) - sprintf(dev->name, "eth%d", unit); - /* Fill the dev fields */ dev->base_addr = (unsigned long)MVME147_LANCE_BASE; dev->netdev_ops = &lance_netdev_ops; @@ -179,22 +176,21 @@ static int m147lance_close(struct net_device *dev) return 0; } -#ifdef MODULE MODULE_LICENSE("GPL"); static struct net_device *dev_mvme147_lance; -int __init init_module(void) +static int __init m147lance_init(void) { - dev_mvme147_lance = mvme147lance_probe(-1); + dev_mvme147_lance = mvme147lance_probe(); return PTR_ERR_OR_ZERO(dev_mvme147_lance); } +module_init(m147lance_init); -void __exit cleanup_module(void) +static void __exit m147lance_exit(void) { struct m147lance_private *lp = netdev_priv(dev_mvme147_lance); unregister_netdev(dev_mvme147_lance); free_pages(lp->ram, 3); free_netdev(dev_mvme147_lance); } - -#endif /* MODULE */ +module_exit(m147lance_exit); diff --git a/drivers/net/ethernet/amd/sun3lance.c b/drivers/net/ethernet/amd/sun3lance.c index f8d7a9387a56..4a845bc071b2 100644 --- a/drivers/net/ethernet/amd/sun3lance.c +++ b/drivers/net/ethernet/amd/sun3lance.c @@ -245,7 +245,7 @@ static void set_multicast_list( struct net_device *dev ); /************************* End of Prototypes **************************/ -struct net_device * __init sun3lance_probe(int unit) +static struct net_device * __init sun3lance_probe(void) { struct net_device *dev; static int found; @@ -272,10 +272,6 @@ struct net_device * __init sun3lance_probe(int unit) dev = alloc_etherdev(sizeof(struct lance_private)); if (!dev) return ERR_PTR(-ENOMEM); - if (unit >= 0) { - sprintf(dev->name, "eth%d", unit); - netdev_boot_setup_check(dev); - } if (!lance_probe(dev)) goto out; @@ -924,17 +920,16 @@ static void set_multicast_list( struct net_device *dev ) } -#ifdef MODULE - static struct net_device *sun3lance_dev; -int __init init_module(void) +static int __init sun3lance_init(void) { - sun3lance_dev = sun3lance_probe(-1); + sun3lance_dev = sun3lance_probe(); return PTR_ERR_OR_ZERO(sun3lance_dev); } +module_init(sun3lance_init); -void __exit cleanup_module(void) +static void __exit sun3lance_cleanup(void) { unregister_netdev(sun3lance_dev); #ifdef CONFIG_SUN3 @@ -942,6 +937,4 @@ void __exit cleanup_module(void) #endif free_netdev(sun3lance_dev); } - -#endif /* MODULE */ - +module_exit(sun3lance_cleanup); diff --git a/drivers/net/ethernet/i825xx/82596.c b/drivers/net/ethernet/i825xx/82596.c index fc8c7cd67471..b8a40146b895 100644 --- a/drivers/net/ethernet/i825xx/82596.c +++ b/drivers/net/ethernet/i825xx/82596.c @@ -1110,9 +1110,6 @@ static void print_eth(unsigned char *add, char *str) add, add + 6, add, add[12], add[13], str); } -static int io = 0x300; -static int irq = 10; - static const struct net_device_ops i596_netdev_ops = { .ndo_open = i596_open, .ndo_stop = i596_close, @@ -1123,7 +1120,7 @@ static const struct net_device_ops i596_netdev_ops = { .ndo_validate_addr = eth_validate_addr, }; -struct net_device * __init i82596_probe(int unit) +static struct net_device * __init i82596_probe(void) { struct net_device *dev; int i; @@ -1140,14 +1137,6 @@ struct net_device * __init i82596_probe(int unit) if (!dev) return ERR_PTR(-ENOMEM); - if (unit >= 0) { - sprintf(dev->name, "eth%d", unit); - netdev_boot_setup_check(dev); - } else { - dev->base_addr = io; - dev->irq = irq; - } - #ifdef ENABLE_MVME16x_NET if (MACH_IS_MVME16x) { if (mvme16x_config & MVME16x_CONFIG_NO_ETHERNET) { @@ -1515,22 +1504,22 @@ static void set_multicast_list(struct net_device *dev) } } -#ifdef MODULE static struct net_device *dev_82596; static int debug = -1; module_param(debug, int, 0); MODULE_PARM_DESC(debug, "i82596 debug mask"); -int __init init_module(void) +static int __init i82596_init(void) { if (debug >= 0) i596_debug = debug; - dev_82596 = i82596_probe(-1); + dev_82596 = i82596_probe(); return PTR_ERR_OR_ZERO(dev_82596); } +module_init(i82596_init); -void __exit cleanup_module(void) +static void __exit i82596_cleanup(void) { unregister_netdev(dev_82596); #ifdef __mc68000__ @@ -1544,5 +1533,4 @@ void __exit cleanup_module(void) free_page ((u32)(dev_82596->mem_start)); free_netdev(dev_82596); } - -#endif /* MODULE */ +module_exit(i82596_cleanup); diff --git a/drivers/net/ethernet/i825xx/sun3_82586.c b/drivers/net/ethernet/i825xx/sun3_82586.c index 4564ee02c95f..893e0ddcb611 100644 --- a/drivers/net/ethernet/i825xx/sun3_82586.c +++ b/drivers/net/ethernet/i825xx/sun3_82586.c @@ -29,6 +29,7 @@ static int rfdadd = 0; /* rfdadd=1 may be better for 8K MEM cards */ static int fifo=0x8; /* don't change */ #include +#include #include #include #include @@ -276,7 +277,7 @@ static void alloc586(struct net_device *dev) memset((char *)p->scb,0,sizeof(struct scb_struct)); } -struct net_device * __init sun3_82586_probe(int unit) +static int __init sun3_82586_probe(void) { struct net_device *dev; unsigned long ioaddr; @@ -291,25 +292,20 @@ struct net_device * __init sun3_82586_probe(int unit) break; default: - return ERR_PTR(-ENODEV); + return -ENODEV; } if (found) - return ERR_PTR(-ENODEV); + return -ENODEV; ioaddr = (unsigned long)ioremap(IE_OBIO, SUN3_82586_TOTAL_SIZE); if (!ioaddr) - return ERR_PTR(-ENOMEM); + return -ENOMEM; found = 1; dev = alloc_etherdev(sizeof(struct priv)); if (!dev) goto out; - if (unit >= 0) { - sprintf(dev->name, "eth%d", unit); - netdev_boot_setup_check(dev); - } - dev->irq = IE_IRQ; dev->base_addr = ioaddr; err = sun3_82586_probe1(dev, ioaddr); @@ -326,8 +322,9 @@ out1: free_netdev(dev); out: iounmap((void __iomem *)ioaddr); - return ERR_PTR(err); + return err; } +module_init(sun3_82586_probe); static const struct net_device_ops sun3_82586_netdev_ops = { .ndo_open = sun3_82586_open, diff --git a/include/net/Space.h b/include/net/Space.h index e30e7a70ea99..93fd6caa4bad 100644 --- a/include/net/Space.h +++ b/include/net/Space.h @@ -8,16 +8,10 @@ struct net_device *ultra_probe(int unit); struct net_device *wd_probe(int unit); struct net_device *ne_probe(int unit); struct net_device *fmv18x_probe(int unit); -struct net_device *i82596_probe(int unit); struct net_device *ni65_probe(int unit); struct net_device *sonic_probe(int unit); struct net_device *smc_init(int unit); -struct net_device *atarilance_probe(int unit); -struct net_device *sun3lance_probe(int unit); -struct net_device *sun3_82586_probe(int unit); -struct net_device *apne_probe(int unit); struct net_device *cs89x0_probe(int unit); -struct net_device *mvme147lance_probe(int unit); struct net_device *tc515_probe(int unit); struct net_device *lance_probe(int unit); struct net_device *cops_probe(int unit); -- cgit v1.2.3 From 375df5f8c1812c59930cfed14ff4cc15929c8f2f Mon Sep 17 00:00:00 2001 From: Michael Schmitz Date: Tue, 3 Aug 2021 13:40:44 +0200 Subject: ax88796: export ax_NS8390_init() hook The block I/O code for the new X-Surf 100 ax88796 driver needs ax_NS8390_init() for error fixup in its block_output function. Export this static function through the ax_NS8390_reinit() wrapper so we can lose the lib8380.c include in the X-Surf 100 driver. [arnd: add the declaration in the header to avoid a -Wmissing-prototypes warning] Fixes: 861928f4e60e826c ("net-next: New ax88796 platform driver for Amiga X-Surf 100 Zorro board (m68k)") Signed-off-by: Michael Schmitz Signed-off-by: Arnd Bergmann Signed-off-by: David S. Miller --- drivers/net/ethernet/8390/ax88796.c | 7 +++++++ include/net/ax88796.h | 3 +++ 2 files changed, 10 insertions(+) (limited to 'include/net') diff --git a/drivers/net/ethernet/8390/ax88796.c b/drivers/net/ethernet/8390/ax88796.c index 9595dd1f32ca..6c6bdd5913ec 100644 --- a/drivers/net/ethernet/8390/ax88796.c +++ b/drivers/net/ethernet/8390/ax88796.c @@ -101,6 +101,13 @@ static inline struct ax_device *to_ax_dev(struct net_device *dev) return (struct ax_device *)(ei_local + 1); } +void ax_NS8390_reinit(struct net_device *dev) +{ + ax_NS8390_init(dev, 1); +} + +EXPORT_SYMBOL_GPL(ax_NS8390_reinit); + /* * ax_initial_check * diff --git a/include/net/ax88796.h b/include/net/ax88796.h index aa52b2e8ff7b..2ed23a368602 100644 --- a/include/net/ax88796.h +++ b/include/net/ax88796.h @@ -38,4 +38,7 @@ struct ax_plat_data { int (*check_irq)(struct platform_device *pdev); }; +/* exported from ax88796.c for xsurf100.c */ +extern void ax_NS8390_reinit(struct net_device *dev); + #endif /* __NET_AX88796_PLAT_H */ -- cgit v1.2.3 From 72bcad5393a7079706fcfe02d84ed1599716d6a2 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 3 Aug 2021 13:40:49 +0200 Subject: wan: remove sbni/granch driver The driver was merged in 1999 and has only ever seen treewide cleanups since then, with no indication whatsoever that anyone has actually had access to hardware for testing the patches. >From the information in the link below, it appears that the hardware is for some leased line system in Russia that has since been discontinued, and useless without any remote end to connect to. As the driver still feels like a Linux-2.2 era artifact today, it appears that the best way forward is to just delete it. Link: https://www.tms.ru/%D0%90%D0%B4%D0%B0%D0%BF%D1%82%D0%B5%D1%80_%D0%B4%D0%BB%D1%8F_%D0%B2%D1%8B%D0%B4%D0%B5%D0%BB%D0%B5%D0%BD%D0%BD%D1%8B%D1%85_%D0%BB%D0%B8%D0%BD%D0%B8%D0%B9_Granch_SBNI12-10 Signed-off-by: Arnd Bergmann Signed-off-by: David S. Miller --- Documentation/admin-guide/kernel-parameters.txt | 2 - drivers/net/Space.c | 4 - drivers/net/wan/Kconfig | 27 - drivers/net/wan/Makefile | 1 - drivers/net/wan/sbni.c | 1639 ----------------------- drivers/net/wan/sbni.h | 147 -- include/net/Space.h | 3 - 7 files changed, 1823 deletions(-) delete mode 100644 drivers/net/wan/sbni.c delete mode 100644 drivers/net/wan/sbni.h (limited to 'include/net') diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index bdb22006f713..ee0569a040c6 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -4945,8 +4945,6 @@ sa1100ir [NET] See drivers/net/irda/sa1100_ir.c. - sbni= [NET] Granch SBNI12 leased line adapter - sched_verbose [KNL] Enables verbose scheduler debug messages. schedstats= [KNL,X86] Enable or disable scheduled statistics. diff --git a/drivers/net/Space.c b/drivers/net/Space.c index f40f2e38682f..49e67c9fb5a4 100644 --- a/drivers/net/Space.c +++ b/drivers/net/Space.c @@ -247,10 +247,6 @@ static int __init net_olddevs_init(void) { int num; -#ifdef CONFIG_SBNI - for (num = 0; num < 8; ++num) - sbni_probe(num); -#endif for (num = 0; num < 8; ++num) ethif_probe2(num); diff --git a/drivers/net/wan/Kconfig b/drivers/net/wan/Kconfig index d31791535ccf..592a8389fc5a 100644 --- a/drivers/net/wan/Kconfig +++ b/drivers/net/wan/Kconfig @@ -313,33 +313,6 @@ config LAPBETHER To compile this driver as a module, choose M here: the module will be called lapbether. - If unsure, say N. - -config SBNI - tristate "Granch SBNI12 Leased Line adapter support" - depends on X86 - help - Driver for ISA SBNI12-xx cards which are low cost alternatives to - leased line modems. - - You can find more information and last versions of drivers and - utilities at . If you have any question you - can send email to . - - To compile this driver as a module, choose M here: the - module will be called sbni. - - If unsure, say N. - -config SBNI_MULTILINE - bool "Multiple line feature support" - depends on SBNI - help - Schedule traffic for some parallel lines, via SBNI12 adapters. - - If you have two computers connected with two parallel lines it's - possible to increase transfer rate nearly twice. You should have - a program named 'sbniconfig' to configure adapters. If unsure, say N. diff --git a/drivers/net/wan/Makefile b/drivers/net/wan/Makefile index 081666c36ca2..f6b92efffc94 100644 --- a/drivers/net/wan/Makefile +++ b/drivers/net/wan/Makefile @@ -22,7 +22,6 @@ obj-$(CONFIG_FARSYNC) += farsync.o obj-$(CONFIG_LANMEDIA) += lmc/ obj-$(CONFIG_LAPBETHER) += lapbether.o -obj-$(CONFIG_SBNI) += sbni.o obj-$(CONFIG_N2) += n2.o obj-$(CONFIG_C101) += c101.o obj-$(CONFIG_WANXL) += wanxl.o diff --git a/drivers/net/wan/sbni.c b/drivers/net/wan/sbni.c deleted file mode 100644 index 469fe979d664..000000000000 --- a/drivers/net/wan/sbni.c +++ /dev/null @@ -1,1639 +0,0 @@ -/* sbni.c: Granch SBNI12 leased line adapters driver for linux - * - * Written 2001 by Denis I.Timofeev (timofeev@granch.ru) - * - * Previous versions were written by Yaroslav Polyakov, - * Alexey Zverev and Max Khon. - * - * Driver supports SBNI12-02,-04,-05,-10,-11 cards, single and - * double-channel, PCI and ISA modifications. - * More info and useful utilities to work with SBNI12 cards you can find - * at http://www.granch.com (English) or http://www.granch.ru (Russian) - * - * This software may be used and distributed according to the terms - * of the GNU General Public License. - * - * - * 5.0.1 Jun 22 2001 - * - Fixed bug in probe - * 5.0.0 Jun 06 2001 - * - Driver was completely redesigned by Denis I.Timofeev, - * - now PCI/Dual, ISA/Dual (with single interrupt line) models are - * - supported - * 3.3.0 Thu Feb 24 21:30:28 NOVT 2000 - * - PCI cards support - * 3.2.0 Mon Dec 13 22:26:53 NOVT 1999 - * - Completely rebuilt all the packet storage system - * - to work in Ethernet-like style. - * 3.1.1 just fixed some bugs (5 aug 1999) - * 3.1.0 added balancing feature (26 apr 1999) - * 3.0.1 just fixed some bugs (14 apr 1999). - * 3.0.0 Initial Revision, Yaroslav Polyakov (24 Feb 1999) - * - added pre-calculation for CRC, fixed bug with "len-2" frames, - * - removed outbound fragmentation (MTU=1000), written CRC-calculation - * - on asm, added work with hard_headers and now we have our own cache - * - for them, optionally supported word-interchange on some chipsets, - * - * Known problem: this driver wasn't tested on multiprocessor machine. - */ - -#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -#include -#include -#include -#include -#include - -#include "sbni.h" - -/* device private data */ - -struct net_local { - struct timer_list watchdog; - struct net_device *watchdog_dev; - - spinlock_t lock; - struct sk_buff *rx_buf_p; /* receive buffer ptr */ - struct sk_buff *tx_buf_p; /* transmit buffer ptr */ - - unsigned int framelen; /* current frame length */ - unsigned int maxframe; /* maximum valid frame length */ - unsigned int state; - unsigned int inppos, outpos; /* positions in rx/tx buffers */ - - /* transmitting frame number - from frames qty to 1 */ - unsigned int tx_frameno; - - /* expected number of next receiving frame */ - unsigned int wait_frameno; - - /* count of failed attempts to frame send - 32 attempts do before - error - while receiver tunes on opposite side of wire */ - unsigned int trans_errors; - - /* idle time; send pong when limit exceeded */ - unsigned int timer_ticks; - - /* fields used for receive level autoselection */ - int delta_rxl; - unsigned int cur_rxl_index, timeout_rxl; - unsigned long cur_rxl_rcvd, prev_rxl_rcvd; - - struct sbni_csr1 csr1; /* current value of CSR1 */ - struct sbni_in_stats in_stats; /* internal statistics */ - - struct net_device *second; /* for ISA/dual cards */ - -#ifdef CONFIG_SBNI_MULTILINE - struct net_device *master; - struct net_device *link; -#endif -}; - - -static int sbni_card_probe( unsigned long ); -static int sbni_pci_probe( struct net_device * ); -static struct net_device *sbni_probe1(struct net_device *, unsigned long, int); -static int sbni_open( struct net_device * ); -static int sbni_close( struct net_device * ); -static netdev_tx_t sbni_start_xmit(struct sk_buff *, - struct net_device * ); -static int sbni_siocdevprivate(struct net_device *, struct ifreq *, - void __user *, int); -static void set_multicast_list( struct net_device * ); - -static irqreturn_t sbni_interrupt( int, void * ); -static void handle_channel( struct net_device * ); -static int recv_frame( struct net_device * ); -static void send_frame( struct net_device * ); -static int upload_data( struct net_device *, - unsigned, unsigned, unsigned, u32 ); -static void download_data( struct net_device *, u32 * ); -static void sbni_watchdog(struct timer_list *); -static void interpret_ack( struct net_device *, unsigned ); -static int append_frame_to_pkt( struct net_device *, unsigned, u32 ); -static void indicate_pkt( struct net_device * ); -static void card_start( struct net_device * ); -static void prepare_to_send( struct sk_buff *, struct net_device * ); -static void drop_xmit_queue( struct net_device * ); -static void send_frame_header( struct net_device *, u32 * ); -static int skip_tail( unsigned int, unsigned int, u32 ); -static int check_fhdr( u32, u32 *, u32 *, u32 *, u32 *, u32 * ); -static void change_level( struct net_device * ); -static void timeout_change_level( struct net_device * ); -static u32 calc_crc32( u32, u8 *, u32 ); -static struct sk_buff * get_rx_buf( struct net_device * ); -static int sbni_init( struct net_device * ); - -#ifdef CONFIG_SBNI_MULTILINE -static int enslave( struct net_device *, struct net_device * ); -static int emancipate( struct net_device * ); -#endif - -static const char version[] = - "Granch SBNI12 driver ver 5.0.1 Jun 22 2001 Denis I.Timofeev.\n"; - -static bool skip_pci_probe __initdata = false; -static int scandone __initdata = 0; -static int num __initdata = 0; - -static unsigned char rxl_tab[]; -static u32 crc32tab[]; - -/* A list of all installed devices, for removing the driver module. */ -static struct net_device *sbni_cards[ SBNI_MAX_NUM_CARDS ]; - -/* Lists of device's parameters */ -static u32 io[ SBNI_MAX_NUM_CARDS ] __initdata = - { [0 ... SBNI_MAX_NUM_CARDS-1] = -1 }; -static u32 irq[ SBNI_MAX_NUM_CARDS ] __initdata; -static u32 baud[ SBNI_MAX_NUM_CARDS ] __initdata; -static u32 rxl[ SBNI_MAX_NUM_CARDS ] __initdata = - { [0 ... SBNI_MAX_NUM_CARDS-1] = -1 }; -static u32 mac[ SBNI_MAX_NUM_CARDS ] __initdata; - -#ifndef MODULE -typedef u32 iarr[]; -static iarr *dest[5] __initdata = { &io, &irq, &baud, &rxl, &mac }; -#endif - -/* A zero-terminated list of I/O addresses to be probed on ISA bus */ -static unsigned int netcard_portlist[ ] __initdata = { - 0x210, 0x214, 0x220, 0x224, 0x230, 0x234, 0x240, 0x244, 0x250, 0x254, - 0x260, 0x264, 0x270, 0x274, 0x280, 0x284, 0x290, 0x294, 0x2a0, 0x2a4, - 0x2b0, 0x2b4, 0x2c0, 0x2c4, 0x2d0, 0x2d4, 0x2e0, 0x2e4, 0x2f0, 0x2f4, - 0 }; - -#define NET_LOCAL_LOCK(dev) (((struct net_local *)netdev_priv(dev))->lock) - -/* - * Look for SBNI card which addr stored in dev->base_addr, if nonzero. - * Otherwise, look through PCI bus. If none PCI-card was found, scan ISA. - */ - -static inline int __init -sbni_isa_probe( struct net_device *dev ) -{ - if( dev->base_addr > 0x1ff && - request_region( dev->base_addr, SBNI_IO_EXTENT, dev->name ) && - sbni_probe1( dev, dev->base_addr, dev->irq ) ) - - return 0; - else { - pr_err("base address 0x%lx is busy, or adapter is malfunctional!\n", - dev->base_addr); - return -ENODEV; - } -} - -static const struct net_device_ops sbni_netdev_ops = { - .ndo_open = sbni_open, - .ndo_stop = sbni_close, - .ndo_start_xmit = sbni_start_xmit, - .ndo_set_rx_mode = set_multicast_list, - .ndo_siocdevprivate = sbni_siocdevprivate, - .ndo_set_mac_address = eth_mac_addr, - .ndo_validate_addr = eth_validate_addr, -}; - -static void __init sbni_devsetup(struct net_device *dev) -{ - ether_setup( dev ); - dev->netdev_ops = &sbni_netdev_ops; -} - -int __init sbni_probe(int unit) -{ - struct net_device *dev; - int err; - - dev = alloc_netdev(sizeof(struct net_local), "sbni", - NET_NAME_UNKNOWN, sbni_devsetup); - if (!dev) - return -ENOMEM; - - dev->netdev_ops = &sbni_netdev_ops; - - sprintf(dev->name, "sbni%d", unit); - netdev_boot_setup_check(dev); - - err = sbni_init(dev); - if (err) { - free_netdev(dev); - return err; - } - - err = register_netdev(dev); - if (err) { - release_region( dev->base_addr, SBNI_IO_EXTENT ); - free_netdev(dev); - return err; - } - pr_info_once("%s", version); - return 0; -} - -static int __init sbni_init(struct net_device *dev) -{ - int i; - if( dev->base_addr ) - return sbni_isa_probe( dev ); - /* otherwise we have to perform search our adapter */ - - if( io[ num ] != -1 ) { - dev->base_addr = io[ num ]; - dev->irq = irq[ num ]; - } else if( scandone || io[ 0 ] != -1 ) { - return -ENODEV; - } - - /* if io[ num ] contains non-zero address, then that is on ISA bus */ - if( dev->base_addr ) - return sbni_isa_probe( dev ); - - /* ...otherwise - scan PCI first */ - if( !skip_pci_probe && !sbni_pci_probe( dev ) ) - return 0; - - if( io[ num ] == -1 ) { - /* Auto-scan will be stopped when first ISA card were found */ - scandone = 1; - if( num > 0 ) - return -ENODEV; - } - - for( i = 0; netcard_portlist[ i ]; ++i ) { - int ioaddr = netcard_portlist[ i ]; - if( request_region( ioaddr, SBNI_IO_EXTENT, dev->name ) && - sbni_probe1( dev, ioaddr, 0 )) - return 0; - } - - return -ENODEV; -} - - -static int __init -sbni_pci_probe( struct net_device *dev ) -{ - struct pci_dev *pdev = NULL; - - while( (pdev = pci_get_class( PCI_CLASS_NETWORK_OTHER << 8, pdev )) - != NULL ) { - int pci_irq_line; - unsigned long pci_ioaddr; - - if( pdev->vendor != SBNI_PCI_VENDOR && - pdev->device != SBNI_PCI_DEVICE ) - continue; - - pci_ioaddr = pci_resource_start( pdev, 0 ); - pci_irq_line = pdev->irq; - - /* Avoid already found cards from previous calls */ - if( !request_region( pci_ioaddr, SBNI_IO_EXTENT, dev->name ) ) { - if (pdev->subsystem_device != 2) - continue; - - /* Dual adapter is present */ - if (!request_region(pci_ioaddr += 4, SBNI_IO_EXTENT, - dev->name ) ) - continue; - } - - if (pci_irq_line <= 0 || pci_irq_line >= nr_irqs) - pr_warn( -"WARNING: The PCI BIOS assigned this PCI card to IRQ %d, which is unlikely to work!.\n" -"You should use the PCI BIOS setup to assign a valid IRQ line.\n", - pci_irq_line ); - - /* avoiding re-enable dual adapters */ - if( (pci_ioaddr & 7) == 0 && pci_enable_device( pdev ) ) { - release_region( pci_ioaddr, SBNI_IO_EXTENT ); - pci_dev_put( pdev ); - return -EIO; - } - if( sbni_probe1( dev, pci_ioaddr, pci_irq_line ) ) { - SET_NETDEV_DEV(dev, &pdev->dev); - /* not the best thing to do, but this is all messed up - for hotplug systems anyway... */ - pci_dev_put( pdev ); - return 0; - } - } - return -ENODEV; -} - - -static struct net_device * __init -sbni_probe1( struct net_device *dev, unsigned long ioaddr, int irq ) -{ - struct net_local *nl; - - if( sbni_card_probe( ioaddr ) ) { - release_region( ioaddr, SBNI_IO_EXTENT ); - return NULL; - } - - outb( 0, ioaddr + CSR0 ); - - if( irq < 2 ) { - unsigned long irq_mask; - - irq_mask = probe_irq_on(); - outb( EN_INT | TR_REQ, ioaddr + CSR0 ); - outb( PR_RES, ioaddr + CSR1 ); - mdelay(50); - irq = probe_irq_off(irq_mask); - outb( 0, ioaddr + CSR0 ); - - if( !irq ) { - pr_err("%s: can't detect device irq!\n", dev->name); - release_region( ioaddr, SBNI_IO_EXTENT ); - return NULL; - } - } else if( irq == 2 ) - irq = 9; - - dev->irq = irq; - dev->base_addr = ioaddr; - - /* Fill in sbni-specific dev fields. */ - nl = netdev_priv(dev); - if( !nl ) { - pr_err("%s: unable to get memory!\n", dev->name); - release_region( ioaddr, SBNI_IO_EXTENT ); - return NULL; - } - - memset( nl, 0, sizeof(struct net_local) ); - spin_lock_init( &nl->lock ); - - /* store MAC address (generate if that isn't known) */ - *(__be16 *)dev->dev_addr = htons( 0x00ff ); - *(__be32 *)(dev->dev_addr + 2) = htonl( 0x01000000 | - ((mac[num] ? - mac[num] : - (u32)((long)netdev_priv(dev))) & 0x00ffffff)); - - /* store link settings (speed, receive level ) */ - nl->maxframe = DEFAULT_FRAME_LEN; - nl->csr1.rate = baud[ num ]; - - if( (nl->cur_rxl_index = rxl[ num ]) == -1 ) { - /* autotune rxl */ - nl->cur_rxl_index = DEF_RXL; - nl->delta_rxl = DEF_RXL_DELTA; - } else { - nl->delta_rxl = 0; - } - nl->csr1.rxl = rxl_tab[ nl->cur_rxl_index ]; - if( inb( ioaddr + CSR0 ) & 0x01 ) - nl->state |= FL_SLOW_MODE; - - pr_notice("%s: ioaddr %#lx, irq %d, MAC: 00:ff:01:%02x:%02x:%02x\n", - dev->name, dev->base_addr, dev->irq, - ((u8 *)dev->dev_addr)[3], - ((u8 *)dev->dev_addr)[4], - ((u8 *)dev->dev_addr)[5]); - - pr_notice("%s: speed %d", - dev->name, - ((nl->state & FL_SLOW_MODE) ? 500000 : 2000000) - / (1 << nl->csr1.rate)); - - if( nl->delta_rxl == 0 ) - pr_cont(", receive level 0x%x (fixed)\n", nl->cur_rxl_index); - else - pr_cont(", receive level (auto)\n"); - -#ifdef CONFIG_SBNI_MULTILINE - nl->master = dev; - nl->link = NULL; -#endif - - sbni_cards[ num++ ] = dev; - return dev; -} - -/* -------------------------------------------------------------------------- */ - -#ifdef CONFIG_SBNI_MULTILINE - -static netdev_tx_t -sbni_start_xmit( struct sk_buff *skb, struct net_device *dev ) -{ - struct net_device *p; - - netif_stop_queue( dev ); - - /* Looking for idle device in the list */ - for( p = dev; p; ) { - struct net_local *nl = netdev_priv(p); - spin_lock( &nl->lock ); - if( nl->tx_buf_p || (nl->state & FL_LINE_DOWN) ) { - p = nl->link; - spin_unlock( &nl->lock ); - } else { - /* Idle dev is found */ - prepare_to_send( skb, p ); - spin_unlock( &nl->lock ); - netif_start_queue( dev ); - return NETDEV_TX_OK; - } - } - - return NETDEV_TX_BUSY; -} - -#else /* CONFIG_SBNI_MULTILINE */ - -static netdev_tx_t -sbni_start_xmit( struct sk_buff *skb, struct net_device *dev ) -{ - struct net_local *nl = netdev_priv(dev); - - netif_stop_queue( dev ); - spin_lock( &nl->lock ); - - prepare_to_send( skb, dev ); - - spin_unlock( &nl->lock ); - return NETDEV_TX_OK; -} - -#endif /* CONFIG_SBNI_MULTILINE */ - -/* -------------------------------------------------------------------------- */ - -/* interrupt handler */ - -/* - * SBNI12D-10, -11/ISA boards within "common interrupt" mode could not - * be looked as two independent single-channel devices. Every channel seems - * as Ethernet interface but interrupt handler must be common. Really, first - * channel ("master") driver only registers the handler. In its struct net_local - * it has got pointer to "slave" channel's struct net_local and handles that's - * interrupts too. - * dev of successfully attached ISA SBNI boards is linked to list. - * While next board driver is initialized, it scans this list. If one - * has found dev with same irq and ioaddr different by 4 then it assumes - * this board to be "master". - */ - -static irqreturn_t -sbni_interrupt( int irq, void *dev_id ) -{ - struct net_device *dev = dev_id; - struct net_local *nl = netdev_priv(dev); - int repeat; - - spin_lock( &nl->lock ); - if( nl->second ) - spin_lock(&NET_LOCAL_LOCK(nl->second)); - - do { - repeat = 0; - if( inb( dev->base_addr + CSR0 ) & (RC_RDY | TR_RDY) ) { - handle_channel( dev ); - repeat = 1; - } - if( nl->second && /* second channel present */ - (inb( nl->second->base_addr+CSR0 ) & (RC_RDY | TR_RDY)) ) { - handle_channel( nl->second ); - repeat = 1; - } - } while( repeat ); - - if( nl->second ) - spin_unlock(&NET_LOCAL_LOCK(nl->second)); - spin_unlock( &nl->lock ); - return IRQ_HANDLED; -} - - -static void -handle_channel( struct net_device *dev ) -{ - struct net_local *nl = netdev_priv(dev); - unsigned long ioaddr = dev->base_addr; - - int req_ans; - unsigned char csr0; - -#ifdef CONFIG_SBNI_MULTILINE - /* Lock the master device because we going to change its local data */ - if( nl->state & FL_SLAVE ) - spin_lock(&NET_LOCAL_LOCK(nl->master)); -#endif - - outb( (inb( ioaddr + CSR0 ) & ~EN_INT) | TR_REQ, ioaddr + CSR0 ); - - nl->timer_ticks = CHANGE_LEVEL_START_TICKS; - for(;;) { - csr0 = inb( ioaddr + CSR0 ); - if( ( csr0 & (RC_RDY | TR_RDY) ) == 0 ) - break; - - req_ans = !(nl->state & FL_PREV_OK); - - if( csr0 & RC_RDY ) - req_ans = recv_frame( dev ); - - /* - * TR_RDY always equals 1 here because we have owned the marker, - * and we set TR_REQ when disabled interrupts - */ - csr0 = inb( ioaddr + CSR0 ); - if( !(csr0 & TR_RDY) || (csr0 & RC_RDY) ) - netdev_err(dev, "internal error!\n"); - - /* if state & FL_NEED_RESEND != 0 then tx_frameno != 0 */ - if( req_ans || nl->tx_frameno != 0 ) - send_frame( dev ); - else - /* send marker without any data */ - outb( inb( ioaddr + CSR0 ) & ~TR_REQ, ioaddr + CSR0 ); - } - - outb( inb( ioaddr + CSR0 ) | EN_INT, ioaddr + CSR0 ); - -#ifdef CONFIG_SBNI_MULTILINE - if( nl->state & FL_SLAVE ) - spin_unlock(&NET_LOCAL_LOCK(nl->master)); -#endif -} - - -/* - * Routine returns 1 if it needs to acknowledge received frame. - * Empty frame received without errors won't be acknowledged. - */ - -static int -recv_frame( struct net_device *dev ) -{ - struct net_local *nl = netdev_priv(dev); - unsigned long ioaddr = dev->base_addr; - - u32 crc = CRC32_INITIAL; - - unsigned framelen = 0, frameno, ack; - unsigned is_first, frame_ok = 0; - - if( check_fhdr( ioaddr, &framelen, &frameno, &ack, &is_first, &crc ) ) { - frame_ok = framelen > 4 - ? upload_data( dev, framelen, frameno, is_first, crc ) - : skip_tail( ioaddr, framelen, crc ); - if( frame_ok ) - interpret_ack( dev, ack ); - } - - outb( inb( ioaddr + CSR0 ) ^ CT_ZER, ioaddr + CSR0 ); - if( frame_ok ) { - nl->state |= FL_PREV_OK; - if( framelen > 4 ) - nl->in_stats.all_rx_number++; - } else { - nl->state &= ~FL_PREV_OK; - change_level( dev ); - nl->in_stats.all_rx_number++; - nl->in_stats.bad_rx_number++; - } - - return !frame_ok || framelen > 4; -} - - -static void -send_frame( struct net_device *dev ) -{ - struct net_local *nl = netdev_priv(dev); - - u32 crc = CRC32_INITIAL; - - if( nl->state & FL_NEED_RESEND ) { - - /* if frame was sended but not ACK'ed - resend it */ - if( nl->trans_errors ) { - --nl->trans_errors; - if( nl->framelen != 0 ) - nl->in_stats.resend_tx_number++; - } else { - /* cannot xmit with many attempts */ -#ifdef CONFIG_SBNI_MULTILINE - if( (nl->state & FL_SLAVE) || nl->link ) -#endif - nl->state |= FL_LINE_DOWN; - drop_xmit_queue( dev ); - goto do_send; - } - } else - nl->trans_errors = TR_ERROR_COUNT; - - send_frame_header( dev, &crc ); - nl->state |= FL_NEED_RESEND; - /* - * FL_NEED_RESEND will be cleared after ACK, but if empty - * frame sended then in prepare_to_send next frame - */ - - - if( nl->framelen ) { - download_data( dev, &crc ); - nl->in_stats.all_tx_number++; - nl->state |= FL_WAIT_ACK; - } - - outsb( dev->base_addr + DAT, (u8 *)&crc, sizeof crc ); - -do_send: - outb( inb( dev->base_addr + CSR0 ) & ~TR_REQ, dev->base_addr + CSR0 ); - - if( nl->tx_frameno ) - /* next frame exists - we request card to send it */ - outb( inb( dev->base_addr + CSR0 ) | TR_REQ, - dev->base_addr + CSR0 ); -} - - -/* - * Write the frame data into adapter's buffer memory, and calculate CRC. - * Do padding if necessary. - */ - -static void -download_data( struct net_device *dev, u32 *crc_p ) -{ - struct net_local *nl = netdev_priv(dev); - struct sk_buff *skb = nl->tx_buf_p; - - unsigned len = min_t(unsigned int, skb->len - nl->outpos, nl->framelen); - - outsb( dev->base_addr + DAT, skb->data + nl->outpos, len ); - *crc_p = calc_crc32( *crc_p, skb->data + nl->outpos, len ); - - /* if packet too short we should write some more bytes to pad */ - for( len = nl->framelen - len; len--; ) { - outb( 0, dev->base_addr + DAT ); - *crc_p = CRC32( 0, *crc_p ); - } -} - - -static int -upload_data( struct net_device *dev, unsigned framelen, unsigned frameno, - unsigned is_first, u32 crc ) -{ - struct net_local *nl = netdev_priv(dev); - - int frame_ok; - - if( is_first ) { - nl->wait_frameno = frameno; - nl->inppos = 0; - } - - if( nl->wait_frameno == frameno ) { - - if( nl->inppos + framelen <= ETHER_MAX_LEN ) - frame_ok = append_frame_to_pkt( dev, framelen, crc ); - - /* - * if CRC is right but framelen incorrect then transmitter - * error was occurred... drop entire packet - */ - else if( (frame_ok = skip_tail( dev->base_addr, framelen, crc )) - != 0 ) { - nl->wait_frameno = 0; - nl->inppos = 0; -#ifdef CONFIG_SBNI_MULTILINE - nl->master->stats.rx_errors++; - nl->master->stats.rx_missed_errors++; -#else - dev->stats.rx_errors++; - dev->stats.rx_missed_errors++; -#endif - } - /* now skip all frames until is_first != 0 */ - } else - frame_ok = skip_tail( dev->base_addr, framelen, crc ); - - if( is_first && !frame_ok ) { - /* - * Frame has been broken, but we had already stored - * is_first... Drop entire packet. - */ - nl->wait_frameno = 0; -#ifdef CONFIG_SBNI_MULTILINE - nl->master->stats.rx_errors++; - nl->master->stats.rx_crc_errors++; -#else - dev->stats.rx_errors++; - dev->stats.rx_crc_errors++; -#endif - } - - return frame_ok; -} - - -static inline void -send_complete( struct net_device *dev ) -{ - struct net_local *nl = netdev_priv(dev); - -#ifdef CONFIG_SBNI_MULTILINE - nl->master->stats.tx_packets++; - nl->master->stats.tx_bytes += nl->tx_buf_p->len; -#else - dev->stats.tx_packets++; - dev->stats.tx_bytes += nl->tx_buf_p->len; -#endif - dev_consume_skb_irq(nl->tx_buf_p); - - nl->tx_buf_p = NULL; - - nl->outpos = 0; - nl->state &= ~(FL_WAIT_ACK | FL_NEED_RESEND); - nl->framelen = 0; -} - - -static void -interpret_ack( struct net_device *dev, unsigned ack ) -{ - struct net_local *nl = netdev_priv(dev); - - if( ack == FRAME_SENT_OK ) { - nl->state &= ~FL_NEED_RESEND; - - if( nl->state & FL_WAIT_ACK ) { - nl->outpos += nl->framelen; - - if( --nl->tx_frameno ) { - nl->framelen = min_t(unsigned int, - nl->maxframe, - nl->tx_buf_p->len - nl->outpos); - } else { - send_complete( dev ); -#ifdef CONFIG_SBNI_MULTILINE - netif_wake_queue( nl->master ); -#else - netif_wake_queue( dev ); -#endif - } - } - } - - nl->state &= ~FL_WAIT_ACK; -} - - -/* - * Glue received frame with previous fragments of packet. - * Indicate packet when last frame would be accepted. - */ - -static int -append_frame_to_pkt( struct net_device *dev, unsigned framelen, u32 crc ) -{ - struct net_local *nl = netdev_priv(dev); - - u8 *p; - - if( nl->inppos + framelen > ETHER_MAX_LEN ) - return 0; - - if( !nl->rx_buf_p && !(nl->rx_buf_p = get_rx_buf( dev )) ) - return 0; - - p = nl->rx_buf_p->data + nl->inppos; - insb( dev->base_addr + DAT, p, framelen ); - if( calc_crc32( crc, p, framelen ) != CRC32_REMAINDER ) - return 0; - - nl->inppos += framelen - 4; - if( --nl->wait_frameno == 0 ) /* last frame received */ - indicate_pkt( dev ); - - return 1; -} - - -/* - * Prepare to start output on adapter. - * Transmitter will be actually activated when marker is accepted. - */ - -static void -prepare_to_send( struct sk_buff *skb, struct net_device *dev ) -{ - struct net_local *nl = netdev_priv(dev); - - unsigned int len; - - /* nl->tx_buf_p == NULL here! */ - if( nl->tx_buf_p ) - netdev_err(dev, "memory leak!\n"); - - nl->outpos = 0; - nl->state &= ~(FL_WAIT_ACK | FL_NEED_RESEND); - - len = skb->len; - if( len < SBNI_MIN_LEN ) - len = SBNI_MIN_LEN; - - nl->tx_buf_p = skb; - nl->tx_frameno = DIV_ROUND_UP(len, nl->maxframe); - nl->framelen = len < nl->maxframe ? len : nl->maxframe; - - outb( inb( dev->base_addr + CSR0 ) | TR_REQ, dev->base_addr + CSR0 ); -#ifdef CONFIG_SBNI_MULTILINE - netif_trans_update(nl->master); -#else - netif_trans_update(dev); -#endif -} - - -static void -drop_xmit_queue( struct net_device *dev ) -{ - struct net_local *nl = netdev_priv(dev); - - if( nl->tx_buf_p ) { - dev_kfree_skb_any( nl->tx_buf_p ); - nl->tx_buf_p = NULL; -#ifdef CONFIG_SBNI_MULTILINE - nl->master->stats.tx_errors++; - nl->master->stats.tx_carrier_errors++; -#else - dev->stats.tx_errors++; - dev->stats.tx_carrier_errors++; -#endif - } - - nl->tx_frameno = 0; - nl->framelen = 0; - nl->outpos = 0; - nl->state &= ~(FL_WAIT_ACK | FL_NEED_RESEND); -#ifdef CONFIG_SBNI_MULTILINE - netif_start_queue( nl->master ); - netif_trans_update(nl->master); -#else - netif_start_queue( dev ); - netif_trans_update(dev); -#endif -} - - -static void -send_frame_header( struct net_device *dev, u32 *crc_p ) -{ - struct net_local *nl = netdev_priv(dev); - - u32 crc = *crc_p; - u32 len_field = nl->framelen + 6; /* CRC + frameno + reserved */ - u8 value; - - if( nl->state & FL_NEED_RESEND ) - len_field |= FRAME_RETRY; /* non-first attempt... */ - - if( nl->outpos == 0 ) - len_field |= FRAME_FIRST; - - len_field |= (nl->state & FL_PREV_OK) ? FRAME_SENT_OK : FRAME_SENT_BAD; - outb( SBNI_SIG, dev->base_addr + DAT ); - - value = (u8) len_field; - outb( value, dev->base_addr + DAT ); - crc = CRC32( value, crc ); - value = (u8) (len_field >> 8); - outb( value, dev->base_addr + DAT ); - crc = CRC32( value, crc ); - - outb( nl->tx_frameno, dev->base_addr + DAT ); - crc = CRC32( nl->tx_frameno, crc ); - outb( 0, dev->base_addr + DAT ); - crc = CRC32( 0, crc ); - *crc_p = crc; -} - - -/* - * if frame tail not needed (incorrect number or received twice), - * it won't store, but CRC will be calculated - */ - -static int -skip_tail( unsigned int ioaddr, unsigned int tail_len, u32 crc ) -{ - while( tail_len-- ) - crc = CRC32( inb( ioaddr + DAT ), crc ); - - return crc == CRC32_REMAINDER; -} - - -/* - * Preliminary checks if frame header is correct, calculates its CRC - * and split it to simple fields - */ - -static int -check_fhdr( u32 ioaddr, u32 *framelen, u32 *frameno, u32 *ack, - u32 *is_first, u32 *crc_p ) -{ - u32 crc = *crc_p; - u8 value; - - if( inb( ioaddr + DAT ) != SBNI_SIG ) - return 0; - - value = inb( ioaddr + DAT ); - *framelen = (u32)value; - crc = CRC32( value, crc ); - value = inb( ioaddr + DAT ); - *framelen |= ((u32)value) << 8; - crc = CRC32( value, crc ); - - *ack = *framelen & FRAME_ACK_MASK; - *is_first = (*framelen & FRAME_FIRST) != 0; - - if( (*framelen &= FRAME_LEN_MASK) < 6 || - *framelen > SBNI_MAX_FRAME - 3 ) - return 0; - - value = inb( ioaddr + DAT ); - *frameno = (u32)value; - crc = CRC32( value, crc ); - - crc = CRC32( inb( ioaddr + DAT ), crc ); /* reserved byte */ - *framelen -= 2; - - *crc_p = crc; - return 1; -} - - -static struct sk_buff * -get_rx_buf( struct net_device *dev ) -{ - /* +2 is to compensate for the alignment fixup below */ - struct sk_buff *skb = dev_alloc_skb( ETHER_MAX_LEN + 2 ); - if( !skb ) - return NULL; - - skb_reserve( skb, 2 ); /* Align IP on longword boundaries */ - return skb; -} - - -static void -indicate_pkt( struct net_device *dev ) -{ - struct net_local *nl = netdev_priv(dev); - struct sk_buff *skb = nl->rx_buf_p; - - skb_put( skb, nl->inppos ); - -#ifdef CONFIG_SBNI_MULTILINE - skb->protocol = eth_type_trans( skb, nl->master ); - netif_rx( skb ); - ++nl->master->stats.rx_packets; - nl->master->stats.rx_bytes += nl->inppos; -#else - skb->protocol = eth_type_trans( skb, dev ); - netif_rx( skb ); - ++dev->stats.rx_packets; - dev->stats.rx_bytes += nl->inppos; -#endif - nl->rx_buf_p = NULL; /* protocol driver will clear this sk_buff */ -} - - -/* -------------------------------------------------------------------------- */ - -/* - * Routine checks periodically wire activity and regenerates marker if - * connect was inactive for a long time. - */ - -static void -sbni_watchdog(struct timer_list *t) -{ - struct net_local *nl = from_timer(nl, t, watchdog); - struct net_device *dev = nl->watchdog_dev; - unsigned long flags; - unsigned char csr0; - - spin_lock_irqsave( &nl->lock, flags ); - - csr0 = inb( dev->base_addr + CSR0 ); - if( csr0 & RC_CHK ) { - - if( nl->timer_ticks ) { - if( csr0 & (RC_RDY | BU_EMP) ) - /* receiving not active */ - nl->timer_ticks--; - } else { - nl->in_stats.timeout_number++; - if( nl->delta_rxl ) - timeout_change_level( dev ); - - outb( *(u_char *)&nl->csr1 | PR_RES, - dev->base_addr + CSR1 ); - csr0 = inb( dev->base_addr + CSR0 ); - } - } else - nl->state &= ~FL_LINE_DOWN; - - outb( csr0 | RC_CHK, dev->base_addr + CSR0 ); - - mod_timer(t, jiffies + SBNI_TIMEOUT); - - spin_unlock_irqrestore( &nl->lock, flags ); -} - - -static unsigned char rxl_tab[] = { - 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x08, - 0x0a, 0x0c, 0x0f, 0x16, 0x18, 0x1a, 0x1c, 0x1f -}; - -#define SIZE_OF_TIMEOUT_RXL_TAB 4 -static unsigned char timeout_rxl_tab[] = { - 0x03, 0x05, 0x08, 0x0b -}; - -/* -------------------------------------------------------------------------- */ - -static void -card_start( struct net_device *dev ) -{ - struct net_local *nl = netdev_priv(dev); - - nl->timer_ticks = CHANGE_LEVEL_START_TICKS; - nl->state &= ~(FL_WAIT_ACK | FL_NEED_RESEND); - nl->state |= FL_PREV_OK; - - nl->inppos = nl->outpos = 0; - nl->wait_frameno = 0; - nl->tx_frameno = 0; - nl->framelen = 0; - - outb( *(u_char *)&nl->csr1 | PR_RES, dev->base_addr + CSR1 ); - outb( EN_INT, dev->base_addr + CSR0 ); -} - -/* -------------------------------------------------------------------------- */ - -/* Receive level auto-selection */ - -static void -change_level( struct net_device *dev ) -{ - struct net_local *nl = netdev_priv(dev); - - if( nl->delta_rxl == 0 ) /* do not auto-negotiate RxL */ - return; - - if( nl->cur_rxl_index == 0 ) - nl->delta_rxl = 1; - else if( nl->cur_rxl_index == 15 ) - nl->delta_rxl = -1; - else if( nl->cur_rxl_rcvd < nl->prev_rxl_rcvd ) - nl->delta_rxl = -nl->delta_rxl; - - nl->csr1.rxl = rxl_tab[ nl->cur_rxl_index += nl->delta_rxl ]; - inb( dev->base_addr + CSR0 ); /* needs for PCI cards */ - outb( *(u8 *)&nl->csr1, dev->base_addr + CSR1 ); - - nl->prev_rxl_rcvd = nl->cur_rxl_rcvd; - nl->cur_rxl_rcvd = 0; -} - - -static void -timeout_change_level( struct net_device *dev ) -{ - struct net_local *nl = netdev_priv(dev); - - nl->cur_rxl_index = timeout_rxl_tab[ nl->timeout_rxl ]; - if( ++nl->timeout_rxl >= 4 ) - nl->timeout_rxl = 0; - - nl->csr1.rxl = rxl_tab[ nl->cur_rxl_index ]; - inb( dev->base_addr + CSR0 ); - outb( *(unsigned char *)&nl->csr1, dev->base_addr + CSR1 ); - - nl->prev_rxl_rcvd = nl->cur_rxl_rcvd; - nl->cur_rxl_rcvd = 0; -} - -/* -------------------------------------------------------------------------- */ - -/* - * Open/initialize the board. - */ - -static int -sbni_open( struct net_device *dev ) -{ - struct net_local *nl = netdev_priv(dev); - struct timer_list *w = &nl->watchdog; - - /* - * For double ISA adapters within "common irq" mode, we have to - * determine whether primary or secondary channel is initialized, - * and set the irq handler only in first case. - */ - if( dev->base_addr < 0x400 ) { /* ISA only */ - struct net_device **p = sbni_cards; - for( ; *p && p < sbni_cards + SBNI_MAX_NUM_CARDS; ++p ) - if( (*p)->irq == dev->irq && - ((*p)->base_addr == dev->base_addr + 4 || - (*p)->base_addr == dev->base_addr - 4) && - (*p)->flags & IFF_UP ) { - - ((struct net_local *) (netdev_priv(*p))) - ->second = dev; - netdev_notice(dev, "using shared irq with %s\n", - (*p)->name); - nl->state |= FL_SECONDARY; - goto handler_attached; - } - } - - if( request_irq(dev->irq, sbni_interrupt, IRQF_SHARED, dev->name, dev) ) { - netdev_err(dev, "unable to get IRQ %d\n", dev->irq); - return -EAGAIN; - } - -handler_attached: - - spin_lock( &nl->lock ); - memset( &dev->stats, 0, sizeof(struct net_device_stats) ); - memset( &nl->in_stats, 0, sizeof(struct sbni_in_stats) ); - - card_start( dev ); - - netif_start_queue( dev ); - - /* set timer watchdog */ - nl->watchdog_dev = dev; - timer_setup(w, sbni_watchdog, 0); - w->expires = jiffies + SBNI_TIMEOUT; - add_timer( w ); - - spin_unlock( &nl->lock ); - return 0; -} - - -static int -sbni_close( struct net_device *dev ) -{ - struct net_local *nl = netdev_priv(dev); - - if( nl->second && nl->second->flags & IFF_UP ) { - netdev_notice(dev, "Secondary channel (%s) is active!\n", - nl->second->name); - return -EBUSY; - } - -#ifdef CONFIG_SBNI_MULTILINE - if( nl->state & FL_SLAVE ) - emancipate( dev ); - else - while( nl->link ) /* it's master device! */ - emancipate( nl->link ); -#endif - - spin_lock( &nl->lock ); - - nl->second = NULL; - drop_xmit_queue( dev ); - netif_stop_queue( dev ); - - del_timer( &nl->watchdog ); - - outb( 0, dev->base_addr + CSR0 ); - - if( !(nl->state & FL_SECONDARY) ) - free_irq( dev->irq, dev ); - nl->state &= FL_SECONDARY; - - spin_unlock( &nl->lock ); - return 0; -} - - -/* - Valid combinations in CSR0 (for probing): - - VALID_DECODER 0000,0011,1011,1010 - - ; 0 ; - - TR_REQ ; 1 ; + - TR_RDY ; 2 ; - - TR_RDY TR_REQ ; 3 ; + - BU_EMP ; 4 ; + - BU_EMP TR_REQ ; 5 ; + - BU_EMP TR_RDY ; 6 ; - - BU_EMP TR_RDY TR_REQ ; 7 ; + - RC_RDY ; 8 ; + - RC_RDY TR_REQ ; 9 ; + - RC_RDY TR_RDY ; 10 ; - - RC_RDY TR_RDY TR_REQ ; 11 ; - - RC_RDY BU_EMP ; 12 ; - - RC_RDY BU_EMP TR_REQ ; 13 ; - - RC_RDY BU_EMP TR_RDY ; 14 ; - - RC_RDY BU_EMP TR_RDY TR_REQ ; 15 ; - -*/ - -#define VALID_DECODER (2 + 8 + 0x10 + 0x20 + 0x80 + 0x100 + 0x200) - - -static int -sbni_card_probe( unsigned long ioaddr ) -{ - unsigned char csr0; - - csr0 = inb( ioaddr + CSR0 ); - if( csr0 != 0xff && csr0 != 0x00 ) { - csr0 &= ~EN_INT; - if( csr0 & BU_EMP ) - csr0 |= EN_INT; - - if( VALID_DECODER & (1 << (csr0 >> 4)) ) - return 0; - } - - return -ENODEV; -} - -/* -------------------------------------------------------------------------- */ - -static int -sbni_siocdevprivate(struct net_device *dev, struct ifreq *ifr, void __user *data, int cmd) -{ - struct net_local *nl = netdev_priv(dev); - struct sbni_flags flags; - int error = 0; - -#ifdef CONFIG_SBNI_MULTILINE - struct net_device *slave_dev; - char slave_name[ 8 ]; -#endif - - switch( cmd ) { - case SIOCDEVGETINSTATS : - if (copy_to_user(data, &nl->in_stats, - sizeof(struct sbni_in_stats))) - error = -EFAULT; - break; - - case SIOCDEVRESINSTATS : - if (!capable(CAP_NET_ADMIN)) - return -EPERM; - memset( &nl->in_stats, 0, sizeof(struct sbni_in_stats) ); - break; - - case SIOCDEVGHWSTATE : - flags.mac_addr = *(u32 *)(dev->dev_addr + 3); - flags.rate = nl->csr1.rate; - flags.slow_mode = (nl->state & FL_SLOW_MODE) != 0; - flags.rxl = nl->cur_rxl_index; - flags.fixed_rxl = nl->delta_rxl == 0; - - if (copy_to_user(data, &flags, sizeof(flags))) - error = -EFAULT; - break; - - case SIOCDEVSHWSTATE : - if (!capable(CAP_NET_ADMIN)) - return -EPERM; - - spin_lock( &nl->lock ); - flags = *(struct sbni_flags*) &ifr->ifr_ifru; - if( flags.fixed_rxl ) { - nl->delta_rxl = 0; - nl->cur_rxl_index = flags.rxl; - } else { - nl->delta_rxl = DEF_RXL_DELTA; - nl->cur_rxl_index = DEF_RXL; - } - - nl->csr1.rxl = rxl_tab[ nl->cur_rxl_index ]; - nl->csr1.rate = flags.rate; - outb( *(u8 *)&nl->csr1 | PR_RES, dev->base_addr + CSR1 ); - spin_unlock( &nl->lock ); - break; - -#ifdef CONFIG_SBNI_MULTILINE - - case SIOCDEVENSLAVE : - if (!capable(CAP_NET_ADMIN)) - return -EPERM; - - if (copy_from_user(slave_name, data, sizeof(slave_name))) - return -EFAULT; - slave_dev = dev_get_by_name(&init_net, slave_name ); - if( !slave_dev || !(slave_dev->flags & IFF_UP) ) { - netdev_err(dev, "trying to enslave non-active device %s\n", - slave_name); - if (slave_dev) - dev_put(slave_dev); - return -EPERM; - } - - return enslave( dev, slave_dev ); - - case SIOCDEVEMANSIPATE : - if (!capable(CAP_NET_ADMIN)) - return -EPERM; - - return emancipate( dev ); - -#endif /* CONFIG_SBNI_MULTILINE */ - - default : - return -EOPNOTSUPP; - } - - return error; -} - - -#ifdef CONFIG_SBNI_MULTILINE - -static int -enslave( struct net_device *dev, struct net_device *slave_dev ) -{ - struct net_local *nl = netdev_priv(dev); - struct net_local *snl = netdev_priv(slave_dev); - - if( nl->state & FL_SLAVE ) /* This isn't master or free device */ - return -EBUSY; - - if( snl->state & FL_SLAVE ) /* That was already enslaved */ - return -EBUSY; - - spin_lock( &nl->lock ); - spin_lock( &snl->lock ); - - /* append to list */ - snl->link = nl->link; - nl->link = slave_dev; - snl->master = dev; - snl->state |= FL_SLAVE; - - /* Summary statistics of MultiLine operation will be stored - in master's counters */ - memset( &slave_dev->stats, 0, sizeof(struct net_device_stats) ); - netif_stop_queue( slave_dev ); - netif_wake_queue( dev ); /* Now we are able to transmit */ - - spin_unlock( &snl->lock ); - spin_unlock( &nl->lock ); - netdev_notice(dev, "slave device (%s) attached\n", slave_dev->name); - return 0; -} - - -static int -emancipate( struct net_device *dev ) -{ - struct net_local *snl = netdev_priv(dev); - struct net_device *p = snl->master; - struct net_local *nl = netdev_priv(p); - - if( !(snl->state & FL_SLAVE) ) - return -EINVAL; - - spin_lock( &nl->lock ); - spin_lock( &snl->lock ); - drop_xmit_queue( dev ); - - /* exclude from list */ - for(;;) { /* must be in list */ - struct net_local *t = netdev_priv(p); - if( t->link == dev ) { - t->link = snl->link; - break; - } - p = t->link; - } - - snl->link = NULL; - snl->master = dev; - snl->state &= ~FL_SLAVE; - - netif_start_queue( dev ); - - spin_unlock( &snl->lock ); - spin_unlock( &nl->lock ); - - dev_put( dev ); - return 0; -} - -#endif - -static void -set_multicast_list( struct net_device *dev ) -{ - return; /* sbni always operate in promiscuos mode */ -} - - -#ifdef MODULE -module_param_hw_array(io, int, ioport, NULL, 0); -module_param_hw_array(irq, int, irq, NULL, 0); -module_param_array(baud, int, NULL, 0); -module_param_array(rxl, int, NULL, 0); -module_param_array(mac, int, NULL, 0); -module_param(skip_pci_probe, bool, 0); - -MODULE_LICENSE("GPL"); - - -int __init init_module( void ) -{ - struct net_device *dev; - int err; - - while( num < SBNI_MAX_NUM_CARDS ) { - dev = alloc_netdev(sizeof(struct net_local), "sbni%d", - NET_NAME_UNKNOWN, sbni_devsetup); - if( !dev) - break; - - sprintf( dev->name, "sbni%d", num ); - - err = sbni_init(dev); - if (err) { - free_netdev(dev); - break; - } - - if( register_netdev( dev ) ) { - release_region( dev->base_addr, SBNI_IO_EXTENT ); - free_netdev( dev ); - break; - } - } - - return *sbni_cards ? 0 : -ENODEV; -} - -void -cleanup_module(void) -{ - int i; - - for (i = 0; i < SBNI_MAX_NUM_CARDS; ++i) { - struct net_device *dev = sbni_cards[i]; - if (dev != NULL) { - unregister_netdev(dev); - release_region(dev->base_addr, SBNI_IO_EXTENT); - free_netdev(dev); - } - } -} - -#else /* MODULE */ - -static int __init -sbni_setup( char *p ) -{ - int n, parm; - - if( *p++ != '(' ) - goto bad_param; - - for( n = 0, parm = 0; *p && n < 8; ) { - (*dest[ parm ])[ n ] = simple_strtoul( p, &p, 0 ); - if( !*p || *p == ')' ) - return 1; - if( *p == ';' ) { - ++p; - ++n; - parm = 0; - } else if( *p++ != ',' ) { - break; - } else { - if( ++parm >= 5 ) - break; - } - } -bad_param: - pr_err("Error in sbni kernel parameter!\n"); - return 0; -} - -__setup( "sbni=", sbni_setup ); - -#endif /* MODULE */ - -/* -------------------------------------------------------------------------- */ - -static u32 -calc_crc32( u32 crc, u8 *p, u32 len ) -{ - while( len-- ) - crc = CRC32( *p++, crc ); - - return crc; -} - -static u32 crc32tab[] __attribute__ ((aligned(8))) = { - 0xD202EF8D, 0xA505DF1B, 0x3C0C8EA1, 0x4B0BBE37, - 0xD56F2B94, 0xA2681B02, 0x3B614AB8, 0x4C667A2E, - 0xDCD967BF, 0xABDE5729, 0x32D70693, 0x45D03605, - 0xDBB4A3A6, 0xACB39330, 0x35BAC28A, 0x42BDF21C, - 0xCFB5FFE9, 0xB8B2CF7F, 0x21BB9EC5, 0x56BCAE53, - 0xC8D83BF0, 0xBFDF0B66, 0x26D65ADC, 0x51D16A4A, - 0xC16E77DB, 0xB669474D, 0x2F6016F7, 0x58672661, - 0xC603B3C2, 0xB1048354, 0x280DD2EE, 0x5F0AE278, - 0xE96CCF45, 0x9E6BFFD3, 0x0762AE69, 0x70659EFF, - 0xEE010B5C, 0x99063BCA, 0x000F6A70, 0x77085AE6, - 0xE7B74777, 0x90B077E1, 0x09B9265B, 0x7EBE16CD, - 0xE0DA836E, 0x97DDB3F8, 0x0ED4E242, 0x79D3D2D4, - 0xF4DBDF21, 0x83DCEFB7, 0x1AD5BE0D, 0x6DD28E9B, - 0xF3B61B38, 0x84B12BAE, 0x1DB87A14, 0x6ABF4A82, - 0xFA005713, 0x8D076785, 0x140E363F, 0x630906A9, - 0xFD6D930A, 0x8A6AA39C, 0x1363F226, 0x6464C2B0, - 0xA4DEAE1D, 0xD3D99E8B, 0x4AD0CF31, 0x3DD7FFA7, - 0xA3B36A04, 0xD4B45A92, 0x4DBD0B28, 0x3ABA3BBE, - 0xAA05262F, 0xDD0216B9, 0x440B4703, 0x330C7795, - 0xAD68E236, 0xDA6FD2A0, 0x4366831A, 0x3461B38C, - 0xB969BE79, 0xCE6E8EEF, 0x5767DF55, 0x2060EFC3, - 0xBE047A60, 0xC9034AF6, 0x500A1B4C, 0x270D2BDA, - 0xB7B2364B, 0xC0B506DD, 0x59BC5767, 0x2EBB67F1, - 0xB0DFF252, 0xC7D8C2C4, 0x5ED1937E, 0x29D6A3E8, - 0x9FB08ED5, 0xE8B7BE43, 0x71BEEFF9, 0x06B9DF6F, - 0x98DD4ACC, 0xEFDA7A5A, 0x76D32BE0, 0x01D41B76, - 0x916B06E7, 0xE66C3671, 0x7F6567CB, 0x0862575D, - 0x9606C2FE, 0xE101F268, 0x7808A3D2, 0x0F0F9344, - 0x82079EB1, 0xF500AE27, 0x6C09FF9D, 0x1B0ECF0B, - 0x856A5AA8, 0xF26D6A3E, 0x6B643B84, 0x1C630B12, - 0x8CDC1683, 0xFBDB2615, 0x62D277AF, 0x15D54739, - 0x8BB1D29A, 0xFCB6E20C, 0x65BFB3B6, 0x12B88320, - 0x3FBA6CAD, 0x48BD5C3B, 0xD1B40D81, 0xA6B33D17, - 0x38D7A8B4, 0x4FD09822, 0xD6D9C998, 0xA1DEF90E, - 0x3161E49F, 0x4666D409, 0xDF6F85B3, 0xA868B525, - 0x360C2086, 0x410B1010, 0xD80241AA, 0xAF05713C, - 0x220D7CC9, 0x550A4C5F, 0xCC031DE5, 0xBB042D73, - 0x2560B8D0, 0x52678846, 0xCB6ED9FC, 0xBC69E96A, - 0x2CD6F4FB, 0x5BD1C46D, 0xC2D895D7, 0xB5DFA541, - 0x2BBB30E2, 0x5CBC0074, 0xC5B551CE, 0xB2B26158, - 0x04D44C65, 0x73D37CF3, 0xEADA2D49, 0x9DDD1DDF, - 0x03B9887C, 0x74BEB8EA, 0xEDB7E950, 0x9AB0D9C6, - 0x0A0FC457, 0x7D08F4C1, 0xE401A57B, 0x930695ED, - 0x0D62004E, 0x7A6530D8, 0xE36C6162, 0x946B51F4, - 0x19635C01, 0x6E646C97, 0xF76D3D2D, 0x806A0DBB, - 0x1E0E9818, 0x6909A88E, 0xF000F934, 0x8707C9A2, - 0x17B8D433, 0x60BFE4A5, 0xF9B6B51F, 0x8EB18589, - 0x10D5102A, 0x67D220BC, 0xFEDB7106, 0x89DC4190, - 0x49662D3D, 0x3E611DAB, 0xA7684C11, 0xD06F7C87, - 0x4E0BE924, 0x390CD9B2, 0xA0058808, 0xD702B89E, - 0x47BDA50F, 0x30BA9599, 0xA9B3C423, 0xDEB4F4B5, - 0x40D06116, 0x37D75180, 0xAEDE003A, 0xD9D930AC, - 0x54D13D59, 0x23D60DCF, 0xBADF5C75, 0xCDD86CE3, - 0x53BCF940, 0x24BBC9D6, 0xBDB2986C, 0xCAB5A8FA, - 0x5A0AB56B, 0x2D0D85FD, 0xB404D447, 0xC303E4D1, - 0x5D677172, 0x2A6041E4, 0xB369105E, 0xC46E20C8, - 0x72080DF5, 0x050F3D63, 0x9C066CD9, 0xEB015C4F, - 0x7565C9EC, 0x0262F97A, 0x9B6BA8C0, 0xEC6C9856, - 0x7CD385C7, 0x0BD4B551, 0x92DDE4EB, 0xE5DAD47D, - 0x7BBE41DE, 0x0CB97148, 0x95B020F2, 0xE2B71064, - 0x6FBF1D91, 0x18B82D07, 0x81B17CBD, 0xF6B64C2B, - 0x68D2D988, 0x1FD5E91E, 0x86DCB8A4, 0xF1DB8832, - 0x616495A3, 0x1663A535, 0x8F6AF48F, 0xF86DC419, - 0x660951BA, 0x110E612C, 0x88073096, 0xFF000000 -}; - diff --git a/drivers/net/wan/sbni.h b/drivers/net/wan/sbni.h deleted file mode 100644 index 84264510a8ed..000000000000 --- a/drivers/net/wan/sbni.h +++ /dev/null @@ -1,147 +0,0 @@ -/* sbni.h: definitions for a Granch SBNI12 driver, version 5.0.0 - * Written 2001 Denis I.Timofeev (timofeev@granch.ru) - * This file is distributed under the GNU GPL - */ - -#ifndef SBNI_H -#define SBNI_H - -#ifdef SBNI_DEBUG -#define DP( A ) A -#else -#define DP( A ) -#endif - - -/* We don't have official vendor id yet... */ -#define SBNI_PCI_VENDOR 0x55 -#define SBNI_PCI_DEVICE 0x9f - -#define ISA_MODE 0x00 -#define PCI_MODE 0x01 - -#define SBNI_IO_EXTENT 4 - -enum sbni_reg { - CSR0 = 0, - CSR1 = 1, - DAT = 2 -}; - -/* CSR0 mapping */ -enum { - BU_EMP = 0x02, - RC_CHK = 0x04, - CT_ZER = 0x08, - TR_REQ = 0x10, - TR_RDY = 0x20, - EN_INT = 0x40, - RC_RDY = 0x80 -}; - - -/* CSR1 mapping */ -#define PR_RES 0x80 - -struct sbni_csr1 { -#ifdef __LITTLE_ENDIAN_BITFIELD - u8 rxl : 5; - u8 rate : 2; - u8 : 1; -#else - u8 : 1; - u8 rate : 2; - u8 rxl : 5; -#endif -}; - -/* fields in frame header */ -#define FRAME_ACK_MASK (unsigned short)0x7000 -#define FRAME_LEN_MASK (unsigned short)0x03FF -#define FRAME_FIRST (unsigned short)0x8000 -#define FRAME_RETRY (unsigned short)0x0800 - -#define FRAME_SENT_BAD (unsigned short)0x4000 -#define FRAME_SENT_OK (unsigned short)0x3000 - - -/* state flags */ -enum { - FL_WAIT_ACK = 0x01, - FL_NEED_RESEND = 0x02, - FL_PREV_OK = 0x04, - FL_SLOW_MODE = 0x08, - FL_SECONDARY = 0x10, -#ifdef CONFIG_SBNI_MULTILINE - FL_SLAVE = 0x20, -#endif - FL_LINE_DOWN = 0x40 -}; - - -enum { - DEFAULT_IOBASEADDR = 0x210, - DEFAULT_INTERRUPTNUMBER = 5, - DEFAULT_RATE = 0, - DEFAULT_FRAME_LEN = 1012 -}; - -#define DEF_RXL_DELTA -1 -#define DEF_RXL 0xf - -#define SBNI_SIG 0x5a - -#define SBNI_MIN_LEN 60 /* Shortest Ethernet frame without FCS */ -#define SBNI_MAX_FRAME 1023 -#define ETHER_MAX_LEN 1518 - -#define SBNI_TIMEOUT (HZ/10) - -#define TR_ERROR_COUNT 32 -#define CHANGE_LEVEL_START_TICKS 4 - -#define SBNI_MAX_NUM_CARDS 16 - -/* internal SBNI-specific statistics */ -struct sbni_in_stats { - u32 all_rx_number; - u32 bad_rx_number; - u32 timeout_number; - u32 all_tx_number; - u32 resend_tx_number; -}; - -/* SBNI ioctl params */ -#define SIOCDEVGETINSTATS SIOCDEVPRIVATE -#define SIOCDEVRESINSTATS SIOCDEVPRIVATE+1 -#define SIOCDEVGHWSTATE SIOCDEVPRIVATE+2 -#define SIOCDEVSHWSTATE SIOCDEVPRIVATE+3 -#define SIOCDEVENSLAVE SIOCDEVPRIVATE+4 -#define SIOCDEVEMANSIPATE SIOCDEVPRIVATE+5 - - -/* data packet for SIOCDEVGHWSTATE/SIOCDEVSHWSTATE ioctl requests */ -struct sbni_flags { - u32 rxl : 4; - u32 rate : 2; - u32 fixed_rxl : 1; - u32 slow_mode : 1; - u32 mac_addr : 24; -}; - -/* - * CRC-32 stuff - */ -#define CRC32(c,crc) (crc32tab[((size_t)(crc) ^ (c)) & 0xff] ^ (((crc) >> 8) & 0x00FFFFFF)) - /* CRC generator 0xEDB88320 */ - /* CRC remainder 0x2144DF1C */ - /* CRC initial value 0x00000000 */ -#define CRC32_REMAINDER 0x2144DF1C -#define CRC32_INITIAL 0x00000000 - -#ifndef __initdata -#define __initdata -#endif - -#endif - diff --git a/include/net/Space.h b/include/net/Space.h index 93fd6caa4bad..08ca9cef0213 100644 --- a/include/net/Space.h +++ b/include/net/Space.h @@ -18,6 +18,3 @@ struct net_device *cops_probe(int unit); /* Fibre Channel adapters */ int iph5526_probe(struct net_device *dev); - -/* SBNI adapters */ -int sbni_probe(int unit); -- cgit v1.2.3 From 102793136ce9dacae77c2d1f8dd6878b480f4b95 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Mon, 2 Aug 2021 16:56:18 -0700 Subject: Bluetooth: HCI: Add proper tracking for enable status of adv instances This adds a field to track if advertising instances are enabled or not and only clear HCI_LE_ADV flag if there is no instance left advertising. Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Marcel Holtmann --- include/net/bluetooth/hci_core.h | 1 + net/bluetooth/hci_event.c | 28 ++++++++++++++++++++++++++++ 2 files changed, 29 insertions(+) (limited to 'include/net') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 4abe3c494002..b79b31359bf8 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -221,6 +221,7 @@ struct oob_data { struct adv_info { struct list_head list; + bool enabled; bool pending; __u8 instance; __u32 flags; diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index ea7fc09478be..35c5cc9f91b0 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -1277,7 +1277,9 @@ static void hci_cc_le_set_ext_adv_enable(struct hci_dev *hdev, struct sk_buff *skb) { struct hci_cp_le_set_ext_adv_enable *cp; + struct hci_cp_ext_adv_set *set; __u8 status = *((__u8 *) skb->data); + struct adv_info *adv = NULL, *n; BT_DBG("%s status 0x%2.2x", hdev->name, status); @@ -1288,22 +1290,48 @@ static void hci_cc_le_set_ext_adv_enable(struct hci_dev *hdev, if (!cp) return; + set = (void *)cp->data; + hci_dev_lock(hdev); + if (cp->num_of_sets) + adv = hci_find_adv_instance(hdev, set->handle); + if (cp->enable) { struct hci_conn *conn; hci_dev_set_flag(hdev, HCI_LE_ADV); + if (adv) + adv->enabled = true; + conn = hci_lookup_le_connect(hdev); if (conn) queue_delayed_work(hdev->workqueue, &conn->le_conn_timeout, conn->conn_timeout); } else { + if (adv) { + adv->enabled = false; + /* If just one instance was disabled check if there are + * any other instance enabled before clearing HCI_LE_ADV + */ + list_for_each_entry_safe(adv, n, &hdev->adv_instances, + list) { + if (adv->enabled) + goto unlock; + } + } else { + /* All instances shall be considered disabled */ + list_for_each_entry_safe(adv, n, &hdev->adv_instances, + list) + adv->enabled = false; + } + hci_dev_clear_flag(hdev, HCI_LE_ADV); } +unlock: hci_dev_unlock(hdev); } -- cgit v1.2.3 From c45074d68a9b1e893d86520af71fab37693c3d7e Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Mon, 2 Aug 2021 16:56:19 -0700 Subject: Bluetooth: Fix not generating RPA when required Code was checking if random_addr and hdev->rpa match without first checking if the RPA has not been set (BDADDR_ANY), furthermore it was clearing HCI_RPA_EXPIRED before the command completes and the RPA is actually programmed which in case of failure would leave the expired RPA still set. Since advertising instance have a similar problem the clearing of HCI_RPA_EXPIRED has been moved to hci_event.c after checking the random address is in fact the hdev->rap and then proceed to set the expire timeout. Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Marcel Holtmann --- include/net/bluetooth/hci_core.h | 4 ++ net/bluetooth/hci_event.c | 32 +++++++++++----- net/bluetooth/hci_request.c | 81 +++++++++++++++++----------------------- 3 files changed, 61 insertions(+), 56 deletions(-) (limited to 'include/net') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index b79b31359bf8..b011eeea28c3 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -1413,6 +1413,10 @@ void hci_conn_del_sysfs(struct hci_conn *conn); !hci_dev_test_flag(dev, HCI_AUTO_OFF)) #define bredr_sc_enabled(dev) (lmp_sc_capable(dev) && \ hci_dev_test_flag(dev, HCI_SC_ENABLED)) +#define rpa_valid(dev) (bacmp(&dev->rpa, BDADDR_ANY) && \ + !hci_dev_test_flag(dev, HCI_RPA_EXPIRED)) +#define adv_rpa_valid(adv) (bacmp(&adv->random_addr, BDADDR_ANY) && \ + !adv->rpa_expired) #define scan_1m(dev) (((dev)->le_tx_def_phys & HCI_LE_SET_PHY_1M) || \ ((dev)->le_rx_def_phys & HCI_LE_SET_PHY_1M)) diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 35c5cc9f91b0..38decf474f31 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -40,6 +40,8 @@ #define ZERO_KEY "\x00\x00\x00\x00\x00\x00\x00\x00" \ "\x00\x00\x00\x00\x00\x00\x00\x00" +#define secs_to_jiffies(_secs) msecs_to_jiffies((_secs) * 1000) + /* Handle HCI Event packets */ static void hci_cc_inquiry_cancel(struct hci_dev *hdev, struct sk_buff *skb, @@ -1171,6 +1173,12 @@ static void hci_cc_le_set_random_addr(struct hci_dev *hdev, struct sk_buff *skb) bacpy(&hdev->random_addr, sent); + if (!bacmp(&hdev->rpa, sent)) { + hci_dev_clear_flag(hdev, HCI_RPA_EXPIRED); + queue_delayed_work(hdev->workqueue, &hdev->rpa_expired, + secs_to_jiffies(hdev->rpa_timeout)); + } + hci_dev_unlock(hdev); } @@ -1201,24 +1209,30 @@ static void hci_cc_le_set_adv_set_random_addr(struct hci_dev *hdev, { __u8 status = *((__u8 *) skb->data); struct hci_cp_le_set_adv_set_rand_addr *cp; - struct adv_info *adv_instance; + struct adv_info *adv; if (status) return; cp = hci_sent_cmd_data(hdev, HCI_OP_LE_SET_ADV_SET_RAND_ADDR); - if (!cp) + /* Update only in case the adv instance since handle 0x00 shall be using + * HCI_OP_LE_SET_RANDOM_ADDR since that allows both extended and + * non-extended adverting. + */ + if (!cp || !cp->handle) return; hci_dev_lock(hdev); - if (!cp->handle) { - /* Store in hdev for instance 0 (Set adv and Directed advs) */ - bacpy(&hdev->random_addr, &cp->bdaddr); - } else { - adv_instance = hci_find_adv_instance(hdev, cp->handle); - if (adv_instance) - bacpy(&adv_instance->random_addr, &cp->bdaddr); + adv = hci_find_adv_instance(hdev, cp->handle); + if (adv) { + bacpy(&adv->random_addr, &cp->bdaddr); + if (!bacmp(&hdev->rpa, &cp->bdaddr)) { + adv->rpa_expired = false; + queue_delayed_work(hdev->workqueue, + &adv->rpa_expired_cb, + secs_to_jiffies(hdev->rpa_timeout)); + } } hci_dev_unlock(hdev); diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c index 1d14adc023e9..f15626607b2d 100644 --- a/net/bluetooth/hci_request.c +++ b/net/bluetooth/hci_request.c @@ -2072,8 +2072,6 @@ int hci_get_random_address(struct hci_dev *hdev, bool require_privacy, * current RPA has expired then generate a new one. */ if (use_rpa) { - int to; - /* If Controller supports LL Privacy use own address type is * 0x03 */ @@ -2084,14 +2082,10 @@ int hci_get_random_address(struct hci_dev *hdev, bool require_privacy, *own_addr_type = ADDR_LE_DEV_RANDOM; if (adv_instance) { - if (!adv_instance->rpa_expired && - !bacmp(&adv_instance->random_addr, &hdev->rpa)) + if (adv_rpa_valid(adv_instance)) return 0; - - adv_instance->rpa_expired = false; } else { - if (!hci_dev_test_and_clear_flag(hdev, HCI_RPA_EXPIRED) && - !bacmp(&hdev->random_addr, &hdev->rpa)) + if (rpa_valid(hdev)) return 0; } @@ -2103,14 +2097,6 @@ int hci_get_random_address(struct hci_dev *hdev, bool require_privacy, bacpy(rand_addr, &hdev->rpa); - to = msecs_to_jiffies(hdev->rpa_timeout * 1000); - if (adv_instance) - queue_delayed_work(hdev->workqueue, - &adv_instance->rpa_expired_cb, to); - else - queue_delayed_work(hdev->workqueue, - &hdev->rpa_expired, to); - return 0; } @@ -2153,6 +2139,30 @@ void __hci_req_clear_ext_adv_sets(struct hci_request *req) hci_req_add(req, HCI_OP_LE_CLEAR_ADV_SETS, 0, NULL); } +static void set_random_addr(struct hci_request *req, bdaddr_t *rpa) +{ + struct hci_dev *hdev = req->hdev; + + /* If we're advertising or initiating an LE connection we can't + * go ahead and change the random address at this time. This is + * because the eventual initiator address used for the + * subsequently created connection will be undefined (some + * controllers use the new address and others the one we had + * when the operation started). + * + * In this kind of scenario skip the update and let the random + * address be updated at the next cycle. + */ + if (hci_dev_test_flag(hdev, HCI_LE_ADV) || + hci_lookup_le_connect(hdev)) { + bt_dev_dbg(hdev, "Deferring random address update"); + hci_dev_set_flag(hdev, HCI_RPA_EXPIRED); + return; + } + + hci_req_add(req, HCI_OP_LE_SET_RANDOM_ADDR, 6, rpa); +} + int __hci_req_setup_ext_adv_instance(struct hci_request *req, u8 instance) { struct hci_cp_le_set_ext_adv_params cp; @@ -2255,6 +2265,13 @@ int __hci_req_setup_ext_adv_instance(struct hci_request *req, u8 instance) } else { if (!bacmp(&random_addr, &hdev->random_addr)) return 0; + /* Instance 0x00 doesn't have an adv_info, instead it + * uses hdev->random_addr to track its address so + * whenever it needs to be updated this also set the + * random address since hdev->random_addr is shared with + * scan state machine. + */ + set_random_addr(req, &random_addr); } memset(&cp, 0, sizeof(cp)); @@ -2512,30 +2529,6 @@ void hci_req_clear_adv_instance(struct hci_dev *hdev, struct sock *sk, false); } -static void set_random_addr(struct hci_request *req, bdaddr_t *rpa) -{ - struct hci_dev *hdev = req->hdev; - - /* If we're advertising or initiating an LE connection we can't - * go ahead and change the random address at this time. This is - * because the eventual initiator address used for the - * subsequently created connection will be undefined (some - * controllers use the new address and others the one we had - * when the operation started). - * - * In this kind of scenario skip the update and let the random - * address be updated at the next cycle. - */ - if (hci_dev_test_flag(hdev, HCI_LE_ADV) || - hci_lookup_le_connect(hdev)) { - bt_dev_dbg(hdev, "Deferring random address update"); - hci_dev_set_flag(hdev, HCI_RPA_EXPIRED); - return; - } - - hci_req_add(req, HCI_OP_LE_SET_RANDOM_ADDR, 6, rpa); -} - int hci_update_random_address(struct hci_request *req, bool require_privacy, bool use_rpa, u8 *own_addr_type) { @@ -2547,8 +2540,6 @@ int hci_update_random_address(struct hci_request *req, bool require_privacy, * the current RPA in use, then generate a new one. */ if (use_rpa) { - int to; - /* If Controller supports LL Privacy use own address type is * 0x03 */ @@ -2558,8 +2549,7 @@ int hci_update_random_address(struct hci_request *req, bool require_privacy, else *own_addr_type = ADDR_LE_DEV_RANDOM; - if (!hci_dev_test_and_clear_flag(hdev, HCI_RPA_EXPIRED) && - !bacmp(&hdev->random_addr, &hdev->rpa)) + if (rpa_valid(hdev)) return 0; err = smp_generate_rpa(hdev, hdev->irk, &hdev->rpa); @@ -2570,9 +2560,6 @@ int hci_update_random_address(struct hci_request *req, bool require_privacy, set_random_addr(req, &hdev->rpa); - to = msecs_to_jiffies(hdev->rpa_timeout * 1000); - queue_delayed_work(hdev->workqueue, &hdev->rpa_expired, to); - return 0; } -- cgit v1.2.3 From 314001f0bf927015e459c9d387d62a231fe93af3 Mon Sep 17 00:00:00 2001 From: Rao Shoaib Date: Sun, 1 Aug 2021 00:57:07 -0700 Subject: af_unix: Add OOB support This patch adds OOB support for AF_UNIX sockets. The semantics is same as TCP. The last byte of a message with the OOB flag is treated as the OOB byte. The byte is separated into a skb and a pointer to the skb is stored in unix_sock. The pointer is used to enforce OOB semantics. Signed-off-by: Rao Shoaib Signed-off-by: David S. Miller --- include/net/af_unix.h | 3 + net/unix/Kconfig | 5 + net/unix/af_unix.c | 153 +++++++- tools/testing/selftests/Makefile | 1 + tools/testing/selftests/net/af_unix/Makefile | 5 + .../testing/selftests/net/af_unix/test_unix_oob.c | 437 +++++++++++++++++++++ 6 files changed, 602 insertions(+), 2 deletions(-) create mode 100644 tools/testing/selftests/net/af_unix/Makefile create mode 100644 tools/testing/selftests/net/af_unix/test_unix_oob.c (limited to 'include/net') diff --git a/include/net/af_unix.h b/include/net/af_unix.h index 435a2c3d5a6f..4757d7f53f13 100644 --- a/include/net/af_unix.h +++ b/include/net/af_unix.h @@ -70,6 +70,9 @@ struct unix_sock { struct socket_wq peer_wq; wait_queue_entry_t peer_wake; struct scm_stat scm_stat; +#if IS_ENABLED(CONFIG_AF_UNIX_OOB) + struct sk_buff *oob_skb; +#endif }; static inline struct unix_sock *unix_sk(const struct sock *sk) diff --git a/net/unix/Kconfig b/net/unix/Kconfig index b6c4282899ec..b7f811216820 100644 --- a/net/unix/Kconfig +++ b/net/unix/Kconfig @@ -25,6 +25,11 @@ config UNIX_SCM depends on UNIX default y +config AF_UNIX_OOB + bool + depends on UNIX + default y + config UNIX_DIAG tristate "UNIX: socket monitoring interface" depends on UNIX diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 256c4e31132e..ec02e70a549b 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -503,6 +503,12 @@ static void unix_sock_destructor(struct sock *sk) skb_queue_purge(&sk->sk_receive_queue); +#if IS_ENABLED(CONFIG_AF_UNIX_OOB) + if (u->oob_skb) { + kfree_skb(u->oob_skb); + u->oob_skb = NULL; + } +#endif WARN_ON(refcount_read(&sk->sk_wmem_alloc)); WARN_ON(!sk_unhashed(sk)); WARN_ON(sk->sk_socket); @@ -1889,6 +1895,46 @@ out: */ #define UNIX_SKB_FRAGS_SZ (PAGE_SIZE << get_order(32768)) +#if (IS_ENABLED(CONFIG_AF_UNIX_OOB)) +static int queue_oob(struct socket *sock, struct msghdr *msg, struct sock *other) +{ + struct unix_sock *ousk = unix_sk(other); + struct sk_buff *skb; + int err = 0; + + skb = sock_alloc_send_skb(sock->sk, 1, msg->msg_flags & MSG_DONTWAIT, &err); + + if (!skb) + return err; + + skb_put(skb, 1); + skb->len = 1; + err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, 1); + + if (err) { + kfree_skb(skb); + return err; + } + + unix_state_lock(other); + maybe_add_creds(skb, sock, other); + skb_get(skb); + + if (ousk->oob_skb) + kfree_skb(ousk->oob_skb); + + ousk->oob_skb = skb; + + scm_stat_add(other, skb); + skb_queue_tail(&other->sk_receive_queue, skb); + sk_send_sigurg(other); + unix_state_unlock(other); + other->sk_data_ready(other); + + return err; +} +#endif + static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) { @@ -1907,8 +1953,14 @@ static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg, return err; err = -EOPNOTSUPP; - if (msg->msg_flags&MSG_OOB) - goto out_err; + if (msg->msg_flags & MSG_OOB) { +#if (IS_ENABLED(CONFIG_AF_UNIX_OOB)) + if (len) + len--; + else +#endif + goto out_err; + } if (msg->msg_namelen) { err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP; @@ -1973,6 +2025,15 @@ static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg, sent += size; } +#if (IS_ENABLED(CONFIG_AF_UNIX_OOB)) + if (msg->msg_flags & MSG_OOB) { + err = queue_oob(sock, msg, other); + if (err) + goto out_err; + sent++; + } +#endif + scm_destroy(&scm); return sent; @@ -2358,6 +2419,59 @@ struct unix_stream_read_state { unsigned int splice_flags; }; +#if IS_ENABLED(CONFIG_AF_UNIX_OOB) +static int unix_stream_recv_urg(struct unix_stream_read_state *state) +{ + struct socket *sock = state->socket; + struct sock *sk = sock->sk; + struct unix_sock *u = unix_sk(sk); + int chunk = 1; + + if (sock_flag(sk, SOCK_URGINLINE) || !u->oob_skb) + return -EINVAL; + + chunk = state->recv_actor(u->oob_skb, 0, chunk, state); + if (chunk < 0) + return -EFAULT; + + if (!(state->flags & MSG_PEEK)) { + UNIXCB(u->oob_skb).consumed += 1; + kfree_skb(u->oob_skb); + u->oob_skb = NULL; + } + state->msg->msg_flags |= MSG_OOB; + return 1; +} + +static struct sk_buff *manage_oob(struct sk_buff *skb, struct sock *sk, + int flags, int copied) +{ + struct unix_sock *u = unix_sk(sk); + + if (!unix_skb_len(skb) && !(flags & MSG_PEEK)) { + skb_unlink(skb, &sk->sk_receive_queue); + consume_skb(skb); + skb = NULL; + } else { + if (skb == u->oob_skb) { + if (copied) { + skb = NULL; + } else if (sock_flag(sk, SOCK_URGINLINE)) { + if (!(flags & MSG_PEEK)) { + u->oob_skb = NULL; + consume_skb(skb); + } + } else if (!(flags & MSG_PEEK)) { + skb_unlink(skb, &sk->sk_receive_queue); + consume_skb(skb); + skb = skb_peek(&sk->sk_receive_queue); + } + } + } + return skb; +} +#endif + static int unix_stream_read_generic(struct unix_stream_read_state *state, bool freezable) { @@ -2383,6 +2497,15 @@ static int unix_stream_read_generic(struct unix_stream_read_state *state, if (unlikely(flags & MSG_OOB)) { err = -EOPNOTSUPP; +#if IS_ENABLED(CONFIG_AF_UNIX_OOB) + mutex_lock(&u->iolock); + unix_state_lock(sk); + + err = unix_stream_recv_urg(state); + + unix_state_unlock(sk); + mutex_unlock(&u->iolock); +#endif goto out; } @@ -2411,6 +2534,18 @@ redo: } last = skb = skb_peek(&sk->sk_receive_queue); last_len = last ? last->len : 0; + +#if IS_ENABLED(CONFIG_AF_UNIX_OOB) + if (skb) { + skb = manage_oob(skb, sk, flags, copied); + if (!skb) { + unix_state_unlock(sk); + if (copied) + break; + goto redo; + } + } +#endif again: if (skb == NULL) { if (copied >= target) @@ -2746,6 +2881,20 @@ static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) case SIOCUNIXFILE: err = unix_open_file(sk); break; +#if IS_ENABLED(CONFIG_AF_UNIX_OOB) + case SIOCATMARK: + { + struct sk_buff *skb; + struct unix_sock *u = unix_sk(sk); + int answ = 0; + + skb = skb_peek(&sk->sk_receive_queue); + if (skb && skb == u->oob_skb) + answ = 1; + err = put_user(answ, (int __user *)arg); + } + break; +#endif default: err = -ENOIOCTLCMD; break; diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile index fb010a35d61a..da9e8b699e42 100644 --- a/tools/testing/selftests/Makefile +++ b/tools/testing/selftests/Makefile @@ -38,6 +38,7 @@ TARGETS += mount_setattr TARGETS += mqueue TARGETS += nci TARGETS += net +TARGETS += net/af_unix TARGETS += net/forwarding TARGETS += net/mptcp TARGETS += netfilter diff --git a/tools/testing/selftests/net/af_unix/Makefile b/tools/testing/selftests/net/af_unix/Makefile new file mode 100644 index 000000000000..cfc7f4f97fd1 --- /dev/null +++ b/tools/testing/selftests/net/af_unix/Makefile @@ -0,0 +1,5 @@ +##TEST_GEN_FILES := test_unix_oob +TEST_PROGS := test_unix_oob +include ../../lib.mk + +all: $(TEST_PROGS) diff --git a/tools/testing/selftests/net/af_unix/test_unix_oob.c b/tools/testing/selftests/net/af_unix/test_unix_oob.c new file mode 100644 index 000000000000..0f3e3763f4f8 --- /dev/null +++ b/tools/testing/selftests/net/af_unix/test_unix_oob.c @@ -0,0 +1,437 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static int pipefd[2]; +static int signal_recvd; +static pid_t producer_id; +static char sock_name[32]; + +static void sig_hand(int sn, siginfo_t *si, void *p) +{ + signal_recvd = sn; +} + +static int set_sig_handler(int signal) +{ + struct sigaction sa; + + sa.sa_sigaction = sig_hand; + sigemptyset(&sa.sa_mask); + sa.sa_flags = SA_SIGINFO | SA_RESTART; + + return sigaction(signal, &sa, NULL); +} + +static void set_filemode(int fd, int set) +{ + int flags = fcntl(fd, F_GETFL, 0); + + if (set) + flags &= ~O_NONBLOCK; + else + flags |= O_NONBLOCK; + fcntl(fd, F_SETFL, flags); +} + +static void signal_producer(int fd) +{ + char cmd; + + cmd = 'S'; + write(fd, &cmd, sizeof(cmd)); +} + +static void wait_for_signal(int fd) +{ + char buf[5]; + + read(fd, buf, 5); +} + +static void die(int status) +{ + fflush(NULL); + unlink(sock_name); + kill(producer_id, SIGTERM); + exit(status); +} + +int is_sioctatmark(int fd) +{ + int ans = -1; + + if (ioctl(fd, SIOCATMARK, &ans, sizeof(ans)) < 0) { +#ifdef DEBUG + perror("SIOCATMARK Failed"); +#endif + } + return ans; +} + +void read_oob(int fd, char *c) +{ + + *c = ' '; + if (recv(fd, c, sizeof(*c), MSG_OOB) < 0) { +#ifdef DEBUG + perror("Reading MSG_OOB Failed"); +#endif + } +} + +int read_data(int pfd, char *buf, int size) +{ + int len = 0; + + memset(buf, size, '0'); + len = read(pfd, buf, size); +#ifdef DEBUG + if (len < 0) + perror("read failed"); +#endif + return len; +} + +static void wait_for_data(int pfd, int event) +{ + struct pollfd pfds[1]; + + pfds[0].fd = pfd; + pfds[0].events = event; + poll(pfds, 1, -1); +} + +void producer(struct sockaddr_un *consumer_addr) +{ + int cfd; + char buf[64]; + int i; + + memset(buf, 'x', sizeof(buf)); + cfd = socket(AF_UNIX, SOCK_STREAM, 0); + + wait_for_signal(pipefd[0]); + if (connect(cfd, (struct sockaddr *)consumer_addr, + sizeof(struct sockaddr)) != 0) { + perror("Connect failed"); + kill(0, SIGTERM); + exit(1); + } + + for (i = 0; i < 2; i++) { + /* Test 1: Test for SIGURG and OOB */ + wait_for_signal(pipefd[0]); + memset(buf, 'x', sizeof(buf)); + buf[63] = '@'; + send(cfd, buf, sizeof(buf), MSG_OOB); + + wait_for_signal(pipefd[0]); + + /* Test 2: Test for OOB being overwitten */ + memset(buf, 'x', sizeof(buf)); + buf[63] = '%'; + send(cfd, buf, sizeof(buf), MSG_OOB); + + memset(buf, 'x', sizeof(buf)); + buf[63] = '#'; + send(cfd, buf, sizeof(buf), MSG_OOB); + + wait_for_signal(pipefd[0]); + + /* Test 3: Test for SIOCATMARK */ + memset(buf, 'x', sizeof(buf)); + buf[63] = '@'; + send(cfd, buf, sizeof(buf), MSG_OOB); + + memset(buf, 'x', sizeof(buf)); + buf[63] = '%'; + send(cfd, buf, sizeof(buf), MSG_OOB); + + memset(buf, 'x', sizeof(buf)); + send(cfd, buf, sizeof(buf), 0); + + wait_for_signal(pipefd[0]); + + /* Test 4: Test for 1byte OOB msg */ + memset(buf, 'x', sizeof(buf)); + buf[0] = '@'; + send(cfd, buf, 1, MSG_OOB); + } +} + +int +main(int argc, char **argv) +{ + int lfd, pfd; + struct sockaddr_un consumer_addr, paddr; + socklen_t len = sizeof(consumer_addr); + char buf[1024]; + int on = 0; + char oob; + int flags; + int atmark; + char *tmp_file; + + lfd = socket(AF_UNIX, SOCK_STREAM, 0); + memset(&consumer_addr, 0, sizeof(consumer_addr)); + consumer_addr.sun_family = AF_UNIX; + sprintf(sock_name, "unix_oob_%d", getpid()); + unlink(sock_name); + strcpy(consumer_addr.sun_path, sock_name); + + if ((bind(lfd, (struct sockaddr *)&consumer_addr, + sizeof(consumer_addr))) != 0) { + perror("socket bind failed"); + exit(1); + } + + pipe(pipefd); + + listen(lfd, 1); + + producer_id = fork(); + if (producer_id == 0) { + producer(&consumer_addr); + exit(0); + } + + set_sig_handler(SIGURG); + signal_producer(pipefd[1]); + + pfd = accept(lfd, (struct sockaddr *) &paddr, &len); + fcntl(pfd, F_SETOWN, getpid()); + + signal_recvd = 0; + signal_producer(pipefd[1]); + + /* Test 1: + * veriyf that SIGURG is + * delivered and 63 bytes are + * read and oob is '@' + */ + wait_for_data(pfd, POLLIN | POLLPRI); + read_oob(pfd, &oob); + len = read_data(pfd, buf, 1024); + if (!signal_recvd || len != 63 || oob != '@') { + fprintf(stderr, "Test 1 failed sigurg %d len %d %c\n", + signal_recvd, len, oob); + die(1); + } + + signal_recvd = 0; + signal_producer(pipefd[1]); + + /* Test 2: + * Verify that the first OOB is over written by + * the 2nd one and the first OOB is returned as + * part of the read, and sigurg is received. + */ + wait_for_data(pfd, POLLIN | POLLPRI); + len = 0; + while (len < 70) + len = recv(pfd, buf, 1024, MSG_PEEK); + len = read_data(pfd, buf, 1024); + read_oob(pfd, &oob); + if (!signal_recvd || len != 127 || oob != '#') { + fprintf(stderr, "Test 2 failed, sigurg %d len %d OOB %c\n", + signal_recvd, len, oob); + die(1); + } + + signal_recvd = 0; + signal_producer(pipefd[1]); + + /* Test 3: + * verify that 2nd oob over writes + * the first one and read breaks at + * oob boundary returning 127 bytes + * and sigurg is received and atmark + * is set. + * oob is '%' and second read returns + * 64 bytes. + */ + len = 0; + wait_for_data(pfd, POLLIN | POLLPRI); + while (len < 150) + len = recv(pfd, buf, 1024, MSG_PEEK); + len = read_data(pfd, buf, 1024); + atmark = is_sioctatmark(pfd); + read_oob(pfd, &oob); + + if (!signal_recvd || len != 127 || oob != '%' || atmark != 1) { + fprintf(stderr, "Test 3 failed, sigurg %d len %d OOB %c ", + "atmark %d\n", signal_recvd, len, oob, atmark); + die(1); + } + + signal_recvd = 0; + + len = read_data(pfd, buf, 1024); + if (len != 64) { + fprintf(stderr, "Test 3.1 failed, sigurg %d len %d OOB %c\n", + signal_recvd, len, oob); + die(1); + } + + signal_recvd = 0; + signal_producer(pipefd[1]); + + /* Test 4: + * verify that a single byte + * oob message is delivered. + * set non blocking mode and + * check proper error is + * returned and sigurg is + * received and correct + * oob is read. + */ + + set_filemode(pfd, 0); + + wait_for_data(pfd, POLLIN | POLLPRI); + len = read_data(pfd, buf, 1024); + if ((len == -1) && (errno == 11)) + len = 0; + + read_oob(pfd, &oob); + + if (!signal_recvd || len != 0 || oob != '@') { + fprintf(stderr, "Test 4 failed, sigurg %d len %d OOB %c\n", + signal_recvd, len, oob); + die(1); + } + + set_filemode(pfd, 1); + + /* Inline Testing */ + + on = 1; + if (setsockopt(pfd, SOL_SOCKET, SO_OOBINLINE, &on, sizeof(on))) { + perror("SO_OOBINLINE"); + die(1); + } + + signal_recvd = 0; + signal_producer(pipefd[1]); + + /* Test 1 -- Inline: + * Check that SIGURG is + * delivered and 63 bytes are + * read and oob is '@' + */ + + wait_for_data(pfd, POLLIN | POLLPRI); + len = read_data(pfd, buf, 1024); + + if (!signal_recvd || len != 63) { + fprintf(stderr, "Test 1 Inline failed, sigurg %d len %d\n", + signal_recvd, len); + die(1); + } + + len = read_data(pfd, buf, 1024); + + if (len != 1) { + fprintf(stderr, + "Test 1.1 Inline failed, sigurg %d len %d oob %c\n", + signal_recvd, len, oob); + die(1); + } + + signal_recvd = 0; + signal_producer(pipefd[1]); + + /* Test 2 -- Inline: + * Verify that the first OOB is over written by + * the 2nd one and read breaks correctly on + * 2nd OOB boundary with the first OOB returned as + * part of the read, and sigurg is delivered and + * siocatmark returns true. + * next read returns one byte, the oob byte + * and siocatmark returns false. + */ + len = 0; + wait_for_data(pfd, POLLIN | POLLPRI); + while (len < 70) + len = recv(pfd, buf, 1024, MSG_PEEK); + len = read_data(pfd, buf, 1024); + atmark = is_sioctatmark(pfd); + if (len != 127 || atmark != 1 || !signal_recvd) { + fprintf(stderr, "Test 2 Inline failed, len %d atmark %d\n", + len, atmark); + die(1); + } + + len = read_data(pfd, buf, 1024); + atmark = is_sioctatmark(pfd); + if (len != 1 || buf[0] != '#' || atmark == 1) { + fprintf(stderr, "Test 2.1 Inline failed, len %d data %c atmark %d\n", + len, buf[0], atmark); + die(1); + } + + signal_recvd = 0; + signal_producer(pipefd[1]); + + /* Test 3 -- Inline: + * verify that 2nd oob over writes + * the first one and read breaks at + * oob boundary returning 127 bytes + * and sigurg is received and siocatmark + * is true after the read. + * subsequent read returns 65 bytes + * because of oob which should be '%'. + */ + len = 0; + wait_for_data(pfd, POLLIN | POLLPRI); + while (len < 126) + len = recv(pfd, buf, 1024, MSG_PEEK); + len = read_data(pfd, buf, 1024); + atmark = is_sioctatmark(pfd); + if (!signal_recvd || len != 127 || !atmark) { + fprintf(stderr, + "Test 3 Inline failed, sigurg %d len %d data %c\n", + signal_recvd, len, buf[0]); + die(1); + } + + len = read_data(pfd, buf, 1024); + atmark = is_sioctatmark(pfd); + if (len != 65 || buf[0] != '%' || atmark != 0) { + fprintf(stderr, + "Test 3.1 Inline failed, len %d oob %c atmark %d\n", + len, buf[0], atmark); + die(1); + } + + signal_recvd = 0; + signal_producer(pipefd[1]); + + /* Test 4 -- Inline: + * verify that a single + * byte oob message is delivered + * and read returns one byte, the oob + * byte and sigurg is received + */ + wait_for_data(pfd, POLLIN | POLLPRI); + len = read_data(pfd, buf, 1024); + if (!signal_recvd || len != 1 || buf[0] != '@') { + fprintf(stderr, + "Test 4 Inline failed, signal %d len %d data %c\n", + signal_recvd, len, buf[0]); + die(1); + } + die(0); +} -- cgit v1.2.3 From 8679c31e0284aa3aaba038035e443180b5bacb99 Mon Sep 17 00:00:00 2001 From: Rocco Yue Date: Tue, 3 Aug 2021 20:02:50 +0800 Subject: net: add extack arg for link ops Pass extack arg to validate_linkmsg and validate_link_af callbacks. If a netlink attribute has a reject_message, use the extended ack mechanism to carry the message back to user space. Signed-off-by: Rocco Yue Reviewed-by: David Ahern Signed-off-by: David S. Miller --- include/net/rtnetlink.h | 3 ++- net/core/rtnetlink.c | 9 +++++---- net/ipv4/devinet.c | 5 +++-- net/ipv6/addrconf.c | 5 +++-- 4 files changed, 13 insertions(+), 9 deletions(-) (limited to 'include/net') diff --git a/include/net/rtnetlink.h b/include/net/rtnetlink.h index 384e800665f2..9f48733bfd21 100644 --- a/include/net/rtnetlink.h +++ b/include/net/rtnetlink.h @@ -153,7 +153,8 @@ struct rtnl_af_ops { u32 ext_filter_mask); int (*validate_link_af)(const struct net_device *dev, - const struct nlattr *attr); + const struct nlattr *attr, + struct netlink_ext_ack *extack); int (*set_link_af)(struct net_device *dev, const struct nlattr *attr, struct netlink_ext_ack *extack); diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index e79aaf1f7139..7c9d32cfe607 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -2257,7 +2257,8 @@ invalid_attr: return -EINVAL; } -static int validate_linkmsg(struct net_device *dev, struct nlattr *tb[]) +static int validate_linkmsg(struct net_device *dev, struct nlattr *tb[], + struct netlink_ext_ack *extack) { if (dev) { if (tb[IFLA_ADDRESS] && @@ -2284,7 +2285,7 @@ static int validate_linkmsg(struct net_device *dev, struct nlattr *tb[]) return -EOPNOTSUPP; if (af_ops->validate_link_af) { - err = af_ops->validate_link_af(dev, af); + err = af_ops->validate_link_af(dev, af, extack); if (err < 0) return err; } @@ -2592,7 +2593,7 @@ static int do_setlink(const struct sk_buff *skb, const struct net_device_ops *ops = dev->netdev_ops; int err; - err = validate_linkmsg(dev, tb); + err = validate_linkmsg(dev, tb, extack); if (err < 0) return err; @@ -3290,7 +3291,7 @@ replay: m_ops = master_dev->rtnl_link_ops; } - err = validate_linkmsg(dev, tb); + err = validate_linkmsg(dev, tb, extack); if (err < 0) return err; diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index c82aded8da7d..f4468980b675 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -1950,7 +1950,8 @@ static const struct nla_policy inet_af_policy[IFLA_INET_MAX+1] = { }; static int inet_validate_link_af(const struct net_device *dev, - const struct nlattr *nla) + const struct nlattr *nla, + struct netlink_ext_ack *extack) { struct nlattr *a, *tb[IFLA_INET_MAX+1]; int err, rem; @@ -1959,7 +1960,7 @@ static int inet_validate_link_af(const struct net_device *dev, return -EAFNOSUPPORT; err = nla_parse_nested_deprecated(tb, IFLA_INET_MAX, nla, - inet_af_policy, NULL); + inet_af_policy, extack); if (err < 0) return err; diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index db0a89810f28..0b786fc7b7d4 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -5792,7 +5792,8 @@ static int check_stable_privacy(struct inet6_dev *idev, struct net *net, } static int inet6_validate_link_af(const struct net_device *dev, - const struct nlattr *nla) + const struct nlattr *nla, + struct netlink_ext_ack *extack) { struct nlattr *tb[IFLA_INET6_MAX + 1]; struct inet6_dev *idev = NULL; @@ -5805,7 +5806,7 @@ static int inet6_validate_link_af(const struct net_device *dev, } err = nla_parse_nested_deprecated(tb, IFLA_INET6_MAX, nla, - inet6_af_policy, NULL); + inet6_af_policy, extack); if (err) return err; -- cgit v1.2.3 From 957e2235e5264c97cd6be8e2e17f2e11b41f2239 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Tue, 3 Aug 2021 23:34:08 +0300 Subject: net: make switchdev_bridge_port_{,unoffload} loosely coupled with the bridge With the introduction of explicit offloading API in switchdev in commit 2f5dc00f7a3e ("net: bridge: switchdev: let drivers inform which bridge ports are offloaded"), we started having Ethernet switch drivers calling directly into a function exported by net/bridge/br_switchdev.c, which is a function exported by the bridge driver. This means that drivers that did not have an explicit dependency on the bridge before, like cpsw and am65-cpsw, now do - otherwise it is not possible to call a symbol exported by a driver that can be built as module unless you are a module too. There was an attempt to solve the dependency issue in the form of commit b0e81817629a ("net: build all switchdev drivers as modules when the bridge is a module"). Grygorii Strashko, however, says about it: | In my opinion, the problem is a bit bigger here than just fixing the | build :( | | In case, of ^cpsw the switchdev mode is kinda optional and in many | cases (especially for testing purposes, NFS) the multi-mac mode is | still preferable mode. | | There were no such tight dependency between switchdev drivers and | bridge core before and switchdev serviced as independent, notification | based layer between them, so ^cpsw still can be "Y" and bridge can be | "M". Now for mostly every kernel build configuration the CONFIG_BRIDGE | will need to be set as "Y", or we will have to update drivers to | support build with BRIDGE=n and maintain separate builds for | networking vs non-networking testing. But is this enough? Wouldn't | it cause 'chain reaction' required to add more and more "Y" options | (like CONFIG_VLAN_8021Q)? | | PS. Just to be sure we on the same page - ARM builds will be forced | (with this patch) to have CONFIG_TI_CPSW_SWITCHDEV=m and so all our | automation testing will just fail with omap2plus_defconfig. In the light of this, it would be desirable for some configurations to avoid dependencies between switchdev drivers and the bridge, and have the switchdev mode as completely optional within the driver. Arnd Bergmann also tried to write a patch which better expressed the build time dependency for Ethernet switch drivers where the switchdev support is optional, like cpsw/am65-cpsw, and this made the drivers follow the bridge (compile as module if the bridge is a module) only if the optional switchdev support in the driver was enabled in the first place: https://patchwork.kernel.org/project/netdevbpf/patch/20210802144813.1152762-1-arnd@kernel.org/ but this still did not solve the fact that cpsw and am65-cpsw now must be built as modules when the bridge is a module - it just expressed correctly that optional dependency. But the new behavior is an apparent regression from Grygorii's perspective. So to support the use case where the Ethernet driver is built-in, NET_SWITCHDEV (a bool option) is enabled, and the bridge is a module, we need a framework that can handle the possible absence of the bridge from the running system, i.e. runtime bloatware as opposed to build-time bloatware. Luckily we already have this framework, since switchdev has been using it extensively. Events from the bridge side are transmitted to the driver side using notifier chains - this was originally done so that unrelated drivers could snoop for events emitted by the bridge towards ports that are implemented by other drivers (think of a switch driver with LAG offload that listens for switchdev events on a bonding/team interface that it offloads). There are also events which are transmitted from the driver side to the bridge side, which again are modeled using notifiers. SWITCHDEV_FDB_ADD_TO_BRIDGE is an example of this, and deals with notifying the bridge that a MAC address has been dynamically learned. So there is a precedent we can use for modeling the new framework. The difference compared to SWITCHDEV_FDB_ADD_TO_BRIDGE is that the work that the bridge needs to do when a port becomes offloaded is blocking in its nature: replay VLANs, MDBs etc. The calling context is indeed blocking (we are under rtnl_mutex), but the existing switchdev notification chain that the bridge is subscribed to is only the atomic one. So we need to subscribe the bridge to the blocking switchdev notification chain too. This patch: - keeps the driver-side perception of the switchdev_bridge_port_{,un}offload unchanged - moves the implementation of switchdev_bridge_port_{,un}offload from the bridge module into the switchdev module. - makes everybody that is subscribed to the switchdev blocking notifier chain "hear" offload & unoffload events - makes the bridge driver subscribe and handle those events - moves the bridge driver's handling of those events into 2 new functions called br_switchdev_port_{,un}offload. These functions contain in fact the core of the logic that was previously in switchdev_bridge_port_{,un}offload, just that now we go through an extra indirection layer to reach them. Unlike all the other switchdev notification structures, the structure used to carry the bridge port information, struct switchdev_notifier_brport_info, does not contain a "bool handled". This is because in the current usage pattern, we always know that a switchdev bridge port offloading event will be handled by the bridge, because the switchdev_bridge_port_offload() call was initiated by a NETDEV_CHANGEUPPER event in the first place, where info->upper_dev is a bridge. So if the bridge wasn't loaded, then the CHANGEUPPER event couldn't have happened. Signed-off-by: Vladimir Oltean Tested-by: Grygorii Strashko Signed-off-by: David S. Miller --- drivers/net/ethernet/ti/am65-cpsw-nuss.c | 2 +- drivers/net/ethernet/ti/cpsw_new.c | 2 +- include/linux/if_bridge.h | 35 ---------------------- include/net/switchdev.h | 46 ++++++++++++++++++++++++++++ net/bridge/br.c | 51 +++++++++++++++++++++++++++++++- net/bridge/br_private.h | 29 ++++++++++++++++++ net/bridge/br_switchdev.c | 36 ++++++---------------- net/switchdev/switchdev.c | 48 ++++++++++++++++++++++++++++++ 8 files changed, 184 insertions(+), 65 deletions(-) (limited to 'include/net') diff --git a/drivers/net/ethernet/ti/am65-cpsw-nuss.c b/drivers/net/ethernet/ti/am65-cpsw-nuss.c index 4f67d1a98c0d..fb5d2ac3f0d2 100644 --- a/drivers/net/ethernet/ti/am65-cpsw-nuss.c +++ b/drivers/net/ethernet/ti/am65-cpsw-nuss.c @@ -7,7 +7,6 @@ #include #include -#include #include #include #include @@ -28,6 +27,7 @@ #include #include #include +#include #include "cpsw_ale.h" #include "cpsw_sl.h" diff --git a/drivers/net/ethernet/ti/cpsw_new.c b/drivers/net/ethernet/ti/cpsw_new.c index b4f55ff4e84f..ae167223e87f 100644 --- a/drivers/net/ethernet/ti/cpsw_new.c +++ b/drivers/net/ethernet/ti/cpsw_new.c @@ -11,7 +11,6 @@ #include #include #include -#include #include #include #include @@ -29,6 +28,7 @@ #include #include +#include #include #include #include diff --git a/include/linux/if_bridge.h b/include/linux/if_bridge.h index 21daed10322e..509e18c7e740 100644 --- a/include/linux/if_bridge.h +++ b/include/linux/if_bridge.h @@ -190,39 +190,4 @@ static inline clock_t br_get_ageing_time(const struct net_device *br_dev) } #endif -#if IS_ENABLED(CONFIG_BRIDGE) && IS_ENABLED(CONFIG_NET_SWITCHDEV) - -int switchdev_bridge_port_offload(struct net_device *brport_dev, - struct net_device *dev, const void *ctx, - struct notifier_block *atomic_nb, - struct notifier_block *blocking_nb, - bool tx_fwd_offload, - struct netlink_ext_ack *extack); -void switchdev_bridge_port_unoffload(struct net_device *brport_dev, - const void *ctx, - struct notifier_block *atomic_nb, - struct notifier_block *blocking_nb); - -#else - -static inline int -switchdev_bridge_port_offload(struct net_device *brport_dev, - struct net_device *dev, const void *ctx, - struct notifier_block *atomic_nb, - struct notifier_block *blocking_nb, - bool tx_fwd_offload, - struct netlink_ext_ack *extack) -{ - return -EINVAL; -} - -static inline void -switchdev_bridge_port_unoffload(struct net_device *brport_dev, - const void *ctx, - struct notifier_block *atomic_nb, - struct notifier_block *blocking_nb) -{ -} -#endif - #endif diff --git a/include/net/switchdev.h b/include/net/switchdev.h index 66468ff8cc0a..60d806b6a5ae 100644 --- a/include/net/switchdev.h +++ b/include/net/switchdev.h @@ -180,6 +180,14 @@ struct switchdev_obj_in_state_mrp { typedef int switchdev_obj_dump_cb_t(struct switchdev_obj *obj); +struct switchdev_brport { + struct net_device *dev; + const void *ctx; + struct notifier_block *atomic_nb; + struct notifier_block *blocking_nb; + bool tx_fwd_offload; +}; + enum switchdev_notifier_type { SWITCHDEV_FDB_ADD_TO_BRIDGE = 1, SWITCHDEV_FDB_DEL_TO_BRIDGE, @@ -197,6 +205,9 @@ enum switchdev_notifier_type { SWITCHDEV_VXLAN_FDB_ADD_TO_DEVICE, SWITCHDEV_VXLAN_FDB_DEL_TO_DEVICE, SWITCHDEV_VXLAN_FDB_OFFLOADED, + + SWITCHDEV_BRPORT_OFFLOADED, + SWITCHDEV_BRPORT_UNOFFLOADED, }; struct switchdev_notifier_info { @@ -226,6 +237,11 @@ struct switchdev_notifier_port_attr_info { bool handled; }; +struct switchdev_notifier_brport_info { + struct switchdev_notifier_info info; /* must be first */ + const struct switchdev_brport brport; +}; + static inline struct net_device * switchdev_notifier_info_to_dev(const struct switchdev_notifier_info *info) { @@ -246,6 +262,17 @@ switchdev_fdb_is_dynamically_learned(const struct switchdev_notifier_fdb_info *f #ifdef CONFIG_NET_SWITCHDEV +int switchdev_bridge_port_offload(struct net_device *brport_dev, + struct net_device *dev, const void *ctx, + struct notifier_block *atomic_nb, + struct notifier_block *blocking_nb, + bool tx_fwd_offload, + struct netlink_ext_ack *extack); +void switchdev_bridge_port_unoffload(struct net_device *brport_dev, + const void *ctx, + struct notifier_block *atomic_nb, + struct notifier_block *blocking_nb); + void switchdev_deferred_process(void); int switchdev_port_attr_set(struct net_device *dev, const struct switchdev_attr *attr, @@ -316,6 +343,25 @@ int switchdev_handle_port_attr_set(struct net_device *dev, struct netlink_ext_ack *extack)); #else +static inline int +switchdev_bridge_port_offload(struct net_device *brport_dev, + struct net_device *dev, const void *ctx, + struct notifier_block *atomic_nb, + struct notifier_block *blocking_nb, + bool tx_fwd_offload, + struct netlink_ext_ack *extack) +{ + return -EOPNOTSUPP; +} + +static inline void +switchdev_bridge_port_unoffload(struct net_device *brport_dev, + const void *ctx, + struct notifier_block *atomic_nb, + struct notifier_block *blocking_nb) +{ +} + static inline void switchdev_deferred_process(void) { } diff --git a/net/bridge/br.c b/net/bridge/br.c index 8fb5dca5f8e0..d3a32c6813e0 100644 --- a/net/bridge/br.c +++ b/net/bridge/br.c @@ -201,6 +201,48 @@ static struct notifier_block br_switchdev_notifier = { .notifier_call = br_switchdev_event, }; +/* called under rtnl_mutex */ +static int br_switchdev_blocking_event(struct notifier_block *nb, + unsigned long event, void *ptr) +{ + struct netlink_ext_ack *extack = netdev_notifier_info_to_extack(ptr); + struct net_device *dev = switchdev_notifier_info_to_dev(ptr); + struct switchdev_notifier_brport_info *brport_info; + const struct switchdev_brport *b; + struct net_bridge_port *p; + int err = NOTIFY_DONE; + + p = br_port_get_rtnl(dev); + if (!p) + goto out; + + switch (event) { + case SWITCHDEV_BRPORT_OFFLOADED: + brport_info = ptr; + b = &brport_info->brport; + + err = br_switchdev_port_offload(p, b->dev, b->ctx, + b->atomic_nb, b->blocking_nb, + b->tx_fwd_offload, extack); + err = notifier_from_errno(err); + break; + case SWITCHDEV_BRPORT_UNOFFLOADED: + brport_info = ptr; + b = &brport_info->brport; + + br_switchdev_port_unoffload(p, b->ctx, b->atomic_nb, + b->blocking_nb); + break; + } + +out: + return err; +} + +static struct notifier_block br_switchdev_blocking_notifier = { + .notifier_call = br_switchdev_blocking_event, +}; + /* br_boolopt_toggle - change user-controlled boolean option * * @br: bridge device @@ -355,10 +397,14 @@ static int __init br_init(void) if (err) goto err_out4; - err = br_netlink_init(); + err = register_switchdev_blocking_notifier(&br_switchdev_blocking_notifier); if (err) goto err_out5; + err = br_netlink_init(); + if (err) + goto err_out6; + brioctl_set(br_ioctl_stub); #if IS_ENABLED(CONFIG_ATM_LANE) @@ -373,6 +419,8 @@ static int __init br_init(void) return 0; +err_out6: + unregister_switchdev_blocking_notifier(&br_switchdev_blocking_notifier); err_out5: unregister_switchdev_notifier(&br_switchdev_notifier); err_out4: @@ -392,6 +440,7 @@ static void __exit br_deinit(void) { stp_proto_unregister(&br_stp_proto); br_netlink_fini(); + unregister_switchdev_blocking_notifier(&br_switchdev_blocking_notifier); unregister_switchdev_notifier(&br_switchdev_notifier); unregister_netdevice_notifier(&br_device_notifier); brioctl_set(NULL); diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index c939631428b9..10d43bf4bb80 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -1880,6 +1880,17 @@ static inline void br_sysfs_delbr(struct net_device *dev) { return; } /* br_switchdev.c */ #ifdef CONFIG_NET_SWITCHDEV +int br_switchdev_port_offload(struct net_bridge_port *p, + struct net_device *dev, const void *ctx, + struct notifier_block *atomic_nb, + struct notifier_block *blocking_nb, + bool tx_fwd_offload, + struct netlink_ext_ack *extack); + +void br_switchdev_port_unoffload(struct net_bridge_port *p, const void *ctx, + struct notifier_block *atomic_nb, + struct notifier_block *blocking_nb); + bool br_switchdev_frame_uses_tx_fwd_offload(struct sk_buff *skb); void br_switchdev_frame_set_offload_fwd_mark(struct sk_buff *skb); @@ -1908,6 +1919,24 @@ static inline void br_switchdev_frame_unmark(struct sk_buff *skb) skb->offload_fwd_mark = 0; } #else +static inline int +br_switchdev_port_offload(struct net_bridge_port *p, + struct net_device *dev, const void *ctx, + struct notifier_block *atomic_nb, + struct notifier_block *blocking_nb, + bool tx_fwd_offload, + struct netlink_ext_ack *extack) +{ + return -EOPNOTSUPP; +} + +static inline void +br_switchdev_port_unoffload(struct net_bridge_port *p, const void *ctx, + struct notifier_block *atomic_nb, + struct notifier_block *blocking_nb) +{ +} + static inline bool br_switchdev_frame_uses_tx_fwd_offload(struct sk_buff *skb) { return false; diff --git a/net/bridge/br_switchdev.c b/net/bridge/br_switchdev.c index 36d75fd4a80c..6bf518d78f02 100644 --- a/net/bridge/br_switchdev.c +++ b/net/bridge/br_switchdev.c @@ -312,23 +312,16 @@ static void nbp_switchdev_unsync_objs(struct net_bridge_port *p, /* Let the bridge know that this port is offloaded, so that it can assign a * switchdev hardware domain to it. */ -int switchdev_bridge_port_offload(struct net_device *brport_dev, - struct net_device *dev, const void *ctx, - struct notifier_block *atomic_nb, - struct notifier_block *blocking_nb, - bool tx_fwd_offload, - struct netlink_ext_ack *extack) +int br_switchdev_port_offload(struct net_bridge_port *p, + struct net_device *dev, const void *ctx, + struct notifier_block *atomic_nb, + struct notifier_block *blocking_nb, + bool tx_fwd_offload, + struct netlink_ext_ack *extack) { struct netdev_phys_item_id ppid; - struct net_bridge_port *p; int err; - ASSERT_RTNL(); - - p = br_port_get_rtnl(brport_dev); - if (!p) - return -ENODEV; - err = dev_get_port_parent_id(dev, &ppid, false); if (err) return err; @@ -348,23 +341,12 @@ out_switchdev_del: return err; } -EXPORT_SYMBOL_GPL(switchdev_bridge_port_offload); -void switchdev_bridge_port_unoffload(struct net_device *brport_dev, - const void *ctx, - struct notifier_block *atomic_nb, - struct notifier_block *blocking_nb) +void br_switchdev_port_unoffload(struct net_bridge_port *p, const void *ctx, + struct notifier_block *atomic_nb, + struct notifier_block *blocking_nb) { - struct net_bridge_port *p; - - ASSERT_RTNL(); - - p = br_port_get_rtnl(brport_dev); - if (!p) - return; - nbp_switchdev_unsync_objs(p, ctx, atomic_nb, blocking_nb); nbp_switchdev_del(p); } -EXPORT_SYMBOL_GPL(switchdev_bridge_port_unoffload); diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c index 0ae3478561f4..0b2c18efc079 100644 --- a/net/switchdev/switchdev.c +++ b/net/switchdev/switchdev.c @@ -809,3 +809,51 @@ int switchdev_handle_port_attr_set(struct net_device *dev, return err; } EXPORT_SYMBOL_GPL(switchdev_handle_port_attr_set); + +int switchdev_bridge_port_offload(struct net_device *brport_dev, + struct net_device *dev, const void *ctx, + struct notifier_block *atomic_nb, + struct notifier_block *blocking_nb, + bool tx_fwd_offload, + struct netlink_ext_ack *extack) +{ + struct switchdev_notifier_brport_info brport_info = { + .brport = { + .dev = dev, + .ctx = ctx, + .atomic_nb = atomic_nb, + .blocking_nb = blocking_nb, + .tx_fwd_offload = tx_fwd_offload, + }, + }; + int err; + + ASSERT_RTNL(); + + err = call_switchdev_blocking_notifiers(SWITCHDEV_BRPORT_OFFLOADED, + brport_dev, &brport_info.info, + extack); + return notifier_to_errno(err); +} +EXPORT_SYMBOL_GPL(switchdev_bridge_port_offload); + +void switchdev_bridge_port_unoffload(struct net_device *brport_dev, + const void *ctx, + struct notifier_block *atomic_nb, + struct notifier_block *blocking_nb) +{ + struct switchdev_notifier_brport_info brport_info = { + .brport = { + .ctx = ctx, + .atomic_nb = atomic_nb, + .blocking_nb = blocking_nb, + }, + }; + + ASSERT_RTNL(); + + call_switchdev_blocking_notifiers(SWITCHDEV_BRPORT_UNOFFLOADED, + brport_dev, &brport_info.info, + NULL); +} +EXPORT_SYMBOL_GPL(switchdev_bridge_port_unoffload); -- cgit v1.2.3 From 04190bf8944deb7e3ac165a1a494db23aa0160a9 Mon Sep 17 00:00:00 2001 From: Pavel Tikhomirov Date: Wed, 4 Aug 2021 10:55:56 +0300 Subject: sock: allow reading and changing sk_userlocks with setsockopt SOCK_SNDBUF_LOCK and SOCK_RCVBUF_LOCK flags disable automatic socket buffers adjustment done by kernel (see tcp_fixup_rcvbuf() and tcp_sndbuf_expand()). If we've just created a new socket this adjustment is enabled on it, but if one changes the socket buffer size by setsockopt(SO_{SND,RCV}BUF*) it becomes disabled. CRIU needs to call setsockopt(SO_{SND,RCV}BUF*) on each socket on restore as it first needs to increase buffer sizes for packet queues restore and second it needs to restore back original buffer sizes. So after CRIU restore all sockets become non-auto-adjustable, which can decrease network performance of restored applications significantly. CRIU need to be able to restore sockets with enabled/disabled adjustment to the same state it was before dump, so let's add special setsockopt for it. Let's also export SOCK_SNDBUF_LOCK and SOCK_RCVBUF_LOCK flags to uAPI so that using these interface one can reenable automatic socket buffer adjustment on their sockets. Signed-off-by: Pavel Tikhomirov Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller --- arch/alpha/include/uapi/asm/socket.h | 2 ++ arch/mips/include/uapi/asm/socket.h | 2 ++ arch/parisc/include/uapi/asm/socket.h | 2 ++ arch/sparc/include/uapi/asm/socket.h | 2 ++ include/net/sock.h | 3 +-- include/uapi/asm-generic/socket.h | 2 ++ include/uapi/linux/socket.h | 5 +++++ net/core/sock.c | 13 +++++++++++++ 8 files changed, 29 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/arch/alpha/include/uapi/asm/socket.h b/arch/alpha/include/uapi/asm/socket.h index 6b3daba60987..1dd9baf4a6c2 100644 --- a/arch/alpha/include/uapi/asm/socket.h +++ b/arch/alpha/include/uapi/asm/socket.h @@ -129,6 +129,8 @@ #define SO_NETNS_COOKIE 71 +#define SO_BUF_LOCK 72 + #if !defined(__KERNEL__) #if __BITS_PER_LONG == 64 diff --git a/arch/mips/include/uapi/asm/socket.h b/arch/mips/include/uapi/asm/socket.h index cdf404a831b2..1eaf6a1ca561 100644 --- a/arch/mips/include/uapi/asm/socket.h +++ b/arch/mips/include/uapi/asm/socket.h @@ -140,6 +140,8 @@ #define SO_NETNS_COOKIE 71 +#define SO_BUF_LOCK 72 + #if !defined(__KERNEL__) #if __BITS_PER_LONG == 64 diff --git a/arch/parisc/include/uapi/asm/socket.h b/arch/parisc/include/uapi/asm/socket.h index 5b5351cdcb33..8baaad52d799 100644 --- a/arch/parisc/include/uapi/asm/socket.h +++ b/arch/parisc/include/uapi/asm/socket.h @@ -121,6 +121,8 @@ #define SO_NETNS_COOKIE 0x4045 +#define SO_BUF_LOCK 0x4046 + #if !defined(__KERNEL__) #if __BITS_PER_LONG == 64 diff --git a/arch/sparc/include/uapi/asm/socket.h b/arch/sparc/include/uapi/asm/socket.h index 92675dc380fa..e80ee8641ac3 100644 --- a/arch/sparc/include/uapi/asm/socket.h +++ b/arch/sparc/include/uapi/asm/socket.h @@ -122,6 +122,8 @@ #define SO_NETNS_COOKIE 0x0050 +#define SO_BUF_LOCK 0x0051 + #if !defined(__KERNEL__) diff --git a/include/net/sock.h b/include/net/sock.h index ff1be7e7e90b..6e761451c927 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -68,6 +68,7 @@ #include #include #include +#include /* * This structure really needs to be cleaned up. @@ -1438,8 +1439,6 @@ static inline int __sk_prot_rehash(struct sock *sk) #define RCV_SHUTDOWN 1 #define SEND_SHUTDOWN 2 -#define SOCK_SNDBUF_LOCK 1 -#define SOCK_RCVBUF_LOCK 2 #define SOCK_BINDADDR_LOCK 4 #define SOCK_BINDPORT_LOCK 8 diff --git a/include/uapi/asm-generic/socket.h b/include/uapi/asm-generic/socket.h index d588c244ec2f..1f0a2b4864e4 100644 --- a/include/uapi/asm-generic/socket.h +++ b/include/uapi/asm-generic/socket.h @@ -124,6 +124,8 @@ #define SO_NETNS_COOKIE 71 +#define SO_BUF_LOCK 72 + #if !defined(__KERNEL__) #if __BITS_PER_LONG == 64 || (defined(__x86_64__) && defined(__ILP32__)) diff --git a/include/uapi/linux/socket.h b/include/uapi/linux/socket.h index c3409c8ec0dd..eb0a9a5b6e71 100644 --- a/include/uapi/linux/socket.h +++ b/include/uapi/linux/socket.h @@ -26,4 +26,9 @@ struct __kernel_sockaddr_storage { }; }; +#define SOCK_SNDBUF_LOCK 1 +#define SOCK_RCVBUF_LOCK 2 + +#define SOCK_BUF_LOCK_MASK (SOCK_SNDBUF_LOCK | SOCK_RCVBUF_LOCK) + #endif /* _UAPI_LINUX_SOCKET_H */ diff --git a/net/core/sock.c b/net/core/sock.c index 9671c32e6ef5..aada649e07e8 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1358,6 +1358,15 @@ set_sndbuf: ret = sock_bindtoindex_locked(sk, val); break; + case SO_BUF_LOCK: + if (val & ~SOCK_BUF_LOCK_MASK) { + ret = -EINVAL; + break; + } + sk->sk_userlocks = val | (sk->sk_userlocks & + ~SOCK_BUF_LOCK_MASK); + break; + default: ret = -ENOPROTOOPT; break; @@ -1720,6 +1729,10 @@ int sock_getsockopt(struct socket *sock, int level, int optname, v.val64 = sock_net(sk)->net_cookie; break; + case SO_BUF_LOCK: + v.val = sk->sk_userlocks & SOCK_BUF_LOCK_MASK; + break; + default: /* We implement the SO_SNDLOWAT etc to not be settable * (1003.1g 7). -- cgit v1.2.3 From db243b796439c0caba47865564d8acd18a301d18 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Wed, 4 Aug 2021 15:45:36 -0500 Subject: net/ipv4/ipv6: Replace one-element arraya with flexible-array members MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There is a regular need in the kernel to provide a way to declare having a dynamically sized set of trailing elements in a structure. Kernel code should always use “flexible array members”[1] for these cases. The older style of one-element or zero-length arrays should no longer be used[2]. Use an anonymous union with a couple of anonymous structs in order to keep userspace unchanged and refactor the related code accordingly: $ pahole -C group_filter net/ipv4/ip_sockglue.o struct group_filter { union { struct { __u32 gf_interface_aux; /* 0 4 */ /* XXX 4 bytes hole, try to pack */ struct __kernel_sockaddr_storage gf_group_aux; /* 8 128 */ /* --- cacheline 2 boundary (128 bytes) was 8 bytes ago --- */ __u32 gf_fmode_aux; /* 136 4 */ __u32 gf_numsrc_aux; /* 140 4 */ struct __kernel_sockaddr_storage gf_slist[1]; /* 144 128 */ }; /* 0 272 */ struct { __u32 gf_interface; /* 0 4 */ /* XXX 4 bytes hole, try to pack */ struct __kernel_sockaddr_storage gf_group; /* 8 128 */ /* --- cacheline 2 boundary (128 bytes) was 8 bytes ago --- */ __u32 gf_fmode; /* 136 4 */ __u32 gf_numsrc; /* 140 4 */ struct __kernel_sockaddr_storage gf_slist_flex[0]; /* 144 0 */ }; /* 0 144 */ }; /* 0 272 */ /* size: 272, cachelines: 5, members: 1 */ /* last cacheline: 16 bytes */ }; $ pahole -C compat_group_filter net/ipv4/ip_sockglue.o struct compat_group_filter { union { struct { __u32 gf_interface_aux; /* 0 4 */ struct __kernel_sockaddr_storage gf_group_aux __attribute__((__aligned__(4))); /* 4 128 */ /* --- cacheline 2 boundary (128 bytes) was 4 bytes ago --- */ __u32 gf_fmode_aux; /* 132 4 */ __u32 gf_numsrc_aux; /* 136 4 */ struct __kernel_sockaddr_storage gf_slist[1] __attribute__((__aligned__(4))); /* 140 128 */ } __attribute__((__packed__)) __attribute__((__aligned__(4))); /* 0 268 */ struct { __u32 gf_interface; /* 0 4 */ struct __kernel_sockaddr_storage gf_group __attribute__((__aligned__(4))); /* 4 128 */ /* --- cacheline 2 boundary (128 bytes) was 4 bytes ago --- */ __u32 gf_fmode; /* 132 4 */ __u32 gf_numsrc; /* 136 4 */ struct __kernel_sockaddr_storage gf_slist_flex[0] __attribute__((__aligned__(4))); /* 140 0 */ } __attribute__((__packed__)) __attribute__((__aligned__(4))); /* 0 140 */ } __attribute__((__aligned__(1))); /* 0 268 */ /* size: 268, cachelines: 5, members: 1 */ /* forced alignments: 1 */ /* last cacheline: 12 bytes */ } __attribute__((__packed__)); This helps with the ongoing efforts to globally enable -Warray-bounds and get us closer to being able to tighten the FORTIFY_SOURCE routines on memcpy(). [1] https://en.wikipedia.org/wiki/Flexible_array_member [2] https://www.kernel.org/doc/html/v5.10/process/deprecated.html#zero-length-and-one-element-arrays Link: https://github.com/KSPP/linux/issues/79 Link: https://github.com/KSPP/linux/issues/109 Signed-off-by: Gustavo A. R. Silva Signed-off-by: David S. Miller --- include/net/compat.h | 27 ++++++++++++++++++++------- include/uapi/linux/in.h | 21 ++++++++++++++++----- net/ipv4/ip_sockglue.c | 19 ++++++++++--------- net/ipv6/ipv6_sockglue.c | 18 +++++++++--------- 4 files changed, 55 insertions(+), 30 deletions(-) (limited to 'include/net') diff --git a/include/net/compat.h b/include/net/compat.h index 84805bdc4435..595fee069b82 100644 --- a/include/net/compat.h +++ b/include/net/compat.h @@ -71,13 +71,26 @@ struct compat_group_source_req { } __packed; struct compat_group_filter { - __u32 gf_interface; - struct __kernel_sockaddr_storage gf_group - __aligned(4); - __u32 gf_fmode; - __u32 gf_numsrc; - struct __kernel_sockaddr_storage gf_slist[1] - __aligned(4); + union { + struct { + __u32 gf_interface_aux; + struct __kernel_sockaddr_storage gf_group_aux + __aligned(4); + __u32 gf_fmode_aux; + __u32 gf_numsrc_aux; + struct __kernel_sockaddr_storage gf_slist[1] + __aligned(4); + } __packed; + struct { + __u32 gf_interface; + struct __kernel_sockaddr_storage gf_group + __aligned(4); + __u32 gf_fmode; + __u32 gf_numsrc; + struct __kernel_sockaddr_storage gf_slist_flex[] + __aligned(4); + } __packed; + }; } __packed; #endif /* NET_COMPAT_H */ diff --git a/include/uapi/linux/in.h b/include/uapi/linux/in.h index 193b7cf1f0ac..14168225cecd 100644 --- a/include/uapi/linux/in.h +++ b/include/uapi/linux/in.h @@ -222,11 +222,22 @@ struct group_source_req { }; struct group_filter { - __u32 gf_interface; /* interface index */ - struct __kernel_sockaddr_storage gf_group; /* multicast address */ - __u32 gf_fmode; /* filter mode */ - __u32 gf_numsrc; /* number of sources */ - struct __kernel_sockaddr_storage gf_slist[1]; /* interface index */ + union { + struct { + __u32 gf_interface_aux; /* interface index */ + struct __kernel_sockaddr_storage gf_group_aux; /* multicast address */ + __u32 gf_fmode_aux; /* filter mode */ + __u32 gf_numsrc_aux; /* number of sources */ + struct __kernel_sockaddr_storage gf_slist[1]; /* interface index */ + }; + struct { + __u32 gf_interface; /* interface index */ + struct __kernel_sockaddr_storage gf_group; /* multicast address */ + __u32 gf_fmode; /* filter mode */ + __u32 gf_numsrc; /* number of sources */ + struct __kernel_sockaddr_storage gf_slist_flex[]; /* interface index */ + }; + }; }; #define GROUP_FILTER_SIZE(numsrc) \ diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index 468969c75708..b297bb28556e 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -790,7 +790,8 @@ static int ip_set_mcast_msfilter(struct sock *sk, sockptr_t optval, int optlen) goto out_free_gsf; err = set_mcast_msfilter(sk, gsf->gf_interface, gsf->gf_numsrc, - gsf->gf_fmode, &gsf->gf_group, gsf->gf_slist); + gsf->gf_fmode, &gsf->gf_group, + gsf->gf_slist_flex); out_free_gsf: kfree(gsf); return err; @@ -799,7 +800,7 @@ out_free_gsf: static int compat_ip_set_mcast_msfilter(struct sock *sk, sockptr_t optval, int optlen) { - const int size0 = offsetof(struct compat_group_filter, gf_slist); + const int size0 = offsetof(struct compat_group_filter, gf_slist_flex); struct compat_group_filter *gf32; unsigned int n; void *p; @@ -813,7 +814,7 @@ static int compat_ip_set_mcast_msfilter(struct sock *sk, sockptr_t optval, p = kmalloc(optlen + 4, GFP_KERNEL); if (!p) return -ENOMEM; - gf32 = p + 4; /* we want ->gf_group and ->gf_slist aligned */ + gf32 = p + 4; /* we want ->gf_group and ->gf_slist_flex aligned */ err = -EFAULT; if (copy_from_sockptr(gf32, optval, optlen)) @@ -826,7 +827,7 @@ static int compat_ip_set_mcast_msfilter(struct sock *sk, sockptr_t optval, goto out_free_gsf; err = -EINVAL; - if (offsetof(struct compat_group_filter, gf_slist[n]) > optlen) + if (offsetof(struct compat_group_filter, gf_slist_flex[n]) > optlen) goto out_free_gsf; /* numsrc >= (4G-140)/128 overflow in 32 bits */ @@ -834,7 +835,7 @@ static int compat_ip_set_mcast_msfilter(struct sock *sk, sockptr_t optval, if (n > sock_net(sk)->ipv4.sysctl_igmp_max_msf) goto out_free_gsf; err = set_mcast_msfilter(sk, gf32->gf_interface, n, gf32->gf_fmode, - &gf32->gf_group, gf32->gf_slist); + &gf32->gf_group, gf32->gf_slist_flex); out_free_gsf: kfree(p); return err; @@ -1455,7 +1456,7 @@ static bool getsockopt_needs_rtnl(int optname) static int ip_get_mcast_msfilter(struct sock *sk, void __user *optval, int __user *optlen, int len) { - const int size0 = offsetof(struct group_filter, gf_slist); + const int size0 = offsetof(struct group_filter, gf_slist_flex); struct group_filter __user *p = optval; struct group_filter gsf; int num; @@ -1467,7 +1468,7 @@ static int ip_get_mcast_msfilter(struct sock *sk, void __user *optval, return -EFAULT; num = gsf.gf_numsrc; - err = ip_mc_gsfget(sk, &gsf, p->gf_slist); + err = ip_mc_gsfget(sk, &gsf, p->gf_slist_flex); if (err) return err; if (gsf.gf_numsrc < num) @@ -1481,7 +1482,7 @@ static int ip_get_mcast_msfilter(struct sock *sk, void __user *optval, static int compat_ip_get_mcast_msfilter(struct sock *sk, void __user *optval, int __user *optlen, int len) { - const int size0 = offsetof(struct compat_group_filter, gf_slist); + const int size0 = offsetof(struct compat_group_filter, gf_slist_flex); struct compat_group_filter __user *p = optval; struct compat_group_filter gf32; struct group_filter gf; @@ -1498,7 +1499,7 @@ static int compat_ip_get_mcast_msfilter(struct sock *sk, void __user *optval, num = gf.gf_numsrc = gf32.gf_numsrc; gf.gf_group = gf32.gf_group; - err = ip_mc_gsfget(sk, &gf, p->gf_slist); + err = ip_mc_gsfget(sk, &gf, p->gf_slist_flex); if (err) return err; if (gf.gf_numsrc < num) diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index a6804a7e34c1..e4bdb09c5586 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -225,7 +225,7 @@ static int ipv6_set_mcast_msfilter(struct sock *sk, sockptr_t optval, if (GROUP_FILTER_SIZE(gsf->gf_numsrc) > optlen) goto out_free_gsf; - ret = ip6_mc_msfilter(sk, gsf, gsf->gf_slist); + ret = ip6_mc_msfilter(sk, gsf, gsf->gf_slist_flex); out_free_gsf: kfree(gsf); return ret; @@ -234,7 +234,7 @@ out_free_gsf: static int compat_ipv6_set_mcast_msfilter(struct sock *sk, sockptr_t optval, int optlen) { - const int size0 = offsetof(struct compat_group_filter, gf_slist); + const int size0 = offsetof(struct compat_group_filter, gf_slist_flex); struct compat_group_filter *gf32; void *p; int ret; @@ -249,7 +249,7 @@ static int compat_ipv6_set_mcast_msfilter(struct sock *sk, sockptr_t optval, if (!p) return -ENOMEM; - gf32 = p + 4; /* we want ->gf_group and ->gf_slist aligned */ + gf32 = p + 4; /* we want ->gf_group and ->gf_slist_flex aligned */ ret = -EFAULT; if (copy_from_sockptr(gf32, optval, optlen)) goto out_free_p; @@ -261,14 +261,14 @@ static int compat_ipv6_set_mcast_msfilter(struct sock *sk, sockptr_t optval, goto out_free_p; ret = -EINVAL; - if (offsetof(struct compat_group_filter, gf_slist[n]) > optlen) + if (offsetof(struct compat_group_filter, gf_slist_flex[n]) > optlen) goto out_free_p; ret = ip6_mc_msfilter(sk, &(struct group_filter){ .gf_interface = gf32->gf_interface, .gf_group = gf32->gf_group, .gf_fmode = gf32->gf_fmode, - .gf_numsrc = gf32->gf_numsrc}, gf32->gf_slist); + .gf_numsrc = gf32->gf_numsrc}, gf32->gf_slist_flex); out_free_p: kfree(p); @@ -1048,7 +1048,7 @@ static int ipv6_getsockopt_sticky(struct sock *sk, struct ipv6_txoptions *opt, static int ipv6_get_msfilter(struct sock *sk, void __user *optval, int __user *optlen, int len) { - const int size0 = offsetof(struct group_filter, gf_slist); + const int size0 = offsetof(struct group_filter, gf_slist_flex); struct group_filter __user *p = optval; struct group_filter gsf; int num; @@ -1062,7 +1062,7 @@ static int ipv6_get_msfilter(struct sock *sk, void __user *optval, return -EADDRNOTAVAIL; num = gsf.gf_numsrc; lock_sock(sk); - err = ip6_mc_msfget(sk, &gsf, p->gf_slist); + err = ip6_mc_msfget(sk, &gsf, p->gf_slist_flex); if (!err) { if (num > gsf.gf_numsrc) num = gsf.gf_numsrc; @@ -1077,7 +1077,7 @@ static int ipv6_get_msfilter(struct sock *sk, void __user *optval, static int compat_ipv6_get_msfilter(struct sock *sk, void __user *optval, int __user *optlen) { - const int size0 = offsetof(struct compat_group_filter, gf_slist); + const int size0 = offsetof(struct compat_group_filter, gf_slist_flex); struct compat_group_filter __user *p = optval; struct compat_group_filter gf32; struct group_filter gf; @@ -1100,7 +1100,7 @@ static int compat_ipv6_get_msfilter(struct sock *sk, void __user *optval, return -EADDRNOTAVAIL; lock_sock(sk); - err = ip6_mc_msfget(sk, &gf, p->gf_slist); + err = ip6_mc_msfget(sk, &gf, p->gf_slist_flex); release_sock(sk); if (err) return err; -- cgit v1.2.3 From e11c0e258c1a87e478a7a44e7c3d9e8ea4b8438c Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Wed, 4 Aug 2021 16:43:52 -0500 Subject: net/ipv6/mcast: Use struct_size() helper Replace IP6_SFLSIZE() with struct_size() helper in order to avoid any potential type mistakes or integer overflows that, in the worst scenario, could lead to heap overflows. Signed-off-by: Gustavo A. R. Silva Signed-off-by: David S. Miller --- include/net/if_inet6.h | 3 --- net/ipv6/mcast.c | 20 +++++++++++++------- 2 files changed, 13 insertions(+), 10 deletions(-) (limited to 'include/net') diff --git a/include/net/if_inet6.h b/include/net/if_inet6.h index 71bb4cc4d05d..42235c178b06 100644 --- a/include/net/if_inet6.h +++ b/include/net/if_inet6.h @@ -82,9 +82,6 @@ struct ip6_sf_socklist { struct in6_addr sl_addr[]; }; -#define IP6_SFLSIZE(count) (sizeof(struct ip6_sf_socklist) + \ - (count) * sizeof(struct in6_addr)) - #define IP6_SFBLOCK 10 /* allocate this many at once */ struct ipv6_mc_socklist { diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index 54ec163fbafa..cd951faa2fac 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -447,7 +447,8 @@ int ip6_mc_source(int add, int omode, struct sock *sk, if (psl) count += psl->sl_max; - newpsl = sock_kmalloc(sk, IP6_SFLSIZE(count), GFP_KERNEL); + newpsl = sock_kmalloc(sk, struct_size(newpsl, sl_addr, count), + GFP_KERNEL); if (!newpsl) { err = -ENOBUFS; goto done; @@ -457,7 +458,8 @@ int ip6_mc_source(int add, int omode, struct sock *sk, if (psl) { for (i = 0; i < psl->sl_count; i++) newpsl->sl_addr[i] = psl->sl_addr[i]; - atomic_sub(IP6_SFLSIZE(psl->sl_max), &sk->sk_omem_alloc); + atomic_sub(struct_size(psl, sl_addr, psl->sl_max), + &sk->sk_omem_alloc); kfree_rcu(psl, rcu); } psl = newpsl; @@ -525,8 +527,9 @@ int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf, goto done; } if (gsf->gf_numsrc) { - newpsl = sock_kmalloc(sk, IP6_SFLSIZE(gsf->gf_numsrc), - GFP_KERNEL); + newpsl = sock_kmalloc(sk, struct_size(newpsl, sl_addr, + gsf->gf_numsrc), + GFP_KERNEL); if (!newpsl) { err = -ENOBUFS; goto done; @@ -543,7 +546,8 @@ int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf, newpsl->sl_count, newpsl->sl_addr, 0); if (err) { mutex_unlock(&idev->mc_lock); - sock_kfree_s(sk, newpsl, IP6_SFLSIZE(newpsl->sl_max)); + sock_kfree_s(sk, newpsl, struct_size(newpsl, sl_addr, + newpsl->sl_max)); goto done; } mutex_unlock(&idev->mc_lock); @@ -559,7 +563,8 @@ int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf, if (psl) { ip6_mc_del_src(idev, group, pmc->sfmode, psl->sl_count, psl->sl_addr, 0); - atomic_sub(IP6_SFLSIZE(psl->sl_max), &sk->sk_omem_alloc); + atomic_sub(struct_size(psl, sl_addr, psl->sl_max), + &sk->sk_omem_alloc); kfree_rcu(psl, rcu); } else { ip6_mc_del_src(idev, group, pmc->sfmode, 0, NULL, 0); @@ -2607,7 +2612,8 @@ static int ip6_mc_leave_src(struct sock *sk, struct ipv6_mc_socklist *iml, err = ip6_mc_del_src(idev, &iml->addr, iml->sfmode, psl->sl_count, psl->sl_addr, 0); RCU_INIT_POINTER(iml->sflist, NULL); - atomic_sub(IP6_SFLSIZE(psl->sl_max), &sk->sk_omem_alloc); + atomic_sub(struct_size(psl, sl_addr, psl->sl_max), + &sk->sk_omem_alloc); kfree_rcu(psl, rcu); } -- cgit v1.2.3 From 6ec566131de0e330fffd33753b325fb4d6b2d00e Mon Sep 17 00:00:00 2001 From: Tedd Ho-Jeong An Date: Wed, 4 Aug 2021 17:32:08 -0700 Subject: Bluetooth: Add support hdev to allocate private data This patch adds support hdev to allocate extra size for private data. The size of private data is specified in the hdev_alloc_size(priv_size) and the allocated buffer can be accessed with hci_get_priv(hdev). Signed-off-by: Tedd Ho-Jeong An Signed-off-by: Marcel Holtmann --- include/net/bluetooth/hci_core.h | 13 ++++++++++++- net/bluetooth/hci_core.c | 13 ++++++++++--- 2 files changed, 22 insertions(+), 4 deletions(-) (limited to 'include/net') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index b011eeea28c3..a7d06d7da602 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -1224,10 +1224,21 @@ static inline void hci_set_drvdata(struct hci_dev *hdev, void *data) dev_set_drvdata(&hdev->dev, data); } +static inline void *hci_get_priv(struct hci_dev *hdev) +{ + return (char *)hdev + sizeof(*hdev); +} + struct hci_dev *hci_dev_get(int index); struct hci_dev *hci_get_route(bdaddr_t *dst, bdaddr_t *src, u8 src_type); -struct hci_dev *hci_alloc_dev(void); +struct hci_dev *hci_alloc_dev_priv(int sizeof_priv); + +static inline struct hci_dev *hci_alloc_dev(void) +{ + return hci_alloc_dev_priv(0); +} + void hci_free_dev(struct hci_dev *hdev); int hci_register_dev(struct hci_dev *hdev); void hci_unregister_dev(struct hci_dev *hdev); diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 2b78e1336c53..cb2e9e513907 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -3751,11 +3751,18 @@ done: } /* Alloc HCI device */ -struct hci_dev *hci_alloc_dev(void) +struct hci_dev *hci_alloc_dev_priv(int sizeof_priv) { struct hci_dev *hdev; + unsigned int alloc_size; - hdev = kzalloc(sizeof(*hdev), GFP_KERNEL); + alloc_size = sizeof(*hdev); + if (sizeof_priv) { + /* Fixme: May need ALIGN-ment? */ + alloc_size += sizeof_priv; + } + + hdev = kzalloc(alloc_size, GFP_KERNEL); if (!hdev) return NULL; @@ -3869,7 +3876,7 @@ struct hci_dev *hci_alloc_dev(void) return hdev; } -EXPORT_SYMBOL(hci_alloc_dev); +EXPORT_SYMBOL(hci_alloc_dev_priv); /* Free HCI device */ void hci_free_dev(struct hci_dev *hdev) -- cgit v1.2.3 From e04480920d1eec9c061841399aa6f35b6f987d8b Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Wed, 4 Aug 2021 19:26:56 +0900 Subject: Bluetooth: defer cleanup of resources in hci_unregister_dev() syzbot is hitting might_sleep() warning at hci_sock_dev_event() due to calling lock_sock() with rw spinlock held [1]. It seems that history of this locking problem is a trial and error. Commit b40df5743ee8 ("[PATCH] bluetooth: fix socket locking in hci_sock_dev_event()") in 2.6.21-rc4 changed bh_lock_sock() to lock_sock() as an attempt to fix lockdep warning. Then, commit 4ce61d1c7a8e ("[BLUETOOTH]: Fix locking in hci_sock_dev_event().") in 2.6.22-rc2 changed lock_sock() to local_bh_disable() + bh_lock_sock_nested() as an attempt to fix the sleep in atomic context warning. Then, commit 4b5dd696f81b ("Bluetooth: Remove local_bh_disable() from hci_sock.c") in 3.3-rc1 removed local_bh_disable(). Then, commit e305509e678b ("Bluetooth: use correct lock to prevent UAF of hdev object") in 5.13-rc5 again changed bh_lock_sock_nested() to lock_sock() as an attempt to fix CVE-2021-3573. This difficulty comes from current implementation that hci_sock_dev_event(HCI_DEV_UNREG) is responsible for dropping all references from sockets because hci_unregister_dev() immediately reclaims resources as soon as returning from hci_sock_dev_event(HCI_DEV_UNREG). But the history suggests that hci_sock_dev_event(HCI_DEV_UNREG) was not doing what it should do. Therefore, instead of trying to detach sockets from device, let's accept not detaching sockets from device at hci_sock_dev_event(HCI_DEV_UNREG), by moving actual cleanup of resources from hci_unregister_dev() to hci_cleanup_dev() which is called by bt_host_release() when all references to this unregistered device (which is a kobject) are gone. Since hci_sock_dev_event(HCI_DEV_UNREG) no longer resets hci_pi(sk)->hdev, we need to check whether this device was unregistered and return an error based on HCI_UNREGISTER flag. There might be subtle behavioral difference in "monitor the hdev" functionality; please report if you found something went wrong due to this patch. Link: https://syzkaller.appspot.com/bug?extid=a5df189917e79d5e59c9 [1] Reported-by: syzbot Suggested-by: Linus Torvalds Signed-off-by: Tetsuo Handa Fixes: e305509e678b ("Bluetooth: use correct lock to prevent UAF of hdev object") Acked-by: Luiz Augusto von Dentz Signed-off-by: Linus Torvalds --- include/net/bluetooth/hci_core.h | 1 + net/bluetooth/hci_core.c | 16 ++++++------- net/bluetooth/hci_sock.c | 49 +++++++++++++++++++++++++++------------- net/bluetooth/hci_sysfs.c | 3 +++ 4 files changed, 45 insertions(+), 24 deletions(-) (limited to 'include/net') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index a53e94459ecd..db4312e44d47 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -1230,6 +1230,7 @@ struct hci_dev *hci_alloc_dev(void); void hci_free_dev(struct hci_dev *hdev); int hci_register_dev(struct hci_dev *hdev); void hci_unregister_dev(struct hci_dev *hdev); +void hci_cleanup_dev(struct hci_dev *hdev); int hci_suspend_dev(struct hci_dev *hdev); int hci_resume_dev(struct hci_dev *hdev); int hci_reset_dev(struct hci_dev *hdev); diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 2560ed2f144d..e1a545c8a69f 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -3996,14 +3996,10 @@ EXPORT_SYMBOL(hci_register_dev); /* Unregister HCI device */ void hci_unregister_dev(struct hci_dev *hdev) { - int id; - BT_DBG("%p name %s bus %d", hdev, hdev->name, hdev->bus); hci_dev_set_flag(hdev, HCI_UNREGISTER); - id = hdev->id; - write_lock(&hci_dev_list_lock); list_del(&hdev->list); write_unlock(&hci_dev_list_lock); @@ -4038,7 +4034,14 @@ void hci_unregister_dev(struct hci_dev *hdev) } device_del(&hdev->dev); + /* Actual cleanup is deferred until hci_cleanup_dev(). */ + hci_dev_put(hdev); +} +EXPORT_SYMBOL(hci_unregister_dev); +/* Cleanup HCI device */ +void hci_cleanup_dev(struct hci_dev *hdev) +{ debugfs_remove_recursive(hdev->debugfs); kfree_const(hdev->hw_info); kfree_const(hdev->fw_info); @@ -4063,11 +4066,8 @@ void hci_unregister_dev(struct hci_dev *hdev) hci_blocked_keys_clear(hdev); hci_dev_unlock(hdev); - hci_dev_put(hdev); - - ida_simple_remove(&hci_index_ida, id); + ida_simple_remove(&hci_index_ida, hdev->id); } -EXPORT_SYMBOL(hci_unregister_dev); /* Suspend HCI device */ int hci_suspend_dev(struct hci_dev *hdev) diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index b04a5a02ecf3..f1128c2134f0 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -59,6 +59,17 @@ struct hci_pinfo { char comm[TASK_COMM_LEN]; }; +static struct hci_dev *hci_hdev_from_sock(struct sock *sk) +{ + struct hci_dev *hdev = hci_pi(sk)->hdev; + + if (!hdev) + return ERR_PTR(-EBADFD); + if (hci_dev_test_flag(hdev, HCI_UNREGISTER)) + return ERR_PTR(-EPIPE); + return hdev; +} + void hci_sock_set_flag(struct sock *sk, int nr) { set_bit(nr, &hci_pi(sk)->flags); @@ -759,19 +770,13 @@ void hci_sock_dev_event(struct hci_dev *hdev, int event) if (event == HCI_DEV_UNREG) { struct sock *sk; - /* Detach sockets from device */ + /* Wake up sockets using this dead device */ read_lock(&hci_sk_list.lock); sk_for_each(sk, &hci_sk_list.head) { - lock_sock(sk); if (hci_pi(sk)->hdev == hdev) { - hci_pi(sk)->hdev = NULL; sk->sk_err = EPIPE; - sk->sk_state = BT_OPEN; sk->sk_state_change(sk); - - hci_dev_put(hdev); } - release_sock(sk); } read_unlock(&hci_sk_list.lock); } @@ -930,10 +935,10 @@ static int hci_sock_reject_list_del(struct hci_dev *hdev, void __user *arg) static int hci_sock_bound_ioctl(struct sock *sk, unsigned int cmd, unsigned long arg) { - struct hci_dev *hdev = hci_pi(sk)->hdev; + struct hci_dev *hdev = hci_hdev_from_sock(sk); - if (!hdev) - return -EBADFD; + if (IS_ERR(hdev)) + return PTR_ERR(hdev); if (hci_dev_test_flag(hdev, HCI_USER_CHANNEL)) return -EBUSY; @@ -1103,6 +1108,18 @@ static int hci_sock_bind(struct socket *sock, struct sockaddr *addr, lock_sock(sk); + /* Allow detaching from dead device and attaching to alive device, if + * the caller wants to re-bind (instead of close) this socket in + * response to hci_sock_dev_event(HCI_DEV_UNREG) notification. + */ + hdev = hci_pi(sk)->hdev; + if (hdev && hci_dev_test_flag(hdev, HCI_UNREGISTER)) { + hci_pi(sk)->hdev = NULL; + sk->sk_state = BT_OPEN; + hci_dev_put(hdev); + } + hdev = NULL; + if (sk->sk_state == BT_BOUND) { err = -EALREADY; goto done; @@ -1379,9 +1396,9 @@ static int hci_sock_getname(struct socket *sock, struct sockaddr *addr, lock_sock(sk); - hdev = hci_pi(sk)->hdev; - if (!hdev) { - err = -EBADFD; + hdev = hci_hdev_from_sock(sk); + if (IS_ERR(hdev)) { + err = PTR_ERR(hdev); goto done; } @@ -1743,9 +1760,9 @@ static int hci_sock_sendmsg(struct socket *sock, struct msghdr *msg, goto done; } - hdev = hci_pi(sk)->hdev; - if (!hdev) { - err = -EBADFD; + hdev = hci_hdev_from_sock(sk); + if (IS_ERR(hdev)) { + err = PTR_ERR(hdev); goto done; } diff --git a/net/bluetooth/hci_sysfs.c b/net/bluetooth/hci_sysfs.c index 9874844a95a9..b69d88b88d2e 100644 --- a/net/bluetooth/hci_sysfs.c +++ b/net/bluetooth/hci_sysfs.c @@ -83,6 +83,9 @@ void hci_conn_del_sysfs(struct hci_conn *conn) static void bt_host_release(struct device *dev) { struct hci_dev *hdev = to_hci_dev(dev); + + if (hci_dev_test_flag(hdev, HCI_UNREGISTER)) + hci_cleanup_dev(hdev); kfree(hdev); module_put(THIS_MODULE); } -- cgit v1.2.3 From c73c57081b3d59aa99093fbedced32ea02620cd3 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Fri, 6 Aug 2021 03:20:08 +0300 Subject: net: dsa: don't disable multicast flooding to the CPU even without an IGMP querier Commit 08cc83cc7fd8 ("net: dsa: add support for BRIDGE_MROUTER attribute") added an option for users to turn off multicast flooding towards the CPU if they turn off the IGMP querier on a bridge which already has enslaved ports (echo 0 > /sys/class/net/br0/bridge/multicast_router). And commit a8b659e7ff75 ("net: dsa: act as passthrough for bridge port flags") simply papered over that issue, because it moved the decision to flood the CPU with multicast (or not) from the DSA core down to individual drivers, instead of taking a more radical position then. The truth is that disabling multicast flooding to the CPU is simply something we are not prepared to do now, if at all. Some reasons: - ICMP6 neighbor solicitation messages are unregistered multicast packets as far as the bridge is concerned. So if we stop flooding multicast, the outside world cannot ping the bridge device's IPv6 link-local address. - There might be foreign interfaces bridged with our DSA switch ports (sending a packet towards the host does not necessarily equal termination, but maybe software forwarding). So if there is no one interested in that multicast traffic in the local network stack, that doesn't mean nobody is. - PTP over L4 (IPv4, IPv6) is multicast, but is unregistered as far as the bridge is concerned. This should reach the CPU port. - The switch driver might not do FDB partitioning. And since we don't even bother to do more fine-grained flood disabling (such as "disable flooding _from_port_N_ towards the CPU port" as opposed to "disable flooding _from_any_port_ towards the CPU port"), this breaks standalone ports, or even multiple bridges where one has an IGMP querier and one doesn't. Reverting the logic makes all of the above work. Fixes: a8b659e7ff75 ("net: dsa: act as passthrough for bridge port flags") Fixes: 08cc83cc7fd8 ("net: dsa: add support for BRIDGE_MROUTER attribute") Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- drivers/net/dsa/b53/b53_common.c | 10 ---------- drivers/net/dsa/b53/b53_priv.h | 2 -- drivers/net/dsa/bcm_sf2.c | 1 - drivers/net/dsa/mv88e6xxx/chip.c | 18 ------------------ include/net/dsa.h | 2 -- net/dsa/dsa_priv.h | 2 -- net/dsa/port.c | 11 ----------- net/dsa/slave.c | 6 ------ 8 files changed, 52 deletions(-) (limited to 'include/net') diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c index b23e3488695b..bd1417a66cbf 100644 --- a/drivers/net/dsa/b53/b53_common.c +++ b/drivers/net/dsa/b53/b53_common.c @@ -2016,15 +2016,6 @@ int b53_br_flags(struct dsa_switch *ds, int port, } EXPORT_SYMBOL(b53_br_flags); -int b53_set_mrouter(struct dsa_switch *ds, int port, bool mrouter, - struct netlink_ext_ack *extack) -{ - b53_port_set_mcast_flood(ds->priv, port, mrouter); - - return 0; -} -EXPORT_SYMBOL(b53_set_mrouter); - static bool b53_possible_cpu_port(struct dsa_switch *ds, int port) { /* Broadcom switches will accept enabling Broadcom tags on the @@ -2268,7 +2259,6 @@ static const struct dsa_switch_ops b53_switch_ops = { .port_bridge_leave = b53_br_leave, .port_pre_bridge_flags = b53_br_flags_pre, .port_bridge_flags = b53_br_flags, - .port_set_mrouter = b53_set_mrouter, .port_stp_state_set = b53_br_set_stp_state, .port_fast_age = b53_br_fast_age, .port_vlan_filtering = b53_vlan_filtering, diff --git a/drivers/net/dsa/b53/b53_priv.h b/drivers/net/dsa/b53/b53_priv.h index 82700a5714c1..9bf8319342b0 100644 --- a/drivers/net/dsa/b53/b53_priv.h +++ b/drivers/net/dsa/b53/b53_priv.h @@ -328,8 +328,6 @@ int b53_br_flags_pre(struct dsa_switch *ds, int port, int b53_br_flags(struct dsa_switch *ds, int port, struct switchdev_brport_flags flags, struct netlink_ext_ack *extack); -int b53_set_mrouter(struct dsa_switch *ds, int port, bool mrouter, - struct netlink_ext_ack *extack); int b53_setup_devlink_resources(struct dsa_switch *ds); void b53_port_event(struct dsa_switch *ds, int port); void b53_phylink_validate(struct dsa_switch *ds, int port, diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c index 3b018fcf4412..6ce9ec1283e0 100644 --- a/drivers/net/dsa/bcm_sf2.c +++ b/drivers/net/dsa/bcm_sf2.c @@ -1199,7 +1199,6 @@ static const struct dsa_switch_ops bcm_sf2_ops = { .port_pre_bridge_flags = b53_br_flags_pre, .port_bridge_flags = b53_br_flags, .port_stp_state_set = b53_br_set_stp_state, - .port_set_mrouter = b53_set_mrouter, .port_fast_age = b53_br_fast_age, .port_vlan_filtering = b53_vlan_filtering, .port_vlan_add = b53_vlan_add, diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index ddb51dd132ef..c2c5f1573fe5 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -5849,23 +5849,6 @@ out: return err; } -static int mv88e6xxx_port_set_mrouter(struct dsa_switch *ds, int port, - bool mrouter, - struct netlink_ext_ack *extack) -{ - struct mv88e6xxx_chip *chip = ds->priv; - int err; - - if (!chip->info->ops->port_set_mcast_flood) - return -EOPNOTSUPP; - - mv88e6xxx_reg_lock(chip); - err = chip->info->ops->port_set_mcast_flood(chip, port, mrouter); - mv88e6xxx_reg_unlock(chip); - - return err; -} - static bool mv88e6xxx_lag_can_offload(struct dsa_switch *ds, struct net_device *lag, struct netdev_lag_upper_info *info) @@ -6167,7 +6150,6 @@ static const struct dsa_switch_ops mv88e6xxx_switch_ops = { .port_bridge_leave = mv88e6xxx_port_bridge_leave, .port_pre_bridge_flags = mv88e6xxx_port_pre_bridge_flags, .port_bridge_flags = mv88e6xxx_port_bridge_flags, - .port_set_mrouter = mv88e6xxx_port_set_mrouter, .port_stp_state_set = mv88e6xxx_port_stp_state_set, .port_fast_age = mv88e6xxx_port_fast_age, .port_vlan_filtering = mv88e6xxx_port_vlan_filtering, diff --git a/include/net/dsa.h b/include/net/dsa.h index 7cc9507282d3..d7dc26d316ea 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -714,8 +714,6 @@ struct dsa_switch_ops { int (*port_bridge_flags)(struct dsa_switch *ds, int port, struct switchdev_brport_flags flags, struct netlink_ext_ack *extack); - int (*port_set_mrouter)(struct dsa_switch *ds, int port, bool mrouter, - struct netlink_ext_ack *extack); /* * VLAN support diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h index e43c5dc04282..7841b3957516 100644 --- a/net/dsa/dsa_priv.h +++ b/net/dsa/dsa_priv.h @@ -244,8 +244,6 @@ int dsa_port_pre_bridge_flags(const struct dsa_port *dp, int dsa_port_bridge_flags(const struct dsa_port *dp, struct switchdev_brport_flags flags, struct netlink_ext_ack *extack); -int dsa_port_mrouter(struct dsa_port *dp, bool mrouter, - struct netlink_ext_ack *extack); int dsa_port_vlan_add(struct dsa_port *dp, const struct switchdev_obj_port_vlan *vlan, struct netlink_ext_ack *extack); diff --git a/net/dsa/port.c b/net/dsa/port.c index c18077a3c779..797a3269a964 100644 --- a/net/dsa/port.c +++ b/net/dsa/port.c @@ -641,17 +641,6 @@ int dsa_port_bridge_flags(const struct dsa_port *dp, return ds->ops->port_bridge_flags(ds, dp->index, flags, extack); } -int dsa_port_mrouter(struct dsa_port *dp, bool mrouter, - struct netlink_ext_ack *extack) -{ - struct dsa_switch *ds = dp->ds; - - if (!ds->ops->port_set_mrouter) - return -EOPNOTSUPP; - - return ds->ops->port_set_mrouter(ds, dp->index, mrouter, extack); -} - int dsa_port_mtu_change(struct dsa_port *dp, int new_mtu, bool targeted_match) { diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 6e1135d3ee33..022174635bc1 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -314,12 +314,6 @@ static int dsa_slave_port_attr_set(struct net_device *dev, const void *ctx, ret = dsa_port_bridge_flags(dp, attr->u.brport_flags, extack); break; - case SWITCHDEV_ATTR_ID_BRIDGE_MROUTER: - if (!dsa_port_offloads_bridge(dp, attr->orig_dev)) - return -EOPNOTSUPP; - - ret = dsa_port_mrouter(dp->cpu_dp, attr->u.mrouter, extack); - break; default: ret = -EOPNOTSUPP; break; -- cgit v1.2.3 From 4592ee7f525c4683ec9e290381601fdee50ae110 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 4 Aug 2021 15:02:15 +0200 Subject: netfilter: conntrack: remove offload_pickup sysctl again These two sysctls were added because the hardcoded defaults (2 minutes, tcp, 30 seconds, udp) turned out to be too low for some setups. They appeared in 5.14-rc1 so it should be fine to remove it again. Marcelo convinced me that there should be no difference between a flow that was offloaded vs. a flow that was not wrt. timeout handling. Thus the default is changed to those for TCP established and UDP stream, 5 days and 120 seconds, respectively. Marcelo also suggested to account for the timeout value used for the offloading, this avoids increase beyond the value in the conntrack-sysctl and will also instantly expire the conntrack entry with altered sysctls. Example: nf_conntrack_udp_timeout_stream=60 nf_flowtable_udp_timeout=60 This will remove offloaded udp flows after one minute, rather than two. An earlier version of this patch also cleared the ASSURED bit to allow nf_conntrack to evict the entry via early_drop (i.e., table full). However, it looks like we can safely assume that connection timed out via HW is still in established state, so this isn't needed. Quoting Oz: [..] the hardware sends all packets with a set FIN flags to sw. [..] Connections that are aged in hardware are expected to be in the established state. In case it turns out that back-to-sw-path transition can occur for 'dodgy' connections too (e.g., one side disappeared while software-path would have been in RETRANS timeout), we can adjust this later. Cc: Oz Shlomo Cc: Paul Blakey Suggested-by: Marcelo Ricardo Leitner Signed-off-by: Florian Westphal Reviewed-by: Marcelo Ricardo Leitner Reviewed-by: Oz Shlomo Signed-off-by: Pablo Neira Ayuso --- Documentation/networking/nf_conntrack-sysctl.rst | 10 ---------- include/net/netns/conntrack.h | 2 -- net/netfilter/nf_conntrack_proto_tcp.c | 1 - net/netfilter/nf_conntrack_proto_udp.c | 1 - net/netfilter/nf_conntrack_standalone.c | 16 ---------------- net/netfilter/nf_flow_table_core.c | 11 ++++++++--- 6 files changed, 8 insertions(+), 33 deletions(-) (limited to 'include/net') diff --git a/Documentation/networking/nf_conntrack-sysctl.rst b/Documentation/networking/nf_conntrack-sysctl.rst index d31ed6c1cb0d..024d784157c8 100644 --- a/Documentation/networking/nf_conntrack-sysctl.rst +++ b/Documentation/networking/nf_conntrack-sysctl.rst @@ -191,19 +191,9 @@ nf_flowtable_tcp_timeout - INTEGER (seconds) TCP connections may be offloaded from nf conntrack to nf flow table. Once aged, the connection is returned to nf conntrack with tcp pickup timeout. -nf_flowtable_tcp_pickup - INTEGER (seconds) - default 120 - - TCP connection timeout after being aged from nf flow table offload. - nf_flowtable_udp_timeout - INTEGER (seconds) default 30 Control offload timeout for udp connections. UDP connections may be offloaded from nf conntrack to nf flow table. Once aged, the connection is returned to nf conntrack with udp pickup timeout. - -nf_flowtable_udp_pickup - INTEGER (seconds) - default 30 - - UDP connection timeout after being aged from nf flow table offload. diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h index 37e5300c7e5a..fefd38db95b3 100644 --- a/include/net/netns/conntrack.h +++ b/include/net/netns/conntrack.h @@ -30,7 +30,6 @@ struct nf_tcp_net { u8 tcp_ignore_invalid_rst; #if IS_ENABLED(CONFIG_NF_FLOW_TABLE) unsigned int offload_timeout; - unsigned int offload_pickup; #endif }; @@ -44,7 +43,6 @@ struct nf_udp_net { unsigned int timeouts[UDP_CT_MAX]; #if IS_ENABLED(CONFIG_NF_FLOW_TABLE) unsigned int offload_timeout; - unsigned int offload_pickup; #endif }; diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index 3259416f2ea4..af5115e127cf 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -1478,7 +1478,6 @@ void nf_conntrack_tcp_init_net(struct net *net) #if IS_ENABLED(CONFIG_NF_FLOW_TABLE) tn->offload_timeout = 30 * HZ; - tn->offload_pickup = 120 * HZ; #endif } diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c index 698fee49e732..f8e3c0d2602f 100644 --- a/net/netfilter/nf_conntrack_proto_udp.c +++ b/net/netfilter/nf_conntrack_proto_udp.c @@ -271,7 +271,6 @@ void nf_conntrack_udp_init_net(struct net *net) #if IS_ENABLED(CONFIG_NF_FLOW_TABLE) un->offload_timeout = 30 * HZ; - un->offload_pickup = 30 * HZ; #endif } diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index 214d9f9e499b..e84b499b7bfa 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -575,7 +575,6 @@ enum nf_ct_sysctl_index { NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_UNACK, #if IS_ENABLED(CONFIG_NF_FLOW_TABLE) NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_OFFLOAD, - NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_OFFLOAD_PICKUP, #endif NF_SYSCTL_CT_PROTO_TCP_LOOSE, NF_SYSCTL_CT_PROTO_TCP_LIBERAL, @@ -585,7 +584,6 @@ enum nf_ct_sysctl_index { NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_STREAM, #if IS_ENABLED(CONFIG_NF_FLOW_TABLE) NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_OFFLOAD, - NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_OFFLOAD_PICKUP, #endif NF_SYSCTL_CT_PROTO_TIMEOUT_ICMP, NF_SYSCTL_CT_PROTO_TIMEOUT_ICMPV6, @@ -776,12 +774,6 @@ static struct ctl_table nf_ct_sysctl_table[] = { .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, - [NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_OFFLOAD_PICKUP] = { - .procname = "nf_flowtable_tcp_pickup", - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, #endif [NF_SYSCTL_CT_PROTO_TCP_LOOSE] = { .procname = "nf_conntrack_tcp_loose", @@ -832,12 +824,6 @@ static struct ctl_table nf_ct_sysctl_table[] = { .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, - [NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_OFFLOAD_PICKUP] = { - .procname = "nf_flowtable_udp_pickup", - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, #endif [NF_SYSCTL_CT_PROTO_TIMEOUT_ICMP] = { .procname = "nf_conntrack_icmp_timeout", @@ -1018,7 +1004,6 @@ static void nf_conntrack_standalone_init_tcp_sysctl(struct net *net, #if IS_ENABLED(CONFIG_NF_FLOW_TABLE) table[NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_OFFLOAD].data = &tn->offload_timeout; - table[NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_OFFLOAD_PICKUP].data = &tn->offload_pickup; #endif } @@ -1111,7 +1096,6 @@ static int nf_conntrack_standalone_init_sysctl(struct net *net) table[NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_STREAM].data = &un->timeouts[UDP_CT_REPLIED]; #if IS_ENABLED(CONFIG_NF_FLOW_TABLE) table[NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_OFFLOAD].data = &un->offload_timeout; - table[NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_OFFLOAD_PICKUP].data = &un->offload_pickup; #endif nf_conntrack_standalone_init_tcp_sysctl(net, table); diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c index 551976e4284c..8788b519255e 100644 --- a/net/netfilter/nf_flow_table_core.c +++ b/net/netfilter/nf_flow_table_core.c @@ -183,7 +183,7 @@ static void flow_offload_fixup_ct_timeout(struct nf_conn *ct) const struct nf_conntrack_l4proto *l4proto; struct net *net = nf_ct_net(ct); int l4num = nf_ct_protonum(ct); - unsigned int timeout; + s32 timeout; l4proto = nf_ct_l4proto_find(l4num); if (!l4proto) @@ -192,15 +192,20 @@ static void flow_offload_fixup_ct_timeout(struct nf_conn *ct) if (l4num == IPPROTO_TCP) { struct nf_tcp_net *tn = nf_tcp_pernet(net); - timeout = tn->offload_pickup; + timeout = tn->timeouts[TCP_CONNTRACK_ESTABLISHED]; + timeout -= tn->offload_timeout; } else if (l4num == IPPROTO_UDP) { struct nf_udp_net *tn = nf_udp_pernet(net); - timeout = tn->offload_pickup; + timeout = tn->timeouts[UDP_CT_REPLIED]; + timeout -= tn->offload_timeout; } else { return; } + if (timeout < 0) + timeout = 0; + if (nf_flow_timeout_delta(ct->timeout) > (__s32)timeout) ct->timeout = nfct_time_stamp + timeout; } -- cgit v1.2.3 From 82564f6c706a37e5f7dec962375581cc9f8fca5d Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Sun, 8 Aug 2021 14:41:21 +0300 Subject: devlink: Simplify devlink port API calls Devlink port already has pointer to the devlink instance and all API calls that forward these devlink ports to the drivers perform same "devlink_port->devlink" assignment before actual call. This patch removes useless parameter and allows us in the future to create specific devlink_port_ops to manage user space access with reliable ops assignment. Signed-off-by: Leon Romanovsky Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/eswitch.c | 10 +-- drivers/net/ethernet/mellanox/mlx5/core/eswitch.h | 6 +- .../net/ethernet/mellanox/mlx5/core/sf/devlink.c | 8 +- drivers/net/ethernet/mellanox/mlx5/core/sf/sf.h | 4 +- include/net/devlink.h | 12 ++- net/core/devlink.c | 95 +++++++++++----------- 6 files changed, 64 insertions(+), 71 deletions(-) (limited to 'include/net') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index 97e6cb6f13c1..2b90388ef209 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -1889,8 +1889,7 @@ is_port_function_supported(struct mlx5_eswitch *esw, u16 vport_num) mlx5_esw_is_sf_vport(esw, vport_num); } -int mlx5_devlink_port_function_hw_addr_get(struct devlink *devlink, - struct devlink_port *port, +int mlx5_devlink_port_function_hw_addr_get(struct devlink_port *port, u8 *hw_addr, int *hw_addr_len, struct netlink_ext_ack *extack) { @@ -1899,7 +1898,7 @@ int mlx5_devlink_port_function_hw_addr_get(struct devlink *devlink, int err = -EOPNOTSUPP; u16 vport_num; - esw = mlx5_devlink_eswitch_get(devlink); + esw = mlx5_devlink_eswitch_get(port->devlink); if (IS_ERR(esw)) return PTR_ERR(esw); @@ -1923,8 +1922,7 @@ int mlx5_devlink_port_function_hw_addr_get(struct devlink *devlink, return err; } -int mlx5_devlink_port_function_hw_addr_set(struct devlink *devlink, - struct devlink_port *port, +int mlx5_devlink_port_function_hw_addr_set(struct devlink_port *port, const u8 *hw_addr, int hw_addr_len, struct netlink_ext_ack *extack) { @@ -1933,7 +1931,7 @@ int mlx5_devlink_port_function_hw_addr_set(struct devlink *devlink, int err = -EOPNOTSUPP; u16 vport_num; - esw = mlx5_devlink_eswitch_get(devlink); + esw = mlx5_devlink_eswitch_get(port->devlink); if (IS_ERR(esw)) { NL_SET_ERR_MSG_MOD(extack, "Eswitch doesn't support set hw_addr"); return PTR_ERR(esw); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index d562edf5b0bc..41eff9dd1bf6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -475,12 +475,10 @@ int mlx5_devlink_eswitch_encap_mode_set(struct devlink *devlink, struct netlink_ext_ack *extack); int mlx5_devlink_eswitch_encap_mode_get(struct devlink *devlink, enum devlink_eswitch_encap_mode *encap); -int mlx5_devlink_port_function_hw_addr_get(struct devlink *devlink, - struct devlink_port *port, +int mlx5_devlink_port_function_hw_addr_get(struct devlink_port *port, u8 *hw_addr, int *hw_addr_len, struct netlink_ext_ack *extack); -int mlx5_devlink_port_function_hw_addr_set(struct devlink *devlink, - struct devlink_port *port, +int mlx5_devlink_port_function_hw_addr_set(struct devlink_port *port, const u8 *hw_addr, int hw_addr_len, struct netlink_ext_ack *extack); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c index 1be048769309..720195c4be7c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c @@ -164,12 +164,12 @@ static bool mlx5_sf_is_active(const struct mlx5_sf *sf) return sf->hw_state == MLX5_VHCA_STATE_ACTIVE || sf->hw_state == MLX5_VHCA_STATE_IN_USE; } -int mlx5_devlink_sf_port_fn_state_get(struct devlink *devlink, struct devlink_port *dl_port, +int mlx5_devlink_sf_port_fn_state_get(struct devlink_port *dl_port, enum devlink_port_fn_state *state, enum devlink_port_fn_opstate *opstate, struct netlink_ext_ack *extack) { - struct mlx5_core_dev *dev = devlink_priv(devlink); + struct mlx5_core_dev *dev = devlink_priv(dl_port->devlink); struct mlx5_sf_table *table; struct mlx5_sf *sf; int err = 0; @@ -248,11 +248,11 @@ out: return err; } -int mlx5_devlink_sf_port_fn_state_set(struct devlink *devlink, struct devlink_port *dl_port, +int mlx5_devlink_sf_port_fn_state_set(struct devlink_port *dl_port, enum devlink_port_fn_state state, struct netlink_ext_ack *extack) { - struct mlx5_core_dev *dev = devlink_priv(devlink); + struct mlx5_core_dev *dev = devlink_priv(dl_port->devlink); struct mlx5_sf_table *table; struct mlx5_sf *sf; int err; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/sf.h b/drivers/net/ethernet/mellanox/mlx5/core/sf/sf.h index 81ce13b19ee8..3a480e06ecc0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/sf/sf.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/sf.h @@ -24,11 +24,11 @@ int mlx5_devlink_sf_port_new(struct devlink *devlink, unsigned int *new_port_index); int mlx5_devlink_sf_port_del(struct devlink *devlink, unsigned int port_index, struct netlink_ext_ack *extack); -int mlx5_devlink_sf_port_fn_state_get(struct devlink *devlink, struct devlink_port *dl_port, +int mlx5_devlink_sf_port_fn_state_get(struct devlink_port *dl_port, enum devlink_port_fn_state *state, enum devlink_port_fn_opstate *opstate, struct netlink_ext_ack *extack); -int mlx5_devlink_sf_port_fn_state_set(struct devlink *devlink, struct devlink_port *dl_port, +int mlx5_devlink_sf_port_fn_state_set(struct devlink_port *dl_port, enum devlink_port_fn_state state, struct netlink_ext_ack *extack); #else diff --git a/include/net/devlink.h b/include/net/devlink.h index 08f4c6191e72..ccbfb3a844aa 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -1396,8 +1396,8 @@ struct devlink_ops { * * Note: @extack can be NULL when port notifier queries the port function. */ - int (*port_function_hw_addr_get)(struct devlink *devlink, struct devlink_port *port, - u8 *hw_addr, int *hw_addr_len, + int (*port_function_hw_addr_get)(struct devlink_port *port, u8 *hw_addr, + int *hw_addr_len, struct netlink_ext_ack *extack); /** * @port_function_hw_addr_set: Port function's hardware address set function. @@ -1406,7 +1406,7 @@ struct devlink_ops { * by the devlink port. Driver should return -EOPNOTSUPP if it doesn't support port * function handling for a particular port. */ - int (*port_function_hw_addr_set)(struct devlink *devlink, struct devlink_port *port, + int (*port_function_hw_addr_set)(struct devlink_port *port, const u8 *hw_addr, int hw_addr_len, struct netlink_ext_ack *extack); /** @@ -1462,8 +1462,7 @@ struct devlink_ops { * * Return: 0 on success, negative value otherwise. */ - int (*port_fn_state_get)(struct devlink *devlink, - struct devlink_port *port, + int (*port_fn_state_get)(struct devlink_port *port, enum devlink_port_fn_state *state, enum devlink_port_fn_opstate *opstate, struct netlink_ext_ack *extack); @@ -1478,8 +1477,7 @@ struct devlink_ops { * * Return: 0 on success, negative value otherwise. */ - int (*port_fn_state_set)(struct devlink *devlink, - struct devlink_port *port, + int (*port_fn_state_set)(struct devlink_port *port, enum devlink_port_fn_state state, struct netlink_ext_ack *extack); diff --git a/net/core/devlink.c b/net/core/devlink.c index 8fa015319af6..ee95eee8d0ed 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -804,10 +804,11 @@ static int devlink_nl_port_attrs_put(struct sk_buff *msg, return 0; } -static int -devlink_port_fn_hw_addr_fill(struct devlink *devlink, const struct devlink_ops *ops, - struct devlink_port *port, struct sk_buff *msg, - struct netlink_ext_ack *extack, bool *msg_updated) +static int devlink_port_fn_hw_addr_fill(const struct devlink_ops *ops, + struct devlink_port *port, + struct sk_buff *msg, + struct netlink_ext_ack *extack, + bool *msg_updated) { u8 hw_addr[MAX_ADDR_LEN]; int hw_addr_len; @@ -816,7 +817,8 @@ devlink_port_fn_hw_addr_fill(struct devlink *devlink, const struct devlink_ops * if (!ops->port_function_hw_addr_get) return 0; - err = ops->port_function_hw_addr_get(devlink, port, hw_addr, &hw_addr_len, extack); + err = ops->port_function_hw_addr_get(port, hw_addr, &hw_addr_len, + extack); if (err) { if (err == -EOPNOTSUPP) return 0; @@ -893,12 +895,11 @@ devlink_port_fn_opstate_valid(enum devlink_port_fn_opstate opstate) opstate == DEVLINK_PORT_FN_OPSTATE_ATTACHED; } -static int -devlink_port_fn_state_fill(struct devlink *devlink, - const struct devlink_ops *ops, - struct devlink_port *port, struct sk_buff *msg, - struct netlink_ext_ack *extack, - bool *msg_updated) +static int devlink_port_fn_state_fill(const struct devlink_ops *ops, + struct devlink_port *port, + struct sk_buff *msg, + struct netlink_ext_ack *extack, + bool *msg_updated) { enum devlink_port_fn_opstate opstate; enum devlink_port_fn_state state; @@ -907,7 +908,7 @@ devlink_port_fn_state_fill(struct devlink *devlink, if (!ops->port_fn_state_get) return 0; - err = ops->port_fn_state_get(devlink, port, &state, &opstate, extack); + err = ops->port_fn_state_get(port, &state, &opstate, extack); if (err) { if (err == -EOPNOTSUPP) return 0; @@ -935,7 +936,6 @@ static int devlink_nl_port_function_attrs_put(struct sk_buff *msg, struct devlink_port *port, struct netlink_ext_ack *extack) { - struct devlink *devlink = port->devlink; const struct devlink_ops *ops; struct nlattr *function_attr; bool msg_updated = false; @@ -945,13 +945,12 @@ devlink_nl_port_function_attrs_put(struct sk_buff *msg, struct devlink_port *por if (!function_attr) return -EMSGSIZE; - ops = devlink->ops; - err = devlink_port_fn_hw_addr_fill(devlink, ops, port, msg, - extack, &msg_updated); + ops = port->devlink->ops; + err = devlink_port_fn_hw_addr_fill(ops, port, msg, extack, + &msg_updated); if (err) goto out; - err = devlink_port_fn_state_fill(devlink, ops, port, msg, extack, - &msg_updated); + err = devlink_port_fn_state_fill(ops, port, msg, extack, &msg_updated); out: if (err || !msg_updated) nla_nest_cancel(msg, function_attr); @@ -1269,31 +1268,33 @@ out: return msg->len; } -static int devlink_port_type_set(struct devlink *devlink, - struct devlink_port *devlink_port, +static int devlink_port_type_set(struct devlink_port *devlink_port, enum devlink_port_type port_type) { int err; - if (devlink->ops->port_type_set) { - if (port_type == devlink_port->type) - return 0; - err = devlink->ops->port_type_set(devlink_port, port_type); - if (err) - return err; - devlink_port->desired_type = port_type; - devlink_port_notify(devlink_port, DEVLINK_CMD_PORT_NEW); + if (devlink_port->devlink->ops->port_type_set) + return -EOPNOTSUPP; + + if (port_type == devlink_port->type) return 0; - } - return -EOPNOTSUPP; + + err = devlink_port->devlink->ops->port_type_set(devlink_port, + port_type); + if (err) + return err; + + devlink_port->desired_type = port_type; + devlink_port_notify(devlink_port, DEVLINK_CMD_PORT_NEW); + return 0; } -static int -devlink_port_function_hw_addr_set(struct devlink *devlink, struct devlink_port *port, - const struct nlattr *attr, struct netlink_ext_ack *extack) +static int devlink_port_function_hw_addr_set(struct devlink_port *port, + const struct nlattr *attr, + struct netlink_ext_ack *extack) { - const struct devlink_ops *ops; + const struct devlink_ops *ops = port->devlink->ops; const u8 *hw_addr; int hw_addr_len; @@ -1314,17 +1315,16 @@ devlink_port_function_hw_addr_set(struct devlink *devlink, struct devlink_port * } } - ops = devlink->ops; if (!ops->port_function_hw_addr_set) { NL_SET_ERR_MSG_MOD(extack, "Port doesn't support function attributes"); return -EOPNOTSUPP; } - return ops->port_function_hw_addr_set(devlink, port, hw_addr, hw_addr_len, extack); + return ops->port_function_hw_addr_set(port, hw_addr, hw_addr_len, + extack); } -static int devlink_port_fn_state_set(struct devlink *devlink, - struct devlink_port *port, +static int devlink_port_fn_state_set(struct devlink_port *port, const struct nlattr *attr, struct netlink_ext_ack *extack) { @@ -1332,18 +1332,18 @@ static int devlink_port_fn_state_set(struct devlink *devlink, const struct devlink_ops *ops; state = nla_get_u8(attr); - ops = devlink->ops; + ops = port->devlink->ops; if (!ops->port_fn_state_set) { NL_SET_ERR_MSG_MOD(extack, "Function does not support state setting"); return -EOPNOTSUPP; } - return ops->port_fn_state_set(devlink, port, state, extack); + return ops->port_fn_state_set(port, state, extack); } -static int -devlink_port_function_set(struct devlink *devlink, struct devlink_port *port, - const struct nlattr *attr, struct netlink_ext_ack *extack) +static int devlink_port_function_set(struct devlink_port *port, + const struct nlattr *attr, + struct netlink_ext_ack *extack) { struct nlattr *tb[DEVLINK_PORT_FUNCTION_ATTR_MAX + 1]; int err; @@ -1357,7 +1357,7 @@ devlink_port_function_set(struct devlink *devlink, struct devlink_port *port, attr = tb[DEVLINK_PORT_FUNCTION_ATTR_HW_ADDR]; if (attr) { - err = devlink_port_function_hw_addr_set(devlink, port, attr, extack); + err = devlink_port_function_hw_addr_set(port, attr, extack); if (err) return err; } @@ -1367,7 +1367,7 @@ devlink_port_function_set(struct devlink *devlink, struct devlink_port *port, */ attr = tb[DEVLINK_PORT_FN_ATTR_STATE]; if (attr) - err = devlink_port_fn_state_set(devlink, port, attr, extack); + err = devlink_port_fn_state_set(port, attr, extack); if (!err) devlink_port_notify(port, DEVLINK_CMD_PORT_NEW); @@ -1378,14 +1378,13 @@ static int devlink_nl_cmd_port_set_doit(struct sk_buff *skb, struct genl_info *info) { struct devlink_port *devlink_port = info->user_ptr[1]; - struct devlink *devlink = devlink_port->devlink; int err; if (info->attrs[DEVLINK_ATTR_PORT_TYPE]) { enum devlink_port_type port_type; port_type = nla_get_u16(info->attrs[DEVLINK_ATTR_PORT_TYPE]); - err = devlink_port_type_set(devlink, devlink_port, port_type); + err = devlink_port_type_set(devlink_port, port_type); if (err) return err; } @@ -1394,7 +1393,7 @@ static int devlink_nl_cmd_port_set_doit(struct sk_buff *skb, struct nlattr *attr = info->attrs[DEVLINK_ATTR_PORT_FUNCTION]; struct netlink_ext_ack *extack = info->extack; - err = devlink_port_function_set(devlink, devlink_port, attr, extack); + err = devlink_port_function_set(devlink_port, attr, extack); if (err) return err; } -- cgit v1.2.3 From 045c45d1f598c65806f885b59f6fbc4cebb62b15 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Sun, 8 Aug 2021 17:35:23 +0300 Subject: net: dsa: centralize fast ageing when address learning is turned off Currently DSA leaves it down to device drivers to fast age the FDB on a port when address learning is disabled on it. There are 2 reasons for doing that in the first place: - when address learning is disabled by user space, through IFLA_BRPORT_LEARNING or the brport_attr_learning sysfs, what user space typically wants to achieve is to operate in a mode with no dynamic FDB entry on that port. But if the port is already up, some addresses might have been already learned on it, and it seems silly to wait for 5 minutes for them to expire until something useful can be done. - when a port leaves a bridge and becomes standalone, DSA turns off address learning on it. This also has the nice side effect of flushing the dynamically learned bridge FDB entries on it, which is a good idea because standalone ports should not have bridge FDB entries on them. We let drivers manage fast ageing under this condition because if DSA were to do it, it would need to track each port's learning state, and act upon the transition, which it currently doesn't. But there are 2 reasons why doing it is better after all: - drivers might get it wrong and not do it (see b53_port_set_learning) - we would like to flush the dynamic entries from the software bridge too, and letting drivers do that would be another pain point So track the port learning state and trigger a fast age process automatically within DSA. Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- drivers/net/dsa/mv88e6xxx/chip.c | 7 ------- include/net/dsa.h | 1 + net/dsa/dsa_priv.h | 2 +- net/dsa/port.c | 35 +++++++++++++++++++++++++++++++---- 4 files changed, 33 insertions(+), 12 deletions(-) (limited to 'include/net') diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index c2c5f1573fe5..c45ca2473743 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -5797,7 +5797,6 @@ static int mv88e6xxx_port_bridge_flags(struct dsa_switch *ds, int port, struct netlink_ext_ack *extack) { struct mv88e6xxx_chip *chip = ds->priv; - bool do_fast_age = false; int err = -EOPNOTSUPP; mv88e6xxx_reg_lock(chip); @@ -5809,9 +5808,6 @@ static int mv88e6xxx_port_bridge_flags(struct dsa_switch *ds, int port, err = mv88e6xxx_port_set_assoc_vector(chip, port, pav); if (err) goto out; - - if (!learning) - do_fast_age = true; } if (flags.mask & BR_FLOOD) { @@ -5843,9 +5839,6 @@ static int mv88e6xxx_port_bridge_flags(struct dsa_switch *ds, int port, out: mv88e6xxx_reg_unlock(chip); - if (do_fast_age) - mv88e6xxx_port_fast_age(ds, port); - return err; } diff --git a/include/net/dsa.h b/include/net/dsa.h index d7dc26d316ea..995e9d3f9cfc 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -254,6 +254,7 @@ struct dsa_port { struct device_node *dn; unsigned int ageing_time; bool vlan_filtering; + bool learning; u8 stp_state; struct net_device *bridge_dev; int bridge_num; diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h index 8dad40b2cf5c..9575cabd3ec3 100644 --- a/net/dsa/dsa_priv.h +++ b/net/dsa/dsa_priv.h @@ -241,7 +241,7 @@ int dsa_port_host_mdb_del(const struct dsa_port *dp, int dsa_port_pre_bridge_flags(const struct dsa_port *dp, struct switchdev_brport_flags flags, struct netlink_ext_ack *extack); -int dsa_port_bridge_flags(const struct dsa_port *dp, +int dsa_port_bridge_flags(struct dsa_port *dp, struct switchdev_brport_flags flags, struct netlink_ext_ack *extack); int dsa_port_vlan_add(struct dsa_port *dp, diff --git a/net/dsa/port.c b/net/dsa/port.c index ef5e08b09bb7..d6a35a03acd6 100644 --- a/net/dsa/port.c +++ b/net/dsa/port.c @@ -30,6 +30,16 @@ static int dsa_port_notify(const struct dsa_port *dp, unsigned long e, void *v) return dsa_tree_notify(dp->ds->dst, e, v); } +static void dsa_port_fast_age(const struct dsa_port *dp) +{ + struct dsa_switch *ds = dp->ds; + + if (!ds->ops->port_fast_age) + return; + + ds->ops->port_fast_age(ds, dp->index); +} + int dsa_port_set_state(struct dsa_port *dp, u8 state, bool do_fast_age) { struct dsa_switch *ds = dp->ds; @@ -40,7 +50,7 @@ int dsa_port_set_state(struct dsa_port *dp, u8 state, bool do_fast_age) ds->ops->port_stp_state_set(ds, port, state); - if (do_fast_age && ds->ops->port_fast_age) { + if (do_fast_age) { /* Fast age FDB entries or flush appropriate forwarding database * for the given port, if we are moving it from Learning or * Forwarding state, to Disabled or Blocking or Listening state. @@ -54,7 +64,7 @@ int dsa_port_set_state(struct dsa_port *dp, u8 state, bool do_fast_age) (state == BR_STATE_DISABLED || state == BR_STATE_BLOCKING || state == BR_STATE_LISTENING)) - ds->ops->port_fast_age(ds, port); + dsa_port_fast_age(dp); } dp->stp_state = state; @@ -633,16 +643,33 @@ int dsa_port_pre_bridge_flags(const struct dsa_port *dp, return ds->ops->port_pre_bridge_flags(ds, dp->index, flags, extack); } -int dsa_port_bridge_flags(const struct dsa_port *dp, +int dsa_port_bridge_flags(struct dsa_port *dp, struct switchdev_brport_flags flags, struct netlink_ext_ack *extack) { struct dsa_switch *ds = dp->ds; + int err; if (!ds->ops->port_bridge_flags) return -EOPNOTSUPP; - return ds->ops->port_bridge_flags(ds, dp->index, flags, extack); + err = ds->ops->port_bridge_flags(ds, dp->index, flags, extack); + if (err) + return err; + + if (flags.mask & BR_LEARNING) { + bool learning = flags.val & BR_LEARNING; + + if (learning == dp->learning) + return 0; + + if (dp->learning && !learning) + dsa_port_fast_age(dp); + + dp->learning = learning; + } + + return 0; } int dsa_port_mtu_change(struct dsa_port *dp, int new_mtu, -- cgit v1.2.3 From 5313a37b881e57767bc37185bef2873862be8d47 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Sun, 8 Aug 2021 17:35:26 +0300 Subject: net: dsa: sja1105: rely on DSA core tracking of port learning state Now that DSA keeps track of the port learning state, it becomes superfluous to keep an additional variable with this information in the sja1105 driver. Remove it. The DSA core's learning state is present in struct dsa_port *dp. To avoid the antipattern where we iterate through a DSA switch's ports and then call dsa_to_port to obtain the "dp" reference (which is bad because dsa_to_port iterates through the DSA switch tree once again), just iterate through the dst->ports and operate on those directly. The sja1105 had an extra use of priv->learn_ena on non-user ports. DSA does not touch the learning state of those ports - drivers are free to do what they wish on them. Mark that information with a comment in struct dsa_port and let sja1105 set dp->learning for cascade ports. Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- drivers/net/dsa/sja1105/sja1105.h | 1 - drivers/net/dsa/sja1105/sja1105_main.c | 32 +++++++++++++------------------- include/net/dsa.h | 1 + 3 files changed, 14 insertions(+), 20 deletions(-) (limited to 'include/net') diff --git a/drivers/net/dsa/sja1105/sja1105.h b/drivers/net/dsa/sja1105/sja1105.h index 9cd7dbdd7db9..2e899c9f036d 100644 --- a/drivers/net/dsa/sja1105/sja1105.h +++ b/drivers/net/dsa/sja1105/sja1105.h @@ -233,7 +233,6 @@ struct sja1105_private { phy_interface_t phy_mode[SJA1105_MAX_NUM_PORTS]; bool fixed_link[SJA1105_MAX_NUM_PORTS]; bool vlan_aware; - unsigned long learn_ena; unsigned long ucast_egress_floods; unsigned long bcast_egress_floods; const struct sja1105_info *info; diff --git a/drivers/net/dsa/sja1105/sja1105_main.c b/drivers/net/dsa/sja1105/sja1105_main.c index 241fd25b0b86..87e279be89c9 100644 --- a/drivers/net/dsa/sja1105/sja1105_main.c +++ b/drivers/net/dsa/sja1105/sja1105_main.c @@ -176,7 +176,7 @@ static int sja1105_init_mac_settings(struct sja1105_private *priv) struct sja1105_mac_config_entry *mac; struct dsa_switch *ds = priv->ds; struct sja1105_table *table; - int i; + struct dsa_port *dp; table = &priv->static_config.tables[BLK_IDX_MAC_CONFIG]; @@ -195,8 +195,11 @@ static int sja1105_init_mac_settings(struct sja1105_private *priv) mac = table->entries; - for (i = 0; i < ds->num_ports; i++) { - mac[i] = default_mac; + list_for_each_entry(dp, &ds->dst->ports, list) { + if (dp->ds != ds) + continue; + + mac[dp->index] = default_mac; /* Let sja1105_bridge_stp_state_set() keep address learning * enabled for the DSA ports. CPU ports use software-assisted @@ -205,8 +208,8 @@ static int sja1105_init_mac_settings(struct sja1105_private *priv) * CPU ports in a cross-chip topology if multiple CPU ports * exist. */ - if (dsa_is_dsa_port(ds, i)) - priv->learn_ena |= BIT(i); + if (dsa_port_is_dsa(dp)) + dp->learning = true; } return 0; @@ -1899,6 +1902,7 @@ static int sja1105_bridge_member(struct dsa_switch *ds, int port, static void sja1105_bridge_stp_state_set(struct dsa_switch *ds, int port, u8 state) { + struct dsa_port *dp = dsa_to_port(ds, port); struct sja1105_private *priv = ds->priv; struct sja1105_mac_config_entry *mac; @@ -1924,12 +1928,12 @@ static void sja1105_bridge_stp_state_set(struct dsa_switch *ds, int port, case BR_STATE_LEARNING: mac[port].ingress = true; mac[port].egress = false; - mac[port].dyn_learn = !!(priv->learn_ena & BIT(port)); + mac[port].dyn_learn = dp->learning; break; case BR_STATE_FORWARDING: mac[port].ingress = true; mac[port].egress = true; - mac[port].dyn_learn = !!(priv->learn_ena & BIT(port)); + mac[port].dyn_learn = dp->learning; break; default: dev_err(ds->dev, "invalid STP state: %d\n", state); @@ -2891,23 +2895,13 @@ static int sja1105_port_set_learning(struct sja1105_private *priv, int port, bool enabled) { struct sja1105_mac_config_entry *mac; - int rc; mac = priv->static_config.tables[BLK_IDX_MAC_CONFIG].entries; mac[port].dyn_learn = enabled; - rc = sja1105_dynamic_config_write(priv, BLK_IDX_MAC_CONFIG, port, - &mac[port], true); - if (rc) - return rc; - - if (enabled) - priv->learn_ena |= BIT(port); - else - priv->learn_ena &= ~BIT(port); - - return 0; + return sja1105_dynamic_config_write(priv, BLK_IDX_MAC_CONFIG, port, + &mac[port], true); } static int sja1105_port_ucast_bcast_flood(struct sja1105_private *priv, int to, diff --git a/include/net/dsa.h b/include/net/dsa.h index 995e9d3f9cfc..0c2cba45fa79 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -254,6 +254,7 @@ struct dsa_port { struct device_node *dn; unsigned int ageing_time; bool vlan_filtering; + /* Managed by DSA on user ports and by drivers on CPU and DSA ports */ bool learning; u8 stp_state; struct net_device *bridge_dev; -- cgit v1.2.3 From 919d13a7e455c2e7676042d7a5f94c164e859d8a Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Sun, 8 Aug 2021 21:57:43 +0300 Subject: devlink: Set device as early as possible All kernel devlink implementations call to devlink_alloc() during initialization routine for specific device which is used later as a parent device for devlink_register(). Such late device assignment causes to the situation which requires us to call to device_register() before setting other parameters, but that call opens devlink to the world and makes accessible for the netlink users. Any attempt to move devlink_register() to be the last call generates the following error due to access to the devlink->dev pointer. [ 8.758862] devlink_nl_param_fill+0x2e8/0xe50 [ 8.760305] devlink_param_notify+0x6d/0x180 [ 8.760435] __devlink_params_register+0x2f1/0x670 [ 8.760558] devlink_params_register+0x1e/0x20 The simple change of API to set devlink device in the devlink_alloc() instead of devlink_register() fixes all this above and ensures that prior to call to devlink_register() everything already set. Signed-off-by: Leon Romanovsky Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c | 9 +++++--- drivers/net/ethernet/cavium/liquidio/lio_main.c | 5 +++-- .../ethernet/freescale/dpaa2/dpaa2-eth-devlink.c | 5 +++-- .../ethernet/hisilicon/hns3/hns3pf/hclge_devlink.c | 4 ++-- .../hisilicon/hns3/hns3vf/hclgevf_devlink.c | 7 +++--- drivers/net/ethernet/huawei/hinic/hinic_devlink.c | 8 +++---- drivers/net/ethernet/huawei/hinic/hinic_devlink.h | 4 ++-- drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c | 2 +- drivers/net/ethernet/huawei/hinic/hinic_main.c | 2 +- drivers/net/ethernet/intel/ice/ice_devlink.c | 4 ++-- .../ethernet/marvell/octeontx2/af/rvu_devlink.c | 5 +++-- .../ethernet/marvell/prestera/prestera_devlink.c | 7 +++--- .../ethernet/marvell/prestera/prestera_devlink.h | 2 +- .../net/ethernet/marvell/prestera/prestera_main.c | 2 +- drivers/net/ethernet/mellanox/mlx4/main.c | 4 ++-- drivers/net/ethernet/mellanox/mlx5/core/devlink.c | 9 ++++---- drivers/net/ethernet/mellanox/mlx5/core/devlink.h | 4 ++-- drivers/net/ethernet/mellanox/mlx5/core/main.c | 4 ++-- .../ethernet/mellanox/mlx5/core/sf/dev/driver.c | 2 +- drivers/net/ethernet/mellanox/mlxsw/core.c | 5 +++-- drivers/net/ethernet/mscc/ocelot_vsc7514.c | 5 +++-- drivers/net/ethernet/netronome/nfp/nfp_main.c | 2 +- drivers/net/ethernet/netronome/nfp/nfp_net_main.c | 2 +- .../net/ethernet/pensando/ionic/ionic_devlink.c | 4 ++-- drivers/net/ethernet/qlogic/qed/qed_devlink.c | 5 +++-- drivers/net/ethernet/ti/am65-cpsw-nuss.c | 4 ++-- drivers/net/ethernet/ti/cpsw_new.c | 4 ++-- drivers/net/netdevsim/dev.c | 4 ++-- drivers/ptp/ptp_ocp.c | 26 ++++------------------ drivers/staging/qlge/qlge_main.c | 5 +++-- include/net/devlink.h | 10 +++++---- net/core/devlink.c | 15 ++++++------- net/dsa/dsa2.c | 5 +++-- 33 files changed, 91 insertions(+), 94 deletions(-) (limited to 'include/net') diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c index 64381be935a8..2cd8bb37e641 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c @@ -743,14 +743,17 @@ static void bnxt_dl_params_unregister(struct bnxt *bp) int bnxt_dl_register(struct bnxt *bp) { + const struct devlink_ops *devlink_ops; struct devlink_port_attrs attrs = {}; struct devlink *dl; int rc; if (BNXT_PF(bp)) - dl = devlink_alloc(&bnxt_dl_ops, sizeof(struct bnxt_dl)); + devlink_ops = &bnxt_dl_ops; else - dl = devlink_alloc(&bnxt_vf_dl_ops, sizeof(struct bnxt_dl)); + devlink_ops = &bnxt_vf_dl_ops; + + dl = devlink_alloc(devlink_ops, sizeof(struct bnxt_dl), &bp->pdev->dev); if (!dl) { netdev_warn(bp->dev, "devlink_alloc failed\n"); return -ENOMEM; @@ -763,7 +766,7 @@ int bnxt_dl_register(struct bnxt *bp) bp->hwrm_spec_code > 0x10803) bp->eswitch_mode = DEVLINK_ESWITCH_MODE_LEGACY; - rc = devlink_register(dl, &bp->pdev->dev); + rc = devlink_register(dl); if (rc) { netdev_warn(bp->dev, "devlink_register failed. rc=%d\n", rc); goto err_dl_free; diff --git a/drivers/net/ethernet/cavium/liquidio/lio_main.c b/drivers/net/ethernet/cavium/liquidio/lio_main.c index af116ef83bad..2907e13b9df6 100644 --- a/drivers/net/ethernet/cavium/liquidio/lio_main.c +++ b/drivers/net/ethernet/cavium/liquidio/lio_main.c @@ -3750,7 +3750,8 @@ static int setup_nic_devices(struct octeon_device *octeon_dev) } devlink = devlink_alloc(&liquidio_devlink_ops, - sizeof(struct lio_devlink_priv)); + sizeof(struct lio_devlink_priv), + &octeon_dev->pci_dev->dev); if (!devlink) { dev_err(&octeon_dev->pci_dev->dev, "devlink alloc failed\n"); goto setup_nic_dev_free; @@ -3759,7 +3760,7 @@ static int setup_nic_devices(struct octeon_device *octeon_dev) lio_devlink = devlink_priv(devlink); lio_devlink->oct = octeon_dev; - if (devlink_register(devlink, &octeon_dev->pci_dev->dev)) { + if (devlink_register(devlink)) { devlink_free(devlink); dev_err(&octeon_dev->pci_dev->dev, "devlink registration failed\n"); diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth-devlink.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth-devlink.c index 8e09f65ea295..605a39f892b9 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth-devlink.c +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth-devlink.c @@ -196,7 +196,8 @@ int dpaa2_eth_dl_register(struct dpaa2_eth_priv *priv) struct dpaa2_eth_devlink_priv *dl_priv; int err; - priv->devlink = devlink_alloc(&dpaa2_eth_devlink_ops, sizeof(*dl_priv)); + priv->devlink = + devlink_alloc(&dpaa2_eth_devlink_ops, sizeof(*dl_priv), dev); if (!priv->devlink) { dev_err(dev, "devlink_alloc failed\n"); return -ENOMEM; @@ -204,7 +205,7 @@ int dpaa2_eth_dl_register(struct dpaa2_eth_priv *priv) dl_priv = devlink_priv(priv->devlink); dl_priv->dpaa2_priv = priv; - err = devlink_register(priv->devlink, dev); + err = devlink_register(priv->devlink); if (err) { dev_err(dev, "devlink_register() = %d\n", err); goto devlink_free; diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_devlink.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_devlink.c index 06d29945d4e1..448f29aa4e6b 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_devlink.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_devlink.c @@ -112,14 +112,14 @@ int hclge_devlink_init(struct hclge_dev *hdev) int ret; devlink = devlink_alloc(&hclge_devlink_ops, - sizeof(struct hclge_devlink_priv)); + sizeof(struct hclge_devlink_priv), &pdev->dev); if (!devlink) return -ENOMEM; priv = devlink_priv(devlink); priv->hdev = hdev; - ret = devlink_register(devlink, &pdev->dev); + ret = devlink_register(devlink); if (ret) { dev_err(&pdev->dev, "failed to register devlink, ret = %d\n", ret); diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_devlink.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_devlink.c index 21a45279fd99..1e6061fb8ed4 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_devlink.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_devlink.c @@ -112,15 +112,16 @@ int hclgevf_devlink_init(struct hclgevf_dev *hdev) struct devlink *devlink; int ret; - devlink = devlink_alloc(&hclgevf_devlink_ops, - sizeof(struct hclgevf_devlink_priv)); + devlink = + devlink_alloc(&hclgevf_devlink_ops, + sizeof(struct hclgevf_devlink_priv), &pdev->dev); if (!devlink) return -ENOMEM; priv = devlink_priv(devlink); priv->hdev = hdev; - ret = devlink_register(devlink, &pdev->dev); + ret = devlink_register(devlink); if (ret) { dev_err(&pdev->dev, "failed to register devlink, ret = %d\n", ret); diff --git a/drivers/net/ethernet/huawei/hinic/hinic_devlink.c b/drivers/net/ethernet/huawei/hinic/hinic_devlink.c index 58d5646444b0..6e11ee339f12 100644 --- a/drivers/net/ethernet/huawei/hinic/hinic_devlink.c +++ b/drivers/net/ethernet/huawei/hinic/hinic_devlink.c @@ -293,9 +293,9 @@ static const struct devlink_ops hinic_devlink_ops = { .flash_update = hinic_devlink_flash_update, }; -struct devlink *hinic_devlink_alloc(void) +struct devlink *hinic_devlink_alloc(struct device *dev) { - return devlink_alloc(&hinic_devlink_ops, sizeof(struct hinic_dev)); + return devlink_alloc(&hinic_devlink_ops, sizeof(struct hinic_dev), dev); } void hinic_devlink_free(struct devlink *devlink) @@ -303,11 +303,11 @@ void hinic_devlink_free(struct devlink *devlink) devlink_free(devlink); } -int hinic_devlink_register(struct hinic_devlink_priv *priv, struct device *dev) +int hinic_devlink_register(struct hinic_devlink_priv *priv) { struct devlink *devlink = priv_to_devlink(priv); - return devlink_register(devlink, dev); + return devlink_register(devlink); } void hinic_devlink_unregister(struct hinic_devlink_priv *priv) diff --git a/drivers/net/ethernet/huawei/hinic/hinic_devlink.h b/drivers/net/ethernet/huawei/hinic/hinic_devlink.h index a090ebcfaabb..9e315011015c 100644 --- a/drivers/net/ethernet/huawei/hinic/hinic_devlink.h +++ b/drivers/net/ethernet/huawei/hinic/hinic_devlink.h @@ -108,9 +108,9 @@ struct host_image_st { u32 device_id; }; -struct devlink *hinic_devlink_alloc(void); +struct devlink *hinic_devlink_alloc(struct device *dev); void hinic_devlink_free(struct devlink *devlink); -int hinic_devlink_register(struct hinic_devlink_priv *priv, struct device *dev); +int hinic_devlink_register(struct hinic_devlink_priv *priv); void hinic_devlink_unregister(struct hinic_devlink_priv *priv); int hinic_health_reporters_create(struct hinic_devlink_priv *priv); diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c b/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c index 428108eb10d2..56b6b04e209b 100644 --- a/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c +++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c @@ -754,7 +754,7 @@ static int init_pfhwdev(struct hinic_pfhwdev *pfhwdev) return err; } - err = hinic_devlink_register(hwdev->devlink_dev, &pdev->dev); + err = hinic_devlink_register(hwdev->devlink_dev); if (err) { dev_err(&hwif->pdev->dev, "Failed to register devlink\n"); hinic_pf_to_mgmt_free(&pfhwdev->pf_to_mgmt); diff --git a/drivers/net/ethernet/huawei/hinic/hinic_main.c b/drivers/net/ethernet/huawei/hinic/hinic_main.c index 405ee4d2d2b1..881d0b247561 100644 --- a/drivers/net/ethernet/huawei/hinic/hinic_main.c +++ b/drivers/net/ethernet/huawei/hinic/hinic_main.c @@ -1183,7 +1183,7 @@ static int nic_dev_init(struct pci_dev *pdev) struct devlink *devlink; int err, num_qps; - devlink = hinic_devlink_alloc(); + devlink = hinic_devlink_alloc(&pdev->dev); if (!devlink) { dev_err(&pdev->dev, "Hinic devlink alloc failed\n"); return -ENOMEM; diff --git a/drivers/net/ethernet/intel/ice/ice_devlink.c b/drivers/net/ethernet/intel/ice/ice_devlink.c index 91b545ab8b8f..8c863d64930b 100644 --- a/drivers/net/ethernet/intel/ice/ice_devlink.c +++ b/drivers/net/ethernet/intel/ice/ice_devlink.c @@ -475,7 +475,7 @@ struct ice_pf *ice_allocate_pf(struct device *dev) { struct devlink *devlink; - devlink = devlink_alloc(&ice_devlink_ops, sizeof(struct ice_pf)); + devlink = devlink_alloc(&ice_devlink_ops, sizeof(struct ice_pf), dev); if (!devlink) return NULL; @@ -502,7 +502,7 @@ int ice_devlink_register(struct ice_pf *pf) struct device *dev = ice_pf_to_dev(pf); int err; - err = devlink_register(devlink, dev); + err = devlink_register(devlink); if (err) { dev_err(dev, "devlink registration failed: %d\n", err); return err; diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_devlink.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_devlink.c index 6f963b2f54a7..a55b46ad162d 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_devlink.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_devlink.c @@ -1503,13 +1503,14 @@ int rvu_register_dl(struct rvu *rvu) struct devlink *dl; int err; - dl = devlink_alloc(&rvu_devlink_ops, sizeof(struct rvu_devlink)); + dl = devlink_alloc(&rvu_devlink_ops, sizeof(struct rvu_devlink), + rvu->dev); if (!dl) { dev_warn(rvu->dev, "devlink_alloc failed\n"); return -ENOMEM; } - err = devlink_register(dl, rvu->dev); + err = devlink_register(dl); if (err) { dev_err(rvu->dev, "devlink register failed with error %d\n", err); devlink_free(dl); diff --git a/drivers/net/ethernet/marvell/prestera/prestera_devlink.c b/drivers/net/ethernet/marvell/prestera/prestera_devlink.c index fa7a0682ad1e..68b442eb6d69 100644 --- a/drivers/net/ethernet/marvell/prestera/prestera_devlink.c +++ b/drivers/net/ethernet/marvell/prestera/prestera_devlink.c @@ -390,11 +390,12 @@ static const struct devlink_ops prestera_dl_ops = { .trap_drop_counter_get = prestera_drop_counter_get, }; -struct prestera_switch *prestera_devlink_alloc(void) +struct prestera_switch *prestera_devlink_alloc(struct prestera_device *dev) { struct devlink *dl; - dl = devlink_alloc(&prestera_dl_ops, sizeof(struct prestera_switch)); + dl = devlink_alloc(&prestera_dl_ops, sizeof(struct prestera_switch), + dev->dev); return devlink_priv(dl); } @@ -411,7 +412,7 @@ int prestera_devlink_register(struct prestera_switch *sw) struct devlink *dl = priv_to_devlink(sw); int err; - err = devlink_register(dl, sw->dev->dev); + err = devlink_register(dl); if (err) { dev_err(prestera_dev(sw), "devlink_register failed: %d\n", err); return err; diff --git a/drivers/net/ethernet/marvell/prestera/prestera_devlink.h b/drivers/net/ethernet/marvell/prestera/prestera_devlink.h index 5d73aa9db897..cc34c3db13a2 100644 --- a/drivers/net/ethernet/marvell/prestera/prestera_devlink.h +++ b/drivers/net/ethernet/marvell/prestera/prestera_devlink.h @@ -6,7 +6,7 @@ #include "prestera.h" -struct prestera_switch *prestera_devlink_alloc(void); +struct prestera_switch *prestera_devlink_alloc(struct prestera_device *dev); void prestera_devlink_free(struct prestera_switch *sw); int prestera_devlink_register(struct prestera_switch *sw); diff --git a/drivers/net/ethernet/marvell/prestera/prestera_main.c b/drivers/net/ethernet/marvell/prestera/prestera_main.c index 7c569c1abefc..44c670807fb3 100644 --- a/drivers/net/ethernet/marvell/prestera/prestera_main.c +++ b/drivers/net/ethernet/marvell/prestera/prestera_main.c @@ -905,7 +905,7 @@ int prestera_device_register(struct prestera_device *dev) struct prestera_switch *sw; int err; - sw = prestera_devlink_alloc(); + sw = prestera_devlink_alloc(dev); if (!sw) return -ENOMEM; diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index 28ac4693da3c..7267c6c6d2e2 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -4005,7 +4005,7 @@ static int mlx4_init_one(struct pci_dev *pdev, const struct pci_device_id *id) printk_once(KERN_INFO "%s", mlx4_version); - devlink = devlink_alloc(&mlx4_devlink_ops, sizeof(*priv)); + devlink = devlink_alloc(&mlx4_devlink_ops, sizeof(*priv), &pdev->dev); if (!devlink) return -ENOMEM; priv = devlink_priv(devlink); @@ -4024,7 +4024,7 @@ static int mlx4_init_one(struct pci_dev *pdev, const struct pci_device_id *id) mutex_init(&dev->persist->interface_state_mutex); mutex_init(&dev->persist->pci_status_mutex); - ret = devlink_register(devlink, &pdev->dev); + ret = devlink_register(devlink); if (ret) goto err_persist_free; ret = devlink_params_register(devlink, mlx4_devlink_params, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c index d791d351b489..f38553ff538b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c @@ -359,9 +359,10 @@ int mlx5_devlink_traps_get_action(struct mlx5_core_dev *dev, int trap_id, return 0; } -struct devlink *mlx5_devlink_alloc(void) +struct devlink *mlx5_devlink_alloc(struct device *dev) { - return devlink_alloc(&mlx5_devlink_ops, sizeof(struct mlx5_core_dev)); + return devlink_alloc(&mlx5_devlink_ops, sizeof(struct mlx5_core_dev), + dev); } void mlx5_devlink_free(struct devlink *devlink) @@ -638,11 +639,11 @@ static void mlx5_devlink_traps_unregister(struct devlink *devlink) ARRAY_SIZE(mlx5_trap_groups_arr)); } -int mlx5_devlink_register(struct devlink *devlink, struct device *dev) +int mlx5_devlink_register(struct devlink *devlink) { int err; - err = devlink_register(devlink, dev); + err = devlink_register(devlink); if (err) return err; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.h b/drivers/net/ethernet/mellanox/mlx5/core/devlink.h index 7318d44b774b..30bf4882779b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/devlink.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.h @@ -31,9 +31,9 @@ int mlx5_devlink_trap_get_num_active(struct mlx5_core_dev *dev); int mlx5_devlink_traps_get_action(struct mlx5_core_dev *dev, int trap_id, enum devlink_trap_action *action); -struct devlink *mlx5_devlink_alloc(void); +struct devlink *mlx5_devlink_alloc(struct device *dev); void mlx5_devlink_free(struct devlink *devlink); -int mlx5_devlink_register(struct devlink *devlink, struct device *dev); +int mlx5_devlink_register(struct devlink *devlink); void mlx5_devlink_unregister(struct devlink *devlink); #endif /* __MLX5_DEVLINK_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index eb1b316560a8..a8efd9f1af4c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -1271,7 +1271,7 @@ int mlx5_init_one(struct mlx5_core_dev *dev) set_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state); - err = mlx5_devlink_register(priv_to_devlink(dev), dev->device); + err = mlx5_devlink_register(priv_to_devlink(dev)); if (err) goto err_devlink_reg; @@ -1452,7 +1452,7 @@ static int probe_one(struct pci_dev *pdev, const struct pci_device_id *id) struct devlink *devlink; int err; - devlink = mlx5_devlink_alloc(); + devlink = mlx5_devlink_alloc(&pdev->dev); if (!devlink) { dev_err(&pdev->dev, "devlink alloc failed\n"); return -ENOMEM; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c b/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c index 42c8ee03fe3e..052f48068dc1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c @@ -14,7 +14,7 @@ static int mlx5_sf_dev_probe(struct auxiliary_device *adev, const struct auxilia struct devlink *devlink; int err; - devlink = mlx5_devlink_alloc(); + devlink = mlx5_devlink_alloc(&adev->dev); if (!devlink) return -ENOMEM; diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.c b/drivers/net/ethernet/mellanox/mlxsw/core.c index e775f08fb464..f080fab3de2b 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core.c @@ -1927,7 +1927,8 @@ __mlxsw_core_bus_device_register(const struct mlxsw_bus_info *mlxsw_bus_info, if (!reload) { alloc_size = sizeof(*mlxsw_core) + mlxsw_driver->priv_size; - devlink = devlink_alloc(&mlxsw_devlink_ops, alloc_size); + devlink = devlink_alloc(&mlxsw_devlink_ops, alloc_size, + mlxsw_bus_info->dev); if (!devlink) { err = -ENOMEM; goto err_devlink_alloc; @@ -1974,7 +1975,7 @@ __mlxsw_core_bus_device_register(const struct mlxsw_bus_info *mlxsw_bus_info, goto err_emad_init; if (!reload) { - err = devlink_register(devlink, mlxsw_bus_info->dev); + err = devlink_register(devlink); if (err) goto err_devlink_register; } diff --git a/drivers/net/ethernet/mscc/ocelot_vsc7514.c b/drivers/net/ethernet/mscc/ocelot_vsc7514.c index 4bd7e9d9ec61..aa41c9cde643 100644 --- a/drivers/net/ethernet/mscc/ocelot_vsc7514.c +++ b/drivers/net/ethernet/mscc/ocelot_vsc7514.c @@ -1103,7 +1103,8 @@ static int mscc_ocelot_probe(struct platform_device *pdev) if (!np && !pdev->dev.platform_data) return -ENODEV; - devlink = devlink_alloc(&ocelot_devlink_ops, sizeof(*ocelot)); + devlink = + devlink_alloc(&ocelot_devlink_ops, sizeof(*ocelot), &pdev->dev); if (!devlink) return -ENOMEM; @@ -1187,7 +1188,7 @@ static int mscc_ocelot_probe(struct platform_device *pdev) if (err) goto out_put_ports; - err = devlink_register(devlink, ocelot->dev); + err = devlink_register(devlink); if (err) goto out_ocelot_deinit; diff --git a/drivers/net/ethernet/netronome/nfp/nfp_main.c b/drivers/net/ethernet/netronome/nfp/nfp_main.c index 742a420152b3..bb3b8a7f6c5d 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_main.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_main.c @@ -692,7 +692,7 @@ static int nfp_pci_probe(struct pci_dev *pdev, goto err_pci_disable; } - devlink = devlink_alloc(&nfp_devlink_ops, sizeof(*pf)); + devlink = devlink_alloc(&nfp_devlink_ops, sizeof(*pf), &pdev->dev); if (!devlink) { err = -ENOMEM; goto err_rel_regions; diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_main.c b/drivers/net/ethernet/netronome/nfp/nfp_net_main.c index 921db40047d7..d10a93801344 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_main.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_main.c @@ -701,7 +701,7 @@ int nfp_net_pci_probe(struct nfp_pf *pf) if (err) goto err_unmap; - err = devlink_register(devlink, &pf->pdev->dev); + err = devlink_register(devlink); if (err) goto err_app_clean; diff --git a/drivers/net/ethernet/pensando/ionic/ionic_devlink.c b/drivers/net/ethernet/pensando/ionic/ionic_devlink.c index cd520e4c5522..c7d0e195d176 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_devlink.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_devlink.c @@ -64,7 +64,7 @@ struct ionic *ionic_devlink_alloc(struct device *dev) { struct devlink *dl; - dl = devlink_alloc(&ionic_dl_ops, sizeof(struct ionic)); + dl = devlink_alloc(&ionic_dl_ops, sizeof(struct ionic), dev); return devlink_priv(dl); } @@ -82,7 +82,7 @@ int ionic_devlink_register(struct ionic *ionic) struct devlink_port_attrs attrs = {}; int err; - err = devlink_register(dl, ionic->dev); + err = devlink_register(dl); if (err) { dev_warn(ionic->dev, "devlink_register failed: %d\n", err); return err; diff --git a/drivers/net/ethernet/qlogic/qed/qed_devlink.c b/drivers/net/ethernet/qlogic/qed/qed_devlink.c index cf7f4da68e69..4c7501b9c284 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_devlink.c +++ b/drivers/net/ethernet/qlogic/qed/qed_devlink.c @@ -207,14 +207,15 @@ struct devlink *qed_devlink_register(struct qed_dev *cdev) struct devlink *dl; int rc; - dl = devlink_alloc(&qed_dl_ops, sizeof(struct qed_devlink)); + dl = devlink_alloc(&qed_dl_ops, sizeof(struct qed_devlink), + &cdev->pdev->dev); if (!dl) return ERR_PTR(-ENOMEM); qdevlink = devlink_priv(dl); qdevlink->cdev = cdev; - rc = devlink_register(dl, &cdev->pdev->dev); + rc = devlink_register(dl); if (rc) goto err_free; diff --git a/drivers/net/ethernet/ti/am65-cpsw-nuss.c b/drivers/net/ethernet/ti/am65-cpsw-nuss.c index 588e7df0b1cc..130346f74ee8 100644 --- a/drivers/net/ethernet/ti/am65-cpsw-nuss.c +++ b/drivers/net/ethernet/ti/am65-cpsw-nuss.c @@ -2422,14 +2422,14 @@ static int am65_cpsw_nuss_register_devlink(struct am65_cpsw_common *common) int i; common->devlink = - devlink_alloc(&am65_cpsw_devlink_ops, sizeof(*dl_priv)); + devlink_alloc(&am65_cpsw_devlink_ops, sizeof(*dl_priv), dev); if (!common->devlink) return -ENOMEM; dl_priv = devlink_priv(common->devlink); dl_priv->common = common; - ret = devlink_register(common->devlink, dev); + ret = devlink_register(common->devlink); if (ret) { dev_err(dev, "devlink reg fail ret:%d\n", ret); goto dl_free; diff --git a/drivers/net/ethernet/ti/cpsw_new.c b/drivers/net/ethernet/ti/cpsw_new.c index ae167223e87f..192394fe4c1c 100644 --- a/drivers/net/ethernet/ti/cpsw_new.c +++ b/drivers/net/ethernet/ti/cpsw_new.c @@ -1800,14 +1800,14 @@ static int cpsw_register_devlink(struct cpsw_common *cpsw) struct cpsw_devlink *dl_priv; int ret = 0; - cpsw->devlink = devlink_alloc(&cpsw_devlink_ops, sizeof(*dl_priv)); + cpsw->devlink = devlink_alloc(&cpsw_devlink_ops, sizeof(*dl_priv), dev); if (!cpsw->devlink) return -ENOMEM; dl_priv = devlink_priv(cpsw->devlink); dl_priv->cpsw = cpsw; - ret = devlink_register(cpsw->devlink, dev); + ret = devlink_register(cpsw->devlink); if (ret) { dev_err(dev, "DL reg fail ret:%d\n", ret); goto dl_free; diff --git a/drivers/net/netdevsim/dev.c b/drivers/net/netdevsim/dev.c index 53068e184c90..54313bd57797 100644 --- a/drivers/net/netdevsim/dev.c +++ b/drivers/net/netdevsim/dev.c @@ -1449,7 +1449,7 @@ int nsim_dev_probe(struct nsim_bus_dev *nsim_bus_dev) int err; devlink = devlink_alloc_ns(&nsim_dev_devlink_ops, sizeof(*nsim_dev), - nsim_bus_dev->initial_net); + nsim_bus_dev->initial_net, &nsim_bus_dev->dev); if (!devlink) return -ENOMEM; nsim_dev = devlink_priv(devlink); @@ -1470,7 +1470,7 @@ int nsim_dev_probe(struct nsim_bus_dev *nsim_bus_dev) if (err) goto err_devlink_free; - err = devlink_register(devlink, &nsim_bus_dev->dev); + err = devlink_register(devlink); if (err) goto err_resources_unregister; diff --git a/drivers/ptp/ptp_ocp.c b/drivers/ptp/ptp_ocp.c index 6b9c14586987..92edf772feed 100644 --- a/drivers/ptp/ptp_ocp.c +++ b/drivers/ptp/ptp_ocp.c @@ -735,24 +735,6 @@ ptp_ocp_info(struct ptp_ocp *bp) ptp_ocp_tod_info(bp); } -static int -ptp_ocp_devlink_register(struct devlink *devlink, struct device *dev) -{ - int err; - - err = devlink_register(devlink, dev); - if (err) - return err; - - return 0; -} - -static void -ptp_ocp_devlink_unregister(struct devlink *devlink) -{ - devlink_unregister(devlink); -} - static struct device * ptp_ocp_find_flash(struct ptp_ocp *bp) { @@ -1437,13 +1419,13 @@ ptp_ocp_probe(struct pci_dev *pdev, const struct pci_device_id *id) struct ptp_ocp *bp; int err; - devlink = devlink_alloc(&ptp_ocp_devlink_ops, sizeof(*bp)); + devlink = devlink_alloc(&ptp_ocp_devlink_ops, sizeof(*bp), &pdev->dev); if (!devlink) { dev_err(&pdev->dev, "devlink_alloc failed\n"); return -ENOMEM; } - err = ptp_ocp_devlink_register(devlink, &pdev->dev); + err = devlink_register(devlink); if (err) goto out_free; @@ -1497,7 +1479,7 @@ out: pci_disable_device(pdev); pci_set_drvdata(pdev, NULL); out_unregister: - ptp_ocp_devlink_unregister(devlink); + devlink_unregister(devlink); out_free: devlink_free(devlink); @@ -1514,7 +1496,7 @@ ptp_ocp_remove(struct pci_dev *pdev) pci_disable_device(pdev); pci_set_drvdata(pdev, NULL); - ptp_ocp_devlink_unregister(devlink); + devlink_unregister(devlink); devlink_free(devlink); } diff --git a/drivers/staging/qlge/qlge_main.c b/drivers/staging/qlge/qlge_main.c index 19a02e958865..8fcdf89da8aa 100644 --- a/drivers/staging/qlge/qlge_main.c +++ b/drivers/staging/qlge/qlge_main.c @@ -4547,7 +4547,8 @@ static int qlge_probe(struct pci_dev *pdev, static int cards_found; int err; - devlink = devlink_alloc(&qlge_devlink_ops, sizeof(struct qlge_adapter)); + devlink = devlink_alloc(&qlge_devlink_ops, sizeof(struct qlge_adapter), + &pdev->dev); if (!devlink) return -ENOMEM; @@ -4613,7 +4614,7 @@ static int qlge_probe(struct pci_dev *pdev, goto netdev_free; } - err = devlink_register(devlink, &pdev->dev); + err = devlink_register(devlink); if (err) goto netdev_free; diff --git a/include/net/devlink.h b/include/net/devlink.h index ccbfb3a844aa..0236c77f2fd0 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -1544,13 +1544,15 @@ struct net *devlink_net(const struct devlink *devlink); * Drivers that operate on real HW must use devlink_alloc() instead. */ struct devlink *devlink_alloc_ns(const struct devlink_ops *ops, - size_t priv_size, struct net *net); + size_t priv_size, struct net *net, + struct device *dev); static inline struct devlink *devlink_alloc(const struct devlink_ops *ops, - size_t priv_size) + size_t priv_size, + struct device *dev) { - return devlink_alloc_ns(ops, priv_size, &init_net); + return devlink_alloc_ns(ops, priv_size, &init_net, dev); } -int devlink_register(struct devlink *devlink, struct device *dev); +int devlink_register(struct devlink *devlink); void devlink_unregister(struct devlink *devlink); void devlink_reload_enable(struct devlink *devlink); void devlink_reload_disable(struct devlink *devlink); diff --git a/net/core/devlink.c b/net/core/devlink.c index ee95eee8d0ed..d3b16dd9f64e 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -8768,24 +8768,26 @@ static bool devlink_reload_actions_valid(const struct devlink_ops *ops) * @ops: ops * @priv_size: size of user private data * @net: net namespace + * @dev: parent device * * Allocate new devlink instance resources, including devlink index * and name. */ struct devlink *devlink_alloc_ns(const struct devlink_ops *ops, - size_t priv_size, struct net *net) + size_t priv_size, struct net *net, + struct device *dev) { struct devlink *devlink; - if (WARN_ON(!ops)) - return NULL; - + WARN_ON(!ops || !dev); if (!devlink_reload_actions_valid(ops)) return NULL; devlink = kzalloc(sizeof(*devlink) + priv_size, GFP_KERNEL); if (!devlink) return NULL; + + devlink->dev = dev; devlink->ops = ops; xa_init_flags(&devlink->snapshot_ids, XA_FLAGS_ALLOC); write_pnet(&devlink->_net, net); @@ -8810,12 +8812,9 @@ EXPORT_SYMBOL_GPL(devlink_alloc_ns); * devlink_register - Register devlink instance * * @devlink: devlink - * @dev: parent device */ -int devlink_register(struct devlink *devlink, struct device *dev) +int devlink_register(struct devlink *devlink) { - WARN_ON(devlink->dev); - devlink->dev = dev; mutex_lock(&devlink_mutex); list_add_tail(&devlink->list, &devlink_list); devlink_notify(devlink, DEVLINK_CMD_NEW); diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c index a4c525f1cb17..8150e16aaa55 100644 --- a/net/dsa/dsa2.c +++ b/net/dsa/dsa2.c @@ -746,13 +746,14 @@ static int dsa_switch_setup(struct dsa_switch *ds) /* Add the switch to devlink before calling setup, so that setup can * add dpipe tables */ - ds->devlink = devlink_alloc(&dsa_devlink_ops, sizeof(*dl_priv)); + ds->devlink = + devlink_alloc(&dsa_devlink_ops, sizeof(*dl_priv), ds->dev); if (!ds->devlink) return -ENOMEM; dl_priv = devlink_priv(ds->devlink); dl_priv->ds = ds; - err = devlink_register(ds->devlink, ds->dev); + err = devlink_register(ds->devlink); if (err) goto free_devlink; -- cgit v1.2.3 From 9e2ee5c7e7c35d195e2aa0692a7241d47a433d1e Mon Sep 17 00:00:00 2001 From: Jussi Maki Date: Sat, 31 Jul 2021 05:57:34 +0000 Subject: net, bonding: Add XDP support to the bonding driver XDP is implemented in the bonding driver by transparently delegating the XDP program loading, removal and xmit operations to the bonding slave devices. The overall goal of this work is that XDP programs can be attached to a bond device *without* any further changes (or awareness) necessary to the program itself, meaning the same XDP program can be attached to a native device but also a bonding device. Semantics of XDP_TX when attached to a bond are equivalent in such setting to the case when a tc/BPF program would be attached to the bond, meaning transmitting the packet out of the bond itself using one of the bond's configured xmit methods to select a slave device (rather than XDP_TX on the slave itself). Handling of XDP_TX to transmit using the configured bonding mechanism is therefore implemented by rewriting the BPF program return value in bpf_prog_run_xdp. To avoid performance impact this check is guarded by a static key, which is incremented when a XDP program is loaded onto a bond device. This approach was chosen to avoid changes to drivers implementing XDP. If the slave device does not match the receive device, then XDP_REDIRECT is transparently used to perform the redirection in order to have the network driver release the packet from its RX ring. The bonding driver hashing functions have been refactored to allow reuse with xdp_buff's to avoid code duplication. The motivation for this change is to enable use of bonding (and 802.3ad) in hairpinning L4 load-balancers such as [1] implemented with XDP and also to transparently support bond devices for projects that use XDP given most modern NICs have dual port adapters. An alternative to this approach would be to implement 802.3ad in user-space and implement the bonding load-balancing in the XDP program itself, but is rather a cumbersome endeavor in terms of slave device management (e.g. by watching netlink) and requires separate programs for native vs bond cases for the orchestrator. A native in-kernel implementation overcomes these issues and provides more flexibility. Below are benchmark results done on two machines with 100Gbit Intel E810 (ice) NIC and with 32-core 3970X on sending machine, and 16-core 3950X on receiving machine. 64 byte packets were sent with pktgen-dpdk at full rate. Two issues [2, 3] were identified with the ice driver, so the tests were performed with iommu=off and patch [2] applied. Additionally the bonding round robin algorithm was modified to use per-cpu tx counters as high CPU load (50% vs 10%) and high rate of cache misses were caused by the shared rr_tx_counter (see patch 2/3). The statistics were collected using "sar -n dev -u 1 10". On top of that, for ice, further work is in progress on improving the XDP_TX numbers [4]. -----------------------| CPU |--| rxpck/s |--| txpck/s |---- without patch (1 dev): XDP_DROP: 3.15% 48.6Mpps XDP_TX: 3.12% 18.3Mpps 18.3Mpps XDP_DROP (RSS): 9.47% 116.5Mpps XDP_TX (RSS): 9.67% 25.3Mpps 24.2Mpps ----------------------- with patch, bond (1 dev): XDP_DROP: 3.14% 46.7Mpps XDP_TX: 3.15% 13.9Mpps 13.9Mpps XDP_DROP (RSS): 10.33% 117.2Mpps XDP_TX (RSS): 10.64% 25.1Mpps 24.0Mpps ----------------------- with patch, bond (2 devs): XDP_DROP: 6.27% 92.7Mpps XDP_TX: 6.26% 17.6Mpps 17.5Mpps XDP_DROP (RSS): 11.38% 117.2Mpps XDP_TX (RSS): 14.30% 28.7Mpps 27.4Mpps -------------------------------------------------------------- RSS: Receive Side Scaling, e.g. the packets were sent to a range of destination IPs. [1]: https://cilium.io/blog/2021/05/20/cilium-110#standalonelb [2]: https://lore.kernel.org/bpf/20210601113236.42651-1-maciej.fijalkowski@intel.com/T/#t [3]: https://lore.kernel.org/bpf/CAHn8xckNXci+X_Eb2WMv4uVYjO2331UWB2JLtXr_58z0Av8+8A@mail.gmail.com/ [4]: https://lore.kernel.org/bpf/20210805230046.28715-1-maciej.fijalkowski@intel.com/T/#t Signed-off-by: Jussi Maki Signed-off-by: Daniel Borkmann Cc: Jay Vosburgh Cc: Veaceslav Falico Cc: Andy Gospodarek Cc: Maciej Fijalkowski Cc: Magnus Karlsson Link: https://lore.kernel.org/bpf/20210731055738.16820-4-joamaki@gmail.com --- drivers/net/bonding/bond_main.c | 309 +++++++++++++++++++++++++++++++++++++++- include/net/bonding.h | 1 + 2 files changed, 309 insertions(+), 1 deletion(-) (limited to 'include/net') diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 487c71fcec58..04cf78fa1721 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -317,6 +317,19 @@ bool bond_sk_check(struct bonding *bond) } } +static bool bond_xdp_check(struct bonding *bond) +{ + switch (BOND_MODE(bond)) { + case BOND_MODE_ROUNDROBIN: + case BOND_MODE_ACTIVEBACKUP: + case BOND_MODE_8023AD: + case BOND_MODE_XOR: + return true; + default: + return false; + } +} + /*---------------------------------- VLAN -----------------------------------*/ /* In the following 2 functions, bond_vlan_rx_add_vid and bond_vlan_rx_kill_vid, @@ -2133,6 +2146,41 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev, bond_update_slave_arr(bond, NULL); + if (!slave_dev->netdev_ops->ndo_bpf || + !slave_dev->netdev_ops->ndo_xdp_xmit) { + if (bond->xdp_prog) { + NL_SET_ERR_MSG(extack, "Slave does not support XDP"); + slave_err(bond_dev, slave_dev, "Slave does not support XDP\n"); + res = -EOPNOTSUPP; + goto err_sysfs_del; + } + } else { + struct netdev_bpf xdp = { + .command = XDP_SETUP_PROG, + .flags = 0, + .prog = bond->xdp_prog, + .extack = extack, + }; + + if (dev_xdp_prog_count(slave_dev) > 0) { + NL_SET_ERR_MSG(extack, + "Slave has XDP program loaded, please unload before enslaving"); + slave_err(bond_dev, slave_dev, + "Slave has XDP program loaded, please unload before enslaving\n"); + res = -EOPNOTSUPP; + goto err_sysfs_del; + } + + res = slave_dev->netdev_ops->ndo_bpf(slave_dev, &xdp); + if (res < 0) { + /* ndo_bpf() sets extack error message */ + slave_dbg(bond_dev, slave_dev, "Error %d calling ndo_bpf\n", res); + goto err_sysfs_del; + } + if (bond->xdp_prog) + bpf_prog_inc(bond->xdp_prog); + } + slave_info(bond_dev, slave_dev, "Enslaving as %s interface with %s link\n", bond_is_active_slave(new_slave) ? "an active" : "a backup", new_slave->link != BOND_LINK_DOWN ? "an up" : "a down"); @@ -2252,6 +2300,17 @@ static int __bond_release_one(struct net_device *bond_dev, /* recompute stats just before removing the slave */ bond_get_stats(bond->dev, &bond->bond_stats); + if (bond->xdp_prog) { + struct netdev_bpf xdp = { + .command = XDP_SETUP_PROG, + .flags = 0, + .prog = NULL, + .extack = NULL, + }; + if (slave_dev->netdev_ops->ndo_bpf(slave_dev, &xdp)) + slave_warn(bond_dev, slave_dev, "failed to unload XDP program\n"); + } + /* unregister rx_handler early so bond_handle_frame wouldn't be called * for this slave anymore. */ @@ -3638,7 +3697,7 @@ static inline u32 bond_eth_hash(struct sk_buff *skb, const void *data, int mhoff return 0; ep = (struct ethhdr *)(data + mhoff); - return ep->h_dest[5] ^ ep->h_source[5] ^ ep->h_proto; + return ep->h_dest[5] ^ ep->h_source[5] ^ be16_to_cpu(ep->h_proto); } static bool bond_flow_ip(struct sk_buff *skb, struct flow_keys *fk, const void *data, @@ -3807,6 +3866,26 @@ u32 bond_xmit_hash(struct bonding *bond, struct sk_buff *skb) skb_headlen(skb)); } +/** + * bond_xmit_hash_xdp - generate a hash value based on the xmit policy + * @bond: bonding device + * @xdp: buffer to use for headers + * + * The XDP variant of bond_xmit_hash. + */ +static u32 bond_xmit_hash_xdp(struct bonding *bond, struct xdp_buff *xdp) +{ + struct ethhdr *eth; + + if (xdp->data + sizeof(struct ethhdr) > xdp->data_end) + return 0; + + eth = (struct ethhdr *)xdp->data; + + return __bond_xmit_hash(bond, NULL, xdp->data, eth->h_proto, 0, + sizeof(struct ethhdr), xdp->data_end - xdp->data); +} + /*-------------------------- Device entry points ----------------------------*/ void bond_work_init_all(struct bonding *bond) @@ -4455,6 +4534,47 @@ non_igmp: return NULL; } +static struct slave *bond_xdp_xmit_roundrobin_slave_get(struct bonding *bond, + struct xdp_buff *xdp) +{ + struct slave *slave; + int slave_cnt; + u32 slave_id; + const struct ethhdr *eth; + void *data = xdp->data; + + if (data + sizeof(struct ethhdr) > xdp->data_end) + goto non_igmp; + + eth = (struct ethhdr *)data; + data += sizeof(struct ethhdr); + + /* See comment on IGMP in bond_xmit_roundrobin_slave_get() */ + if (eth->h_proto == htons(ETH_P_IP)) { + const struct iphdr *iph; + + if (data + sizeof(struct iphdr) > xdp->data_end) + goto non_igmp; + + iph = (struct iphdr *)data; + + if (iph->protocol == IPPROTO_IGMP) { + slave = rcu_dereference(bond->curr_active_slave); + if (slave) + return slave; + return bond_get_slave_by_id(bond, 0); + } + } + +non_igmp: + slave_cnt = READ_ONCE(bond->slave_cnt); + if (likely(slave_cnt)) { + slave_id = bond_rr_gen_slave_id(bond) % slave_cnt; + return bond_get_slave_by_id(bond, slave_id); + } + return NULL; +} + static netdev_tx_t bond_xmit_roundrobin(struct sk_buff *skb, struct net_device *bond_dev) { @@ -4670,6 +4790,22 @@ static struct slave *bond_xmit_3ad_xor_slave_get(struct bonding *bond, return slave; } +static struct slave *bond_xdp_xmit_3ad_xor_slave_get(struct bonding *bond, + struct xdp_buff *xdp) +{ + struct bond_up_slave *slaves; + unsigned int count; + u32 hash; + + hash = bond_xmit_hash_xdp(bond, xdp); + slaves = rcu_dereference(bond->usable_slaves); + count = slaves ? READ_ONCE(slaves->count) : 0; + if (unlikely(!count)) + return NULL; + + return slaves->arr[hash % count]; +} + /* Use this Xmit function for 3AD as well as XOR modes. The current * usable slave array is formed in the control path. The xmit function * just calculates hash and sends the packet out. @@ -4954,6 +5090,174 @@ static netdev_tx_t bond_start_xmit(struct sk_buff *skb, struct net_device *dev) return ret; } +static struct net_device * +bond_xdp_get_xmit_slave(struct net_device *bond_dev, struct xdp_buff *xdp) +{ + struct bonding *bond = netdev_priv(bond_dev); + struct slave *slave; + + /* Caller needs to hold rcu_read_lock() */ + + switch (BOND_MODE(bond)) { + case BOND_MODE_ROUNDROBIN: + slave = bond_xdp_xmit_roundrobin_slave_get(bond, xdp); + break; + + case BOND_MODE_ACTIVEBACKUP: + slave = bond_xmit_activebackup_slave_get(bond); + break; + + case BOND_MODE_8023AD: + case BOND_MODE_XOR: + slave = bond_xdp_xmit_3ad_xor_slave_get(bond, xdp); + break; + + default: + /* Should never happen. Mode guarded by bond_xdp_check() */ + netdev_err(bond_dev, "Unknown bonding mode %d for xdp xmit\n", BOND_MODE(bond)); + WARN_ON_ONCE(1); + return NULL; + } + + if (slave) + return slave->dev; + + return NULL; +} + +static int bond_xdp_xmit(struct net_device *bond_dev, + int n, struct xdp_frame **frames, u32 flags) +{ + int nxmit, err = -ENXIO; + + rcu_read_lock(); + + for (nxmit = 0; nxmit < n; nxmit++) { + struct xdp_frame *frame = frames[nxmit]; + struct xdp_frame *frames1[] = {frame}; + struct net_device *slave_dev; + struct xdp_buff xdp; + + xdp_convert_frame_to_buff(frame, &xdp); + + slave_dev = bond_xdp_get_xmit_slave(bond_dev, &xdp); + if (!slave_dev) { + err = -ENXIO; + break; + } + + err = slave_dev->netdev_ops->ndo_xdp_xmit(slave_dev, 1, frames1, flags); + if (err < 1) + break; + } + + rcu_read_unlock(); + + /* If error happened on the first frame then we can pass the error up, otherwise + * report the number of frames that were xmitted. + */ + if (err < 0) + return (nxmit == 0 ? err : nxmit); + + return nxmit; +} + +static int bond_xdp_set(struct net_device *dev, struct bpf_prog *prog, + struct netlink_ext_ack *extack) +{ + struct bonding *bond = netdev_priv(dev); + struct list_head *iter; + struct slave *slave, *rollback_slave; + struct bpf_prog *old_prog; + struct netdev_bpf xdp = { + .command = XDP_SETUP_PROG, + .flags = 0, + .prog = prog, + .extack = extack, + }; + int err; + + ASSERT_RTNL(); + + if (!bond_xdp_check(bond)) + return -EOPNOTSUPP; + + old_prog = bond->xdp_prog; + bond->xdp_prog = prog; + + bond_for_each_slave(bond, slave, iter) { + struct net_device *slave_dev = slave->dev; + + if (!slave_dev->netdev_ops->ndo_bpf || + !slave_dev->netdev_ops->ndo_xdp_xmit) { + NL_SET_ERR_MSG(extack, "Slave device does not support XDP"); + slave_err(dev, slave_dev, "Slave does not support XDP\n"); + err = -EOPNOTSUPP; + goto err; + } + + if (dev_xdp_prog_count(slave_dev) > 0) { + NL_SET_ERR_MSG(extack, + "Slave has XDP program loaded, please unload before enslaving"); + slave_err(dev, slave_dev, + "Slave has XDP program loaded, please unload before enslaving\n"); + err = -EOPNOTSUPP; + goto err; + } + + err = slave_dev->netdev_ops->ndo_bpf(slave_dev, &xdp); + if (err < 0) { + /* ndo_bpf() sets extack error message */ + slave_err(dev, slave_dev, "Error %d calling ndo_bpf\n", err); + goto err; + } + if (prog) + bpf_prog_inc(prog); + } + + if (old_prog) + bpf_prog_put(old_prog); + + if (prog) + static_branch_inc(&bpf_master_redirect_enabled_key); + else + static_branch_dec(&bpf_master_redirect_enabled_key); + + return 0; + +err: + /* unwind the program changes */ + bond->xdp_prog = old_prog; + xdp.prog = old_prog; + xdp.extack = NULL; /* do not overwrite original error */ + + bond_for_each_slave(bond, rollback_slave, iter) { + struct net_device *slave_dev = rollback_slave->dev; + int err_unwind; + + if (slave == rollback_slave) + break; + + err_unwind = slave_dev->netdev_ops->ndo_bpf(slave_dev, &xdp); + if (err_unwind < 0) + slave_err(dev, slave_dev, + "Error %d when unwinding XDP program change\n", err_unwind); + else if (xdp.prog) + bpf_prog_inc(xdp.prog); + } + return err; +} + +static int bond_xdp(struct net_device *dev, struct netdev_bpf *xdp) +{ + switch (xdp->command) { + case XDP_SETUP_PROG: + return bond_xdp_set(dev, xdp->prog, xdp->extack); + default: + return -EINVAL; + } +} + static u32 bond_mode_bcast_speed(struct slave *slave, u32 speed) { if (speed == 0 || speed == SPEED_UNKNOWN) @@ -5042,6 +5346,9 @@ static const struct net_device_ops bond_netdev_ops = { .ndo_features_check = passthru_features_check, .ndo_get_xmit_slave = bond_xmit_get_slave, .ndo_sk_get_lower_dev = bond_sk_get_lower_dev, + .ndo_bpf = bond_xdp, + .ndo_xdp_xmit = bond_xdp_xmit, + .ndo_xdp_get_xmit_slave = bond_xdp_get_xmit_slave, }; static const struct device_type bond_type = { diff --git a/include/net/bonding.h b/include/net/bonding.h index 625d9c72dee3..b91c365e4e95 100644 --- a/include/net/bonding.h +++ b/include/net/bonding.h @@ -258,6 +258,7 @@ struct bonding { /* protecting ipsec_list */ spinlock_t ipsec_lock; #endif /* CONFIG_XFRM_OFFLOAD */ + struct bpf_prog *xdp_prog; }; #define bond_slave_get_rcu(dev) \ -- cgit v1.2.3 From beb7f2de5728b0bd2140a652fa51f6ad85d159f7 Mon Sep 17 00:00:00 2001 From: Roi Dayan Date: Sun, 8 Aug 2021 09:52:42 +0300 Subject: psample: Add a fwd declaration for skbuff Without this there is a warning if source files include psample.h before skbuff.h or doesn't include it at all. Fixes: 6ae0a6286171 ("net: Introduce psample, a new genetlink channel for packet sampling") Signed-off-by: Roi Dayan Link: https://lore.kernel.org/r/20210808065242.1522535-1-roid@nvidia.com Signed-off-by: Jakub Kicinski --- include/net/psample.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/net') diff --git a/include/net/psample.h b/include/net/psample.h index e328c5127757..0509d2d6be67 100644 --- a/include/net/psample.h +++ b/include/net/psample.h @@ -31,6 +31,8 @@ struct psample_group *psample_group_get(struct net *net, u32 group_num); void psample_group_take(struct psample_group *group); void psample_group_put(struct psample_group *group); +struct sk_buff; + #if IS_ENABLED(CONFIG_PSAMPLE) void psample_sample_packet(struct psample_group *group, struct sk_buff *skb, -- cgit v1.2.3 From 57f05bc2ab2443b89c2e2562c05053bcc7d30e8b Mon Sep 17 00:00:00 2001 From: Yunsheng Lin Date: Fri, 6 Aug 2021 10:46:19 +0800 Subject: page_pool: keep pp info as long as page pool owns the page Currently, page->pp is cleared and set everytime the page is recycled, which is unnecessary. So only set the page->pp when the page is added to the page pool and only clear it when the page is released from the page pool. This is also a preparation to support allocating frag page in page pool. Reviewed-by: Ilias Apalodimas Signed-off-by: Yunsheng Lin Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/marvell/mvneta.c | 6 +----- drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c | 2 +- drivers/net/ethernet/ti/cpsw.c | 2 +- drivers/net/ethernet/ti/cpsw_new.c | 2 +- include/linux/skbuff.h | 4 +--- include/net/page_pool.h | 7 ------- net/core/page_pool.c | 21 +++++++++++++++++---- 7 files changed, 22 insertions(+), 22 deletions(-) (limited to 'include/net') diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c index ff8db311963c..5d1007e1b5c9 100644 --- a/drivers/net/ethernet/marvell/mvneta.c +++ b/drivers/net/ethernet/marvell/mvneta.c @@ -2327,7 +2327,7 @@ mvneta_swbm_build_skb(struct mvneta_port *pp, struct page_pool *pool, if (!skb) return ERR_PTR(-ENOMEM); - skb_mark_for_recycle(skb, virt_to_page(xdp->data), pool); + skb_mark_for_recycle(skb); skb_reserve(skb, xdp->data - xdp->data_hard_start); skb_put(skb, xdp->data_end - xdp->data); @@ -2339,10 +2339,6 @@ mvneta_swbm_build_skb(struct mvneta_port *pp, struct page_pool *pool, skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, skb_frag_page(frag), skb_frag_off(frag), skb_frag_size(frag), PAGE_SIZE); - /* We don't need to reset pp_recycle here. It's already set, so - * just mark fragments for recycling. - */ - page_pool_store_mem_info(skb_frag_page(frag), pool); } return skb; diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c index 99bd8b8aa0e2..744f58f41ecc 100644 --- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c +++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c @@ -3995,7 +3995,7 @@ static int mvpp2_rx(struct mvpp2_port *port, struct napi_struct *napi, } if (pp) - skb_mark_for_recycle(skb, page, pp); + skb_mark_for_recycle(skb); else dma_unmap_single_attrs(dev->dev.parent, dma_addr, bm_pool->buf_size, DMA_FROM_DEVICE, diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c index 8e1e582a10c8..9f70e40779f6 100644 --- a/drivers/net/ethernet/ti/cpsw.c +++ b/drivers/net/ethernet/ti/cpsw.c @@ -431,7 +431,7 @@ static void cpsw_rx_handler(void *token, int len, int status) skb->protocol = eth_type_trans(skb, ndev); /* mark skb for recycling */ - skb_mark_for_recycle(skb, page, pool); + skb_mark_for_recycle(skb); netif_receive_skb(skb); ndev->stats.rx_bytes += len; diff --git a/drivers/net/ethernet/ti/cpsw_new.c b/drivers/net/ethernet/ti/cpsw_new.c index 192394fe4c1c..ff3a96b084ee 100644 --- a/drivers/net/ethernet/ti/cpsw_new.c +++ b/drivers/net/ethernet/ti/cpsw_new.c @@ -375,7 +375,7 @@ static void cpsw_rx_handler(void *token, int len, int status) skb->protocol = eth_type_trans(skb, ndev); /* mark skb for recycling */ - skb_mark_for_recycle(skb, page, pool); + skb_mark_for_recycle(skb); netif_receive_skb(skb); ndev->stats.rx_bytes += len; diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 783cc2368bb1..6bdb0db3e825 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -4712,11 +4712,9 @@ static inline u64 skb_get_kcov_handle(struct sk_buff *skb) } #ifdef CONFIG_PAGE_POOL -static inline void skb_mark_for_recycle(struct sk_buff *skb, struct page *page, - struct page_pool *pp) +static inline void skb_mark_for_recycle(struct sk_buff *skb) { skb->pp_recycle = 1; - page_pool_store_mem_info(page, pp); } #endif diff --git a/include/net/page_pool.h b/include/net/page_pool.h index 3dd62dd73027..8d7744d1c7c1 100644 --- a/include/net/page_pool.h +++ b/include/net/page_pool.h @@ -253,11 +253,4 @@ static inline void page_pool_ring_unlock(struct page_pool *pool) spin_unlock_bh(&pool->ring.producer_lock); } -/* Store mem_info on struct page and use it while recycling skb frags */ -static inline -void page_pool_store_mem_info(struct page *page, struct page_pool *pp) -{ - page->pp = pp; -} - #endif /* _NET_PAGE_POOL_H */ diff --git a/net/core/page_pool.c b/net/core/page_pool.c index 5e4eb45b139c..78838c6fe007 100644 --- a/net/core/page_pool.c +++ b/net/core/page_pool.c @@ -206,6 +206,19 @@ static bool page_pool_dma_map(struct page_pool *pool, struct page *page) return true; } +static void page_pool_set_pp_info(struct page_pool *pool, + struct page *page) +{ + page->pp = pool; + page->pp_magic |= PP_SIGNATURE; +} + +static void page_pool_clear_pp_info(struct page *page) +{ + page->pp_magic = 0; + page->pp = NULL; +} + static struct page *__page_pool_alloc_page_order(struct page_pool *pool, gfp_t gfp) { @@ -222,7 +235,7 @@ static struct page *__page_pool_alloc_page_order(struct page_pool *pool, return NULL; } - page->pp_magic |= PP_SIGNATURE; + page_pool_set_pp_info(pool, page); /* Track how many pages are held 'in-flight' */ pool->pages_state_hold_cnt++; @@ -266,7 +279,8 @@ static struct page *__page_pool_alloc_pages_slow(struct page_pool *pool, put_page(page); continue; } - page->pp_magic |= PP_SIGNATURE; + + page_pool_set_pp_info(pool, page); pool->alloc.cache[pool->alloc.count++] = page; /* Track how many pages are held 'in-flight' */ pool->pages_state_hold_cnt++; @@ -345,7 +359,7 @@ void page_pool_release_page(struct page_pool *pool, struct page *page) DMA_ATTR_SKIP_CPU_SYNC); page_pool_set_dma_addr(page, 0); skip_dma_unmap: - page->pp_magic = 0; + page_pool_clear_pp_info(page); /* This may be the last page returned, releasing the pool, so * it is not safe to reference pool afterwards. @@ -644,7 +658,6 @@ bool page_pool_return_skb_page(struct page *page) * The page will be returned to the pool here regardless of the * 'flipped' fragment being in use or not. */ - page->pp = NULL; page_pool_put_full_page(pp, page, false); return true; -- cgit v1.2.3 From 0e9d2a0a3a836c37528899010e73b5be8111753e Mon Sep 17 00:00:00 2001 From: Yunsheng Lin Date: Fri, 6 Aug 2021 10:46:20 +0800 Subject: page_pool: add interface to manipulate frag count in page pool For 32 bit systems with 64 bit dma, dma_addr[1] is used to store the upper 32 bit dma addr, those system should be rare those days. For normal system, the dma_addr[1] in 'struct page' is not used, so we can reuse dma_addr[1] for storing frag count, which means how many frags this page might be splited to. In order to simplify the page frag support in the page pool, the PAGE_POOL_DMA_USE_PP_FRAG_COUNT macro is added to indicate the 32 bit systems with 64 bit dma, and the page frag support in page pool is disabled for such system. The newly added page_pool_set_frag_count() is called to reserve the maximum frag count before any page frag is passed to the user. The page_pool_atomic_sub_frag_count_return() is called when user is done with the page frag. Signed-off-by: Yunsheng Lin Signed-off-by: Jakub Kicinski --- include/linux/mm_types.h | 18 +++++++++++++----- include/net/page_pool.h | 46 +++++++++++++++++++++++++++++++++++++++------- net/core/page_pool.c | 4 ++++ 3 files changed, 56 insertions(+), 12 deletions(-) (limited to 'include/net') diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 52bbd2b7cb46..7f8ee09c711f 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -103,11 +103,19 @@ struct page { unsigned long pp_magic; struct page_pool *pp; unsigned long _pp_mapping_pad; - /** - * @dma_addr: might require a 64-bit value on - * 32-bit architectures. - */ - unsigned long dma_addr[2]; + unsigned long dma_addr; + union { + /** + * dma_addr_upper: might require a 64-bit + * value on 32-bit architectures. + */ + unsigned long dma_addr_upper; + /** + * For frag page support, not supported in + * 32-bit architectures with 64-bit DMA. + */ + atomic_long_t pp_frag_count; + }; }; struct { /* slab, slob and slub */ union { diff --git a/include/net/page_pool.h b/include/net/page_pool.h index 8d7744d1c7c1..42e6997e637d 100644 --- a/include/net/page_pool.h +++ b/include/net/page_pool.h @@ -45,7 +45,10 @@ * Please note DMA-sync-for-CPU is still * device driver responsibility */ -#define PP_FLAG_ALL (PP_FLAG_DMA_MAP | PP_FLAG_DMA_SYNC_DEV) +#define PP_FLAG_PAGE_FRAG BIT(2) /* for page frag feature */ +#define PP_FLAG_ALL (PP_FLAG_DMA_MAP |\ + PP_FLAG_DMA_SYNC_DEV |\ + PP_FLAG_PAGE_FRAG) /* * Fast allocation side cache array/stack @@ -198,19 +201,48 @@ static inline void page_pool_recycle_direct(struct page_pool *pool, page_pool_put_full_page(pool, page, true); } +#define PAGE_POOL_DMA_USE_PP_FRAG_COUNT \ + (sizeof(dma_addr_t) > sizeof(unsigned long)) + static inline dma_addr_t page_pool_get_dma_addr(struct page *page) { - dma_addr_t ret = page->dma_addr[0]; - if (sizeof(dma_addr_t) > sizeof(unsigned long)) - ret |= (dma_addr_t)page->dma_addr[1] << 16 << 16; + dma_addr_t ret = page->dma_addr; + + if (PAGE_POOL_DMA_USE_PP_FRAG_COUNT) + ret |= (dma_addr_t)page->dma_addr_upper << 16 << 16; + return ret; } static inline void page_pool_set_dma_addr(struct page *page, dma_addr_t addr) { - page->dma_addr[0] = addr; - if (sizeof(dma_addr_t) > sizeof(unsigned long)) - page->dma_addr[1] = upper_32_bits(addr); + page->dma_addr = addr; + if (PAGE_POOL_DMA_USE_PP_FRAG_COUNT) + page->dma_addr_upper = upper_32_bits(addr); +} + +static inline void page_pool_set_frag_count(struct page *page, long nr) +{ + atomic_long_set(&page->pp_frag_count, nr); +} + +static inline long page_pool_atomic_sub_frag_count_return(struct page *page, + long nr) +{ + long ret; + + /* As suggested by Alexander, atomic_long_read() may cover up the + * reference count errors, so avoid calling atomic_long_read() in + * the cases of freeing or draining the page_frags, where we would + * not expect it to match or that are slowpath anyway. + */ + if (__builtin_constant_p(nr) && + atomic_long_read(&page->pp_frag_count) == nr) + return 0; + + ret = atomic_long_sub_return(nr, &page->pp_frag_count); + WARN_ON(ret < 0); + return ret; } static inline bool is_page_pool_compiled_in(void) diff --git a/net/core/page_pool.c b/net/core/page_pool.c index 78838c6fe007..68fab94ac422 100644 --- a/net/core/page_pool.c +++ b/net/core/page_pool.c @@ -67,6 +67,10 @@ static int page_pool_init(struct page_pool *pool, */ } + if (PAGE_POOL_DMA_USE_PP_FRAG_COUNT && + pool->p.flags & PP_FLAG_PAGE_FRAG) + return -EINVAL; + if (ptr_ring_init(&pool->ring, ring_qsize, GFP_KERNEL) < 0) return -ENOMEM; -- cgit v1.2.3 From 53e0961da1c7bbdabd1abebb20de403ec237ec09 Mon Sep 17 00:00:00 2001 From: Yunsheng Lin Date: Fri, 6 Aug 2021 10:46:21 +0800 Subject: page_pool: add frag page recycling support in page pool Currently page pool only support page recycling when there is only one user of the page, and the split page reusing implemented in the most driver can not use the page pool as bing-pong way of reusing requires the multi user support in page pool. Those reusing or recycling has below limitations: 1. page from page pool can only be used be one user in order for the page recycling to happen. 2. Bing-pong way of reusing in most driver does not support multi desc using different part of the same page in order to save memory. So add multi-users support and frag page recycling in page pool to overcome the above limitation. Signed-off-by: Yunsheng Lin Signed-off-by: Jakub Kicinski --- include/net/page_pool.h | 15 +++++++++ net/core/page_pool.c | 87 +++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 102 insertions(+) (limited to 'include/net') diff --git a/include/net/page_pool.h b/include/net/page_pool.h index 42e6997e637d..a4082406a003 100644 --- a/include/net/page_pool.h +++ b/include/net/page_pool.h @@ -91,6 +91,9 @@ struct page_pool { unsigned long defer_warn; u32 pages_state_hold_cnt; + unsigned int frag_offset; + struct page *frag_page; + long frag_users; /* * Data structure for allocation side @@ -140,6 +143,18 @@ static inline struct page *page_pool_dev_alloc_pages(struct page_pool *pool) return page_pool_alloc_pages(pool, gfp); } +struct page *page_pool_alloc_frag(struct page_pool *pool, unsigned int *offset, + unsigned int size, gfp_t gfp); + +static inline struct page *page_pool_dev_alloc_frag(struct page_pool *pool, + unsigned int *offset, + unsigned int size) +{ + gfp_t gfp = (GFP_ATOMIC | __GFP_NOWARN); + + return page_pool_alloc_frag(pool, offset, size, gfp); +} + /* get the stored dma direction. A driver might decide to treat this locally and * avoid the extra cache line from page_pool to determine the direction */ diff --git a/net/core/page_pool.c b/net/core/page_pool.c index 68fab94ac422..ac116041b35f 100644 --- a/net/core/page_pool.c +++ b/net/core/page_pool.c @@ -24,6 +24,8 @@ #define DEFER_TIME (msecs_to_jiffies(1000)) #define DEFER_WARN_INTERVAL (60 * HZ) +#define BIAS_MAX LONG_MAX + static int page_pool_init(struct page_pool *pool, const struct page_pool_params *params) { @@ -423,6 +425,11 @@ static __always_inline struct page * __page_pool_put_page(struct page_pool *pool, struct page *page, unsigned int dma_sync_size, bool allow_direct) { + /* It is not the last user for the page frag case */ + if (pool->p.flags & PP_FLAG_PAGE_FRAG && + page_pool_atomic_sub_frag_count_return(page, 1)) + return NULL; + /* This allocator is optimized for the XDP mode that uses * one-frame-per-page, but have fallbacks that act like the * regular page allocator APIs. @@ -515,6 +522,84 @@ void page_pool_put_page_bulk(struct page_pool *pool, void **data, } EXPORT_SYMBOL(page_pool_put_page_bulk); +static struct page *page_pool_drain_frag(struct page_pool *pool, + struct page *page) +{ + long drain_count = BIAS_MAX - pool->frag_users; + + /* Some user is still using the page frag */ + if (likely(page_pool_atomic_sub_frag_count_return(page, + drain_count))) + return NULL; + + if (page_ref_count(page) == 1 && !page_is_pfmemalloc(page)) { + if (pool->p.flags & PP_FLAG_DMA_SYNC_DEV) + page_pool_dma_sync_for_device(pool, page, -1); + + return page; + } + + page_pool_return_page(pool, page); + return NULL; +} + +static void page_pool_free_frag(struct page_pool *pool) +{ + long drain_count = BIAS_MAX - pool->frag_users; + struct page *page = pool->frag_page; + + pool->frag_page = NULL; + + if (!page || + page_pool_atomic_sub_frag_count_return(page, drain_count)) + return; + + page_pool_return_page(pool, page); +} + +struct page *page_pool_alloc_frag(struct page_pool *pool, + unsigned int *offset, + unsigned int size, gfp_t gfp) +{ + unsigned int max_size = PAGE_SIZE << pool->p.order; + struct page *page = pool->frag_page; + + if (WARN_ON(!(pool->p.flags & PP_FLAG_PAGE_FRAG) || + size > max_size)) + return NULL; + + size = ALIGN(size, dma_get_cache_alignment()); + *offset = pool->frag_offset; + + if (page && *offset + size > max_size) { + page = page_pool_drain_frag(pool, page); + if (page) + goto frag_reset; + } + + if (!page) { + page = page_pool_alloc_pages(pool, gfp); + if (unlikely(!page)) { + pool->frag_page = NULL; + return NULL; + } + + pool->frag_page = page; + +frag_reset: + pool->frag_users = 1; + *offset = 0; + pool->frag_offset = size; + page_pool_set_frag_count(page, BIAS_MAX); + return page; + } + + pool->frag_users++; + pool->frag_offset = *offset + size; + return page; +} +EXPORT_SYMBOL(page_pool_alloc_frag); + static void page_pool_empty_ring(struct page_pool *pool) { struct page *page; @@ -620,6 +705,8 @@ void page_pool_destroy(struct page_pool *pool) if (!page_pool_put(pool)) return; + page_pool_free_frag(pool); + if (!page_pool_release(pool)) return; -- cgit v1.2.3 From 8702997074363c294a1f83928cd0c33ca57bf813 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 5 Aug 2021 12:02:43 +0200 Subject: netfilter: nf_queue: move hookfn registration out of struct net This was done to detect when the pernet->init() function was not called yet, by checking if net->nf.queue_handler is NULL. Once the nfnetlink_queue module is active, all struct net pointers contain the same address. So place this back in nf_queue.c. Handle the 'netns error unwind' test by checking nfnl_queue_net for a NULL pointer and add a comment for this. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_queue.h | 4 ++-- include/net/netns/netfilter.h | 1 - net/netfilter/nf_queue.c | 19 +++++++++---------- net/netfilter/nfnetlink_queue.c | 15 +++++++++++++-- 4 files changed, 24 insertions(+), 15 deletions(-) (limited to 'include/net') diff --git a/include/net/netfilter/nf_queue.h b/include/net/netfilter/nf_queue.h index e770bba00066..9eed51e920e8 100644 --- a/include/net/netfilter/nf_queue.h +++ b/include/net/netfilter/nf_queue.h @@ -33,8 +33,8 @@ struct nf_queue_handler { void (*nf_hook_drop)(struct net *net); }; -void nf_register_queue_handler(struct net *net, const struct nf_queue_handler *qh); -void nf_unregister_queue_handler(struct net *net); +void nf_register_queue_handler(const struct nf_queue_handler *qh); +void nf_unregister_queue_handler(void); void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict); void nf_queue_entry_get_refs(struct nf_queue_entry *entry); diff --git a/include/net/netns/netfilter.h b/include/net/netns/netfilter.h index 15e2b13fb0c0..986a2a9cfdfa 100644 --- a/include/net/netns/netfilter.h +++ b/include/net/netns/netfilter.h @@ -12,7 +12,6 @@ struct netns_nf { #if defined CONFIG_PROC_FS struct proc_dir_entry *proc_netfilter; #endif - const struct nf_queue_handler __rcu *queue_handler; const struct nf_logger __rcu *nf_loggers[NFPROTO_NUMPROTO]; #ifdef CONFIG_SYSCTL struct ctl_table_header *nf_log_dir_header; diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c index bbd1209694b8..4903da82dc04 100644 --- a/net/netfilter/nf_queue.c +++ b/net/netfilter/nf_queue.c @@ -21,6 +21,8 @@ #include "nf_internals.h" +static const struct nf_queue_handler __rcu *nf_queue_handler; + /* * Hook for nfnetlink_queue to register its queue handler. * We do this so that most of the NFQUEUE code can be modular. @@ -29,20 +31,18 @@ * receives, no matter what. */ -/* return EBUSY when somebody else is registered, return EEXIST if the - * same handler is registered, return 0 in case of success. */ -void nf_register_queue_handler(struct net *net, const struct nf_queue_handler *qh) +void nf_register_queue_handler(const struct nf_queue_handler *qh) { /* should never happen, we only have one queueing backend in kernel */ - WARN_ON(rcu_access_pointer(net->nf.queue_handler)); - rcu_assign_pointer(net->nf.queue_handler, qh); + WARN_ON(rcu_access_pointer(nf_queue_handler)); + rcu_assign_pointer(nf_queue_handler, qh); } EXPORT_SYMBOL(nf_register_queue_handler); /* The caller must flush their queue before this */ -void nf_unregister_queue_handler(struct net *net) +void nf_unregister_queue_handler(void) { - RCU_INIT_POINTER(net->nf.queue_handler, NULL); + RCU_INIT_POINTER(nf_queue_handler, NULL); } EXPORT_SYMBOL(nf_unregister_queue_handler); @@ -116,7 +116,7 @@ void nf_queue_nf_hook_drop(struct net *net) const struct nf_queue_handler *qh; rcu_read_lock(); - qh = rcu_dereference(net->nf.queue_handler); + qh = rcu_dereference(nf_queue_handler); if (qh) qh->nf_hook_drop(net); rcu_read_unlock(); @@ -157,12 +157,11 @@ static int __nf_queue(struct sk_buff *skb, const struct nf_hook_state *state, { struct nf_queue_entry *entry = NULL; const struct nf_queue_handler *qh; - struct net *net = state->net; unsigned int route_key_size; int status; /* QUEUE == DROP if no one is waiting, to be safe. */ - qh = rcu_dereference(net->nf.queue_handler); + qh = rcu_dereference(nf_queue_handler); if (!qh) return -ESRCH; diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index f774de0fc24f..4c3fbaaeb103 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -951,6 +951,16 @@ static void nfqnl_nf_hook_drop(struct net *net) struct nfnl_queue_net *q = nfnl_queue_pernet(net); int i; + /* This function is also called on net namespace error unwind, + * when pernet_ops->init() failed and ->exit() functions of the + * previous pernet_ops gets called. + * + * This may result in a call to nfqnl_nf_hook_drop() before + * struct nfnl_queue_net was allocated. + */ + if (!q) + return; + for (i = 0; i < INSTANCE_BUCKETS; i++) { struct nfqnl_instance *inst; struct hlist_head *head = &q->instance_table[i]; @@ -1502,7 +1512,6 @@ static int __net_init nfnl_queue_net_init(struct net *net) &nfqnl_seq_ops, sizeof(struct iter_state))) return -ENOMEM; #endif - nf_register_queue_handler(net, &nfqh); return 0; } @@ -1511,7 +1520,6 @@ static void __net_exit nfnl_queue_net_exit(struct net *net) struct nfnl_queue_net *q = nfnl_queue_pernet(net); unsigned int i; - nf_unregister_queue_handler(net); #ifdef CONFIG_PROC_FS remove_proc_entry("nfnetlink_queue", net->nf.proc_netfilter); #endif @@ -1555,6 +1563,8 @@ static int __init nfnetlink_queue_init(void) goto cleanup_netlink_subsys; } + nf_register_queue_handler(&nfqh); + return status; cleanup_netlink_subsys: @@ -1568,6 +1578,7 @@ out: static void __exit nfnetlink_queue_fini(void) { + nf_unregister_queue_handler(); unregister_netdevice_notifier(&nfqnl_dev_notifier); nfnetlink_subsys_unregister(&nfqnl_subsys); netlink_unregister_notifier(&nfqnl_rtnl_notifier); -- cgit v1.2.3 From f13a5ad88186c142b4f6060fb06f0f8fb1674915 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Tue, 10 Aug 2021 16:24:15 +0300 Subject: devlink: Add new "enable_eth" generic device param Add new device generic parameter to enable/disable creation of Ethernet auxiliary device and associated device functionality in the devlink instance. User who prefers to disable such functionality can disable it using below example. $ devlink dev param set pci/0000:06:00.0 \ name enable_eth value false cmode driverinit $ devlink dev reload pci/0000:06:00.0 At this point devlink instance do not create auxiliary device for the Ethernet functionality. Signed-off-by: Parav Pandit Reviewed-by: Jiri Pirko Reviewed-by: Leon Romanovsky Signed-off-by: David S. Miller --- Documentation/networking/devlink/devlink-params.rst | 4 ++++ include/net/devlink.h | 4 ++++ net/core/devlink.c | 5 +++++ 3 files changed, 13 insertions(+) (limited to 'include/net') diff --git a/Documentation/networking/devlink/devlink-params.rst b/Documentation/networking/devlink/devlink-params.rst index 54c9f107c4b0..219c1272f2d6 100644 --- a/Documentation/networking/devlink/devlink-params.rst +++ b/Documentation/networking/devlink/devlink-params.rst @@ -97,6 +97,10 @@ own name. * - ``enable_roce`` - Boolean - Enable handling of RoCE traffic in the device. + * - ``enable_eth`` + - Boolean + - When enabled, the device driver will instantiate Ethernet specific + auxiliary device of the devlink device. * - ``internal_err_reset`` - Boolean - When enabled, the device driver will reset the device on internal diff --git a/include/net/devlink.h b/include/net/devlink.h index 0236c77f2fd0..1e3e183bb2c2 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -519,6 +519,7 @@ enum devlink_param_generic_id { DEVLINK_PARAM_GENERIC_ID_RESET_DEV_ON_DRV_PROBE, DEVLINK_PARAM_GENERIC_ID_ENABLE_ROCE, DEVLINK_PARAM_GENERIC_ID_ENABLE_REMOTE_DEV_RESET, + DEVLINK_PARAM_GENERIC_ID_ENABLE_ETH, /* add new param generic ids above here*/ __DEVLINK_PARAM_GENERIC_ID_MAX, @@ -559,6 +560,9 @@ enum devlink_param_generic_id { #define DEVLINK_PARAM_GENERIC_ENABLE_REMOTE_DEV_RESET_NAME "enable_remote_dev_reset" #define DEVLINK_PARAM_GENERIC_ENABLE_REMOTE_DEV_RESET_TYPE DEVLINK_PARAM_TYPE_BOOL +#define DEVLINK_PARAM_GENERIC_ENABLE_ETH_NAME "enable_eth" +#define DEVLINK_PARAM_GENERIC_ENABLE_ETH_TYPE DEVLINK_PARAM_TYPE_BOOL + #define DEVLINK_PARAM_GENERIC(_id, _cmodes, _get, _set, _validate) \ { \ .id = DEVLINK_PARAM_GENERIC_ID_##_id, \ diff --git a/net/core/devlink.c b/net/core/devlink.c index b02d54ab59ac..9a59f45c8bf9 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -4277,6 +4277,11 @@ static const struct devlink_param devlink_param_generic[] = { .name = DEVLINK_PARAM_GENERIC_ENABLE_REMOTE_DEV_RESET_NAME, .type = DEVLINK_PARAM_GENERIC_ENABLE_REMOTE_DEV_RESET_TYPE, }, + { + .id = DEVLINK_PARAM_GENERIC_ID_ENABLE_ETH, + .name = DEVLINK_PARAM_GENERIC_ENABLE_ETH_NAME, + .type = DEVLINK_PARAM_GENERIC_ENABLE_ETH_TYPE, + }, }; static int devlink_param_generic_verify(const struct devlink_param *param) -- cgit v1.2.3 From 8ddaabee3c7994854841a9b097fd94538126c12c Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Tue, 10 Aug 2021 16:24:16 +0300 Subject: devlink: Add new "enable_rdma" generic device param Add new device generic parameter to enable/disable creation of RDMA auxiliary device and associated device functionality in the devlink instance. User who prefers to disable such functionality can disable it using below example. $ devlink dev param set pci/0000:06:00.0 \ name enable_rdma value false cmode driverinit $ devlink dev reload pci/0000:06:00.0 At this point devlink instance do not create auxiliary device for the RDMA functionality. Signed-off-by: Parav Pandit Reviewed-by: Jiri Pirko Reviewed-by: Leon Romanovsky Signed-off-by: David S. Miller --- Documentation/networking/devlink/devlink-params.rst | 4 ++++ include/net/devlink.h | 4 ++++ net/core/devlink.c | 5 +++++ 3 files changed, 13 insertions(+) (limited to 'include/net') diff --git a/Documentation/networking/devlink/devlink-params.rst b/Documentation/networking/devlink/devlink-params.rst index 219c1272f2d6..a49da0b049b6 100644 --- a/Documentation/networking/devlink/devlink-params.rst +++ b/Documentation/networking/devlink/devlink-params.rst @@ -101,6 +101,10 @@ own name. - Boolean - When enabled, the device driver will instantiate Ethernet specific auxiliary device of the devlink device. + * - ``enable_rdma`` + - Boolean + - When enabled, the device driver will instantiate RDMA specific + auxiliary device of the devlink device. * - ``internal_err_reset`` - Boolean - When enabled, the device driver will reset the device on internal diff --git a/include/net/devlink.h b/include/net/devlink.h index 1e3e183bb2c2..6f4f0416e598 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -520,6 +520,7 @@ enum devlink_param_generic_id { DEVLINK_PARAM_GENERIC_ID_ENABLE_ROCE, DEVLINK_PARAM_GENERIC_ID_ENABLE_REMOTE_DEV_RESET, DEVLINK_PARAM_GENERIC_ID_ENABLE_ETH, + DEVLINK_PARAM_GENERIC_ID_ENABLE_RDMA, /* add new param generic ids above here*/ __DEVLINK_PARAM_GENERIC_ID_MAX, @@ -563,6 +564,9 @@ enum devlink_param_generic_id { #define DEVLINK_PARAM_GENERIC_ENABLE_ETH_NAME "enable_eth" #define DEVLINK_PARAM_GENERIC_ENABLE_ETH_TYPE DEVLINK_PARAM_TYPE_BOOL +#define DEVLINK_PARAM_GENERIC_ENABLE_RDMA_NAME "enable_rdma" +#define DEVLINK_PARAM_GENERIC_ENABLE_RDMA_TYPE DEVLINK_PARAM_TYPE_BOOL + #define DEVLINK_PARAM_GENERIC(_id, _cmodes, _get, _set, _validate) \ { \ .id = DEVLINK_PARAM_GENERIC_ID_##_id, \ diff --git a/net/core/devlink.c b/net/core/devlink.c index 9a59f45c8bf9..b68d6921d34f 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -4282,6 +4282,11 @@ static const struct devlink_param devlink_param_generic[] = { .name = DEVLINK_PARAM_GENERIC_ENABLE_ETH_NAME, .type = DEVLINK_PARAM_GENERIC_ENABLE_ETH_TYPE, }, + { + .id = DEVLINK_PARAM_GENERIC_ID_ENABLE_RDMA, + .name = DEVLINK_PARAM_GENERIC_ENABLE_RDMA_NAME, + .type = DEVLINK_PARAM_GENERIC_ENABLE_RDMA_TYPE, + }, }; static int devlink_param_generic_verify(const struct devlink_param *param) -- cgit v1.2.3 From 076b2a9dbb28e8b3d9a264a8bca664794255d448 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Tue, 10 Aug 2021 16:24:17 +0300 Subject: devlink: Add new "enable_vnet" generic device param Add new device generic parameter to enable/disable creation of VDPA net auxiliary device and associated device functionality in the devlink instance. User who prefers to disable such functionality can disable it using below example. $ devlink dev param set pci/0000:06:00.0 \ name enable_vnet value false cmode driverinit $ devlink dev reload pci/0000:06:00.0 At this point devlink instance do not create auxiliary device for the VDPA net functionality. Signed-off-by: Parav Pandit Reviewed-by: Jiri Pirko Reviewed-by: Leon Romanovsky Signed-off-by: David S. Miller --- Documentation/networking/devlink/devlink-params.rst | 4 ++++ include/net/devlink.h | 4 ++++ net/core/devlink.c | 5 +++++ 3 files changed, 13 insertions(+) (limited to 'include/net') diff --git a/Documentation/networking/devlink/devlink-params.rst b/Documentation/networking/devlink/devlink-params.rst index a49da0b049b6..4878907e9232 100644 --- a/Documentation/networking/devlink/devlink-params.rst +++ b/Documentation/networking/devlink/devlink-params.rst @@ -105,6 +105,10 @@ own name. - Boolean - When enabled, the device driver will instantiate RDMA specific auxiliary device of the devlink device. + * - ``enable_vnet`` + - Boolean + - When enabled, the device driver will instantiate VDPA networking + specific auxiliary device of the devlink device. * - ``internal_err_reset`` - Boolean - When enabled, the device driver will reset the device on internal diff --git a/include/net/devlink.h b/include/net/devlink.h index 6f4f0416e598..0a0becbcdc49 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -521,6 +521,7 @@ enum devlink_param_generic_id { DEVLINK_PARAM_GENERIC_ID_ENABLE_REMOTE_DEV_RESET, DEVLINK_PARAM_GENERIC_ID_ENABLE_ETH, DEVLINK_PARAM_GENERIC_ID_ENABLE_RDMA, + DEVLINK_PARAM_GENERIC_ID_ENABLE_VNET, /* add new param generic ids above here*/ __DEVLINK_PARAM_GENERIC_ID_MAX, @@ -567,6 +568,9 @@ enum devlink_param_generic_id { #define DEVLINK_PARAM_GENERIC_ENABLE_RDMA_NAME "enable_rdma" #define DEVLINK_PARAM_GENERIC_ENABLE_RDMA_TYPE DEVLINK_PARAM_TYPE_BOOL +#define DEVLINK_PARAM_GENERIC_ENABLE_VNET_NAME "enable_vnet" +#define DEVLINK_PARAM_GENERIC_ENABLE_VNET_TYPE DEVLINK_PARAM_TYPE_BOOL + #define DEVLINK_PARAM_GENERIC(_id, _cmodes, _get, _set, _validate) \ { \ .id = DEVLINK_PARAM_GENERIC_ID_##_id, \ diff --git a/net/core/devlink.c b/net/core/devlink.c index b68d6921d34f..867ae7e39788 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -4287,6 +4287,11 @@ static const struct devlink_param devlink_param_generic[] = { .name = DEVLINK_PARAM_GENERIC_ENABLE_RDMA_NAME, .type = DEVLINK_PARAM_GENERIC_ENABLE_RDMA_TYPE, }, + { + .id = DEVLINK_PARAM_GENERIC_ID_ENABLE_VNET, + .name = DEVLINK_PARAM_GENERIC_ENABLE_VNET_NAME, + .type = DEVLINK_PARAM_GENERIC_ENABLE_VNET_TYPE, + }, }; static int devlink_param_generic_verify(const struct devlink_param *param) -- cgit v1.2.3 From b40c51efefbc4a3ddec682f118adefea1ccf70dc Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Tue, 10 Aug 2021 16:24:19 +0300 Subject: devlink: Add API to register and unregister single parameter Currently device configuration parameters can be registered as an array. Due to this a constant array must be registered. A single driver supporting multiple devices each with different device capabilities end up registering all parameters even if it doesn't support it. One possible workaround a driver can do is, it registers multiple single entry arrays to overcome such limitation. Better is to provide a API that enables driver to register/unregister a single parameter. This also further helps in two ways. (1) to reduce the memory of devlink_param_entry by avoiding in registering parameters which are not supported by the device. (2) avoid generating multiple parameter add, delete, publish, unpublish, init value notifications for such unsupported parameters Signed-off-by: Parav Pandit Reviewed-by: Jiri Pirko Reviewed-by: Leon Romanovsky Signed-off-by: David S. Miller --- include/net/devlink.h | 4 ++++ net/core/devlink.c | 37 +++++++++++++++++++++++++++++++++++++ 2 files changed, 41 insertions(+) (limited to 'include/net') diff --git a/include/net/devlink.h b/include/net/devlink.h index 0a0becbcdc49..f6459ee77114 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -1645,6 +1645,10 @@ int devlink_params_register(struct devlink *devlink, void devlink_params_unregister(struct devlink *devlink, const struct devlink_param *params, size_t params_count); +int devlink_param_register(struct devlink *devlink, + const struct devlink_param *param); +void devlink_param_unregister(struct devlink *devlink, + const struct devlink_param *param); void devlink_params_publish(struct devlink *devlink); void devlink_params_unpublish(struct devlink *devlink); int devlink_port_params_register(struct devlink_port *devlink_port, diff --git a/net/core/devlink.c b/net/core/devlink.c index 050dd7271a45..629291175af3 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -9903,6 +9903,43 @@ void devlink_params_unregister(struct devlink *devlink, } EXPORT_SYMBOL_GPL(devlink_params_unregister); +/** + * devlink_param_register - register one configuration parameter + * + * @devlink: devlink + * @param: one configuration parameter + * + * Register the configuration parameter supported by the driver. + * Return: returns 0 on successful registration or error code otherwise. + */ +int devlink_param_register(struct devlink *devlink, + const struct devlink_param *param) +{ + int err; + + mutex_lock(&devlink->lock); + err = __devlink_param_register_one(devlink, 0, &devlink->param_list, + param, DEVLINK_CMD_PARAM_NEW); + mutex_unlock(&devlink->lock); + return err; +} +EXPORT_SYMBOL_GPL(devlink_param_register); + +/** + * devlink_param_unregister - unregister one configuration parameter + * @devlink: devlink + * @param: configuration parameter to unregister + */ +void devlink_param_unregister(struct devlink *devlink, + const struct devlink_param *param) +{ + mutex_lock(&devlink->lock); + devlink_param_unregister_one(devlink, 0, &devlink->param_list, param, + DEVLINK_CMD_PARAM_DEL); + mutex_unlock(&devlink->lock); +} +EXPORT_SYMBOL_GPL(devlink_param_unregister); + /** * devlink_params_publish - publish configuration parameters * -- cgit v1.2.3 From 9c4a7665b4237621879caf115a78f69bad67b9c7 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Tue, 10 Aug 2021 16:24:20 +0300 Subject: devlink: Add APIs to publish, unpublish individual parameter Enable drivers to publish/unpublish individual parameter. Signed-off-by: Parav Pandit Reviewed-by: Jiri Pirko Reviewed-by: Leon Romanovsky Signed-off-by: David S. Miller --- include/net/devlink.h | 4 ++++ net/core/devlink.c | 48 ++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 52 insertions(+) (limited to 'include/net') diff --git a/include/net/devlink.h b/include/net/devlink.h index f6459ee77114..1151497c0ec5 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -1651,6 +1651,10 @@ void devlink_param_unregister(struct devlink *devlink, const struct devlink_param *param); void devlink_params_publish(struct devlink *devlink); void devlink_params_unpublish(struct devlink *devlink); +void devlink_param_publish(struct devlink *devlink, + const struct devlink_param *param); +void devlink_param_unpublish(struct devlink *devlink, + const struct devlink_param *param); int devlink_port_params_register(struct devlink_port *devlink_port, const struct devlink_param *params, size_t params_count); diff --git a/net/core/devlink.c b/net/core/devlink.c index 629291175af3..ee9787314cff 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -9982,6 +9982,54 @@ void devlink_params_unpublish(struct devlink *devlink) } EXPORT_SYMBOL_GPL(devlink_params_unpublish); +/** + * devlink_param_publish - publish one configuration parameter + * + * @devlink: devlink + * @param: one configuration parameter + * + * Publish previously registered configuration parameter. + */ +void devlink_param_publish(struct devlink *devlink, + const struct devlink_param *param) +{ + struct devlink_param_item *param_item; + + list_for_each_entry(param_item, &devlink->param_list, list) { + if (param_item->param != param || param_item->published) + continue; + param_item->published = true; + devlink_param_notify(devlink, 0, param_item, + DEVLINK_CMD_PARAM_NEW); + break; + } +} +EXPORT_SYMBOL_GPL(devlink_param_publish); + +/** + * devlink_param_unpublish - unpublish one configuration parameter + * + * @devlink: devlink + * @param: one configuration parameter + * + * Unpublish previously registered configuration parameter. + */ +void devlink_param_unpublish(struct devlink *devlink, + const struct devlink_param *param) +{ + struct devlink_param_item *param_item; + + list_for_each_entry(param_item, &devlink->param_list, list) { + if (param_item->param != param || !param_item->published) + continue; + param_item->published = false; + devlink_param_notify(devlink, 0, param_item, + DEVLINK_CMD_PARAM_DEL); + break; + } +} +EXPORT_SYMBOL_GPL(devlink_param_unpublish); + /** * devlink_port_params_register - register port configuration parameters * -- cgit v1.2.3 From 891a88f4f5768b1e6ff52a2386d48558aa5a3f63 Mon Sep 17 00:00:00 2001 From: Jonathan Toppins Date: Tue, 10 Aug 2021 22:53:30 -0400 Subject: bonding: remove extraneous definitions from bonding.h All of the symbols either only exist in bond_options.c or nowhere at all. These symbols were verified to not exist in the code base by using `git grep` and their removal was verified by compiling bonding.ko. Signed-off-by: Jonathan Toppins Signed-off-by: Jakub Kicinski --- include/net/bonding.h | 12 ------------ 1 file changed, 12 deletions(-) (limited to 'include/net') diff --git a/include/net/bonding.h b/include/net/bonding.h index 9f3fdc180c6c..15e083e18f75 100644 --- a/include/net/bonding.h +++ b/include/net/bonding.h @@ -150,11 +150,6 @@ struct bond_params { u8 ad_actor_system[ETH_ALEN + 2]; }; -struct bond_parm_tbl { - char *modename; - int mode; -}; - struct slave { struct net_device *dev; /* first - useful for panic debug */ struct bonding *bond; /* our master */ @@ -755,13 +750,6 @@ static inline int bond_get_targets_ip(__be32 *targets, __be32 ip) /* exported from bond_main.c */ extern unsigned int bond_net_id; -extern const struct bond_parm_tbl bond_lacp_tbl[]; -extern const struct bond_parm_tbl xmit_hashtype_tbl[]; -extern const struct bond_parm_tbl arp_validate_tbl[]; -extern const struct bond_parm_tbl arp_all_targets_tbl[]; -extern const struct bond_parm_tbl fail_over_mac_tbl[]; -extern const struct bond_parm_tbl pri_reselect_tbl[]; -extern struct bond_parm_tbl ad_select_tbl[]; /* exported from bond_netlink.c */ extern struct rtnl_link_ops bond_link_ops; -- cgit v1.2.3 From 83f0a0b7285b299e006b0698a0ddc1ffacff3e43 Mon Sep 17 00:00:00 2001 From: Jeremy Kerr Date: Tue, 10 Aug 2021 10:38:34 +0800 Subject: mctp: Specify route types, require rtm_type in RTM_*ROUTE messages This change adds a 'type' attribute to routes, which can be parsed from a RTM_NEWROUTE message. This will help to distinguish local vs. peer routes in a future change. This means userspace will need to set a correct rtm_type in RTM_NEWROUTE and RTM_DELROUTE messages; we currently only accept RTN_UNICAST. Signed-off-by: Jeremy Kerr Link: https://lore.kernel.org/r/20210810023834.2231088-1-jk@codeconstruct.com.au Signed-off-by: Jakub Kicinski --- include/net/mctp.h | 1 + net/mctp/route.c | 27 ++++++++++++++++++++++----- 2 files changed, 23 insertions(+), 5 deletions(-) (limited to 'include/net') diff --git a/include/net/mctp.h b/include/net/mctp.h index 54bbe042c973..a824d47c3c6d 100644 --- a/include/net/mctp.h +++ b/include/net/mctp.h @@ -173,6 +173,7 @@ struct mctp_route { struct mctp_dev *dev; unsigned int mtu; + unsigned char type; int (*output)(struct mctp_route *route, struct sk_buff *skb); diff --git a/net/mctp/route.c b/net/mctp/route.c index b3101375c8e7..5265525011ad 100644 --- a/net/mctp/route.c +++ b/net/mctp/route.c @@ -710,8 +710,9 @@ int mctp_local_output(struct sock *sk, struct mctp_route *rt, /* route management */ static int mctp_route_add(struct mctp_dev *mdev, mctp_eid_t daddr_start, unsigned int daddr_extent, unsigned int mtu, - bool is_local) + unsigned char type) { + int (*rtfn)(struct mctp_route *rt, struct sk_buff *skb); struct net *net = dev_net(mdev->dev); struct mctp_route *rt, *ert; @@ -721,6 +722,17 @@ static int mctp_route_add(struct mctp_dev *mdev, mctp_eid_t daddr_start, if (daddr_extent > 0xff || daddr_start + daddr_extent >= 255) return -EINVAL; + switch (type) { + case RTN_LOCAL: + rtfn = mctp_route_input; + break; + case RTN_UNICAST: + rtfn = mctp_route_output; + break; + default: + return -EINVAL; + } + rt = mctp_route_alloc(); if (!rt) return -ENOMEM; @@ -730,7 +742,8 @@ static int mctp_route_add(struct mctp_dev *mdev, mctp_eid_t daddr_start, rt->mtu = mtu; rt->dev = mdev; dev_hold(rt->dev->dev); - rt->output = is_local ? mctp_route_input : mctp_route_output; + rt->type = type; + rt->output = rtfn; ASSERT_RTNL(); /* Prevent duplicate identical routes. */ @@ -777,7 +790,7 @@ static int mctp_route_remove(struct mctp_dev *mdev, mctp_eid_t daddr_start, int mctp_route_add_local(struct mctp_dev *mdev, mctp_eid_t addr) { - return mctp_route_add(mdev, addr, 0, 0, true); + return mctp_route_add(mdev, addr, 0, 0, RTN_LOCAL); } int mctp_route_remove_local(struct mctp_dev *mdev, mctp_eid_t addr) @@ -936,7 +949,11 @@ static int mctp_newroute(struct sk_buff *skb, struct nlmsghdr *nlh, /* TODO: parse mtu from nlparse */ mtu = 0; - rc = mctp_route_add(mdev, daddr_start, rtm->rtm_dst_len, mtu, false); + if (rtm->rtm_type != RTN_UNICAST) + return -EINVAL; + + rc = mctp_route_add(mdev, daddr_start, rtm->rtm_dst_len, mtu, + rtm->rtm_type); return rc; } @@ -985,7 +1002,7 @@ static int mctp_fill_rtinfo(struct sk_buff *skb, struct mctp_route *rt, hdr->rtm_table = RT_TABLE_DEFAULT; hdr->rtm_protocol = RTPROT_STATIC; /* everything is user-defined */ hdr->rtm_scope = RT_SCOPE_LINK; /* TODO: scope in mctp_route? */ - hdr->rtm_type = RTN_ANYCAST; /* TODO: type from route */ + hdr->rtm_type = rt->type; if (nla_put_u8(skb, RTA_DST, rt->min)) goto cancel; -- cgit v1.2.3 From 8c89f7b3d3f2880c57b0bc96c72ccd98fe354399 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 6 Aug 2021 14:53:05 -0700 Subject: mac80211: Use flex-array for radiotap header bitmap In preparation for FORTIFY_SOURCE performing compile-time and run-time field bounds checking for memcpy(), memmove(), and memset(), avoid intentionally writing across neighboring fields. The it_present member of struct ieee80211_radiotap_header is treated as a flexible array (multiple u32s can be conditionally present). In order for memcpy() to reason (or really, not reason) about the size of operations against this struct, use of bytes beyond it_present need to be treated as part of the flexible array. Add a trailing flexible array and initialize its initial index via pointer arithmetic. Cc: Johannes Berg Cc: "David S. Miller" Cc: Jakub Kicinski Cc: linux-wireless@vger.kernel.org Cc: netdev@vger.kernel.org Signed-off-by: Kees Cook Link: https://lore.kernel.org/r/20210806215305.2875621-1-keescook@chromium.org Signed-off-by: Johannes Berg --- include/net/ieee80211_radiotap.h | 5 +++++ net/mac80211/rx.c | 7 ++++++- net/wireless/radiotap.c | 5 ++--- 3 files changed, 13 insertions(+), 4 deletions(-) (limited to 'include/net') diff --git a/include/net/ieee80211_radiotap.h b/include/net/ieee80211_radiotap.h index c0854933e24f..11630351c978 100644 --- a/include/net/ieee80211_radiotap.h +++ b/include/net/ieee80211_radiotap.h @@ -43,6 +43,11 @@ struct ieee80211_radiotap_header { * @it_present: (first) present word */ __le32 it_present; + + /** + * @it_optional: all remaining presence bitmaps + */ + __le32 it_optional[]; } __packed; /* version is always 0 */ diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 3eb7b03b23c6..33c56eab07fc 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -359,7 +359,12 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local, put_unaligned_le32(it_present_val, it_present); - pos = (void *)(it_present + 1); + /* This references through an offset into it_optional[] rather + * than via it_present otherwise later uses of pos will cause + * the compiler to think we have walked past the end of the + * struct member. + */ + pos = (void *)&rthdr->it_optional[it_present - rthdr->it_optional]; /* the order of the following fields is important */ diff --git a/net/wireless/radiotap.c b/net/wireless/radiotap.c index 8099c9564a59..ae2e1a896461 100644 --- a/net/wireless/radiotap.c +++ b/net/wireless/radiotap.c @@ -115,10 +115,9 @@ int ieee80211_radiotap_iterator_init( iterator->_max_length = get_unaligned_le16(&radiotap_header->it_len); iterator->_arg_index = 0; iterator->_bitmap_shifter = get_unaligned_le32(&radiotap_header->it_present); - iterator->_arg = (uint8_t *)radiotap_header + sizeof(*radiotap_header); + iterator->_arg = (uint8_t *)radiotap_header->it_optional; iterator->_reset_on_ext = 0; - iterator->_next_bitmap = &radiotap_header->it_present; - iterator->_next_bitmap++; + iterator->_next_bitmap = radiotap_header->it_optional; iterator->_vns = vns; iterator->current_namespace = &radiotap_ns; iterator->is_radiotap_ns = 1; -- cgit v1.2.3 From 437ebfd90a2567aab19dce47bafc81ebd8a63324 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Sat, 14 Aug 2021 12:57:28 +0300 Subject: devlink: Count struct devlink consumers The struct devlink itself is protected by internal lock and doesn't need global lock during operation. That global lock is used to protect addition/removal new devlink instances from the global list in use by all devlink consumers in the system. The future conversion of linked list to be xarray will allow us to actually delete that lock, but first we need to count all struct devlink users. The reference counting provides us a way to ensure that no new user space commands success to grab devlink instance which is going to be destroyed makes it is safe to access it without lock. Signed-off-by: Leon Romanovsky Signed-off-by: David S. Miller --- include/net/devlink.h | 2 + net/core/devlink.c | 205 +++++++++++++++++++++++++++++++++++++++++--------- 2 files changed, 172 insertions(+), 35 deletions(-) (limited to 'include/net') diff --git a/include/net/devlink.h b/include/net/devlink.h index 1151497c0ec5..4c60d61d92da 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -56,6 +56,8 @@ struct devlink { */ u8 reload_failed:1, reload_enabled:1; + refcount_t refcount; + struct completion comp; char priv[0] __aligned(NETDEV_ALIGN); }; diff --git a/net/core/devlink.c b/net/core/devlink.c index c8a8eecad1c5..76f459da6e05 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -108,10 +108,22 @@ struct net *devlink_net(const struct devlink *devlink) } EXPORT_SYMBOL_GPL(devlink_net); +static void devlink_put(struct devlink *devlink) +{ + if (refcount_dec_and_test(&devlink->refcount)) + complete(&devlink->comp); +} + +static bool __must_check devlink_try_get(struct devlink *devlink) +{ + return refcount_inc_not_zero(&devlink->refcount); +} + static struct devlink *devlink_get_from_attrs(struct net *net, struct nlattr **attrs) { struct devlink *devlink; + bool found = false; char *busname; char *devname; @@ -126,16 +138,16 @@ static struct devlink *devlink_get_from_attrs(struct net *net, list_for_each_entry(devlink, &devlink_list, list) { if (strcmp(devlink->dev->bus->name, busname) == 0 && strcmp(dev_name(devlink->dev), devname) == 0 && - net_eq(devlink_net(devlink), net)) - return devlink; + net_eq(devlink_net(devlink), net)) { + found = true; + break; + } } - return ERR_PTR(-ENODEV); -} + if (!found || !devlink_try_get(devlink)) + devlink = ERR_PTR(-ENODEV); -static struct devlink *devlink_get_from_info(struct genl_info *info) -{ - return devlink_get_from_attrs(genl_info_net(info), info->attrs); + return devlink; } static struct devlink_port *devlink_port_get_by_index(struct devlink *devlink, @@ -486,7 +498,7 @@ static int devlink_nl_pre_doit(const struct genl_ops *ops, int err; mutex_lock(&devlink_mutex); - devlink = devlink_get_from_info(info); + devlink = devlink_get_from_attrs(genl_info_net(info), info->attrs); if (IS_ERR(devlink)) { mutex_unlock(&devlink_mutex); return PTR_ERR(devlink); @@ -529,6 +541,7 @@ static int devlink_nl_pre_doit(const struct genl_ops *ops, unlock: if (~ops->internal_flags & DEVLINK_NL_FLAG_NO_LOCK) mutex_unlock(&devlink->lock); + devlink_put(devlink); mutex_unlock(&devlink_mutex); return err; } @@ -541,6 +554,7 @@ static void devlink_nl_post_doit(const struct genl_ops *ops, devlink = info->user_ptr[0]; if (~ops->internal_flags & DEVLINK_NL_FLAG_NO_LOCK) mutex_unlock(&devlink->lock); + devlink_put(devlink); mutex_unlock(&devlink_mutex); } @@ -1078,8 +1092,12 @@ static int devlink_nl_cmd_rate_get_dumpit(struct sk_buff *msg, mutex_lock(&devlink_mutex); list_for_each_entry(devlink, &devlink_list, list) { - if (!net_eq(devlink_net(devlink), sock_net(msg->sk))) + if (!devlink_try_get(devlink)) continue; + + if (!net_eq(devlink_net(devlink), sock_net(msg->sk))) + goto retry; + mutex_lock(&devlink->lock); list_for_each_entry(devlink_rate, &devlink->rate_list, list) { enum devlink_command cmd = DEVLINK_CMD_RATE_NEW; @@ -1094,11 +1112,14 @@ static int devlink_nl_cmd_rate_get_dumpit(struct sk_buff *msg, NLM_F_MULTI, NULL); if (err) { mutex_unlock(&devlink->lock); + devlink_put(devlink); goto out; } idx++; } mutex_unlock(&devlink->lock); +retry: + devlink_put(devlink); } out: mutex_unlock(&devlink_mutex); @@ -1173,15 +1194,24 @@ static int devlink_nl_cmd_get_dumpit(struct sk_buff *msg, mutex_lock(&devlink_mutex); list_for_each_entry(devlink, &devlink_list, list) { - if (!net_eq(devlink_net(devlink), sock_net(msg->sk))) + if (!devlink_try_get(devlink)) continue; + + if (!net_eq(devlink_net(devlink), sock_net(msg->sk))) { + devlink_put(devlink); + continue; + } + if (idx < start) { idx++; + devlink_put(devlink); continue; } + err = devlink_nl_fill(msg, devlink, DEVLINK_CMD_NEW, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI); + devlink_put(devlink); if (err) goto out; idx++; @@ -1226,8 +1256,12 @@ static int devlink_nl_cmd_port_get_dumpit(struct sk_buff *msg, mutex_lock(&devlink_mutex); list_for_each_entry(devlink, &devlink_list, list) { - if (!net_eq(devlink_net(devlink), sock_net(msg->sk))) + if (!devlink_try_get(devlink)) continue; + + if (!net_eq(devlink_net(devlink), sock_net(msg->sk))) + goto retry; + mutex_lock(&devlink->lock); list_for_each_entry(devlink_port, &devlink->port_list, list) { if (idx < start) { @@ -1241,11 +1275,14 @@ static int devlink_nl_cmd_port_get_dumpit(struct sk_buff *msg, NLM_F_MULTI, cb->extack); if (err) { mutex_unlock(&devlink->lock); + devlink_put(devlink); goto out; } idx++; } mutex_unlock(&devlink->lock); +retry: + devlink_put(devlink); } out: mutex_unlock(&devlink_mutex); @@ -1884,8 +1921,12 @@ static int devlink_nl_cmd_sb_get_dumpit(struct sk_buff *msg, mutex_lock(&devlink_mutex); list_for_each_entry(devlink, &devlink_list, list) { - if (!net_eq(devlink_net(devlink), sock_net(msg->sk))) + if (!devlink_try_get(devlink)) continue; + + if (!net_eq(devlink_net(devlink), sock_net(msg->sk))) + goto retry; + mutex_lock(&devlink->lock); list_for_each_entry(devlink_sb, &devlink->sb_list, list) { if (idx < start) { @@ -1899,11 +1940,14 @@ static int devlink_nl_cmd_sb_get_dumpit(struct sk_buff *msg, NLM_F_MULTI); if (err) { mutex_unlock(&devlink->lock); + devlink_put(devlink); goto out; } idx++; } mutex_unlock(&devlink->lock); +retry: + devlink_put(devlink); } out: mutex_unlock(&devlink_mutex); @@ -2028,9 +2072,13 @@ static int devlink_nl_cmd_sb_pool_get_dumpit(struct sk_buff *msg, mutex_lock(&devlink_mutex); list_for_each_entry(devlink, &devlink_list, list) { + if (!devlink_try_get(devlink)) + continue; + if (!net_eq(devlink_net(devlink), sock_net(msg->sk)) || !devlink->ops->sb_pool_get) - continue; + goto retry; + mutex_lock(&devlink->lock); list_for_each_entry(devlink_sb, &devlink->sb_list, list) { err = __sb_pool_get_dumpit(msg, start, &idx, devlink, @@ -2041,10 +2089,13 @@ static int devlink_nl_cmd_sb_pool_get_dumpit(struct sk_buff *msg, err = 0; } else if (err) { mutex_unlock(&devlink->lock); + devlink_put(devlink); goto out; } } mutex_unlock(&devlink->lock); +retry: + devlink_put(devlink); } out: mutex_unlock(&devlink_mutex); @@ -2241,9 +2292,13 @@ static int devlink_nl_cmd_sb_port_pool_get_dumpit(struct sk_buff *msg, mutex_lock(&devlink_mutex); list_for_each_entry(devlink, &devlink_list, list) { + if (!devlink_try_get(devlink)) + continue; + if (!net_eq(devlink_net(devlink), sock_net(msg->sk)) || !devlink->ops->sb_port_pool_get) - continue; + goto retry; + mutex_lock(&devlink->lock); list_for_each_entry(devlink_sb, &devlink->sb_list, list) { err = __sb_port_pool_get_dumpit(msg, start, &idx, @@ -2254,10 +2309,13 @@ static int devlink_nl_cmd_sb_port_pool_get_dumpit(struct sk_buff *msg, err = 0; } else if (err) { mutex_unlock(&devlink->lock); + devlink_put(devlink); goto out; } } mutex_unlock(&devlink->lock); +retry: + devlink_put(devlink); } out: mutex_unlock(&devlink_mutex); @@ -2482,9 +2540,12 @@ devlink_nl_cmd_sb_tc_pool_bind_get_dumpit(struct sk_buff *msg, mutex_lock(&devlink_mutex); list_for_each_entry(devlink, &devlink_list, list) { + if (!devlink_try_get(devlink)) + continue; + if (!net_eq(devlink_net(devlink), sock_net(msg->sk)) || !devlink->ops->sb_tc_pool_bind_get) - continue; + goto retry; mutex_lock(&devlink->lock); list_for_each_entry(devlink_sb, &devlink->sb_list, list) { @@ -2497,10 +2558,13 @@ devlink_nl_cmd_sb_tc_pool_bind_get_dumpit(struct sk_buff *msg, err = 0; } else if (err) { mutex_unlock(&devlink->lock); + devlink_put(devlink); goto out; } } mutex_unlock(&devlink->lock); +retry: + devlink_put(devlink); } out: mutex_unlock(&devlink_mutex); @@ -4552,8 +4616,12 @@ static int devlink_nl_cmd_param_get_dumpit(struct sk_buff *msg, mutex_lock(&devlink_mutex); list_for_each_entry(devlink, &devlink_list, list) { - if (!net_eq(devlink_net(devlink), sock_net(msg->sk))) + if (!devlink_try_get(devlink)) continue; + + if (!net_eq(devlink_net(devlink), sock_net(msg->sk))) + goto retry; + mutex_lock(&devlink->lock); list_for_each_entry(param_item, &devlink->param_list, list) { if (idx < start) { @@ -4569,11 +4637,14 @@ static int devlink_nl_cmd_param_get_dumpit(struct sk_buff *msg, err = 0; } else if (err) { mutex_unlock(&devlink->lock); + devlink_put(devlink); goto out; } idx++; } mutex_unlock(&devlink->lock); +retry: + devlink_put(devlink); } out: mutex_unlock(&devlink_mutex); @@ -4820,8 +4891,12 @@ static int devlink_nl_cmd_port_param_get_dumpit(struct sk_buff *msg, mutex_lock(&devlink_mutex); list_for_each_entry(devlink, &devlink_list, list) { - if (!net_eq(devlink_net(devlink), sock_net(msg->sk))) + if (!devlink_try_get(devlink)) continue; + + if (!net_eq(devlink_net(devlink), sock_net(msg->sk))) + goto retry; + mutex_lock(&devlink->lock); list_for_each_entry(devlink_port, &devlink->port_list, list) { list_for_each_entry(param_item, @@ -4841,12 +4916,15 @@ static int devlink_nl_cmd_port_param_get_dumpit(struct sk_buff *msg, err = 0; } else if (err) { mutex_unlock(&devlink->lock); + devlink_put(devlink); goto out; } idx++; } } mutex_unlock(&devlink->lock); +retry: + devlink_put(devlink); } out: mutex_unlock(&devlink_mutex); @@ -5385,14 +5463,20 @@ static int devlink_nl_cmd_region_get_dumpit(struct sk_buff *msg, struct devlink *devlink; int start = cb->args[0]; int idx = 0; - int err; + int err = 0; mutex_lock(&devlink_mutex); list_for_each_entry(devlink, &devlink_list, list) { - if (!net_eq(devlink_net(devlink), sock_net(msg->sk))) + if (!devlink_try_get(devlink)) continue; + + if (!net_eq(devlink_net(devlink), sock_net(msg->sk))) + goto retry; + err = devlink_nl_cmd_region_get_devlink_dumpit(msg, cb, devlink, &idx, start); +retry: + devlink_put(devlink); if (err) goto out; } @@ -5755,6 +5839,7 @@ static int devlink_nl_cmd_region_read_dumpit(struct sk_buff *skb, nla_nest_end(skb, chunks_attr); genlmsg_end(skb, hdr); mutex_unlock(&devlink->lock); + devlink_put(devlink); mutex_unlock(&devlink_mutex); return skb->len; @@ -5763,6 +5848,7 @@ nla_put_failure: genlmsg_cancel(skb, hdr); out_unlock: mutex_unlock(&devlink->lock); + devlink_put(devlink); out_dev: mutex_unlock(&devlink_mutex); return err; @@ -5914,17 +6000,14 @@ static int devlink_nl_cmd_info_get_dumpit(struct sk_buff *msg, mutex_lock(&devlink_mutex); list_for_each_entry(devlink, &devlink_list, list) { - if (!net_eq(devlink_net(devlink), sock_net(msg->sk))) + if (!devlink_try_get(devlink)) continue; - if (idx < start) { - idx++; - continue; - } - if (!devlink->ops->info_get) { - idx++; - continue; - } + if (!net_eq(devlink_net(devlink), sock_net(msg->sk))) + goto retry; + + if (idx < start || !devlink->ops->info_get) + goto inc; mutex_lock(&devlink->lock); err = devlink_nl_info_fill(msg, devlink, DEVLINK_CMD_INFO_GET, @@ -5934,9 +6017,14 @@ static int devlink_nl_cmd_info_get_dumpit(struct sk_buff *msg, mutex_unlock(&devlink->lock); if (err == -EOPNOTSUPP) err = 0; - else if (err) + else if (err) { + devlink_put(devlink); break; + } +inc: idx++; +retry: + devlink_put(devlink); } mutex_unlock(&devlink_mutex); @@ -7021,6 +7109,7 @@ devlink_health_reporter_get_from_cb(struct netlink_callback *cb) goto unlock; reporter = devlink_health_reporter_get_from_attrs(devlink, attrs); + devlink_put(devlink); mutex_unlock(&devlink_mutex); return reporter; unlock: @@ -7092,8 +7181,12 @@ devlink_nl_cmd_health_reporter_get_dumpit(struct sk_buff *msg, mutex_lock(&devlink_mutex); list_for_each_entry(devlink, &devlink_list, list) { - if (!net_eq(devlink_net(devlink), sock_net(msg->sk))) + if (!devlink_try_get(devlink)) continue; + + if (!net_eq(devlink_net(devlink), sock_net(msg->sk))) + goto retry_rep; + mutex_lock(&devlink->reporters_lock); list_for_each_entry(reporter, &devlink->reporter_list, list) { @@ -7107,16 +7200,23 @@ devlink_nl_cmd_health_reporter_get_dumpit(struct sk_buff *msg, NLM_F_MULTI); if (err) { mutex_unlock(&devlink->reporters_lock); + devlink_put(devlink); goto out; } idx++; } mutex_unlock(&devlink->reporters_lock); +retry_rep: + devlink_put(devlink); } list_for_each_entry(devlink, &devlink_list, list) { - if (!net_eq(devlink_net(devlink), sock_net(msg->sk))) + if (!devlink_try_get(devlink)) continue; + + if (!net_eq(devlink_net(devlink), sock_net(msg->sk))) + goto retry_port; + mutex_lock(&devlink->lock); list_for_each_entry(port, &devlink->port_list, list) { mutex_lock(&port->reporters_lock); @@ -7133,6 +7233,7 @@ devlink_nl_cmd_health_reporter_get_dumpit(struct sk_buff *msg, if (err) { mutex_unlock(&port->reporters_lock); mutex_unlock(&devlink->lock); + devlink_put(devlink); goto out; } idx++; @@ -7140,6 +7241,8 @@ devlink_nl_cmd_health_reporter_get_dumpit(struct sk_buff *msg, mutex_unlock(&port->reporters_lock); } mutex_unlock(&devlink->lock); +retry_port: + devlink_put(devlink); } out: mutex_unlock(&devlink_mutex); @@ -7673,8 +7776,12 @@ static int devlink_nl_cmd_trap_get_dumpit(struct sk_buff *msg, mutex_lock(&devlink_mutex); list_for_each_entry(devlink, &devlink_list, list) { - if (!net_eq(devlink_net(devlink), sock_net(msg->sk))) + if (!devlink_try_get(devlink)) continue; + + if (!net_eq(devlink_net(devlink), sock_net(msg->sk))) + goto retry; + mutex_lock(&devlink->lock); list_for_each_entry(trap_item, &devlink->trap_list, list) { if (idx < start) { @@ -7688,11 +7795,14 @@ static int devlink_nl_cmd_trap_get_dumpit(struct sk_buff *msg, NLM_F_MULTI); if (err) { mutex_unlock(&devlink->lock); + devlink_put(devlink); goto out; } idx++; } mutex_unlock(&devlink->lock); +retry: + devlink_put(devlink); } out: mutex_unlock(&devlink_mutex); @@ -7892,8 +8002,12 @@ static int devlink_nl_cmd_trap_group_get_dumpit(struct sk_buff *msg, mutex_lock(&devlink_mutex); list_for_each_entry(devlink, &devlink_list, list) { - if (!net_eq(devlink_net(devlink), sock_net(msg->sk))) + if (!devlink_try_get(devlink)) continue; + + if (!net_eq(devlink_net(devlink), sock_net(msg->sk))) + goto retry; + mutex_lock(&devlink->lock); list_for_each_entry(group_item, &devlink->trap_group_list, list) { @@ -7908,11 +8022,14 @@ static int devlink_nl_cmd_trap_group_get_dumpit(struct sk_buff *msg, NLM_F_MULTI); if (err) { mutex_unlock(&devlink->lock); + devlink_put(devlink); goto out; } idx++; } mutex_unlock(&devlink->lock); +retry: + devlink_put(devlink); } out: mutex_unlock(&devlink_mutex); @@ -8198,8 +8315,12 @@ static int devlink_nl_cmd_trap_policer_get_dumpit(struct sk_buff *msg, mutex_lock(&devlink_mutex); list_for_each_entry(devlink, &devlink_list, list) { - if (!net_eq(devlink_net(devlink), sock_net(msg->sk))) + if (!devlink_try_get(devlink)) continue; + + if (!net_eq(devlink_net(devlink), sock_net(msg->sk))) + goto retry; + mutex_lock(&devlink->lock); list_for_each_entry(policer_item, &devlink->trap_policer_list, list) { @@ -8214,11 +8335,14 @@ static int devlink_nl_cmd_trap_policer_get_dumpit(struct sk_buff *msg, NLM_F_MULTI); if (err) { mutex_unlock(&devlink->lock); + devlink_put(devlink); goto out; } idx++; } mutex_unlock(&devlink->lock); +retry: + devlink_put(devlink); } out: mutex_unlock(&devlink_mutex); @@ -8801,6 +8925,9 @@ struct devlink *devlink_alloc_ns(const struct devlink_ops *ops, INIT_LIST_HEAD(&devlink->trap_policer_list); mutex_init(&devlink->lock); mutex_init(&devlink->reporters_lock); + refcount_set(&devlink->refcount, 1); + init_completion(&devlink->comp); + return devlink; } EXPORT_SYMBOL_GPL(devlink_alloc_ns); @@ -8827,6 +8954,9 @@ EXPORT_SYMBOL_GPL(devlink_register); */ void devlink_unregister(struct devlink *devlink) { + devlink_put(devlink); + wait_for_completion(&devlink->comp); + mutex_lock(&devlink_mutex); WARN_ON(devlink_reload_supported(devlink->ops) && devlink->reload_enabled); @@ -11374,9 +11504,12 @@ static void __net_exit devlink_pernet_pre_exit(struct net *net) */ mutex_lock(&devlink_mutex); list_for_each_entry(devlink, &devlink_list, list) { - if (!net_eq(devlink_net(devlink), net)) + if (!devlink_try_get(devlink)) continue; + if (!net_eq(devlink_net(devlink), net)) + goto retry; + WARN_ON(!devlink_reload_supported(devlink->ops)); err = devlink_reload(devlink, &init_net, DEVLINK_RELOAD_ACTION_DRIVER_REINIT, @@ -11384,6 +11517,8 @@ static void __net_exit devlink_pernet_pre_exit(struct net *net) &actions_performed, NULL); if (err && err != -EOPNOTSUPP) pr_warn("Failed to reload devlink instance into init_net\n"); +retry: + devlink_put(devlink); } mutex_unlock(&devlink_mutex); } -- cgit v1.2.3 From 11a861d767cdd87a34397821b0fd2095893b84b3 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Sat, 14 Aug 2021 12:57:29 +0300 Subject: devlink: Use xarray to store devlink instances We can use xarray instead of linearly organized linked lists for the devlink instances. This will let us revise the locking scheme in favour of internal xarray locking that protects database. Signed-off-by: Leon Romanovsky Signed-off-by: David S. Miller --- include/net/devlink.h | 2 +- net/core/devlink.c | 70 +++++++++++++++++++++++++++++++++++---------------- 2 files changed, 50 insertions(+), 22 deletions(-) (limited to 'include/net') diff --git a/include/net/devlink.h b/include/net/devlink.h index 4c60d61d92da..154cf0dbca37 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -32,7 +32,7 @@ struct devlink_dev_stats { struct devlink_ops; struct devlink { - struct list_head list; + u32 index; struct list_head port_list; struct list_head rate_list; struct list_head sb_list; diff --git a/net/core/devlink.c b/net/core/devlink.c index 76f459da6e05..d218f57ad8cf 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -92,7 +92,8 @@ static const struct nla_policy devlink_function_nl_policy[DEVLINK_PORT_FUNCTION_ DEVLINK_PORT_FN_STATE_ACTIVE), }; -static LIST_HEAD(devlink_list); +static DEFINE_XARRAY_FLAGS(devlinks, XA_FLAGS_ALLOC); +#define DEVLINK_REGISTERED XA_MARK_1 /* devlink_mutex * @@ -123,6 +124,7 @@ static struct devlink *devlink_get_from_attrs(struct net *net, struct nlattr **attrs) { struct devlink *devlink; + unsigned long index; bool found = false; char *busname; char *devname; @@ -135,7 +137,7 @@ static struct devlink *devlink_get_from_attrs(struct net *net, lockdep_assert_held(&devlink_mutex); - list_for_each_entry(devlink, &devlink_list, list) { + xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) { if (strcmp(devlink->dev->bus->name, busname) == 0 && strcmp(dev_name(devlink->dev), devname) == 0 && net_eq(devlink_net(devlink), net)) { @@ -1087,11 +1089,12 @@ static int devlink_nl_cmd_rate_get_dumpit(struct sk_buff *msg, struct devlink_rate *devlink_rate; struct devlink *devlink; int start = cb->args[0]; + unsigned long index; int idx = 0; int err = 0; mutex_lock(&devlink_mutex); - list_for_each_entry(devlink, &devlink_list, list) { + xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) { if (!devlink_try_get(devlink)) continue; @@ -1189,11 +1192,12 @@ static int devlink_nl_cmd_get_dumpit(struct sk_buff *msg, { struct devlink *devlink; int start = cb->args[0]; + unsigned long index; int idx = 0; int err; mutex_lock(&devlink_mutex); - list_for_each_entry(devlink, &devlink_list, list) { + xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) { if (!devlink_try_get(devlink)) continue; @@ -1251,11 +1255,12 @@ static int devlink_nl_cmd_port_get_dumpit(struct sk_buff *msg, struct devlink *devlink; struct devlink_port *devlink_port; int start = cb->args[0]; + unsigned long index; int idx = 0; int err; mutex_lock(&devlink_mutex); - list_for_each_entry(devlink, &devlink_list, list) { + xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) { if (!devlink_try_get(devlink)) continue; @@ -1916,11 +1921,12 @@ static int devlink_nl_cmd_sb_get_dumpit(struct sk_buff *msg, struct devlink *devlink; struct devlink_sb *devlink_sb; int start = cb->args[0]; + unsigned long index; int idx = 0; int err; mutex_lock(&devlink_mutex); - list_for_each_entry(devlink, &devlink_list, list) { + xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) { if (!devlink_try_get(devlink)) continue; @@ -2067,11 +2073,12 @@ static int devlink_nl_cmd_sb_pool_get_dumpit(struct sk_buff *msg, struct devlink *devlink; struct devlink_sb *devlink_sb; int start = cb->args[0]; + unsigned long index; int idx = 0; int err = 0; mutex_lock(&devlink_mutex); - list_for_each_entry(devlink, &devlink_list, list) { + xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) { if (!devlink_try_get(devlink)) continue; @@ -2287,11 +2294,12 @@ static int devlink_nl_cmd_sb_port_pool_get_dumpit(struct sk_buff *msg, struct devlink *devlink; struct devlink_sb *devlink_sb; int start = cb->args[0]; + unsigned long index; int idx = 0; int err = 0; mutex_lock(&devlink_mutex); - list_for_each_entry(devlink, &devlink_list, list) { + xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) { if (!devlink_try_get(devlink)) continue; @@ -2535,11 +2543,12 @@ devlink_nl_cmd_sb_tc_pool_bind_get_dumpit(struct sk_buff *msg, struct devlink *devlink; struct devlink_sb *devlink_sb; int start = cb->args[0]; + unsigned long index; int idx = 0; int err = 0; mutex_lock(&devlink_mutex); - list_for_each_entry(devlink, &devlink_list, list) { + xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) { if (!devlink_try_get(devlink)) continue; @@ -4611,11 +4620,12 @@ static int devlink_nl_cmd_param_get_dumpit(struct sk_buff *msg, struct devlink_param_item *param_item; struct devlink *devlink; int start = cb->args[0]; + unsigned long index; int idx = 0; int err = 0; mutex_lock(&devlink_mutex); - list_for_each_entry(devlink, &devlink_list, list) { + xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) { if (!devlink_try_get(devlink)) continue; @@ -4886,11 +4896,12 @@ static int devlink_nl_cmd_port_param_get_dumpit(struct sk_buff *msg, struct devlink_port *devlink_port; struct devlink *devlink; int start = cb->args[0]; + unsigned long index; int idx = 0; int err = 0; mutex_lock(&devlink_mutex); - list_for_each_entry(devlink, &devlink_list, list) { + xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) { if (!devlink_try_get(devlink)) continue; @@ -5462,11 +5473,12 @@ static int devlink_nl_cmd_region_get_dumpit(struct sk_buff *msg, { struct devlink *devlink; int start = cb->args[0]; + unsigned long index; int idx = 0; int err = 0; mutex_lock(&devlink_mutex); - list_for_each_entry(devlink, &devlink_list, list) { + xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) { if (!devlink_try_get(devlink)) continue; @@ -5995,11 +6007,12 @@ static int devlink_nl_cmd_info_get_dumpit(struct sk_buff *msg, { struct devlink *devlink; int start = cb->args[0]; + unsigned long index; int idx = 0; int err = 0; mutex_lock(&devlink_mutex); - list_for_each_entry(devlink, &devlink_list, list) { + xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) { if (!devlink_try_get(devlink)) continue; @@ -7176,11 +7189,12 @@ devlink_nl_cmd_health_reporter_get_dumpit(struct sk_buff *msg, struct devlink_port *port; struct devlink *devlink; int start = cb->args[0]; + unsigned long index; int idx = 0; int err; mutex_lock(&devlink_mutex); - list_for_each_entry(devlink, &devlink_list, list) { + xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) { if (!devlink_try_get(devlink)) continue; @@ -7210,7 +7224,7 @@ retry_rep: devlink_put(devlink); } - list_for_each_entry(devlink, &devlink_list, list) { + xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) { if (!devlink_try_get(devlink)) continue; @@ -7771,11 +7785,12 @@ static int devlink_nl_cmd_trap_get_dumpit(struct sk_buff *msg, struct devlink_trap_item *trap_item; struct devlink *devlink; int start = cb->args[0]; + unsigned long index; int idx = 0; int err; mutex_lock(&devlink_mutex); - list_for_each_entry(devlink, &devlink_list, list) { + xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) { if (!devlink_try_get(devlink)) continue; @@ -7997,11 +8012,12 @@ static int devlink_nl_cmd_trap_group_get_dumpit(struct sk_buff *msg, u32 portid = NETLINK_CB(cb->skb).portid; struct devlink *devlink; int start = cb->args[0]; + unsigned long index; int idx = 0; int err; mutex_lock(&devlink_mutex); - list_for_each_entry(devlink, &devlink_list, list) { + xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) { if (!devlink_try_get(devlink)) continue; @@ -8310,11 +8326,12 @@ static int devlink_nl_cmd_trap_policer_get_dumpit(struct sk_buff *msg, u32 portid = NETLINK_CB(cb->skb).portid; struct devlink *devlink; int start = cb->args[0]; + unsigned long index; int idx = 0; int err; mutex_lock(&devlink_mutex); - list_for_each_entry(devlink, &devlink_list, list) { + xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) { if (!devlink_try_get(devlink)) continue; @@ -8899,6 +8916,8 @@ struct devlink *devlink_alloc_ns(const struct devlink_ops *ops, struct device *dev) { struct devlink *devlink; + static u32 last_id; + int ret; WARN_ON(!ops || !dev); if (!devlink_reload_actions_valid(ops)) @@ -8908,6 +8927,13 @@ struct devlink *devlink_alloc_ns(const struct devlink_ops *ops, if (!devlink) return NULL; + ret = xa_alloc_cyclic(&devlinks, &devlink->index, devlink, xa_limit_31b, + &last_id, GFP_KERNEL); + if (ret < 0) { + kfree(devlink); + return NULL; + } + devlink->dev = dev; devlink->ops = ops; xa_init_flags(&devlink->snapshot_ids, XA_FLAGS_ALLOC); @@ -8940,7 +8966,7 @@ EXPORT_SYMBOL_GPL(devlink_alloc_ns); int devlink_register(struct devlink *devlink) { mutex_lock(&devlink_mutex); - list_add_tail(&devlink->list, &devlink_list); + xa_set_mark(&devlinks, devlink->index, DEVLINK_REGISTERED); devlink_notify(devlink, DEVLINK_CMD_NEW); mutex_unlock(&devlink_mutex); return 0; @@ -8961,7 +8987,7 @@ void devlink_unregister(struct devlink *devlink) WARN_ON(devlink_reload_supported(devlink->ops) && devlink->reload_enabled); devlink_notify(devlink, DEVLINK_CMD_DEL); - list_del(&devlink->list); + xa_clear_mark(&devlinks, devlink->index, DEVLINK_REGISTERED); mutex_unlock(&devlink_mutex); } EXPORT_SYMBOL_GPL(devlink_unregister); @@ -9023,6 +9049,7 @@ void devlink_free(struct devlink *devlink) WARN_ON(!list_empty(&devlink->port_list)); xa_destroy(&devlink->snapshot_ids); + xa_erase(&devlinks, devlink->index); kfree(devlink); } @@ -11497,13 +11524,14 @@ static void __net_exit devlink_pernet_pre_exit(struct net *net) { struct devlink *devlink; u32 actions_performed; + unsigned long index; int err; /* In case network namespace is getting destroyed, reload * all devlink instances from this namespace into init_net. */ mutex_lock(&devlink_mutex); - list_for_each_entry(devlink, &devlink_list, list) { + xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) { if (!devlink_try_get(devlink)) continue; -- cgit v1.2.3 From 6c9b40844751ea30c72f7a2f92f4d704bc6b2927 Mon Sep 17 00:00:00 2001 From: Cai Huoqing Date: Fri, 13 Aug 2021 20:08:02 +0800 Subject: net: Remove net/ipx.h and uapi/linux/ipx.h header files commit <47595e32869f> ("") indicated the ipx network layer as obsolete in Jan 2018, updated in the MAINTAINERS file now, after being exposed for 3 years to refactoring, so to delete uapi/linux/ipx.h and net/ipx.h header files for good. additionally, there is no module that depends on ipx.h except a broken staging driver(r8188eu) Signed-off-by: Cai Huoqing Signed-off-by: David S. Miller --- include/net/ipx.h | 171 ----------------------------------------------- include/uapi/linux/ipx.h | 87 ------------------------ 2 files changed, 258 deletions(-) delete mode 100644 include/net/ipx.h delete mode 100644 include/uapi/linux/ipx.h (limited to 'include/net') diff --git a/include/net/ipx.h b/include/net/ipx.h deleted file mode 100644 index 9d1342807b59..000000000000 --- a/include/net/ipx.h +++ /dev/null @@ -1,171 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _NET_INET_IPX_H_ -#define _NET_INET_IPX_H_ -/* - * The following information is in its entirety obtained from: - * - * Novell 'IPX Router Specification' Version 1.10 - * Part No. 107-000029-001 - * - * Which is available from ftp.novell.com - */ - -#include -#include -#include -#include -#include -#include - -struct ipx_address { - __be32 net; - __u8 node[IPX_NODE_LEN]; - __be16 sock; -}; - -#define ipx_broadcast_node "\377\377\377\377\377\377" -#define ipx_this_node "\0\0\0\0\0\0" - -#define IPX_MAX_PPROP_HOPS 8 - -struct ipxhdr { - __be16 ipx_checksum __packed; -#define IPX_NO_CHECKSUM cpu_to_be16(0xFFFF) - __be16 ipx_pktsize __packed; - __u8 ipx_tctrl; - __u8 ipx_type; -#define IPX_TYPE_UNKNOWN 0x00 -#define IPX_TYPE_RIP 0x01 /* may also be 0 */ -#define IPX_TYPE_SAP 0x04 /* may also be 0 */ -#define IPX_TYPE_SPX 0x05 /* SPX protocol */ -#define IPX_TYPE_NCP 0x11 /* $lots for docs on this (SPIT) */ -#define IPX_TYPE_PPROP 0x14 /* complicated flood fill brdcast */ - struct ipx_address ipx_dest __packed; - struct ipx_address ipx_source __packed; -}; - -/* From af_ipx.c */ -extern int sysctl_ipx_pprop_broadcasting; - -struct ipx_interface { - /* IPX address */ - __be32 if_netnum; - unsigned char if_node[IPX_NODE_LEN]; - refcount_t refcnt; - - /* physical device info */ - struct net_device *if_dev; - struct datalink_proto *if_dlink; - __be16 if_dlink_type; - - /* socket support */ - unsigned short if_sknum; - struct hlist_head if_sklist; - spinlock_t if_sklist_lock; - - /* administrative overhead */ - int if_ipx_offset; - unsigned char if_internal; - unsigned char if_primary; - - struct list_head node; /* node in ipx_interfaces list */ -}; - -struct ipx_route { - __be32 ir_net; - struct ipx_interface *ir_intrfc; - unsigned char ir_routed; - unsigned char ir_router_node[IPX_NODE_LEN]; - struct list_head node; /* node in ipx_routes list */ - refcount_t refcnt; -}; - -struct ipx_cb { - u8 ipx_tctrl; - __be32 ipx_dest_net; - __be32 ipx_source_net; - struct { - __be32 netnum; - int index; - } last_hop; -}; - -#include - -struct ipx_sock { - /* struct sock has to be the first member of ipx_sock */ - struct sock sk; - struct ipx_address dest_addr; - struct ipx_interface *intrfc; - __be16 port; -#ifdef CONFIG_IPX_INTERN - unsigned char node[IPX_NODE_LEN]; -#endif - unsigned short type; - /* - * To handle special ncp connection-handling sockets for mars_nwe, - * the connection number must be stored in the socket. - */ - unsigned short ipx_ncp_conn; -}; - -static inline struct ipx_sock *ipx_sk(struct sock *sk) -{ - return (struct ipx_sock *)sk; -} - -#define IPX_SKB_CB(__skb) ((struct ipx_cb *)&((__skb)->cb[0])) - -#define IPX_MIN_EPHEMERAL_SOCKET 0x4000 -#define IPX_MAX_EPHEMERAL_SOCKET 0x7fff - -extern struct list_head ipx_routes; -extern rwlock_t ipx_routes_lock; - -extern struct list_head ipx_interfaces; -struct ipx_interface *ipx_interfaces_head(void); -extern spinlock_t ipx_interfaces_lock; - -extern struct ipx_interface *ipx_primary_net; - -int ipx_proc_init(void); -void ipx_proc_exit(void); - -const char *ipx_frame_name(__be16); -const char *ipx_device_name(struct ipx_interface *intrfc); - -static __inline__ void ipxitf_hold(struct ipx_interface *intrfc) -{ - refcount_inc(&intrfc->refcnt); -} - -void ipxitf_down(struct ipx_interface *intrfc); -struct ipx_interface *ipxitf_find_using_net(__be32 net); -int ipxitf_send(struct ipx_interface *intrfc, struct sk_buff *skb, char *node); -__be16 ipx_cksum(struct ipxhdr *packet, int length); -int ipxrtr_add_route(__be32 network, struct ipx_interface *intrfc, - unsigned char *node); -void ipxrtr_del_routes(struct ipx_interface *intrfc); -int ipxrtr_route_packet(struct sock *sk, struct sockaddr_ipx *usipx, - struct msghdr *msg, size_t len, int noblock); -int ipxrtr_route_skb(struct sk_buff *skb); -struct ipx_route *ipxrtr_lookup(__be32 net); -int ipxrtr_ioctl(unsigned int cmd, void __user *arg); - -static __inline__ void ipxitf_put(struct ipx_interface *intrfc) -{ - if (refcount_dec_and_test(&intrfc->refcnt)) - ipxitf_down(intrfc); -} - -static __inline__ void ipxrtr_hold(struct ipx_route *rt) -{ - refcount_inc(&rt->refcnt); -} - -static __inline__ void ipxrtr_put(struct ipx_route *rt) -{ - if (refcount_dec_and_test(&rt->refcnt)) - kfree(rt); -} -#endif /* _NET_INET_IPX_H_ */ diff --git a/include/uapi/linux/ipx.h b/include/uapi/linux/ipx.h deleted file mode 100644 index 3168137adae8..000000000000 --- a/include/uapi/linux/ipx.h +++ /dev/null @@ -1,87 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -#ifndef _IPX_H_ -#define _IPX_H_ -#include /* for compatibility with glibc netipx/ipx.h */ -#include -#include -#include -#define IPX_NODE_LEN 6 -#define IPX_MTU 576 - -#if __UAPI_DEF_SOCKADDR_IPX -struct sockaddr_ipx { - __kernel_sa_family_t sipx_family; - __be16 sipx_port; - __be32 sipx_network; - unsigned char sipx_node[IPX_NODE_LEN]; - __u8 sipx_type; - unsigned char sipx_zero; /* 16 byte fill */ -}; -#endif /* __UAPI_DEF_SOCKADDR_IPX */ - -/* - * So we can fit the extra info for SIOCSIFADDR into the address nicely - */ -#define sipx_special sipx_port -#define sipx_action sipx_zero -#define IPX_DLTITF 0 -#define IPX_CRTITF 1 - -#if __UAPI_DEF_IPX_ROUTE_DEFINITION -struct ipx_route_definition { - __be32 ipx_network; - __be32 ipx_router_network; - unsigned char ipx_router_node[IPX_NODE_LEN]; -}; -#endif /* __UAPI_DEF_IPX_ROUTE_DEFINITION */ - -#if __UAPI_DEF_IPX_INTERFACE_DEFINITION -struct ipx_interface_definition { - __be32 ipx_network; - unsigned char ipx_device[16]; - unsigned char ipx_dlink_type; -#define IPX_FRAME_NONE 0 -#define IPX_FRAME_SNAP 1 -#define IPX_FRAME_8022 2 -#define IPX_FRAME_ETHERII 3 -#define IPX_FRAME_8023 4 -#define IPX_FRAME_TR_8022 5 /* obsolete */ - unsigned char ipx_special; -#define IPX_SPECIAL_NONE 0 -#define IPX_PRIMARY 1 -#define IPX_INTERNAL 2 - unsigned char ipx_node[IPX_NODE_LEN]; -}; -#endif /* __UAPI_DEF_IPX_INTERFACE_DEFINITION */ - -#if __UAPI_DEF_IPX_CONFIG_DATA -struct ipx_config_data { - unsigned char ipxcfg_auto_select_primary; - unsigned char ipxcfg_auto_create_interfaces; -}; -#endif /* __UAPI_DEF_IPX_CONFIG_DATA */ - -/* - * OLD Route Definition for backward compatibility. - */ - -#if __UAPI_DEF_IPX_ROUTE_DEF -struct ipx_route_def { - __be32 ipx_network; - __be32 ipx_router_network; -#define IPX_ROUTE_NO_ROUTER 0 - unsigned char ipx_router_node[IPX_NODE_LEN]; - unsigned char ipx_device[16]; - unsigned short ipx_flags; -#define IPX_RT_SNAP 8 -#define IPX_RT_8022 4 -#define IPX_RT_BLUEBOOK 2 -#define IPX_RT_ROUTED 1 -}; -#endif /* __UAPI_DEF_IPX_ROUTE_DEF */ - -#define SIOCAIPXITFCRT (SIOCPROTOPRIVATE) -#define SIOCAIPXPRISLT (SIOCPROTOPRIVATE + 1) -#define SIOCIPXCFGDATA (SIOCPROTOPRIVATE + 2) -#define SIOCIPXNCPCONN (SIOCPROTOPRIVATE + 3) -#endif /* _IPX_H_ */ -- cgit v1.2.3 From 7087c4f69487f017722df7d299ef9b7709996b79 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Wed, 11 Aug 2021 16:20:16 -0700 Subject: Bluetooth: Store advertising handle so it can be re-enabled This stores the advertising handle/instance into hci_conn so it is accessible when re-enabling the advertising once disconnected. Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Marcel Holtmann --- include/net/bluetooth/hci_core.h | 1 + net/bluetooth/hci_event.c | 45 ++++++++++++++++++++++++++-------------- 2 files changed, 30 insertions(+), 16 deletions(-) (limited to 'include/net') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index a7d06d7da602..a7360c8c72f8 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -629,6 +629,7 @@ struct hci_conn { __u8 init_addr_type; bdaddr_t resp_addr; __u8 resp_addr_type; + __u8 adv_instance; __u16 handle; __u16 state; __u8 mode; diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index a8a199cfef60..1ee89d9b2ed4 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -2348,19 +2348,20 @@ static void hci_cs_disconnect(struct hci_dev *hdev, u8 status) conn = hci_conn_hash_lookup_handle(hdev, __le16_to_cpu(cp->handle)); if (conn) { - u8 type = conn->type; - mgmt_disconnect_failed(hdev, &conn->dst, conn->type, conn->dst_type, status); + if (conn->type == LE_LINK) { + hdev->cur_adv_instance = conn->adv_instance; + hci_req_reenable_advertising(hdev); + } + /* If the disconnection failed for any reason, the upper layer * does not retry to disconnect in current implementation. * Hence, we need to do some basic cleanup here and re-enable * advertising if necessary. */ hci_conn_del(conn); - if (type == LE_LINK) - hci_req_reenable_advertising(hdev); } hci_dev_unlock(hdev); @@ -2886,7 +2887,6 @@ static void hci_disconn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) struct hci_conn_params *params; struct hci_conn *conn; bool mgmt_connected; - u8 type; BT_DBG("%s status 0x%2.2x", hdev->name, ev->status); @@ -2941,10 +2941,7 @@ static void hci_disconn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) } } - type = conn->type; - hci_disconn_cfm(conn, ev->reason); - hci_conn_del(conn); /* The suspend notifier is waiting for all devices to disconnect so * clear the bit from pending tasks and inform the wait queue. @@ -2964,8 +2961,12 @@ static void hci_disconn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) * or until a connection is created or until the Advertising * is timed out due to Directed Advertising." */ - if (type == LE_LINK) + if (conn->type == LE_LINK) { + hdev->cur_adv_instance = conn->adv_instance; hci_req_reenable_advertising(hdev); + } + + hci_conn_del(conn); unlock: hci_dev_unlock(hdev); @@ -5323,6 +5324,13 @@ static void le_conn_complete_evt(struct hci_dev *hdev, u8 status, conn->handle = handle; conn->state = BT_CONFIG; + /* Store current advertising instance as connection advertising instance + * when sotfware rotation is in use so it can be re-enabled when + * disconnected. + */ + if (!ext_adv_capable(hdev)) + conn->adv_instance = hdev->cur_adv_instance; + conn->le_conn_interval = interval; conn->le_conn_latency = latency; conn->le_supv_timeout = supervision_timeout; @@ -5406,13 +5414,13 @@ static void hci_le_ext_adv_term_evt(struct hci_dev *hdev, struct sk_buff *skb) { struct hci_evt_le_ext_adv_set_term *ev = (void *) skb->data; struct hci_conn *conn; + struct adv_info *adv; BT_DBG("%s status 0x%2.2x", hdev->name, ev->status); - if (ev->status) { - struct adv_info *adv; + adv = hci_find_adv_instance(hdev, ev->handle); - adv = hci_find_adv_instance(hdev, ev->handle); + if (ev->status) { if (!adv) return; @@ -5423,9 +5431,15 @@ static void hci_le_ext_adv_term_evt(struct hci_dev *hdev, struct sk_buff *skb) return; } + if (adv) + adv->enabled = false; + conn = hci_conn_hash_lookup_handle(hdev, __le16_to_cpu(ev->conn_handle)); if (conn) { - struct adv_info *adv_instance; + /* Store handle in the connection so the correct advertising + * instance can be re-enabled when disconnected. + */ + conn->adv_instance = ev->handle; if (hdev->adv_addr_type != ADDR_LE_DEV_RANDOM || bacmp(&conn->resp_addr, BDADDR_ANY)) @@ -5436,9 +5450,8 @@ static void hci_le_ext_adv_term_evt(struct hci_dev *hdev, struct sk_buff *skb) return; } - adv_instance = hci_find_adv_instance(hdev, ev->handle); - if (adv_instance) - bacpy(&conn->resp_addr, &adv_instance->random_addr); + if (adv) + bacpy(&conn->resp_addr, &adv->random_addr); } } -- cgit v1.2.3 From 94531cfcbe79c3598acf96806627b2137ca32eb9 Mon Sep 17 00:00:00 2001 From: Jiang Wang Date: Mon, 16 Aug 2021 19:03:21 +0000 Subject: af_unix: Add unix_stream_proto for sockmap Previously, sockmap for AF_UNIX protocol only supports dgram type. This patch add unix stream type support, which is similar to unix_dgram_proto. To support sockmap, dgram and stream cannot share the same unix_proto anymore, because they have different implementations, such as unhash for stream type (which will remove closed or disconnected sockets from the map), so rename unix_proto to unix_dgram_proto and add a new unix_stream_proto. Also implement stream related sockmap functions. And add dgram key words to those dgram specific functions. Signed-off-by: Jiang Wang Signed-off-by: Andrii Nakryiko Reviewed-by: Cong Wang Acked-by: Jakub Sitnicki Acked-by: John Fastabend Link: https://lore.kernel.org/bpf/20210816190327.2739291-3-jiang.wang@bytedance.com --- include/net/af_unix.h | 8 +++-- net/core/sock_map.c | 1 + net/unix/af_unix.c | 83 ++++++++++++++++++++++++++++++++++++++------- net/unix/unix_bpf.c | 93 +++++++++++++++++++++++++++++++++++++++------------ 4 files changed, 148 insertions(+), 37 deletions(-) (limited to 'include/net') diff --git a/include/net/af_unix.h b/include/net/af_unix.h index 4757d7f53f13..7d142e8a0550 100644 --- a/include/net/af_unix.h +++ b/include/net/af_unix.h @@ -87,6 +87,8 @@ long unix_outq_len(struct sock *sk); int __unix_dgram_recvmsg(struct sock *sk, struct msghdr *msg, size_t size, int flags); +int __unix_stream_recvmsg(struct sock *sk, struct msghdr *msg, size_t size, + int flags); #ifdef CONFIG_SYSCTL int unix_sysctl_register(struct net *net); void unix_sysctl_unregister(struct net *net); @@ -96,9 +98,11 @@ static inline void unix_sysctl_unregister(struct net *net) {} #endif #ifdef CONFIG_BPF_SYSCALL -extern struct proto unix_proto; +extern struct proto unix_dgram_proto; +extern struct proto unix_stream_proto; -int unix_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool restore); +int unix_dgram_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool restore); +int unix_stream_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool restore); void __init unix_bpf_build_proto(void); #else static inline void __init unix_bpf_build_proto(void) diff --git a/net/core/sock_map.c b/net/core/sock_map.c index ae5fa4338d9c..e252b8ec2b85 100644 --- a/net/core/sock_map.c +++ b/net/core/sock_map.c @@ -1494,6 +1494,7 @@ void sock_map_unhash(struct sock *sk) rcu_read_unlock(); saved_unhash(sk); } +EXPORT_SYMBOL_GPL(sock_map_unhash); void sock_map_close(struct sock *sk, long timeout) { diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 4455b62317d4..443c49081636 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -798,17 +798,35 @@ static void unix_close(struct sock *sk, long timeout) */ } -struct proto unix_proto = { - .name = "UNIX", +static void unix_unhash(struct sock *sk) +{ + /* Nothing to do here, unix socket does not need a ->unhash(). + * This is merely for sockmap. + */ +} + +struct proto unix_dgram_proto = { + .name = "UNIX-DGRAM", + .owner = THIS_MODULE, + .obj_size = sizeof(struct unix_sock), + .close = unix_close, +#ifdef CONFIG_BPF_SYSCALL + .psock_update_sk_prot = unix_dgram_bpf_update_proto, +#endif +}; + +struct proto unix_stream_proto = { + .name = "UNIX-STREAM", .owner = THIS_MODULE, .obj_size = sizeof(struct unix_sock), .close = unix_close, + .unhash = unix_unhash, #ifdef CONFIG_BPF_SYSCALL - .psock_update_sk_prot = unix_bpf_update_proto, + .psock_update_sk_prot = unix_stream_bpf_update_proto, #endif }; -static struct sock *unix_create1(struct net *net, struct socket *sock, int kern) +static struct sock *unix_create1(struct net *net, struct socket *sock, int kern, int type) { struct sock *sk = NULL; struct unix_sock *u; @@ -817,7 +835,11 @@ static struct sock *unix_create1(struct net *net, struct socket *sock, int kern) if (atomic_long_read(&unix_nr_socks) > 2 * get_max_files()) goto out; - sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_proto, kern); + if (type == SOCK_STREAM) + sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_stream_proto, kern); + else /*dgram and seqpacket */ + sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_dgram_proto, kern); + if (!sk) goto out; @@ -879,7 +901,7 @@ static int unix_create(struct net *net, struct socket *sock, int protocol, return -ESOCKTNOSUPPORT; } - return unix_create1(net, sock, kern) ? 0 : -ENOMEM; + return unix_create1(net, sock, kern, sock->type) ? 0 : -ENOMEM; } static int unix_release(struct socket *sock) @@ -1293,7 +1315,7 @@ static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr, err = -ENOMEM; /* create new sock for complete connection */ - newsk = unix_create1(sock_net(sk), NULL, 0); + newsk = unix_create1(sock_net(sk), NULL, 0, sock->type); if (newsk == NULL) goto out; @@ -2323,8 +2345,10 @@ static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg, size_t si struct sock *sk = sock->sk; #ifdef CONFIG_BPF_SYSCALL - if (sk->sk_prot != &unix_proto) - return sk->sk_prot->recvmsg(sk, msg, size, flags & MSG_DONTWAIT, + const struct proto *prot = READ_ONCE(sk->sk_prot); + + if (prot != &unix_dgram_proto) + return prot->recvmsg(sk, msg, size, flags & MSG_DONTWAIT, flags & ~MSG_DONTWAIT, NULL); #endif return __unix_dgram_recvmsg(sk, msg, size, flags); @@ -2728,6 +2752,20 @@ static int unix_stream_read_actor(struct sk_buff *skb, return ret ?: chunk; } +int __unix_stream_recvmsg(struct sock *sk, struct msghdr *msg, + size_t size, int flags) +{ + struct unix_stream_read_state state = { + .recv_actor = unix_stream_read_actor, + .socket = sk->sk_socket, + .msg = msg, + .size = size, + .flags = flags + }; + + return unix_stream_read_generic(&state, true); +} + static int unix_stream_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, int flags) { @@ -2739,6 +2777,14 @@ static int unix_stream_recvmsg(struct socket *sock, struct msghdr *msg, .flags = flags }; +#ifdef CONFIG_BPF_SYSCALL + struct sock *sk = sock->sk; + const struct proto *prot = READ_ONCE(sk->sk_prot); + + if (prot != &unix_stream_proto) + return prot->recvmsg(sk, msg, size, flags & MSG_DONTWAIT, + flags & ~MSG_DONTWAIT, NULL); +#endif return unix_stream_read_generic(&state, true); } @@ -2799,7 +2845,9 @@ static int unix_shutdown(struct socket *sock, int mode) (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) { int peer_mode = 0; + const struct proto *prot = READ_ONCE(other->sk_prot); + prot->unhash(other); if (mode&RCV_SHUTDOWN) peer_mode |= SEND_SHUTDOWN; if (mode&SEND_SHUTDOWN) @@ -2808,10 +2856,12 @@ static int unix_shutdown(struct socket *sock, int mode) other->sk_shutdown |= peer_mode; unix_state_unlock(other); other->sk_state_change(other); - if (peer_mode == SHUTDOWN_MASK) + if (peer_mode == SHUTDOWN_MASK) { sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP); - else if (peer_mode & RCV_SHUTDOWN) + other->sk_state = TCP_CLOSE; + } else if (peer_mode & RCV_SHUTDOWN) { sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN); + } } if (other) sock_put(other); @@ -3289,7 +3339,13 @@ static int __init af_unix_init(void) BUILD_BUG_ON(sizeof(struct unix_skb_parms) > sizeof_field(struct sk_buff, cb)); - rc = proto_register(&unix_proto, 1); + rc = proto_register(&unix_dgram_proto, 1); + if (rc != 0) { + pr_crit("%s: Cannot create unix_sock SLAB cache!\n", __func__); + goto out; + } + + rc = proto_register(&unix_stream_proto, 1); if (rc != 0) { pr_crit("%s: Cannot create unix_sock SLAB cache!\n", __func__); goto out; @@ -3310,7 +3366,8 @@ out: static void __exit af_unix_exit(void) { sock_unregister(PF_UNIX); - proto_unregister(&unix_proto); + proto_unregister(&unix_dgram_proto); + proto_unregister(&unix_stream_proto); unregister_pernet_subsys(&unix_net_ops); } diff --git a/net/unix/unix_bpf.c b/net/unix/unix_bpf.c index 20f53575b5c9..b927e2baae50 100644 --- a/net/unix/unix_bpf.c +++ b/net/unix/unix_bpf.c @@ -38,9 +38,18 @@ static int unix_msg_wait_data(struct sock *sk, struct sk_psock *psock, return ret; } -static int unix_dgram_bpf_recvmsg(struct sock *sk, struct msghdr *msg, - size_t len, int nonblock, int flags, - int *addr_len) +static int __unix_recvmsg(struct sock *sk, struct msghdr *msg, + size_t len, int flags) +{ + if (sk->sk_type == SOCK_DGRAM) + return __unix_dgram_recvmsg(sk, msg, len, flags); + else + return __unix_stream_recvmsg(sk, msg, len, flags); +} + +static int unix_bpf_recvmsg(struct sock *sk, struct msghdr *msg, + size_t len, int nonblock, int flags, + int *addr_len) { struct unix_sock *u = unix_sk(sk); struct sk_psock *psock; @@ -48,14 +57,14 @@ static int unix_dgram_bpf_recvmsg(struct sock *sk, struct msghdr *msg, psock = sk_psock_get(sk); if (unlikely(!psock)) - return __unix_dgram_recvmsg(sk, msg, len, flags); + return __unix_recvmsg(sk, msg, len, flags); mutex_lock(&u->iolock); if (!skb_queue_empty(&sk->sk_receive_queue) && sk_psock_queue_empty(psock)) { mutex_unlock(&u->iolock); sk_psock_put(sk, psock); - return __unix_dgram_recvmsg(sk, msg, len, flags); + return __unix_recvmsg(sk, msg, len, flags); } msg_bytes_ready: @@ -71,7 +80,7 @@ msg_bytes_ready: goto msg_bytes_ready; mutex_unlock(&u->iolock); sk_psock_put(sk, psock); - return __unix_dgram_recvmsg(sk, msg, len, flags); + return __unix_recvmsg(sk, msg, len, flags); } copied = -EAGAIN; } @@ -80,30 +89,55 @@ msg_bytes_ready: return copied; } -static struct proto *unix_prot_saved __read_mostly; -static DEFINE_SPINLOCK(unix_prot_lock); -static struct proto unix_bpf_prot; +static struct proto *unix_dgram_prot_saved __read_mostly; +static DEFINE_SPINLOCK(unix_dgram_prot_lock); +static struct proto unix_dgram_bpf_prot; + +static struct proto *unix_stream_prot_saved __read_mostly; +static DEFINE_SPINLOCK(unix_stream_prot_lock); +static struct proto unix_stream_bpf_prot; -static void unix_bpf_rebuild_protos(struct proto *prot, const struct proto *base) +static void unix_dgram_bpf_rebuild_protos(struct proto *prot, const struct proto *base) { *prot = *base; prot->close = sock_map_close; - prot->recvmsg = unix_dgram_bpf_recvmsg; + prot->recvmsg = unix_bpf_recvmsg; +} + +static void unix_stream_bpf_rebuild_protos(struct proto *prot, + const struct proto *base) +{ + *prot = *base; + prot->close = sock_map_close; + prot->recvmsg = unix_bpf_recvmsg; + prot->unhash = sock_map_unhash; +} + +static void unix_dgram_bpf_check_needs_rebuild(struct proto *ops) +{ + if (unlikely(ops != smp_load_acquire(&unix_dgram_prot_saved))) { + spin_lock_bh(&unix_dgram_prot_lock); + if (likely(ops != unix_dgram_prot_saved)) { + unix_dgram_bpf_rebuild_protos(&unix_dgram_bpf_prot, ops); + smp_store_release(&unix_dgram_prot_saved, ops); + } + spin_unlock_bh(&unix_dgram_prot_lock); + } } -static void unix_bpf_check_needs_rebuild(struct proto *ops) +static void unix_stream_bpf_check_needs_rebuild(struct proto *ops) { - if (unlikely(ops != smp_load_acquire(&unix_prot_saved))) { - spin_lock_bh(&unix_prot_lock); - if (likely(ops != unix_prot_saved)) { - unix_bpf_rebuild_protos(&unix_bpf_prot, ops); - smp_store_release(&unix_prot_saved, ops); + if (unlikely(ops != smp_load_acquire(&unix_stream_prot_saved))) { + spin_lock_bh(&unix_stream_prot_lock); + if (likely(ops != unix_stream_prot_saved)) { + unix_stream_bpf_rebuild_protos(&unix_stream_bpf_prot, ops); + smp_store_release(&unix_stream_prot_saved, ops); } - spin_unlock_bh(&unix_prot_lock); + spin_unlock_bh(&unix_stream_prot_lock); } } -int unix_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool restore) +int unix_dgram_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool restore) { if (sk->sk_type != SOCK_DGRAM) return -EOPNOTSUPP; @@ -114,12 +148,27 @@ int unix_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool restore) return 0; } - unix_bpf_check_needs_rebuild(psock->sk_proto); - WRITE_ONCE(sk->sk_prot, &unix_bpf_prot); + unix_dgram_bpf_check_needs_rebuild(psock->sk_proto); + WRITE_ONCE(sk->sk_prot, &unix_dgram_bpf_prot); + return 0; +} + +int unix_stream_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool restore) +{ + if (restore) { + sk->sk_write_space = psock->saved_write_space; + WRITE_ONCE(sk->sk_prot, psock->sk_proto); + return 0; + } + + unix_stream_bpf_check_needs_rebuild(psock->sk_proto); + WRITE_ONCE(sk->sk_prot, &unix_stream_bpf_prot); return 0; } void __init unix_bpf_build_proto(void) { - unix_bpf_rebuild_protos(&unix_bpf_prot, &unix_proto); + unix_dgram_bpf_rebuild_protos(&unix_dgram_bpf_prot, &unix_dgram_proto); + unix_stream_bpf_rebuild_protos(&unix_stream_bpf_prot, &unix_stream_proto); + } -- cgit v1.2.3 From 0d2ab3aea50bb02ff0c9c3d53c7b2b4b21cdd59d Mon Sep 17 00:00:00 2001 From: John Crispin Date: Fri, 2 Jul 2021 19:44:07 +0200 Subject: nl80211: add support for BSS coloring This patch adds support for BSS color collisions to the wireless subsystem. Add the required functionality to nl80211 that will notify about color collisions, triggering the color change and notifying when it is completed. Co-developed-by: Lorenzo Bianconi Signed-off-by: Lorenzo Bianconi Signed-off-by: John Crispin Link: https://lore.kernel.org/r/500b3582aec8fe2c42ef46f3117b148cb7cbceb5.1625247619.git.lorenzo@kernel.org [remove unnecessary NULL initialisation] Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 92 +++++++++++++++++++++++++ include/uapi/linux/nl80211.h | 43 ++++++++++++ net/wireless/nl80211.c | 157 +++++++++++++++++++++++++++++++++++++++++++ net/wireless/rdev-ops.h | 13 ++++ net/wireless/trace.h | 46 +++++++++++++ 5 files changed, 351 insertions(+) (limited to 'include/net') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 161cdf7df1a0..62dd8422e0dc 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -1252,6 +1252,27 @@ struct cfg80211_csa_settings { u8 count; }; +/** + * struct cfg80211_color_change_settings - color change settings + * + * Used for bss color change + * + * @beacon_color_change: beacon data while performing the color countdown + * @counter_offsets_beacon: offsets of the counters within the beacon (tail) + * @counter_offsets_presp: offsets of the counters within the probe response + * @beacon_next: beacon data to be used after the color change + * @count: number of beacons until the color change + * @color: the color used after the change + */ +struct cfg80211_color_change_settings { + struct cfg80211_beacon_data beacon_color_change; + u16 counter_offset_beacon; + u16 counter_offset_presp; + struct cfg80211_beacon_data beacon_next; + u8 count; + u8 color; +}; + /** * struct iface_combination_params - input parameters for interface combinations * @@ -3995,6 +4016,8 @@ struct mgmt_frame_regs { * given TIDs. This callback may sleep. * * @set_sar_specs: Update the SAR (TX power) settings. + * + * @color_change: Initiate a color change. */ struct cfg80211_ops { int (*suspend)(struct wiphy *wiphy, struct cfg80211_wowlan *wow); @@ -4322,6 +4345,9 @@ struct cfg80211_ops { const u8 *peer, u8 tids); int (*set_sar_specs)(struct wiphy *wiphy, struct cfg80211_sar_specs *sar); + int (*color_change)(struct wiphy *wiphy, + struct net_device *dev, + struct cfg80211_color_change_settings *params); }; /* @@ -8218,4 +8244,70 @@ void cfg80211_update_owe_info_event(struct net_device *netdev, */ void cfg80211_bss_flush(struct wiphy *wiphy); +/** + * cfg80211_bss_color_notify - notify about bss color event + * @dev: network device + * @gfp: allocation flags + * @cmd: the actual event we want to notify + * @count: the number of TBTTs until the color change happens + * @color_bitmap: representations of the colors that the local BSS is aware of + */ +int cfg80211_bss_color_notify(struct net_device *dev, gfp_t gfp, + enum nl80211_commands cmd, u8 count, + u64 color_bitmap); + +/** + * cfg80211_obss_color_collision_notify - notify about bss color collision + * @dev: network device + * @color_bitmap: representations of the colors that the local BSS is aware of + */ +static inline int cfg80211_obss_color_collision_notify(struct net_device *dev, + u64 color_bitmap) +{ + return cfg80211_bss_color_notify(dev, GFP_KERNEL, + NL80211_CMD_OBSS_COLOR_COLLISION, + 0, color_bitmap); +} + +/** + * cfg80211_color_change_started_notify - notify color change start + * @dev: the device on which the color is switched + * @count: the number of TBTTs until the color change happens + * + * Inform the userspace about the color change that has started. + */ +static inline int cfg80211_color_change_started_notify(struct net_device *dev, + u8 count) +{ + return cfg80211_bss_color_notify(dev, GFP_KERNEL, + NL80211_CMD_COLOR_CHANGE_STARTED, + count, 0); +} + +/** + * cfg80211_color_change_aborted_notify - notify color change abort + * @dev: the device on which the color is switched + * + * Inform the userspace about the color change that has aborted. + */ +static inline int cfg80211_color_change_aborted_notify(struct net_device *dev) +{ + return cfg80211_bss_color_notify(dev, GFP_KERNEL, + NL80211_CMD_COLOR_CHANGE_ABORTED, + 0, 0); +} + +/** + * cfg80211_color_change_notify - notify color change completion + * @dev: the device on which the color was switched + * + * Inform the userspace about the color change that has completed. + */ +static inline int cfg80211_color_change_notify(struct net_device *dev) +{ + return cfg80211_bss_color_notify(dev, GFP_KERNEL, + NL80211_CMD_COLOR_CHANGE_COMPLETED, + 0, 0); +} + #endif /* __NET_CFG80211_H */ diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index db474994fa73..c2efea98e060 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -1185,6 +1185,21 @@ * passed using %NL80211_ATTR_SAR_SPEC. %NL80211_ATTR_WIPHY is used to * specify the wiphy index to be applied to. * + * @NL80211_CMD_OBSS_COLOR_COLLISION: This notification is sent out whenever + * mac80211/drv detects a bss color collision. + * + * @NL80211_CMD_COLOR_CHANGE_REQUEST: This command is used to indicate that + * userspace wants to change the BSS color. + * + * @NL80211_CMD_COLOR_CHANGE_STARTED: Notify userland, that a color change has + * started + * + * @NL80211_CMD_COLOR_CHANGE_ABORTED: Notify userland, that the color change has + * been aborted + * + * @NL80211_CMD_COLOR_CHANGE_COMPLETED: Notify userland that the color change + * has completed + * * @NL80211_CMD_MAX: highest used command number * @__NL80211_CMD_AFTER_LAST: internal use */ @@ -1417,6 +1432,14 @@ enum nl80211_commands { NL80211_CMD_SET_SAR_SPECS, + NL80211_CMD_OBSS_COLOR_COLLISION, + + NL80211_CMD_COLOR_CHANGE_REQUEST, + + NL80211_CMD_COLOR_CHANGE_STARTED, + NL80211_CMD_COLOR_CHANGE_ABORTED, + NL80211_CMD_COLOR_CHANGE_COMPLETED, + /* add new commands above here */ /* used to define NL80211_CMD_MAX below */ @@ -2560,6 +2583,16 @@ enum nl80211_commands { * disassoc events to indicate that an immediate reconnect to the AP * is desired. * + * @NL80211_ATTR_OBSS_COLOR_BITMAP: bitmap of the u64 BSS colors for the + * %NL80211_CMD_OBSS_COLOR_COLLISION event. + * + * @NL80211_ATTR_COLOR_CHANGE_COUNT: u8 attribute specifying the number of TBTT's + * until the color switch event. + * @NL80211_ATTR_COLOR_CHANGE_COLOR: u8 attribute specifying the color that we are + * switching to + * @NL80211_ATTR_COLOR_CHANGE_ELEMS: Nested set of attributes containing the IE + * information for the time while performing a color switch. + * * @NUM_NL80211_ATTR: total number of nl80211_attrs available * @NL80211_ATTR_MAX: highest attribute number currently defined * @__NL80211_ATTR_AFTER_LAST: internal use @@ -3057,6 +3090,12 @@ enum nl80211_attrs { NL80211_ATTR_DISABLE_HE, + NL80211_ATTR_OBSS_COLOR_BITMAP, + + NL80211_ATTR_COLOR_CHANGE_COUNT, + NL80211_ATTR_COLOR_CHANGE_COLOR, + NL80211_ATTR_COLOR_CHANGE_ELEMS, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, @@ -5953,6 +5992,9 @@ enum nl80211_feature_flags { * frame protection for all management frames exchanged during the * negotiation and range measurement procedure. * + * @NL80211_EXT_FEATURE_BSS_COLOR: The driver supports BSS color collision + * detection and change announcemnts. + * * @NUM_NL80211_EXT_FEATURES: number of extended features. * @MAX_NL80211_EXT_FEATURES: highest extended feature index. */ @@ -6017,6 +6059,7 @@ enum nl80211_ext_feature_index { NL80211_EXT_FEATURE_SECURE_LTF, NL80211_EXT_FEATURE_SECURE_RTT, NL80211_EXT_FEATURE_PROT_RANGE_NEGO_AND_MEASURE, + NL80211_EXT_FEATURE_BSS_COLOR, /* add new features before the definition below */ NUM_NL80211_EXT_FEATURES, diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index dceed5b5b226..bf7cd4752547 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -759,6 +759,10 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = { [NL80211_ATTR_RECONNECT_REQUESTED] = { .type = NLA_REJECT }, [NL80211_ATTR_SAR_SPEC] = NLA_POLICY_NESTED(sar_policy), [NL80211_ATTR_DISABLE_HE] = { .type = NLA_FLAG }, + [NL80211_ATTR_OBSS_COLOR_BITMAP] = { .type = NLA_U64 }, + [NL80211_ATTR_COLOR_CHANGE_COUNT] = { .type = NLA_U8 }, + [NL80211_ATTR_COLOR_CHANGE_COLOR] = { .type = NLA_U8 }, + [NL80211_ATTR_COLOR_CHANGE_ELEMS] = NLA_POLICY_NESTED(nl80211_policy), }; /* policy for the key attributes */ @@ -14800,6 +14804,106 @@ bad_tid_conf: return ret; } +static int nl80211_color_change(struct sk_buff *skb, struct genl_info *info) +{ + struct cfg80211_registered_device *rdev = info->user_ptr[0]; + struct cfg80211_color_change_settings params = {}; + struct net_device *dev = info->user_ptr[1]; + struct wireless_dev *wdev = dev->ieee80211_ptr; + struct nlattr **tb; + u16 offset; + int err; + + if (!rdev->ops->color_change) + return -EOPNOTSUPP; + + if (!wiphy_ext_feature_isset(&rdev->wiphy, + NL80211_EXT_FEATURE_BSS_COLOR)) + return -EOPNOTSUPP; + + if (wdev->iftype != NL80211_IFTYPE_AP) + return -EOPNOTSUPP; + + if (!info->attrs[NL80211_ATTR_COLOR_CHANGE_COUNT] || + !info->attrs[NL80211_ATTR_COLOR_CHANGE_COLOR] || + !info->attrs[NL80211_ATTR_COLOR_CHANGE_ELEMS]) + return -EINVAL; + + params.count = nla_get_u8(info->attrs[NL80211_ATTR_COLOR_CHANGE_COUNT]); + params.color = nla_get_u8(info->attrs[NL80211_ATTR_COLOR_CHANGE_COLOR]); + + err = nl80211_parse_beacon(rdev, info->attrs, ¶ms.beacon_next); + if (err) + return err; + + tb = kcalloc(NL80211_ATTR_MAX + 1, sizeof(*tb), GFP_KERNEL); + if (!tb) + return -ENOMEM; + + err = nla_parse_nested(tb, NL80211_ATTR_MAX, + info->attrs[NL80211_ATTR_COLOR_CHANGE_ELEMS], + nl80211_policy, info->extack); + if (err) + goto out; + + err = nl80211_parse_beacon(rdev, tb, ¶ms.beacon_color_change); + if (err) + goto out; + + if (!tb[NL80211_ATTR_CNTDWN_OFFS_BEACON]) { + err = -EINVAL; + goto out; + } + + if (nla_len(tb[NL80211_ATTR_CNTDWN_OFFS_BEACON]) != sizeof(u16)) { + err = -EINVAL; + goto out; + } + + offset = nla_get_u16(tb[NL80211_ATTR_CNTDWN_OFFS_BEACON]); + if (offset >= params.beacon_color_change.tail_len) { + err = -EINVAL; + goto out; + } + + if (params.beacon_color_change.tail[offset] != params.count) { + err = -EINVAL; + goto out; + } + + params.counter_offset_beacon = offset; + + if (tb[NL80211_ATTR_CNTDWN_OFFS_PRESP]) { + if (nla_len(tb[NL80211_ATTR_CNTDWN_OFFS_PRESP]) != + sizeof(u16)) { + err = -EINVAL; + goto out; + } + + offset = nla_get_u16(tb[NL80211_ATTR_CNTDWN_OFFS_PRESP]); + if (offset >= params.beacon_color_change.probe_resp_len) { + err = -EINVAL; + goto out; + } + + if (params.beacon_color_change.probe_resp[offset] != + params.count) { + err = -EINVAL; + goto out; + } + + params.counter_offset_presp = offset; + } + + wdev_lock(wdev); + err = rdev_color_change(rdev, dev, ¶ms); + wdev_unlock(wdev); + +out: + kfree(tb); + return err; +} + #define NL80211_FLAG_NEED_WIPHY 0x01 #define NL80211_FLAG_NEED_NETDEV 0x02 #define NL80211_FLAG_NEED_RTNL 0x04 @@ -15795,6 +15899,14 @@ static const struct genl_small_ops nl80211_small_ops[] = { .internal_flags = NL80211_FLAG_NEED_WIPHY | NL80211_FLAG_NEED_RTNL, }, + { + .cmd = NL80211_CMD_COLOR_CHANGE_REQUEST, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, + .doit = nl80211_color_change, + .flags = GENL_UNS_ADMIN_PERM, + .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | + NL80211_FLAG_NEED_RTNL, + }, }; static struct genl_family nl80211_fam __ro_after_init = { @@ -17424,6 +17536,51 @@ void cfg80211_ch_switch_started_notify(struct net_device *dev, } EXPORT_SYMBOL(cfg80211_ch_switch_started_notify); +int cfg80211_bss_color_notify(struct net_device *dev, gfp_t gfp, + enum nl80211_commands cmd, u8 count, + u64 color_bitmap) +{ + struct wireless_dev *wdev = dev->ieee80211_ptr; + struct wiphy *wiphy = wdev->wiphy; + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); + struct sk_buff *msg; + void *hdr; + + ASSERT_WDEV_LOCK(wdev); + + trace_cfg80211_bss_color_notify(dev, cmd, count, color_bitmap); + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, gfp); + if (!msg) + return -ENOMEM; + + hdr = nl80211hdr_put(msg, 0, 0, 0, cmd); + if (!hdr) + goto nla_put_failure; + + if (nla_put_u32(msg, NL80211_ATTR_IFINDEX, dev->ifindex)) + goto nla_put_failure; + + if (cmd == NL80211_CMD_COLOR_CHANGE_STARTED && + nla_put_u32(msg, NL80211_ATTR_COLOR_CHANGE_COUNT, count)) + goto nla_put_failure; + + if (cmd == NL80211_CMD_OBSS_COLOR_COLLISION && + nla_put_u64_64bit(msg, NL80211_ATTR_OBSS_COLOR_BITMAP, + color_bitmap, NL80211_ATTR_PAD)) + goto nla_put_failure; + + genlmsg_end(msg, hdr); + + return genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), + msg, 0, NL80211_MCGRP_MLME, gfp); + +nla_put_failure: + nlmsg_free(msg); + return -EINVAL; +} +EXPORT_SYMBOL(cfg80211_bss_color_notify); + void nl80211_radar_notify(struct cfg80211_registered_device *rdev, const struct cfg80211_chan_def *chandef, diff --git a/net/wireless/rdev-ops.h b/net/wireless/rdev-ops.h index b1d37f582dc6..ce6bf218a1a3 100644 --- a/net/wireless/rdev-ops.h +++ b/net/wireless/rdev-ops.h @@ -1368,4 +1368,17 @@ static inline int rdev_set_sar_specs(struct cfg80211_registered_device *rdev, return ret; } +static inline int rdev_color_change(struct cfg80211_registered_device *rdev, + struct net_device *dev, + struct cfg80211_color_change_settings *params) +{ + int ret; + + trace_rdev_color_change(&rdev->wiphy, dev, params); + ret = rdev->ops->color_change(&rdev->wiphy, dev, params); + trace_rdev_return_int(&rdev->wiphy, ret); + + return ret; +} + #endif /* __CFG80211_RDEV_OPS */ diff --git a/net/wireless/trace.h b/net/wireless/trace.h index 440bce5f0274..911bb3b8969e 100644 --- a/net/wireless/trace.h +++ b/net/wireless/trace.h @@ -3597,6 +3597,52 @@ TRACE_EVENT(rdev_set_sar_specs, WIPHY_PR_ARG, __entry->type, __entry->num) ); +TRACE_EVENT(rdev_color_change, + TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, + struct cfg80211_color_change_settings *params), + TP_ARGS(wiphy, netdev, params), + TP_STRUCT__entry( + WIPHY_ENTRY + NETDEV_ENTRY + __field(u8, count) + __field(u16, bcn_ofs) + __field(u16, pres_ofs) + ), + TP_fast_assign( + WIPHY_ASSIGN; + NETDEV_ASSIGN; + __entry->count = params->count; + __entry->bcn_ofs = params->counter_offset_beacon; + __entry->pres_ofs = params->counter_offset_presp; + ), + TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT + ", count: %u", + WIPHY_PR_ARG, NETDEV_PR_ARG, + __entry->count) +); + +TRACE_EVENT(cfg80211_bss_color_notify, + TP_PROTO(struct net_device *netdev, + enum nl80211_commands cmd, + u8 count, u64 color_bitmap), + TP_ARGS(netdev, cmd, count, color_bitmap), + TP_STRUCT__entry( + NETDEV_ENTRY + __field(enum nl80211_bss_scan_width, cmd) + __field(u8, count) + __field(u64, color_bitmap) + ), + TP_fast_assign( + NETDEV_ASSIGN; + __entry->cmd = cmd; + __entry->count = count; + __entry->color_bitmap = color_bitmap; + ), + TP_printk(NETDEV_PR_FMT ", cmd: %x, count: %u, bitmap: %llx", + NETDEV_PR_ARG, __entry->cmd, __entry->count, + __entry->color_bitmap) +); + #endif /* !__RDEV_OPS_TRACE || TRACE_HEADER_MULTI_READ */ #undef TRACE_INCLUDE_PATH -- cgit v1.2.3 From 5f9404abdf2ac31c8f4768c39714bfcaca389e3a Mon Sep 17 00:00:00 2001 From: John Crispin Date: Fri, 2 Jul 2021 19:44:08 +0200 Subject: mac80211: add support for BSS color change The color change announcement is very similar to how CSA works where we have an IE that includes a counter. When the counter hits 0, the new color is applied via an updated beacon. This patch makes the CSA counter functionality reusable, rather than implementing it again. This also allows for future reuse incase support for other counter IEs gets added. Co-developed-by: Lorenzo Bianconi Signed-off-by: Lorenzo Bianconi Signed-off-by: John Crispin Link: https://lore.kernel.org/r/057c1e67b82bee561ea44ce6a45a8462d3da6995.1625247619.git.lorenzo@kernel.org Signed-off-by: Johannes Berg --- include/net/mac80211.h | 29 ++++++ net/mac80211/cfg.c | 234 ++++++++++++++++++++++++++++++++++++++++++--- net/mac80211/ieee80211_i.h | 11 +++ net/mac80211/iface.c | 2 + net/mac80211/tx.c | 29 +++--- 5 files changed, 283 insertions(+), 22 deletions(-) (limited to 'include/net') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index d8a1d09a2141..a23e6734d26b 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -1711,6 +1711,10 @@ enum ieee80211_offload_flags { * protected by fq->lock. * @offload_flags: 802.3 -> 802.11 enapsulation offload flags, see * &enum ieee80211_offload_flags. + * @color_change_active: marks whether a color change is ongoing. Internally it is + * write-protected by sdata_lock and local->mtx so holding either is fine + * for read access. + * @color_change_color: the bss color that will be used after the change. */ struct ieee80211_vif { enum nl80211_iftype type; @@ -1739,6 +1743,9 @@ struct ieee80211_vif { bool txqs_stopped[IEEE80211_NUM_ACS]; + bool color_change_active; + u8 color_change_color; + /* must be last */ u8 drv_priv[] __aligned(sizeof(void *)); }; @@ -5007,6 +5014,16 @@ void ieee80211_csa_finish(struct ieee80211_vif *vif); */ bool ieee80211_beacon_cntdwn_is_complete(struct ieee80211_vif *vif); +/** + * ieee80211_color_change_finish - notify mac80211 about color change + * @vif: &struct ieee80211_vif pointer from the add_interface callback. + * + * After a color change announcement was scheduled and the counter in this + * announcement hits 1, this function must be called by the driver to + * notify mac80211 that the color can be changed + */ +void ieee80211_color_change_finish(struct ieee80211_vif *vif); + /** * ieee80211_proberesp_get - retrieve a Probe Response template * @hw: pointer obtained from ieee80211_alloc_hw(). @@ -6771,6 +6788,18 @@ struct sk_buff * ieee80211_get_unsol_bcast_probe_resp_tmpl(struct ieee80211_hw *hw, struct ieee80211_vif *vif); +/** + * ieeee80211_obss_color_collision_notify - notify userland about a BSS color + * collision. + * + * @vif: &struct ieee80211_vif pointer from the add_interface callback. + * @color_bitmap: a 64 bit bitmap representing the colors that the local BSS is + * aware of. + */ +void +ieeee80211_obss_color_collision_notify(struct ieee80211_vif *vif, + u64 color_bitmap); + /** * ieee80211_is_tx_data - check if frame is a data frame * diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 4e6f11e63df3..d69b31c20fe2 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -828,9 +828,11 @@ static int ieee80211_set_monitor_channel(struct wiphy *wiphy, return ret; } -static int ieee80211_set_probe_resp(struct ieee80211_sub_if_data *sdata, - const u8 *resp, size_t resp_len, - const struct ieee80211_csa_settings *csa) +static int +ieee80211_set_probe_resp(struct ieee80211_sub_if_data *sdata, + const u8 *resp, size_t resp_len, + const struct ieee80211_csa_settings *csa, + const struct ieee80211_color_change_settings *cca) { struct probe_resp *new, *old; @@ -850,6 +852,8 @@ static int ieee80211_set_probe_resp(struct ieee80211_sub_if_data *sdata, memcpy(new->cntdwn_counter_offsets, csa->counter_offsets_presp, csa->n_counter_offsets_presp * sizeof(new->cntdwn_counter_offsets[0])); + else if (cca) + new->cntdwn_counter_offsets[0] = cca->counter_offset_presp; rcu_assign_pointer(sdata->u.ap.probe_resp, new); if (old) @@ -955,7 +959,8 @@ static int ieee80211_set_ftm_responder_params( static int ieee80211_assign_beacon(struct ieee80211_sub_if_data *sdata, struct cfg80211_beacon_data *params, - const struct ieee80211_csa_settings *csa) + const struct ieee80211_csa_settings *csa, + const struct ieee80211_color_change_settings *cca) { struct beacon_data *new, *old; int new_head_len, new_tail_len; @@ -1004,6 +1009,9 @@ static int ieee80211_assign_beacon(struct ieee80211_sub_if_data *sdata, memcpy(new->cntdwn_counter_offsets, csa->counter_offsets_beacon, csa->n_counter_offsets_beacon * sizeof(new->cntdwn_counter_offsets[0])); + } else if (cca) { + new->cntdwn_current_counter = cca->count; + new->cntdwn_counter_offsets[0] = cca->counter_offset_beacon; } /* copy in head */ @@ -1020,7 +1028,7 @@ static int ieee80211_assign_beacon(struct ieee80211_sub_if_data *sdata, memcpy(new->tail, old->tail, new_tail_len); err = ieee80211_set_probe_resp(sdata, params->probe_resp, - params->probe_resp_len, csa); + params->probe_resp_len, csa, cca); if (err < 0) { kfree(new); return err; @@ -1175,7 +1183,7 @@ static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev, if (ieee80211_hw_check(&local->hw, HAS_RATE_CONTROL)) sdata->vif.bss_conf.beacon_tx_rate = params->beacon_rate; - err = ieee80211_assign_beacon(sdata, ¶ms->beacon, NULL); + err = ieee80211_assign_beacon(sdata, ¶ms->beacon, NULL, NULL); if (err < 0) goto error; changed |= err; @@ -1230,17 +1238,17 @@ static int ieee80211_change_beacon(struct wiphy *wiphy, struct net_device *dev, sdata = IEEE80211_DEV_TO_SUB_IF(dev); sdata_assert_lock(sdata); - /* don't allow changing the beacon while CSA is in place - offset + /* don't allow changing the beacon while a countdown is in place - offset * of channel switch counter may change */ - if (sdata->vif.csa_active) + if (sdata->vif.csa_active || sdata->vif.color_change_active) return -EBUSY; old = sdata_dereference(sdata->u.ap.beacon, sdata); if (!old) return -ENOENT; - err = ieee80211_assign_beacon(sdata, params, NULL); + err = ieee80211_assign_beacon(sdata, params, NULL, NULL); if (err < 0) return err; ieee80211_bss_info_change_notify(sdata, err); @@ -3156,7 +3164,7 @@ static int ieee80211_set_after_csa_beacon(struct ieee80211_sub_if_data *sdata, switch (sdata->vif.type) { case NL80211_IFTYPE_AP: err = ieee80211_assign_beacon(sdata, sdata->u.ap.next_beacon, - NULL); + NULL, NULL); kfree(sdata->u.ap.next_beacon); sdata->u.ap.next_beacon = NULL; @@ -3322,7 +3330,7 @@ static int ieee80211_set_csa_beacon(struct ieee80211_sub_if_data *sdata, csa.n_counter_offsets_presp = params->n_counter_offsets_presp; csa.count = params->count; - err = ieee80211_assign_beacon(sdata, ¶ms->beacon_csa, &csa); + err = ieee80211_assign_beacon(sdata, ¶ms->beacon_csa, &csa, NULL); if (err < 0) { kfree(sdata->u.ap.next_beacon); return err; @@ -3411,6 +3419,15 @@ static int ieee80211_set_csa_beacon(struct ieee80211_sub_if_data *sdata, return 0; } +static void ieee80211_color_change_abort(struct ieee80211_sub_if_data *sdata) +{ + sdata->vif.color_change_active = false; + kfree(sdata->u.ap.next_beacon); + sdata->u.ap.next_beacon = NULL; + + cfg80211_color_change_aborted_notify(sdata->dev); +} + static int __ieee80211_channel_switch(struct wiphy *wiphy, struct net_device *dev, struct cfg80211_csa_settings *params) @@ -3479,6 +3496,10 @@ __ieee80211_channel_switch(struct wiphy *wiphy, struct net_device *dev, goto out; } + /* if there is a color change in progress, abort it */ + if (sdata->vif.color_change_active) + ieee80211_color_change_abort(sdata); + err = ieee80211_set_csa_beacon(sdata, params, &changed); if (err) { ieee80211_vif_unreserve_chanctx(sdata); @@ -4130,6 +4151,196 @@ static int ieee80211_set_sar_specs(struct wiphy *wiphy, return local->ops->set_sar_specs(&local->hw, sar); } +static int +ieee80211_set_after_color_change_beacon(struct ieee80211_sub_if_data *sdata, + u32 *changed) +{ + switch (sdata->vif.type) { + case NL80211_IFTYPE_AP: { + int ret; + + ret = ieee80211_assign_beacon(sdata, sdata->u.ap.next_beacon, + NULL, NULL); + kfree(sdata->u.ap.next_beacon); + sdata->u.ap.next_beacon = NULL; + + if (ret < 0) + return ret; + + *changed |= ret; + break; + } + default: + WARN_ON_ONCE(1); + return -EINVAL; + } + + return 0; +} + +static int +ieee80211_set_color_change_beacon(struct ieee80211_sub_if_data *sdata, + struct cfg80211_color_change_settings *params, + u32 *changed) +{ + struct ieee80211_color_change_settings color_change = {}; + int err; + + switch (sdata->vif.type) { + case NL80211_IFTYPE_AP: + sdata->u.ap.next_beacon = + cfg80211_beacon_dup(¶ms->beacon_next); + if (!sdata->u.ap.next_beacon) + return -ENOMEM; + + if (params->count <= 1) + break; + + color_change.counter_offset_beacon = + params->counter_offset_beacon; + color_change.counter_offset_presp = + params->counter_offset_presp; + color_change.count = params->count; + + err = ieee80211_assign_beacon(sdata, ¶ms->beacon_color_change, + NULL, &color_change); + if (err < 0) { + kfree(sdata->u.ap.next_beacon); + return err; + } + *changed |= err; + break; + default: + return -EOPNOTSUPP; + } + + return 0; +} + +static void +ieee80211_color_change_bss_config_notify(struct ieee80211_sub_if_data *sdata, + u8 color, int enable, u32 changed) +{ + sdata->vif.bss_conf.he_bss_color.color = color; + sdata->vif.bss_conf.he_bss_color.enabled = enable; + changed |= BSS_CHANGED_HE_BSS_COLOR; + + ieee80211_bss_info_change_notify(sdata, changed); +} + +static int ieee80211_color_change_finalize(struct ieee80211_sub_if_data *sdata) +{ + struct ieee80211_local *local = sdata->local; + u32 changed = 0; + int err; + + sdata_assert_lock(sdata); + lockdep_assert_held(&local->mtx); + + sdata->vif.color_change_active = false; + + err = ieee80211_set_after_color_change_beacon(sdata, &changed); + if (err) { + cfg80211_color_change_aborted_notify(sdata->dev); + return err; + } + + ieee80211_color_change_bss_config_notify(sdata, + sdata->vif.color_change_color, + 1, changed); + cfg80211_color_change_notify(sdata->dev); + + return 0; +} + +void ieee80211_color_change_finalize_work(struct work_struct *work) +{ + struct ieee80211_sub_if_data *sdata = + container_of(work, struct ieee80211_sub_if_data, + color_change_finalize_work); + struct ieee80211_local *local = sdata->local; + + sdata_lock(sdata); + mutex_lock(&local->mtx); + + /* AP might have been stopped while waiting for the lock. */ + if (!sdata->vif.color_change_active) + goto unlock; + + if (!ieee80211_sdata_running(sdata)) + goto unlock; + + ieee80211_color_change_finalize(sdata); + +unlock: + mutex_unlock(&local->mtx); + sdata_unlock(sdata); +} + +void ieee80211_color_change_finish(struct ieee80211_vif *vif) +{ + struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif); + + ieee80211_queue_work(&sdata->local->hw, + &sdata->color_change_finalize_work); +} +EXPORT_SYMBOL_GPL(ieee80211_color_change_finish); + +void +ieeee80211_obss_color_collision_notify(struct ieee80211_vif *vif, + u64 color_bitmap) +{ + struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif); + + if (sdata->vif.color_change_active || sdata->vif.csa_active) + return; + + cfg80211_obss_color_collision_notify(sdata->dev, color_bitmap); +} +EXPORT_SYMBOL_GPL(ieeee80211_obss_color_collision_notify); + +static int +ieee80211_color_change(struct wiphy *wiphy, struct net_device *dev, + struct cfg80211_color_change_settings *params) +{ + struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); + struct ieee80211_local *local = sdata->local; + u32 changed = 0; + int err; + + sdata_assert_lock(sdata); + + mutex_lock(&local->mtx); + + /* don't allow another color change if one is already active or if csa + * is active + */ + if (sdata->vif.color_change_active || sdata->vif.csa_active) { + err = -EBUSY; + goto out; + } + + err = ieee80211_set_color_change_beacon(sdata, params, &changed); + if (err) + goto out; + + sdata->vif.color_change_active = true; + sdata->vif.color_change_color = params->color; + + cfg80211_color_change_started_notify(sdata->dev, params->count); + + if (changed) + ieee80211_color_change_bss_config_notify(sdata, 0, 0, changed); + else + /* if the beacon didn't change, we can finalize immediately */ + ieee80211_color_change_finalize(sdata); + +out: + mutex_unlock(&local->mtx); + + return err; +} + const struct cfg80211_ops mac80211_config_ops = { .add_virtual_intf = ieee80211_add_iface, .del_virtual_intf = ieee80211_del_iface, @@ -4233,4 +4444,5 @@ const struct cfg80211_ops mac80211_config_ops = { .set_tid_config = ieee80211_set_tid_config, .reset_tid_config = ieee80211_reset_tid_config, .set_sar_specs = ieee80211_set_sar_specs, + .color_change = ieee80211_color_change, }; diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index c57224d771b9..e8945c20688a 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -245,6 +245,12 @@ struct ieee80211_csa_settings { u8 count; }; +struct ieee80211_color_change_settings { + u16 counter_offset_beacon; + u16 counter_offset_presp; + u8 count; +}; + struct beacon_data { u8 *head, *tail; int head_len, tail_len; @@ -924,6 +930,8 @@ struct ieee80211_sub_if_data { bool csa_block_tx; /* write-protected by sdata_lock and local->mtx */ struct cfg80211_chan_def csa_chandef; + struct work_struct color_change_finalize_work; + struct list_head assigned_chanctx_list; /* protected by chanctx_mtx */ struct list_head reserved_chanctx_list; /* protected by chanctx_mtx */ @@ -1888,6 +1896,9 @@ void ieee80211_csa_finalize_work(struct work_struct *work); int ieee80211_channel_switch(struct wiphy *wiphy, struct net_device *dev, struct cfg80211_csa_settings *params); +/* color change handling */ +void ieee80211_color_change_finalize_work(struct work_struct *work); + /* interface handling */ #define MAC80211_SUPPORTED_FEATURES_TX (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | \ NETIF_F_HW_CSUM | NETIF_F_SG | \ diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index cd96cd337aa8..769f8f585c06 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -462,6 +462,7 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, bool going_do sdata_unlock(sdata); cancel_work_sync(&sdata->csa_finalize_work); + cancel_work_sync(&sdata->color_change_finalize_work); cancel_delayed_work_sync(&sdata->dfs_cac_timer_work); @@ -1531,6 +1532,7 @@ static void ieee80211_setup_sdata(struct ieee80211_sub_if_data *sdata, INIT_WORK(&sdata->work, ieee80211_iface_work); INIT_WORK(&sdata->recalc_smps, ieee80211_recalc_smps_work); INIT_WORK(&sdata->csa_finalize_work, ieee80211_csa_finalize_work); + INIT_WORK(&sdata->color_change_finalize_work, ieee80211_color_change_finalize_work); INIT_LIST_HEAD(&sdata->assigned_chanctx_list); INIT_LIST_HEAD(&sdata->reserved_chanctx_list); diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 8509778ff31f..1f7571777fb1 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -4782,11 +4782,11 @@ static int ieee80211_beacon_add_tim(struct ieee80211_sub_if_data *sdata, static void ieee80211_set_beacon_cntdwn(struct ieee80211_sub_if_data *sdata, struct beacon_data *beacon) { + u8 *beacon_data, count, max_count = 1; struct probe_resp *resp; - u8 *beacon_data; size_t beacon_data_len; + u16 *bcn_offsets; int i; - u8 count = beacon->cntdwn_current_counter; switch (sdata->vif.type) { case NL80211_IFTYPE_AP: @@ -4806,21 +4806,27 @@ static void ieee80211_set_beacon_cntdwn(struct ieee80211_sub_if_data *sdata, } rcu_read_lock(); - for (i = 0; i < IEEE80211_MAX_CNTDWN_COUNTERS_NUM; ++i) { - resp = rcu_dereference(sdata->u.ap.probe_resp); + resp = rcu_dereference(sdata->u.ap.probe_resp); + + bcn_offsets = beacon->cntdwn_counter_offsets; + count = beacon->cntdwn_current_counter; + if (sdata->vif.csa_active) + max_count = IEEE80211_MAX_CNTDWN_COUNTERS_NUM; - if (beacon->cntdwn_counter_offsets[i]) { - if (WARN_ON_ONCE(beacon->cntdwn_counter_offsets[i] >= - beacon_data_len)) { + for (i = 0; i < max_count; ++i) { + if (bcn_offsets[i]) { + if (WARN_ON_ONCE(bcn_offsets[i] >= beacon_data_len)) { rcu_read_unlock(); return; } - - beacon_data[beacon->cntdwn_counter_offsets[i]] = count; + beacon_data[bcn_offsets[i]] = count; } - if (sdata->vif.type == NL80211_IFTYPE_AP && resp) - resp->data[resp->cntdwn_counter_offsets[i]] = count; + if (sdata->vif.type == NL80211_IFTYPE_AP && resp) { + u16 *resp_offsets = resp->cntdwn_counter_offsets; + + resp->data[resp_offsets[i]] = count; + } } rcu_read_unlock(); } @@ -5030,6 +5036,7 @@ __ieee80211_beacon_get(struct ieee80211_hw *hw, if (offs) { offs->tim_offset = beacon->head_len; offs->tim_length = skb->len - beacon->head_len; + offs->cntdwn_counter_offs[0] = beacon->cntdwn_counter_offsets[0]; /* for AP the csa offsets are from tail */ csa_off_base = skb->len; -- cgit v1.2.3 From 4b1327be9fe57443295ae86fe0fcf24a18469e9f Mon Sep 17 00:00:00 2001 From: Wei Wang Date: Tue, 17 Aug 2021 12:40:03 -0700 Subject: net-memcg: pass in gfp_t mask to mem_cgroup_charge_skmem() Add gfp_t mask as an input parameter to mem_cgroup_charge_skmem(), to give more control to the networking stack and enable it to change memcg charging behavior. In the future, the networking stack may decide to avoid oom-kills when fallbacks are more appropriate. One behavior change in mem_cgroup_charge_skmem() by this patch is to avoid force charging by default and let the caller decide when and if force charging is needed through the presence or absence of __GFP_NOFAIL. Signed-off-by: Wei Wang Reviewed-by: Shakeel Butt Signed-off-by: David S. Miller --- include/linux/memcontrol.h | 3 ++- include/net/sock.h | 5 +++++ mm/memcontrol.c | 24 +++++++++++------------- net/core/sock.c | 16 ++++++++++++---- net/ipv4/inet_connection_sock.c | 3 ++- net/ipv4/tcp_output.c | 3 ++- 6 files changed, 34 insertions(+), 20 deletions(-) (limited to 'include/net') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index bfe5c486f4ad..f0ee30881ca9 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -1581,7 +1581,8 @@ static inline void mem_cgroup_flush_foreign(struct bdi_writeback *wb) #endif /* CONFIG_CGROUP_WRITEBACK */ struct sock; -bool mem_cgroup_charge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages); +bool mem_cgroup_charge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages, + gfp_t gfp_mask); void mem_cgroup_uncharge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages); #ifdef CONFIG_MEMCG extern struct static_key_false memcg_sockets_enabled_key; diff --git a/include/net/sock.h b/include/net/sock.h index 6e761451c927..95b25777b53e 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -2400,6 +2400,11 @@ static inline gfp_t gfp_any(void) return in_softirq() ? GFP_ATOMIC : GFP_KERNEL; } +static inline gfp_t gfp_memcg_charge(void) +{ + return in_softirq() ? GFP_NOWAIT : GFP_KERNEL; +} + static inline long sock_rcvtimeo(const struct sock *sk, bool noblock) { return noblock ? 0 : sk->sk_rcvtimeo; diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 8ef06f9e0db1..be585ceaba98 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -7048,14 +7048,14 @@ void mem_cgroup_sk_free(struct sock *sk) * mem_cgroup_charge_skmem - charge socket memory * @memcg: memcg to charge * @nr_pages: number of pages to charge + * @gfp_mask: reclaim mode * * Charges @nr_pages to @memcg. Returns %true if the charge fit within - * @memcg's configured limit, %false if the charge had to be forced. + * @memcg's configured limit, %false if it doesn't. */ -bool mem_cgroup_charge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages) +bool mem_cgroup_charge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages, + gfp_t gfp_mask) { - gfp_t gfp_mask = GFP_KERNEL; - if (!cgroup_subsys_on_dfl(memory_cgrp_subsys)) { struct page_counter *fail; @@ -7063,21 +7063,19 @@ bool mem_cgroup_charge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages) memcg->tcpmem_pressure = 0; return true; } - page_counter_charge(&memcg->tcpmem, nr_pages); memcg->tcpmem_pressure = 1; + if (gfp_mask & __GFP_NOFAIL) { + page_counter_charge(&memcg->tcpmem, nr_pages); + return true; + } return false; } - /* Don't block in the packet receive path */ - if (in_softirq()) - gfp_mask = GFP_NOWAIT; - - mod_memcg_state(memcg, MEMCG_SOCK, nr_pages); - - if (try_charge(memcg, gfp_mask, nr_pages) == 0) + if (try_charge(memcg, gfp_mask, nr_pages) == 0) { + mod_memcg_state(memcg, MEMCG_SOCK, nr_pages); return true; + } - try_charge(memcg, gfp_mask|__GFP_NOFAIL, nr_pages); return false; } diff --git a/net/core/sock.c b/net/core/sock.c index aada649e07e8..950f1e70dbf5 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -2728,10 +2728,12 @@ int __sk_mem_raise_allocated(struct sock *sk, int size, int amt, int kind) { struct proto *prot = sk->sk_prot; long allocated = sk_memory_allocated_add(sk, amt); + bool memcg_charge = mem_cgroup_sockets_enabled && sk->sk_memcg; bool charged = true; - if (mem_cgroup_sockets_enabled && sk->sk_memcg && - !(charged = mem_cgroup_charge_skmem(sk->sk_memcg, amt))) + if (memcg_charge && + !(charged = mem_cgroup_charge_skmem(sk->sk_memcg, amt, + gfp_memcg_charge()))) goto suppress_allocation; /* Under limit. */ @@ -2785,8 +2787,14 @@ suppress_allocation: /* Fail only if socket is _under_ its sndbuf. * In this case we cannot block, so that we have to fail. */ - if (sk->sk_wmem_queued + size >= sk->sk_sndbuf) + if (sk->sk_wmem_queued + size >= sk->sk_sndbuf) { + /* Force charge with __GFP_NOFAIL */ + if (memcg_charge && !charged) { + mem_cgroup_charge_skmem(sk->sk_memcg, amt, + gfp_memcg_charge() | __GFP_NOFAIL); + } return 1; + } } if (kind == SK_MEM_SEND || (kind == SK_MEM_RECV && charged)) @@ -2794,7 +2802,7 @@ suppress_allocation: sk_memory_allocated_sub(sk, amt); - if (mem_cgroup_sockets_enabled && sk->sk_memcg) + if (memcg_charge && charged) mem_cgroup_uncharge_skmem(sk->sk_memcg, amt); return 0; diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 754013fa393b..f25d02ad4a8a 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -534,7 +534,8 @@ out: atomic_read(&newsk->sk_rmem_alloc)); mem_cgroup_sk_alloc(newsk); if (newsk->sk_memcg && amt) - mem_cgroup_charge_skmem(newsk->sk_memcg, amt); + mem_cgroup_charge_skmem(newsk->sk_memcg, amt, + GFP_KERNEL | __GFP_NOFAIL); release_sock(newsk); } diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 29553fce8502..6d72f3ea48c4 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -3373,7 +3373,8 @@ void sk_forced_mem_schedule(struct sock *sk, int size) sk_memory_allocated_add(sk, amt); if (mem_cgroup_sockets_enabled && sk->sk_memcg) - mem_cgroup_charge_skmem(sk->sk_memcg, amt); + mem_cgroup_charge_skmem(sk->sk_memcg, amt, + gfp_memcg_charge() | __GFP_NOFAIL); } /* Send a FIN. The caller locks the socket for us. -- cgit v1.2.3 From 74fc4f828769cca1c3be89ea92cb88feaa27ef52 Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Tue, 17 Aug 2021 20:05:18 +0300 Subject: net: Fix offloading indirect devices dependency on qdisc order creation Currently, when creating an ingress qdisc on an indirect device before the driver registered for callbacks, the driver will not have a chance to register its filter configuration callbacks. To fix that, modify the code such that it keeps track of all the ingress qdiscs that call flow_indr_dev_setup_offload(). When a driver calls flow_indr_dev_register(), go through the list of tracked ingress qdiscs and call the driver callback entry point so as to give it a chance to register its callback. Reviewed-by: Jiri Pirko Signed-off-by: Eli Cohen Signed-off-by: David S. Miller --- include/net/flow_offload.h | 1 + net/core/flow_offload.c | 89 ++++++++++++++++++++++++++++++++++- net/netfilter/nf_flow_table_offload.c | 1 + net/netfilter/nf_tables_offload.c | 1 + net/sched/cls_api.c | 1 + 5 files changed, 92 insertions(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/flow_offload.h b/include/net/flow_offload.h index f3c2841566a0..5aa27acdb0b3 100644 --- a/include/net/flow_offload.h +++ b/include/net/flow_offload.h @@ -453,6 +453,7 @@ struct flow_block_offload { struct list_head *driver_block_list; struct netlink_ext_ack *extack; struct Qdisc *sch; + struct list_head *cb_list_head; }; enum tc_setup_type; diff --git a/net/core/flow_offload.c b/net/core/flow_offload.c index 1da83997e86a..6beaea13564a 100644 --- a/net/core/flow_offload.c +++ b/net/core/flow_offload.c @@ -321,6 +321,7 @@ EXPORT_SYMBOL(flow_block_cb_setup_simple); static DEFINE_MUTEX(flow_indr_block_lock); static LIST_HEAD(flow_block_indr_list); static LIST_HEAD(flow_block_indr_dev_list); +static LIST_HEAD(flow_indir_dev_list); struct flow_indr_dev { struct list_head list; @@ -345,6 +346,33 @@ static struct flow_indr_dev *flow_indr_dev_alloc(flow_indr_block_bind_cb_t *cb, return indr_dev; } +struct flow_indir_dev_info { + void *data; + struct net_device *dev; + struct Qdisc *sch; + enum tc_setup_type type; + void (*cleanup)(struct flow_block_cb *block_cb); + struct list_head list; + enum flow_block_command command; + enum flow_block_binder_type binder_type; + struct list_head *cb_list; +}; + +static void existing_qdiscs_register(flow_indr_block_bind_cb_t *cb, void *cb_priv) +{ + struct flow_block_offload bo; + struct flow_indir_dev_info *cur; + + list_for_each_entry(cur, &flow_indir_dev_list, list) { + memset(&bo, 0, sizeof(bo)); + bo.command = cur->command; + bo.binder_type = cur->binder_type; + INIT_LIST_HEAD(&bo.cb_list); + cb(cur->dev, cur->sch, cb_priv, cur->type, &bo, cur->data, cur->cleanup); + list_splice(&bo.cb_list, cur->cb_list); + } +} + int flow_indr_dev_register(flow_indr_block_bind_cb_t *cb, void *cb_priv) { struct flow_indr_dev *indr_dev; @@ -366,6 +394,7 @@ int flow_indr_dev_register(flow_indr_block_bind_cb_t *cb, void *cb_priv) } list_add(&indr_dev->list, &flow_block_indr_dev_list); + existing_qdiscs_register(cb, cb_priv); mutex_unlock(&flow_indr_block_lock); return 0; @@ -462,7 +491,59 @@ out: } EXPORT_SYMBOL(flow_indr_block_cb_alloc); -int flow_indr_dev_setup_offload(struct net_device *dev, struct Qdisc *sch, +static struct flow_indir_dev_info *find_indir_dev(void *data) +{ + struct flow_indir_dev_info *cur; + + list_for_each_entry(cur, &flow_indir_dev_list, list) { + if (cur->data == data) + return cur; + } + return NULL; +} + +static int indir_dev_add(void *data, struct net_device *dev, struct Qdisc *sch, + enum tc_setup_type type, void (*cleanup)(struct flow_block_cb *block_cb), + struct flow_block_offload *bo) +{ + struct flow_indir_dev_info *info; + + info = find_indir_dev(data); + if (info) + return -EEXIST; + + info = kzalloc(sizeof(*info), GFP_KERNEL); + if (!info) + return -ENOMEM; + + info->data = data; + info->dev = dev; + info->sch = sch; + info->type = type; + info->cleanup = cleanup; + info->command = bo->command; + info->binder_type = bo->binder_type; + info->cb_list = bo->cb_list_head; + + list_add(&info->list, &flow_indir_dev_list); + return 0; +} + +static int indir_dev_remove(void *data) +{ + struct flow_indir_dev_info *info; + + info = find_indir_dev(data); + if (!info) + return -ENOENT; + + list_del(&info->list); + + kfree(info); + return 0; +} + +int flow_indr_dev_setup_offload(struct net_device *dev, struct Qdisc *sch, enum tc_setup_type type, void *data, struct flow_block_offload *bo, void (*cleanup)(struct flow_block_cb *block_cb)) @@ -470,6 +551,12 @@ int flow_indr_dev_setup_offload(struct net_device *dev, struct Qdisc *sch, struct flow_indr_dev *this; mutex_lock(&flow_indr_block_lock); + + if (bo->command == FLOW_BLOCK_BIND) + indir_dev_add(data, dev, sch, type, cleanup, bo); + else if (bo->command == FLOW_BLOCK_UNBIND) + indir_dev_remove(data); + list_for_each_entry(this, &flow_block_indr_dev_list, list) this->cb(dev, sch, this->cb_priv, type, bo, data, cleanup); diff --git a/net/netfilter/nf_flow_table_offload.c b/net/netfilter/nf_flow_table_offload.c index 2bfd9f1b8f11..d6bf1b2cd541 100644 --- a/net/netfilter/nf_flow_table_offload.c +++ b/net/netfilter/nf_flow_table_offload.c @@ -1096,6 +1096,7 @@ static void nf_flow_table_block_offload_init(struct flow_block_offload *bo, bo->command = cmd; bo->binder_type = FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS; bo->extack = extack; + bo->cb_list_head = &flowtable->flow_block.cb_list; INIT_LIST_HEAD(&bo->cb_list); } diff --git a/net/netfilter/nf_tables_offload.c b/net/netfilter/nf_tables_offload.c index b58d73a96523..9656c1646222 100644 --- a/net/netfilter/nf_tables_offload.c +++ b/net/netfilter/nf_tables_offload.c @@ -353,6 +353,7 @@ static void nft_flow_block_offload_init(struct flow_block_offload *bo, bo->command = cmd; bo->binder_type = FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS; bo->extack = extack; + bo->cb_list_head = &basechain->flow_block.cb_list; INIT_LIST_HEAD(&bo->cb_list); } diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 4a7043a4e5d6..2ef8f5a6205a 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -634,6 +634,7 @@ static void tcf_block_offload_init(struct flow_block_offload *bo, bo->block_shared = shared; bo->extack = extack; bo->sch = sch; + bo->cb_list_head = &flow_block->cb_list; INIT_LIST_HEAD(&bo->cb_list); } -- cgit v1.2.3 From fa05bdb89b01b098aad19ec0ebc4d1cc7b11177e Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Thu, 19 Aug 2021 13:58:42 +0300 Subject: Revert "flow_offload: action should not be NULL when it is referenced" This reverts commit 9ea3e52c5bc8bb4a084938dc1e3160643438927a. Cited commit added a check to make sure 'action' is not NULL, but 'action' is already dereferenced before the check, when calling flow_offload_has_one_action(). Therefore, the check does not make any sense and results in a smatch warning: include/net/flow_offload.h:322 flow_action_mixed_hw_stats_check() warn: variable dereferenced before check 'action' (see line 319) Fix by reverting this commit. Cc: gushengxian Fixes: 9ea3e52c5bc8 ("flow_offload: action should not be NULL when it is referenced") Signed-off-by: Ido Schimmel Acked-by: Jamal Hadi Salim Link: https://lore.kernel.org/r/20210819105842.1315705-1-idosch@idosch.org Signed-off-by: Jakub Kicinski --- include/net/flow_offload.h | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) (limited to 'include/net') diff --git a/include/net/flow_offload.h b/include/net/flow_offload.h index f3c2841566a0..1b9d75aedb22 100644 --- a/include/net/flow_offload.h +++ b/include/net/flow_offload.h @@ -319,14 +319,12 @@ flow_action_mixed_hw_stats_check(const struct flow_action *action, if (flow_offload_has_one_action(action)) return true; - if (action) { - flow_action_for_each(i, action_entry, action) { - if (i && action_entry->hw_stats != last_hw_stats) { - NL_SET_ERR_MSG_MOD(extack, "Mixing HW stats types for actions is not supported"); - return false; - } - last_hw_stats = action_entry->hw_stats; + flow_action_for_each(i, action_entry, action) { + if (i && action_entry->hw_stats != last_hw_stats) { + NL_SET_ERR_MSG_MOD(extack, "Mixing HW stats types for actions is not supported"); + return false; } + last_hw_stats = action_entry->hw_stats; } return true; } -- cgit v1.2.3 From f5e165e72b29d908214e554ef57f67790ba95934 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Thu, 19 Aug 2021 20:55:00 +0300 Subject: net: dsa: track unique bridge numbers across all DSA switch trees Right now, cross-tree bridging setups work somewhat by mistake. In the case of cross-tree bridging with sja1105, all switch instances need to agree upon a common VLAN ID for forwarding a packet that belongs to a certain bridging domain. With TX forwarding offload, the VLAN ID is the bridge VLAN for VLAN-aware bridging, and the tag_8021q TX forwarding offload VID (a VLAN which has non-zero VBID bits) for VLAN-unaware bridging. The VBID for VLAN-unaware bridging is derived from the dp->bridge_num value calculated by DSA independently for each switch tree. If ports from one tree join one bridge, and ports from another tree join another bridge, DSA will assign them the same bridge_num, even though the bridges are different. If cross-tree bridging is supported, this is an issue. Modify DSA to calculate the bridge_num globally across all switch trees. This has the implication for a driver that the dp->bridge_num value that DSA will assign to its ports might not be contiguous, if there are boards with multiple DSA drivers instantiated. Additionally, all bridge_num values eat up towards each switch's ds->num_fwd_offloading_bridges maximum, which is potentially unfortunate, and can be seen as a limitation introduced by this patch. However, that is the lesser evil for now. Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- include/net/dsa.h | 8 +++----- net/dsa/dsa2.c | 48 ++++++++++++++++++++++++++++++++++++++++++++++++ net/dsa/dsa_priv.h | 2 ++ net/dsa/port.c | 39 +++++---------------------------------- 4 files changed, 58 insertions(+), 39 deletions(-) (limited to 'include/net') diff --git a/include/net/dsa.h b/include/net/dsa.h index 0c2cba45fa79..c7ea0f61056f 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -155,9 +155,6 @@ struct dsa_switch_tree { /* Track the largest switch index within a tree */ unsigned int last_switch; - - /* Track the bridges with forwarding offload enabled */ - unsigned long fwd_offloading_bridges; }; #define dsa_lags_foreach_id(_id, _dst) \ @@ -411,8 +408,9 @@ struct dsa_switch { unsigned int num_lag_ids; /* Drivers that support bridge forwarding offload should set this to - * the maximum number of bridges spanning the same switch tree that can - * be offloaded. + * the maximum number of bridges spanning the same switch tree (or all + * trees, in the case of cross-tree bridging support) that can be + * offloaded. */ unsigned int num_fwd_offloading_bridges; diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c index dcd67801eca4..1b2b25d7bd02 100644 --- a/net/dsa/dsa2.c +++ b/net/dsa/dsa2.c @@ -21,6 +21,9 @@ static DEFINE_MUTEX(dsa2_mutex); LIST_HEAD(dsa_tree_list); +/* Track the bridges with forwarding offload enabled */ +static unsigned long dsa_fwd_offloading_bridges; + /** * dsa_tree_notify - Execute code for all switches in a DSA switch tree. * @dst: collection of struct dsa_switch devices to notify. @@ -126,6 +129,51 @@ void dsa_lag_unmap(struct dsa_switch_tree *dst, struct net_device *lag) } } +static int dsa_bridge_num_find(const struct net_device *bridge_dev) +{ + struct dsa_switch_tree *dst; + struct dsa_port *dp; + + /* When preparing the offload for a port, it will have a valid + * dp->bridge_dev pointer but a not yet valid dp->bridge_num. + * However there might be other ports having the same dp->bridge_dev + * and a valid dp->bridge_num, so just ignore this port. + */ + list_for_each_entry(dst, &dsa_tree_list, list) + list_for_each_entry(dp, &dst->ports, list) + if (dp->bridge_dev == bridge_dev && + dp->bridge_num != -1) + return dp->bridge_num; + + return -1; +} + +int dsa_bridge_num_get(const struct net_device *bridge_dev, int max) +{ + int bridge_num = dsa_bridge_num_find(bridge_dev); + + if (bridge_num < 0) { + /* First port that offloads TX forwarding for this bridge */ + bridge_num = find_first_zero_bit(&dsa_fwd_offloading_bridges, + DSA_MAX_NUM_OFFLOADING_BRIDGES); + if (bridge_num >= max) + return -1; + + set_bit(bridge_num, &dsa_fwd_offloading_bridges); + } + + return bridge_num; +} + +void dsa_bridge_num_put(const struct net_device *bridge_dev, int bridge_num) +{ + /* Check if the bridge is still in use, otherwise it is time + * to clean it up so we can reuse this bridge_num later. + */ + if (!dsa_bridge_num_find(bridge_dev)) + clear_bit(bridge_num, &dsa_fwd_offloading_bridges); +} + struct dsa_switch *dsa_switch_find(int tree_index, int sw_index) { struct dsa_switch_tree *dst; diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h index b7a269e0513f..88aaf43b2da4 100644 --- a/net/dsa/dsa_priv.h +++ b/net/dsa/dsa_priv.h @@ -543,6 +543,8 @@ int dsa_tree_change_tag_proto(struct dsa_switch_tree *dst, struct net_device *master, const struct dsa_device_ops *tag_ops, const struct dsa_device_ops *old_tag_ops); +int dsa_bridge_num_get(const struct net_device *bridge_dev, int max); +void dsa_bridge_num_put(const struct net_device *bridge_dev, int bridge_num); /* tag_8021q.c */ int dsa_tag_8021q_bridge_join(struct dsa_switch *ds, diff --git a/net/dsa/port.c b/net/dsa/port.c index 979042a64d1a..4fbe81ffb1ce 100644 --- a/net/dsa/port.c +++ b/net/dsa/port.c @@ -270,27 +270,9 @@ static void dsa_port_switchdev_unsync_attrs(struct dsa_port *dp) */ } -static int dsa_tree_find_bridge_num(struct dsa_switch_tree *dst, - struct net_device *bridge_dev) -{ - struct dsa_port *dp; - - /* When preparing the offload for a port, it will have a valid - * dp->bridge_dev pointer but a not yet valid dp->bridge_num. - * However there might be other ports having the same dp->bridge_dev - * and a valid dp->bridge_num, so just ignore this port. - */ - list_for_each_entry(dp, &dst->ports, list) - if (dp->bridge_dev == bridge_dev && dp->bridge_num != -1) - return dp->bridge_num; - - return -1; -} - static void dsa_port_bridge_tx_fwd_unoffload(struct dsa_port *dp, struct net_device *bridge_dev) { - struct dsa_switch_tree *dst = dp->ds->dst; int bridge_num = dp->bridge_num; struct dsa_switch *ds = dp->ds; @@ -300,11 +282,7 @@ static void dsa_port_bridge_tx_fwd_unoffload(struct dsa_port *dp, dp->bridge_num = -1; - /* Check if the bridge is still in use, otherwise it is time - * to clean it up so we can reuse this bridge_num later. - */ - if (!dsa_tree_find_bridge_num(dst, bridge_dev)) - clear_bit(bridge_num, &dst->fwd_offloading_bridges); + dsa_bridge_num_put(bridge_dev, bridge_num); /* Notify the chips only once the offload has been deactivated, so * that they can update their configuration accordingly. @@ -316,23 +294,16 @@ static void dsa_port_bridge_tx_fwd_unoffload(struct dsa_port *dp, static bool dsa_port_bridge_tx_fwd_offload(struct dsa_port *dp, struct net_device *bridge_dev) { - struct dsa_switch_tree *dst = dp->ds->dst; struct dsa_switch *ds = dp->ds; int bridge_num, err; if (!ds->ops->port_bridge_tx_fwd_offload) return false; - bridge_num = dsa_tree_find_bridge_num(dst, bridge_dev); - if (bridge_num < 0) { - /* First port that offloads TX forwarding for this bridge */ - bridge_num = find_first_zero_bit(&dst->fwd_offloading_bridges, - DSA_MAX_NUM_OFFLOADING_BRIDGES); - if (bridge_num >= ds->num_fwd_offloading_bridges) - return false; - - set_bit(bridge_num, &dst->fwd_offloading_bridges); - } + bridge_num = dsa_bridge_num_get(bridge_dev, + ds->num_fwd_offloading_bridges); + if (bridge_num < 0) + return false; dp->bridge_num = bridge_num; -- cgit v1.2.3 From f5a4c24e689f54e66201f04d343bdd2e8a1d7923 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Mon, 23 Aug 2021 20:02:39 +0200 Subject: mac80211: introduce individual TWT support in AP mode Introduce TWT action frames parsing support to mac80211. Currently just individual TWT agreement are support in AP mode. Whenever the AP receives a TWT action frame from an associated client, after performing sanity checks, it will notify the underlay driver with requested parameters in order to check if they are supported and if there is enough room for a new agreement. The driver is expected to set the agreement result and report it to mac80211. Drivers supporting this have two new callbacks: - add_twt_setup (mandatory) - twt_teardown_request (optional) mac80211 will send an action frame reply according to the result reported by the driver. Tested-by: Peter Chiu Signed-off-by: Lorenzo Bianconi Link: https://lore.kernel.org/r/257512f2e22ba42b9f2624942a128dd8f141de4b.1629741512.git.lorenzo@kernel.org [use le16p_replace_bits(), minor cleanups, use (void *) casts, fix to use ieee80211_get_he_iftype_cap() correctly] Signed-off-by: Johannes Berg --- include/net/mac80211.h | 12 +++ net/mac80211/driver-ops.h | 36 +++++++++ net/mac80211/ieee80211_i.h | 6 ++ net/mac80211/iface.c | 41 +++++++++++ net/mac80211/rx.c | 73 ++++++++++++++++++ net/mac80211/s1g.c | 180 +++++++++++++++++++++++++++++++++++++++++++++ net/mac80211/status.c | 17 ++++- net/mac80211/trace.h | 67 +++++++++++++++++ 8 files changed, 430 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index a23e6734d26b..af0fc13cea34 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -3926,6 +3926,13 @@ struct ieee80211_prep_tx_info { * @set_sar_specs: Update the SAR (TX power) settings. * @sta_set_decap_offload: Called to notify the driver when a station is allowed * to use rx decapsulation offload + * @add_twt_setup: Update hw with TWT agreement parameters received from the peer. + * This callback allows the hw to check if requested parameters + * are supported and if there is enough room for a new agreement. + * The hw is expected to set agreement result in the req_type field of + * twt structure. + * @twt_teardown_request: Update the hw with TWT teardown request received + * from the peer. */ struct ieee80211_ops { void (*tx)(struct ieee80211_hw *hw, @@ -4249,6 +4256,11 @@ struct ieee80211_ops { void (*sta_set_decap_offload)(struct ieee80211_hw *hw, struct ieee80211_vif *vif, struct ieee80211_sta *sta, bool enabled); + void (*add_twt_setup)(struct ieee80211_hw *hw, + struct ieee80211_sta *sta, + struct ieee80211_twt_setup *twt); + void (*twt_teardown_request)(struct ieee80211_hw *hw, + struct ieee80211_sta *sta, u8 flowid); }; /** diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h index bcb7cc06db3d..cd3731cbf6c6 100644 --- a/net/mac80211/driver-ops.h +++ b/net/mac80211/driver-ops.h @@ -1447,4 +1447,40 @@ static inline void drv_sta_set_decap_offload(struct ieee80211_local *local, trace_drv_return_void(local); } +static inline void drv_add_twt_setup(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata, + struct ieee80211_sta *sta, + struct ieee80211_twt_setup *twt) +{ + struct ieee80211_twt_params *twt_agrt; + + might_sleep(); + + if (!check_sdata_in_driver(sdata)) + return; + + twt_agrt = (void *)twt->params; + + trace_drv_add_twt_setup(local, sta, twt, twt_agrt); + local->ops->add_twt_setup(&local->hw, sta, twt); + trace_drv_return_void(local); +} + +static inline void drv_twt_teardown_request(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata, + struct ieee80211_sta *sta, + u8 flowid) +{ + might_sleep(); + if (!check_sdata_in_driver(sdata)) + return; + + if (!local->ops->twt_teardown_request) + return; + + trace_drv_twt_teardown_request(local, sta, flowid); + local->ops->twt_teardown_request(&local->hw, sta, flowid); + trace_drv_return_void(local); +} + #endif /* __MAC80211_DRIVER_OPS */ diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index e8945c20688a..4d830a9c7b18 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -946,6 +946,7 @@ struct ieee80211_sub_if_data { struct work_struct work; struct sk_buff_head skb_queue; + struct sk_buff_head status_queue; u8 needed_rx_chains; enum ieee80211_smps_mode smps_mode; @@ -2080,6 +2081,11 @@ ieee80211_he_op_ie_to_bss_conf(struct ieee80211_vif *vif, /* S1G */ void ieee80211_s1g_sta_rate_init(struct sta_info *sta); +bool ieee80211_s1g_is_twt_setup(struct sk_buff *skb); +void ieee80211_s1g_rx_twt_action(struct ieee80211_sub_if_data *sdata, + struct sk_buff *skb); +void ieee80211_s1g_status_twt_action(struct ieee80211_sub_if_data *sdata, + struct sk_buff *skb); /* Spectrum management */ void ieee80211_process_measurement_req(struct ieee80211_sub_if_data *sdata, diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 769f8f585c06..62c95597704b 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -552,6 +552,7 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, bool going_do */ ieee80211_free_keys(sdata, true); skb_queue_purge(&sdata->skb_queue); + skb_queue_purge(&sdata->status_queue); } spin_lock_irqsave(&local->queue_stop_reason_lock, flags); @@ -984,6 +985,7 @@ int ieee80211_add_virtual_monitor(struct ieee80211_local *local) } skb_queue_head_init(&sdata->skb_queue); + skb_queue_head_init(&sdata->status_queue); INIT_WORK(&sdata->work, ieee80211_iface_work); return 0; @@ -1382,6 +1384,16 @@ static void ieee80211_iface_process_skb(struct ieee80211_local *local, WARN_ON(1); break; } + } else if (ieee80211_is_action(mgmt->frame_control) && + mgmt->u.action.category == WLAN_CATEGORY_S1G) { + switch (mgmt->u.action.u.s1g.action_code) { + case WLAN_S1G_TWT_TEARDOWN: + case WLAN_S1G_TWT_SETUP: + ieee80211_s1g_rx_twt_action(sdata, skb); + break; + default: + break; + } } else if (ieee80211_is_ext(mgmt->frame_control)) { if (sdata->vif.type == NL80211_IFTYPE_STATION) ieee80211_sta_rx_queued_ext(sdata, skb); @@ -1437,6 +1449,24 @@ static void ieee80211_iface_process_skb(struct ieee80211_local *local, } } +static void ieee80211_iface_process_status(struct ieee80211_sub_if_data *sdata, + struct sk_buff *skb) +{ + struct ieee80211_mgmt *mgmt = (void *)skb->data; + + if (ieee80211_is_action(mgmt->frame_control) && + mgmt->u.action.category == WLAN_CATEGORY_S1G) { + switch (mgmt->u.action.u.s1g.action_code) { + case WLAN_S1G_TWT_TEARDOWN: + case WLAN_S1G_TWT_SETUP: + ieee80211_s1g_status_twt_action(sdata, skb); + break; + default: + break; + } + } +} + static void ieee80211_iface_work(struct work_struct *work) { struct ieee80211_sub_if_data *sdata = @@ -1466,6 +1496,16 @@ static void ieee80211_iface_work(struct work_struct *work) kcov_remote_stop(); } + /* process status queue */ + while ((skb = skb_dequeue(&sdata->status_queue))) { + kcov_remote_start_common(skb_get_kcov_handle(skb)); + + ieee80211_iface_process_status(sdata, skb); + kfree_skb(skb); + + kcov_remote_stop(); + } + /* then other type-dependent work */ switch (sdata->vif.type) { case NL80211_IFTYPE_STATION: @@ -1529,6 +1569,7 @@ static void ieee80211_setup_sdata(struct ieee80211_sub_if_data *sdata, } skb_queue_head_init(&sdata->skb_queue); + skb_queue_head_init(&sdata->status_queue); INIT_WORK(&sdata->work, ieee80211_iface_work); INIT_WORK(&sdata->recalc_smps, ieee80211_recalc_smps_work); INIT_WORK(&sdata->csa_finalize_work, ieee80211_csa_finalize_work); diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 33c56eab07fc..99ed68f7dc36 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -3212,6 +3212,68 @@ ieee80211_rx_h_mgmt_check(struct ieee80211_rx_data *rx) return RX_CONTINUE; } +static bool +ieee80211_process_rx_twt_action(struct ieee80211_rx_data *rx) +{ + struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *)rx->skb->data; + struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(rx->skb); + struct ieee80211_sub_if_data *sdata = rx->sdata; + const struct ieee80211_sta_he_cap *hecap; + struct ieee80211_supported_band *sband; + + /* TWT actions are only supported in AP for the moment */ + if (sdata->vif.type != NL80211_IFTYPE_AP) + return false; + + if (!rx->local->ops->add_twt_setup) + return false; + + sband = rx->local->hw.wiphy->bands[status->band]; + hecap = ieee80211_get_he_iftype_cap(sband, + ieee80211_vif_type_p2p(&sdata->vif)); + if (!hecap) + return false; + + if (!(hecap->he_cap_elem.mac_cap_info[0] & + IEEE80211_HE_MAC_CAP0_TWT_RES)) + return false; + + if (!rx->sta) + return false; + + switch (mgmt->u.action.u.s1g.action_code) { + case WLAN_S1G_TWT_SETUP: { + struct ieee80211_twt_setup *twt; + + if (rx->skb->len < IEEE80211_MIN_ACTION_SIZE + + 1 + /* action code */ + sizeof(struct ieee80211_twt_setup) + + 2 /* TWT req_type agrt */) + break; + + twt = (void *)mgmt->u.action.u.s1g.variable; + if (twt->element_id != WLAN_EID_S1G_TWT) + break; + + if (rx->skb->len < IEEE80211_MIN_ACTION_SIZE + + 4 + /* action code + token + tlv */ + twt->length) + break; + + return true; /* queue the frame */ + } + case WLAN_S1G_TWT_TEARDOWN: + if (rx->skb->len < IEEE80211_MIN_ACTION_SIZE + 2) + break; + + return true; /* queue the frame */ + default: + break; + } + + return false; +} + static ieee80211_rx_result debug_noinline ieee80211_rx_h_action(struct ieee80211_rx_data *rx) { @@ -3491,6 +3553,17 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx) !mesh_path_sel_is_hwmp(sdata)) break; goto queue; + case WLAN_CATEGORY_S1G: + switch (mgmt->u.action.u.s1g.action_code) { + case WLAN_S1G_TWT_SETUP: + case WLAN_S1G_TWT_TEARDOWN: + if (ieee80211_process_rx_twt_action(rx)) + goto queue; + break; + default: + break; + } + break; } return RX_CONTINUE; diff --git a/net/mac80211/s1g.c b/net/mac80211/s1g.c index c33f332b049a..7e35ab5b6166 100644 --- a/net/mac80211/s1g.c +++ b/net/mac80211/s1g.c @@ -6,6 +6,7 @@ #include #include #include "ieee80211_i.h" +#include "driver-ops.h" void ieee80211_s1g_sta_rate_init(struct sta_info *sta) { @@ -14,3 +15,182 @@ void ieee80211_s1g_sta_rate_init(struct sta_info *sta) sta->rx_stats.last_rate = STA_STATS_FIELD(TYPE, STA_STATS_RATE_TYPE_S1G); } + +bool ieee80211_s1g_is_twt_setup(struct sk_buff *skb) +{ + struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *)skb->data; + + if (likely(!ieee80211_is_action(mgmt->frame_control))) + return false; + + if (likely(mgmt->u.action.category != WLAN_CATEGORY_S1G)) + return false; + + return mgmt->u.action.u.s1g.action_code == WLAN_S1G_TWT_SETUP; +} + +static void +ieee80211_s1g_send_twt_setup(struct ieee80211_sub_if_data *sdata, const u8 *da, + const u8 *bssid, struct ieee80211_twt_setup *twt) +{ + int len = IEEE80211_MIN_ACTION_SIZE + 4 + twt->length; + struct ieee80211_local *local = sdata->local; + struct ieee80211_mgmt *mgmt; + struct sk_buff *skb; + + skb = dev_alloc_skb(local->hw.extra_tx_headroom + len); + if (!skb) + return; + + skb_reserve(skb, local->hw.extra_tx_headroom); + mgmt = skb_put_zero(skb, len); + mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT | + IEEE80211_STYPE_ACTION); + memcpy(mgmt->da, da, ETH_ALEN); + memcpy(mgmt->sa, sdata->vif.addr, ETH_ALEN); + memcpy(mgmt->bssid, bssid, ETH_ALEN); + + mgmt->u.action.category = WLAN_CATEGORY_S1G; + mgmt->u.action.u.s1g.action_code = WLAN_S1G_TWT_SETUP; + memcpy(mgmt->u.action.u.s1g.variable, twt, 3 + twt->length); + + IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_INTFL_DONT_ENCRYPT | + IEEE80211_TX_INTFL_MLME_CONN_TX | + IEEE80211_TX_CTL_REQ_TX_STATUS; + ieee80211_tx_skb(sdata, skb); +} + +static void +ieee80211_s1g_send_twt_teardown(struct ieee80211_sub_if_data *sdata, + const u8 *da, const u8 *bssid, u8 flowid) +{ + struct ieee80211_local *local = sdata->local; + struct ieee80211_mgmt *mgmt; + struct sk_buff *skb; + u8 *id; + + skb = dev_alloc_skb(local->hw.extra_tx_headroom + + IEEE80211_MIN_ACTION_SIZE + 2); + if (!skb) + return; + + skb_reserve(skb, local->hw.extra_tx_headroom); + mgmt = skb_put_zero(skb, IEEE80211_MIN_ACTION_SIZE + 2); + mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT | + IEEE80211_STYPE_ACTION); + memcpy(mgmt->da, da, ETH_ALEN); + memcpy(mgmt->sa, sdata->vif.addr, ETH_ALEN); + memcpy(mgmt->bssid, bssid, ETH_ALEN); + + mgmt->u.action.category = WLAN_CATEGORY_S1G; + mgmt->u.action.u.s1g.action_code = WLAN_S1G_TWT_TEARDOWN; + id = (u8 *)mgmt->u.action.u.s1g.variable; + *id = flowid; + + IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_INTFL_DONT_ENCRYPT | + IEEE80211_TX_CTL_REQ_TX_STATUS; + ieee80211_tx_skb(sdata, skb); +} + +static void +ieee80211_s1g_rx_twt_setup(struct ieee80211_sub_if_data *sdata, + struct sta_info *sta, struct sk_buff *skb) +{ + struct ieee80211_mgmt *mgmt = (void *)skb->data; + struct ieee80211_twt_setup *twt = (void *)mgmt->u.action.u.s1g.variable; + struct ieee80211_twt_params *twt_agrt = (void *)twt->params; + + twt_agrt->req_type &= cpu_to_le16(~IEEE80211_TWT_REQTYPE_REQUEST); + + /* broadcast TWT not supported yet */ + if (twt->control & IEEE80211_TWT_CONTROL_NEG_TYPE_BROADCAST) { + le16p_replace_bits(&twt_agrt->req_type, + TWT_SETUP_CMD_REJECT, + IEEE80211_TWT_REQTYPE_SETUP_CMD); + goto out; + } + + drv_add_twt_setup(sdata->local, sdata, &sta->sta, twt); +out: + ieee80211_s1g_send_twt_setup(sdata, mgmt->sa, sdata->vif.addr, twt); +} + +static void +ieee80211_s1g_rx_twt_teardown(struct ieee80211_sub_if_data *sdata, + struct sta_info *sta, struct sk_buff *skb) +{ + struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *)skb->data; + + drv_twt_teardown_request(sdata->local, sdata, &sta->sta, + mgmt->u.action.u.s1g.variable[0]); +} + +static void +ieee80211_s1g_tx_twt_setup_fail(struct ieee80211_sub_if_data *sdata, + struct sta_info *sta, struct sk_buff *skb) +{ + struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *)skb->data; + struct ieee80211_twt_setup *twt = (void *)mgmt->u.action.u.s1g.variable; + struct ieee80211_twt_params *twt_agrt = (void *)twt->params; + u8 flowid = le16_get_bits(twt_agrt->req_type, + IEEE80211_TWT_REQTYPE_FLOWID); + + drv_twt_teardown_request(sdata->local, sdata, &sta->sta, flowid); + + ieee80211_s1g_send_twt_teardown(sdata, mgmt->sa, sdata->vif.addr, + flowid); +} + +void ieee80211_s1g_rx_twt_action(struct ieee80211_sub_if_data *sdata, + struct sk_buff *skb) +{ + struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *)skb->data; + struct ieee80211_local *local = sdata->local; + struct sta_info *sta; + + mutex_lock(&local->sta_mtx); + + sta = sta_info_get_bss(sdata, mgmt->sa); + if (!sta) + goto out; + + switch (mgmt->u.action.u.s1g.action_code) { + case WLAN_S1G_TWT_SETUP: + ieee80211_s1g_rx_twt_setup(sdata, sta, skb); + break; + case WLAN_S1G_TWT_TEARDOWN: + ieee80211_s1g_rx_twt_teardown(sdata, sta, skb); + break; + default: + break; + } + +out: + mutex_unlock(&local->sta_mtx); +} + +void ieee80211_s1g_status_twt_action(struct ieee80211_sub_if_data *sdata, + struct sk_buff *skb) +{ + struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *)skb->data; + struct ieee80211_local *local = sdata->local; + struct sta_info *sta; + + mutex_lock(&local->sta_mtx); + + sta = sta_info_get_bss(sdata, mgmt->da); + if (!sta) + goto out; + + switch (mgmt->u.action.u.s1g.action_code) { + case WLAN_S1G_TWT_SETUP: + /* process failed twt setup frames */ + ieee80211_s1g_tx_twt_setup_fail(sdata, sta, skb); + break; + default: + break; + } + +out: + mutex_unlock(&local->sta_mtx); +} diff --git a/net/mac80211/status.c b/net/mac80211/status.c index 1f295e5721ef..f6f63a0b1b72 100644 --- a/net/mac80211/status.c +++ b/net/mac80211/status.c @@ -705,13 +705,26 @@ static void ieee80211_report_used_skb(struct ieee80211_local *local, /* Check to see if packet is a TDLS teardown packet */ if (ieee80211_is_data(hdr->frame_control) && (ieee80211_get_tdls_action(skb, hdr_size) == - WLAN_TDLS_TEARDOWN)) + WLAN_TDLS_TEARDOWN)) { ieee80211_tdls_td_tx_handle(local, sdata, skb, info->flags); - else + } else if (ieee80211_s1g_is_twt_setup(skb)) { + if (!acked) { + struct sk_buff *qskb; + + qskb = skb_clone(skb, GFP_ATOMIC); + if (qskb) { + skb_queue_tail(&sdata->status_queue, + qskb); + ieee80211_queue_work(&local->hw, + &sdata->work); + } + } + } else { ieee80211_mgd_conn_tx_status(sdata, hdr->frame_control, acked); + } } rcu_read_unlock(); diff --git a/net/mac80211/trace.h b/net/mac80211/trace.h index f6ef15366938..9e8381bef7ed 100644 --- a/net/mac80211/trace.h +++ b/net/mac80211/trace.h @@ -2825,6 +2825,73 @@ DEFINE_EVENT(sta_flag_evt, drv_sta_set_decap_offload, TP_ARGS(local, sdata, sta, enabled) ); +TRACE_EVENT(drv_add_twt_setup, + TP_PROTO(struct ieee80211_local *local, + struct ieee80211_sta *sta, + struct ieee80211_twt_setup *twt, + struct ieee80211_twt_params *twt_agrt), + + TP_ARGS(local, sta, twt, twt_agrt), + + TP_STRUCT__entry( + LOCAL_ENTRY + STA_ENTRY + __field(u8, dialog_token) + __field(u8, control) + __field(__le16, req_type) + __field(__le64, twt) + __field(u8, duration) + __field(__le16, mantissa) + __field(u8, channel) + ), + + TP_fast_assign( + LOCAL_ASSIGN; + STA_ASSIGN; + __entry->dialog_token = twt->dialog_token; + __entry->control = twt->control; + __entry->req_type = twt_agrt->req_type; + __entry->twt = twt_agrt->twt; + __entry->duration = twt_agrt->min_twt_dur; + __entry->mantissa = twt_agrt->mantissa; + __entry->channel = twt_agrt->channel; + ), + + TP_printk( + LOCAL_PR_FMT STA_PR_FMT + " token:%d control:0x%02x req_type:0x%04x" + " twt:%llu duration:%d mantissa:%d channel:%d", + LOCAL_PR_ARG, STA_PR_ARG, __entry->dialog_token, + __entry->control, le16_to_cpu(__entry->req_type), + le64_to_cpu(__entry->twt), __entry->duration, + le16_to_cpu(__entry->mantissa), __entry->channel + ) +); + +TRACE_EVENT(drv_twt_teardown_request, + TP_PROTO(struct ieee80211_local *local, + struct ieee80211_sta *sta, u8 flowid), + + TP_ARGS(local, sta, flowid), + + TP_STRUCT__entry( + LOCAL_ENTRY + STA_ENTRY + __field(u8, flowid) + ), + + TP_fast_assign( + LOCAL_ASSIGN; + STA_ASSIGN; + __entry->flowid = flowid; + ), + + TP_printk( + LOCAL_PR_FMT STA_PR_FMT " flowid:%d", + LOCAL_PR_ARG, STA_PR_ARG, __entry->flowid + ) +); + #endif /* !__MAC80211_DRIVER_TRACE || TRACE_HEADER_MULTI_READ */ #undef TRACE_INCLUDE_PATH -- cgit v1.2.3 From 58adf9dcb15b99f047e80e10c85fb51ed3c88215 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Tue, 24 Aug 2021 00:22:58 +0300 Subject: net: dsa: let drivers state that they need VLAN filtering while standalone As explained in commit e358bef7c392 ("net: dsa: Give drivers the chance to veto certain upper devices"), the hellcreek driver uses some tricks to comply with the network stack expectations: it enforces port separation in standalone mode using VLANs. For untagged traffic, bridging between ports is prevented by using different PVIDs, and for VLAN-tagged traffic, it never accepts 8021q uppers with the same VID on two ports, so packets with one VLAN cannot leak from one port to another. That is almost fine*, and has worked because hellcreek relied on an implicit behavior of the DSA core that was changed by the previous patch: the standalone ports declare the 'rx-vlan-filter' feature as 'on [fixed]'. Since most of the DSA drivers are actually VLAN-unaware in standalone mode, that feature was actually incorrectly reflecting the hardware/driver state, so there was a desire to fix it. This leaves the hellcreek driver in a situation where it has to explicitly request this behavior from the DSA framework. We configure the ports as follows: - Standalone: 'rx-vlan-filter' is on. An 8021q upper on top of a standalone hellcreek port will go through dsa_slave_vlan_rx_add_vid and will add a VLAN to the hardware tables, giving the driver the opportunity to refuse it through .port_prechangeupper. - Bridged with vlan_filtering=0: 'rx-vlan-filter' is off. An 8021q upper on top of a bridged hellcreek port will not go through dsa_slave_vlan_rx_add_vid, because there will not be any attempt to offload this VLAN. The driver already disables VLAN awareness, so that upper should receive the traffic it needs. - Bridged with vlan_filtering=1: 'rx-vlan-filter' is on. An 8021q upper on top of a bridged hellcreek port will call dsa_slave_vlan_rx_add_vid, and can again be vetoed through .port_prechangeupper. *It is not actually completely fine, because if I follow through correctly, we can have the following situation: ip link add br0 type bridge vlan_filtering 0 ip link set lan0 master br0 # lan0 now becomes VLAN-unaware ip link set lan0 nomaster # lan0 fails to become VLAN-aware again, therefore breaking isolation This patch fixes that corner case by extending the DSA core logic, based on this requested attribute, to change the VLAN awareness state of the switch (port) when it leaves the bridge. Signed-off-by: Vladimir Oltean Reviewed-by: Florian Fainelli Acked-by: Kurt Kanzenbach Signed-off-by: David S. Miller --- drivers/net/dsa/hirschmann/hellcreek.c | 1 + include/net/dsa.h | 3 +++ net/dsa/slave.c | 12 ++++++++---- net/dsa/switch.c | 21 ++++++++++++++++----- 4 files changed, 28 insertions(+), 9 deletions(-) (limited to 'include/net') diff --git a/drivers/net/dsa/hirschmann/hellcreek.c b/drivers/net/dsa/hirschmann/hellcreek.c index 5c54ae1be62c..3faff95fd49f 100644 --- a/drivers/net/dsa/hirschmann/hellcreek.c +++ b/drivers/net/dsa/hirschmann/hellcreek.c @@ -1345,6 +1345,7 @@ static int hellcreek_setup(struct dsa_switch *ds) * filtering setups are not supported. */ ds->vlan_filtering_is_global = true; + ds->needs_standalone_vlan_filtering = true; /* Intercept _all_ PTP multicast traffic */ ret = hellcreek_setup_fdb(hellcreek); diff --git a/include/net/dsa.h b/include/net/dsa.h index c7ea0f61056f..f9a17145255a 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -363,6 +363,9 @@ struct dsa_switch { */ bool vlan_filtering_is_global; + /* Keep VLAN filtering enabled on ports not offloading any upper. */ + bool needs_standalone_vlan_filtering; + /* Pass .port_vlan_add and .port_vlan_del to drivers even for bridges * that have vlan_filtering=0. All drivers should ideally set this (and * then the option would get removed), but it is unknown whether this diff --git a/net/dsa/slave.c b/net/dsa/slave.c index f71d31d3aab4..662ff531d4e2 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -1435,11 +1435,12 @@ static int dsa_slave_clear_vlan(struct net_device *vdev, int vid, void *arg) * To summarize, a DSA switch port offloads: * * - If standalone (this includes software bridge, software LAG): - * - if ds->vlan_filtering_is_global = true AND there are bridges spanning - * this switch chip which have vlan_filtering=1: + * - if ds->needs_standalone_vlan_filtering = true, OR if + * (ds->vlan_filtering_is_global = true AND there are bridges spanning + * this switch chip which have vlan_filtering=1) * - the 8021q upper VLANs - * - else (VLAN filtering is not global, or it is, but no port is under a - * VLAN-aware bridge): + * - else (standalone VLAN filtering is not needed, VLAN filtering is not + * global, or it is, but no port is under a VLAN-aware bridge): * - no VLAN (any 8021q upper is a software VLAN) * * - If under a vlan_filtering=0 bridge which it offload: @@ -1871,6 +1872,7 @@ void dsa_slave_setup_tagger(struct net_device *slave) struct dsa_slave_priv *p = netdev_priv(slave); const struct dsa_port *cpu_dp = dp->cpu_dp; struct net_device *master = cpu_dp->master; + const struct dsa_switch *ds = dp->ds; slave->needed_headroom = cpu_dp->tag_ops->needed_headroom; slave->needed_tailroom = cpu_dp->tag_ops->needed_tailroom; @@ -1888,6 +1890,8 @@ void dsa_slave_setup_tagger(struct net_device *slave) slave->features |= NETIF_F_LLTX; if (slave->needed_tailroom) slave->features &= ~(NETIF_F_SG | NETIF_F_FRAGLIST); + if (ds->needs_standalone_vlan_filtering) + slave->features |= NETIF_F_HW_VLAN_CTAG_FILTER; } static struct lock_class_key dsa_slave_netdev_xmit_lock_key; diff --git a/net/dsa/switch.c b/net/dsa/switch.c index dd042fd7f800..1c797ec8e2c2 100644 --- a/net/dsa/switch.c +++ b/net/dsa/switch.c @@ -116,9 +116,10 @@ static int dsa_switch_bridge_join(struct dsa_switch *ds, static int dsa_switch_bridge_leave(struct dsa_switch *ds, struct dsa_notifier_bridge_info *info) { - bool unset_vlan_filtering = br_vlan_enabled(info->br); struct dsa_switch_tree *dst = ds->dst; struct netlink_ext_ack extack = {0}; + bool change_vlan_filtering = false; + bool vlan_filtering; int err, port; if (dst->index == info->tree_index && ds->index == info->sw_index && @@ -131,6 +132,15 @@ static int dsa_switch_bridge_leave(struct dsa_switch *ds, info->sw_index, info->port, info->br); + if (ds->needs_standalone_vlan_filtering && !br_vlan_enabled(info->br)) { + change_vlan_filtering = true; + vlan_filtering = true; + } else if (!ds->needs_standalone_vlan_filtering && + br_vlan_enabled(info->br)) { + change_vlan_filtering = true; + vlan_filtering = false; + } + /* If the bridge was vlan_filtering, the bridge core doesn't trigger an * event for changing vlan_filtering setting upon slave ports leaving * it. That is a good thing, because that lets us handle it and also @@ -139,21 +149,22 @@ static int dsa_switch_bridge_leave(struct dsa_switch *ds, * vlan_filtering callback is only when the last port leaves the last * VLAN-aware bridge. */ - if (unset_vlan_filtering && ds->vlan_filtering_is_global) { + if (change_vlan_filtering && ds->vlan_filtering_is_global) { for (port = 0; port < ds->num_ports; port++) { struct net_device *bridge_dev; bridge_dev = dsa_to_port(ds, port)->bridge_dev; if (bridge_dev && br_vlan_enabled(bridge_dev)) { - unset_vlan_filtering = false; + change_vlan_filtering = false; break; } } } - if (unset_vlan_filtering) { + + if (change_vlan_filtering) { err = dsa_port_vlan_filtering(dsa_to_port(ds, info->port), - false, &extack); + vlan_filtering, &extack); if (extack._msg) dev_err(ds->dev, "port %d: %s\n", info->port, extack._msg); -- cgit v1.2.3 From 446e7f218b7662c912c610aae99069543aa88a40 Mon Sep 17 00:00:00 2001 From: zhang kai Date: Mon, 23 Aug 2021 11:49:00 +0800 Subject: ipv6: correct comments about fib6_node sernum correct comments in set and get fn_sernum Signed-off-by: zhang kai Reviewed-by: David Ahern Signed-off-by: David S. Miller --- include/net/ip6_fib.h | 4 ++-- net/ipv6/ip6_fib.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include/net') diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index 15b7fbe6b15c..c412dde4d67d 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -267,7 +267,7 @@ static inline bool fib6_check_expired(const struct fib6_info *f6i) return false; } -/* Function to safely get fn->sernum for passed in rt +/* Function to safely get fn->fn_sernum for passed in rt * and store result in passed in cookie. * Return true if we can get cookie safely * Return false if not @@ -282,7 +282,7 @@ static inline bool fib6_get_cookie_safe(const struct fib6_info *f6i, if (fn) { *cookie = fn->fn_sernum; - /* pairs with smp_wmb() in fib6_update_sernum_upto_root() */ + /* pairs with smp_wmb() in __fib6_update_sernum_upto_root() */ smp_rmb(); status = true; } diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 2d650dc24349..ef75c9b05f17 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -1341,7 +1341,7 @@ static void __fib6_update_sernum_upto_root(struct fib6_info *rt, struct fib6_node *fn = rcu_dereference_protected(rt->fib6_node, lockdep_is_held(&rt->fib6_table->tb6_lock)); - /* paired with smp_rmb() in rt6_get_cookie_safe() */ + /* paired with smp_rmb() in fib6_get_cookie_safe() */ smp_wmb(); while (fn) { fn->fn_sernum = sernum; -- cgit v1.2.3 From d7b269083786dca3b1b0141bde6cea834062b691 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Tue, 24 Aug 2021 16:26:14 -0700 Subject: mptcp: shrink mptcp_out_options struct After the previous patch we can alias with a union several fields in mptcp_out_options. Such struct is stack allocated and memset() for each plain TCP out packet. Every saved byted counts. Before: pahole -EC mptcp_out_options # ... /* size: 136, cachelines: 3, members: 17 */ After: pahole -EC mptcp_out_options # ... /* size: 56, cachelines: 1, members: 9 */ Signed-off-by: Paolo Abeni Signed-off-by: Mat Martineau Signed-off-by: David S. Miller --- include/net/mptcp.h | 26 +++++++++++++++++--------- 1 file changed, 17 insertions(+), 9 deletions(-) (limited to 'include/net') diff --git a/include/net/mptcp.h b/include/net/mptcp.h index 8b5af683a818..3236010afa29 100644 --- a/include/net/mptcp.h +++ b/include/net/mptcp.h @@ -58,10 +58,6 @@ struct mptcp_addr_info { struct mptcp_out_options { #if IS_ENABLED(CONFIG_MPTCP) u16 suboptions; - u64 sndr_key; - u64 rcvr_key; - u64 ahmac; - struct mptcp_addr_info addr; struct mptcp_rm_list rm_list; u8 join_id; u8 backup; @@ -69,11 +65,23 @@ struct mptcp_out_options { reset_transient:1, csum_reqd:1, allow_join_id0:1; - u32 nonce; - u64 thmac; - u32 token; - u8 hmac[20]; - struct mptcp_ext ext_copy; + union { + struct { + u64 sndr_key; + u64 rcvr_key; + }; + struct { + struct mptcp_addr_info addr; + u64 ahmac; + }; + struct mptcp_ext ext_copy; + struct { + u32 nonce; + u32 token; + u64 thmac; + u8 hmac[20]; + }; + }; #endif }; -- cgit v1.2.3 From c25aeb4e095355eec3beb6a2b2b30322bd6d0dd4 Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Tue, 24 Aug 2021 16:26:15 -0700 Subject: mptcp: MP_FAIL suboption sending This patch added the MP_FAIL suboption sending support. Add a new flag named send_mp_fail in struct mptcp_subflow_context. If this flag is set, send out MP_FAIL suboption. Add a new member fail_seq in struct mptcp_out_options to save the data sequence number to put into the MP_FAIL suboption. An MP_FAIL option could be included in a RST or on the subflow-level ACK. Suggested-by: Paolo Abeni Signed-off-by: Geliang Tang Signed-off-by: Mat Martineau Signed-off-by: David S. Miller --- include/net/mptcp.h | 5 ++++- net/mptcp/options.c | 59 ++++++++++++++++++++++++++++++++++++++++++++++++---- net/mptcp/protocol.h | 3 +++ 3 files changed, 62 insertions(+), 5 deletions(-) (limited to 'include/net') diff --git a/include/net/mptcp.h b/include/net/mptcp.h index 3236010afa29..6026bbefbffd 100644 --- a/include/net/mptcp.h +++ b/include/net/mptcp.h @@ -74,7 +74,10 @@ struct mptcp_out_options { struct mptcp_addr_info addr; u64 ahmac; }; - struct mptcp_ext ext_copy; + struct { + struct mptcp_ext ext_copy; + u64 fail_seq; + }; struct { u32 nonce; u32 token; diff --git a/net/mptcp/options.c b/net/mptcp/options.c index 1a59b3045a33..f2ebdd55d3cc 100644 --- a/net/mptcp/options.c +++ b/net/mptcp/options.c @@ -767,7 +767,7 @@ static bool mptcp_established_options_mp_prio(struct sock *sk, return true; } -static noinline void mptcp_established_options_rst(struct sock *sk, struct sk_buff *skb, +static noinline bool mptcp_established_options_rst(struct sock *sk, struct sk_buff *skb, unsigned int *size, unsigned int remaining, struct mptcp_out_options *opts) @@ -775,12 +775,36 @@ static noinline void mptcp_established_options_rst(struct sock *sk, struct sk_bu const struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); if (remaining < TCPOLEN_MPTCP_RST) - return; + return false; *size = TCPOLEN_MPTCP_RST; opts->suboptions |= OPTION_MPTCP_RST; opts->reset_transient = subflow->reset_transient; opts->reset_reason = subflow->reset_reason; + + return true; +} + +static bool mptcp_established_options_mp_fail(struct sock *sk, + unsigned int *size, + unsigned int remaining, + struct mptcp_out_options *opts) +{ + struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); + + if (likely(!subflow->send_mp_fail)) + return false; + + if (remaining < TCPOLEN_MPTCP_FAIL) + return false; + + *size = TCPOLEN_MPTCP_FAIL; + opts->suboptions |= OPTION_MPTCP_FAIL; + opts->fail_seq = subflow->map_seq; + + pr_debug("MP_FAIL fail_seq=%llu", opts->fail_seq); + + return true; } bool mptcp_established_options(struct sock *sk, struct sk_buff *skb, @@ -799,15 +823,28 @@ bool mptcp_established_options(struct sock *sk, struct sk_buff *skb, return false; if (unlikely(skb && TCP_SKB_CB(skb)->tcp_flags & TCPHDR_RST)) { - mptcp_established_options_rst(sk, skb, size, remaining, opts); + if (mptcp_established_options_mp_fail(sk, &opt_size, remaining, opts)) { + *size += opt_size; + remaining -= opt_size; + } + if (mptcp_established_options_rst(sk, skb, &opt_size, remaining, opts)) { + *size += opt_size; + remaining -= opt_size; + } return true; } snd_data_fin = mptcp_data_fin_enabled(msk); if (mptcp_established_options_mp(sk, skb, snd_data_fin, &opt_size, remaining, opts)) ret = true; - else if (mptcp_established_options_dss(sk, skb, snd_data_fin, &opt_size, remaining, opts)) + else if (mptcp_established_options_dss(sk, skb, snd_data_fin, &opt_size, remaining, opts)) { ret = true; + if (mptcp_established_options_mp_fail(sk, &opt_size, remaining, opts)) { + *size += opt_size; + remaining -= opt_size; + return true; + } + } /* we reserved enough space for the above options, and exceeding the * TCP option space would be fatal @@ -1210,6 +1247,20 @@ static u16 mptcp_make_csum(const struct mptcp_ext *mpext) void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp, struct mptcp_out_options *opts) { + if (unlikely(OPTION_MPTCP_FAIL & opts->suboptions)) { + const struct sock *ssk = (const struct sock *)tp; + struct mptcp_subflow_context *subflow; + + subflow = mptcp_subflow_ctx(ssk); + subflow->send_mp_fail = 0; + + *ptr++ = mptcp_option(MPTCPOPT_MP_FAIL, + TCPOLEN_MPTCP_FAIL, + 0, 0); + put_unaligned_be64(opts->fail_seq, ptr); + ptr += 2; + } + /* RST is mutually exclusive with everything else */ if (unlikely(OPTION_MPTCP_RST & opts->suboptions)) { *ptr++ = mptcp_option(MPTCPOPT_RST, diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index d276ce16f126..3e4a79cf520a 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -27,6 +27,7 @@ #define OPTION_MPTCP_PRIO BIT(9) #define OPTION_MPTCP_RST BIT(10) #define OPTION_MPTCP_DSS BIT(11) +#define OPTION_MPTCP_FAIL BIT(12) /* MPTCP option subtypes */ #define MPTCPOPT_MP_CAPABLE 0 @@ -68,6 +69,7 @@ #define TCPOLEN_MPTCP_PRIO_ALIGN 4 #define TCPOLEN_MPTCP_FASTCLOSE 12 #define TCPOLEN_MPTCP_RST 4 +#define TCPOLEN_MPTCP_FAIL 12 #define TCPOLEN_MPTCP_MPC_ACK_DATA_CSUM (TCPOLEN_MPTCP_DSS_CHECKSUM + TCPOLEN_MPTCP_MPC_ACK_DATA) @@ -429,6 +431,7 @@ struct mptcp_subflow_context { mpc_map : 1, backup : 1, send_mp_prio : 1, + send_mp_fail : 1, rx_eof : 1, can_ack : 1, /* only after processing the remote a key */ disposable : 1, /* ctx can be free at ulp release time */ -- cgit v1.2.3 From 87e5ef4b19cec86c861e3ebab3a5d840ecc2f4a4 Mon Sep 17 00:00:00 2001 From: Shaokun Zhang Date: Wed, 25 Aug 2021 14:34:31 +0800 Subject: mctp: Remove the repeated declaration Function 'mctp_dev_get_rtnl' is declared twice, so remove the repeated declaration. Cc: Jeremy Kerr Cc: Matt Johnston Cc: "David S. Miller" Cc: Jakub Kicinski Signed-off-by: Shaokun Zhang Signed-off-by: David S. Miller --- include/net/mctpdevice.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/net') diff --git a/include/net/mctpdevice.h b/include/net/mctpdevice.h index 57e773ff08bb..71a11012fac7 100644 --- a/include/net/mctpdevice.h +++ b/include/net/mctpdevice.h @@ -31,6 +31,5 @@ struct mctp_dev { struct mctp_dev *mctp_dev_get_rtnl(const struct net_device *dev); struct mctp_dev *__mctp_dev_get(const struct net_device *dev); -struct mctp_dev *mctp_dev_get_rtnl(const struct net_device *dev); #endif /* __NET_MCTPDEVICE_H */ -- cgit v1.2.3 From 478374a3c15f369e57fdd79d64d7a1d2eb307e16 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 16 Aug 2021 17:16:22 +0200 Subject: netfilter: ecache: remove one indent level nf_conntrack_eventmask_report and nf_ct_deliver_cached_events shared most of their code. This unifies the layout by changing if (nf_ct_is_confirmed(ct)) { foo } to if (!nf_ct_is_confirmed(ct))) return foo This removes one level of indentation. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack_ecache.h | 2 +- net/netfilter/nf_conntrack_ecache.c | 64 +++++++++++++++-------------- net/netfilter/nf_conntrack_netlink.c | 2 +- 3 files changed, 36 insertions(+), 32 deletions(-) (limited to 'include/net') diff --git a/include/net/netfilter/nf_conntrack_ecache.h b/include/net/netfilter/nf_conntrack_ecache.h index d00ba6048e44..3734bacf9763 100644 --- a/include/net/netfilter/nf_conntrack_ecache.h +++ b/include/net/netfilter/nf_conntrack_ecache.h @@ -73,7 +73,7 @@ struct nf_ct_event { }; struct nf_ct_event_notifier { - int (*fcn)(unsigned int events, struct nf_ct_event *item); + int (*fcn)(unsigned int events, const struct nf_ct_event *item); }; int nf_conntrack_register_notifier(struct net *net, diff --git a/net/netfilter/nf_conntrack_ecache.c b/net/netfilter/nf_conntrack_ecache.c index 296e4a171bd1..3f1e0add58bc 100644 --- a/net/netfilter/nf_conntrack_ecache.c +++ b/net/netfilter/nf_conntrack_ecache.c @@ -133,10 +133,15 @@ static void ecache_work(struct work_struct *work) int nf_conntrack_eventmask_report(unsigned int eventmask, struct nf_conn *ct, u32 portid, int report) { - int ret = 0; struct net *net = nf_ct_net(ct); struct nf_ct_event_notifier *notify; struct nf_conntrack_ecache *e; + struct nf_ct_event item; + unsigned long missed; + int ret = 0; + + if (!nf_ct_is_confirmed(ct)) + return ret; rcu_read_lock(); notify = rcu_dereference(net->ct.nf_conntrack_event_cb); @@ -147,38 +152,37 @@ int nf_conntrack_eventmask_report(unsigned int eventmask, struct nf_conn *ct, if (!e) goto out_unlock; - if (nf_ct_is_confirmed(ct)) { - struct nf_ct_event item = { - .ct = ct, - .portid = e->portid ? e->portid : portid, - .report = report - }; - /* This is a resent of a destroy event? If so, skip missed */ - unsigned long missed = e->portid ? 0 : e->missed; - - if (!((eventmask | missed) & e->ctmask)) - goto out_unlock; - - ret = notify->fcn(eventmask | missed, &item); - if (unlikely(ret < 0 || missed)) { - spin_lock_bh(&ct->lock); - if (ret < 0) { - /* This is a destroy event that has been - * triggered by a process, we store the PORTID - * to include it in the retransmission. - */ - if (eventmask & (1 << IPCT_DESTROY)) { - if (e->portid == 0 && portid != 0) - e->portid = portid; - e->state = NFCT_ECACHE_DESTROY_FAIL; - } else { - e->missed |= eventmask; - } + memset(&item, 0, sizeof(item)); + + item.ct = ct; + item.portid = e->portid ? e->portid : portid; + item.report = report; + + /* This is a resent of a destroy event? If so, skip missed */ + missed = e->portid ? 0 : e->missed; + + if (!((eventmask | missed) & e->ctmask)) + goto out_unlock; + + ret = notify->fcn(eventmask | missed, &item); + if (unlikely(ret < 0 || missed)) { + spin_lock_bh(&ct->lock); + if (ret < 0) { + /* This is a destroy event that has been + * triggered by a process, we store the PORTID + * to include it in the retransmission. + */ + if (eventmask & (1 << IPCT_DESTROY)) { + if (e->portid == 0 && portid != 0) + e->portid = portid; + e->state = NFCT_ECACHE_DESTROY_FAIL; } else { - e->missed &= ~missed; + e->missed |= eventmask; } - spin_unlock_bh(&ct->lock); + } else { + e->missed &= ~missed; } + spin_unlock_bh(&ct->lock); } out_unlock: rcu_read_unlock(); diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index eb35c6151fb0..43b891a902de 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -706,7 +706,7 @@ static size_t ctnetlink_nlmsg_size(const struct nf_conn *ct) } static int -ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item) +ctnetlink_conntrack_event(unsigned int events, const struct nf_ct_event *item) { const struct nf_conntrack_zone *zone; struct net *net; -- cgit v1.2.3 From b86c0e6429dac2458694495aeebf15f4fe6b269d Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 16 Aug 2021 17:16:25 +0200 Subject: netfilter: ecache: prepare for event notifier merge This prepares for merge for ct and exp notifier structs. The 'fcn' member is renamed to something unique. Second, the register/unregister api is simplified. There is only one implementation so there is no need to do any error checking. Replace the EBUSY logic with WARN_ON_ONCE. This allows to remove error unwinding. The exp notifier register/unregister function is removed in a followup patch. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack_ecache.h | 11 +++++------ net/netfilter/nf_conntrack_ecache.c | 26 ++++++-------------------- net/netfilter/nf_conntrack_netlink.c | 22 ++++++---------------- 3 files changed, 17 insertions(+), 42 deletions(-) (limited to 'include/net') diff --git a/include/net/netfilter/nf_conntrack_ecache.h b/include/net/netfilter/nf_conntrack_ecache.h index 3734bacf9763..061a93a03b82 100644 --- a/include/net/netfilter/nf_conntrack_ecache.h +++ b/include/net/netfilter/nf_conntrack_ecache.h @@ -73,13 +73,12 @@ struct nf_ct_event { }; struct nf_ct_event_notifier { - int (*fcn)(unsigned int events, const struct nf_ct_event *item); + int (*ct_event)(unsigned int events, const struct nf_ct_event *item); }; -int nf_conntrack_register_notifier(struct net *net, - struct nf_ct_event_notifier *nb); -void nf_conntrack_unregister_notifier(struct net *net, - struct nf_ct_event_notifier *nb); +void nf_conntrack_register_notifier(struct net *net, + const struct nf_ct_event_notifier *nb); +void nf_conntrack_unregister_notifier(struct net *net); void nf_ct_deliver_cached_events(struct nf_conn *ct); int nf_conntrack_eventmask_report(unsigned int eventmask, struct nf_conn *ct, @@ -159,7 +158,7 @@ struct nf_exp_event { }; struct nf_exp_event_notifier { - int (*fcn)(unsigned int events, struct nf_exp_event *item); + int (*exp_event)(unsigned int events, struct nf_exp_event *item); }; int nf_ct_expect_register_notifier(struct net *net, diff --git a/net/netfilter/nf_conntrack_ecache.c b/net/netfilter/nf_conntrack_ecache.c index fbe04e16280a..d92f78e4bc7c 100644 --- a/net/netfilter/nf_conntrack_ecache.c +++ b/net/netfilter/nf_conntrack_ecache.c @@ -151,7 +151,7 @@ static int __nf_conntrack_eventmask_report(struct nf_conntrack_ecache *e, return 0; } - ret = notify->fcn(events | missed, item); + ret = notify->ct_event(events | missed, item); rcu_read_unlock(); if (likely(ret >= 0 && missed == 0)) @@ -258,43 +258,29 @@ void nf_ct_expect_event_report(enum ip_conntrack_expect_events event, .portid = portid, .report = report }; - notify->fcn(1 << event, &item); + notify->exp_event(1 << event, &item); } out_unlock: rcu_read_unlock(); } -int nf_conntrack_register_notifier(struct net *net, - struct nf_ct_event_notifier *new) +void nf_conntrack_register_notifier(struct net *net, + const struct nf_ct_event_notifier *new) { - int ret; struct nf_ct_event_notifier *notify; mutex_lock(&nf_ct_ecache_mutex); notify = rcu_dereference_protected(net->ct.nf_conntrack_event_cb, lockdep_is_held(&nf_ct_ecache_mutex)); - if (notify != NULL) { - ret = -EBUSY; - goto out_unlock; - } + WARN_ON_ONCE(notify); rcu_assign_pointer(net->ct.nf_conntrack_event_cb, new); - ret = 0; - -out_unlock: mutex_unlock(&nf_ct_ecache_mutex); - return ret; } EXPORT_SYMBOL_GPL(nf_conntrack_register_notifier); -void nf_conntrack_unregister_notifier(struct net *net, - struct nf_ct_event_notifier *new) +void nf_conntrack_unregister_notifier(struct net *net) { - struct nf_ct_event_notifier *notify; - mutex_lock(&nf_ct_ecache_mutex); - notify = rcu_dereference_protected(net->ct.nf_conntrack_event_cb, - lockdep_is_held(&nf_ct_ecache_mutex)); - BUG_ON(notify != new); RCU_INIT_POINTER(net->ct.nf_conntrack_event_cb, NULL); mutex_unlock(&nf_ct_ecache_mutex); /* synchronize_rcu() is called from ctnetlink_exit. */ diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 43b891a902de..6d6f7cd70753 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -3755,11 +3755,11 @@ static int ctnetlink_stat_exp_cpu(struct sk_buff *skb, #ifdef CONFIG_NF_CONNTRACK_EVENTS static struct nf_ct_event_notifier ctnl_notifier = { - .fcn = ctnetlink_conntrack_event, + .ct_event = ctnetlink_conntrack_event, }; static struct nf_exp_event_notifier ctnl_notifier_exp = { - .fcn = ctnetlink_expect_event, + .exp_event = ctnetlink_expect_event, }; #endif @@ -3854,33 +3854,23 @@ static int __net_init ctnetlink_net_init(struct net *net) #ifdef CONFIG_NF_CONNTRACK_EVENTS int ret; - ret = nf_conntrack_register_notifier(net, &ctnl_notifier); - if (ret < 0) { - pr_err("ctnetlink_init: cannot register notifier.\n"); - goto err_out; - } + nf_conntrack_register_notifier(net, &ctnl_notifier); ret = nf_ct_expect_register_notifier(net, &ctnl_notifier_exp); if (ret < 0) { pr_err("ctnetlink_init: cannot expect register notifier.\n"); - goto err_unreg_notifier; + nf_conntrack_unregister_notifier(net); + return ret; } #endif return 0; - -#ifdef CONFIG_NF_CONNTRACK_EVENTS -err_unreg_notifier: - nf_conntrack_unregister_notifier(net, &ctnl_notifier); -err_out: - return ret; -#endif } static void ctnetlink_net_exit(struct net *net) { #ifdef CONFIG_NF_CONNTRACK_EVENTS nf_ct_expect_unregister_notifier(net, &ctnl_notifier_exp); - nf_conntrack_unregister_notifier(net, &ctnl_notifier); + nf_conntrack_unregister_notifier(net); #endif } -- cgit v1.2.3 From bd1431db0b8131098a285c8cc6a357629b4362e5 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 16 Aug 2021 17:16:26 +0200 Subject: netfilter: ecache: remove nf_exp_event_notifier structure Reuse the conntrack event notofier struct, this allows to remove the extra register/unregister functions and avoids a pointer in struct net. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack_ecache.h | 23 +++++---------- include/net/netns/conntrack.h | 1 - net/netfilter/nf_conntrack_ecache.c | 43 ++--------------------------- net/netfilter/nf_conntrack_netlink.c | 30 ++------------------ 4 files changed, 13 insertions(+), 84 deletions(-) (limited to 'include/net') diff --git a/include/net/netfilter/nf_conntrack_ecache.h b/include/net/netfilter/nf_conntrack_ecache.h index 061a93a03b82..d932e22edcb4 100644 --- a/include/net/netfilter/nf_conntrack_ecache.h +++ b/include/net/netfilter/nf_conntrack_ecache.h @@ -72,8 +72,15 @@ struct nf_ct_event { int report; }; +struct nf_exp_event { + struct nf_conntrack_expect *exp; + u32 portid; + int report; +}; + struct nf_ct_event_notifier { int (*ct_event)(unsigned int events, const struct nf_ct_event *item); + int (*exp_event)(unsigned int events, const struct nf_exp_event *item); }; void nf_conntrack_register_notifier(struct net *net, @@ -150,22 +157,6 @@ nf_conntrack_event(enum ip_conntrack_events event, struct nf_conn *ct) } #ifdef CONFIG_NF_CONNTRACK_EVENTS - -struct nf_exp_event { - struct nf_conntrack_expect *exp; - u32 portid; - int report; -}; - -struct nf_exp_event_notifier { - int (*exp_event)(unsigned int events, struct nf_exp_event *item); -}; - -int nf_ct_expect_register_notifier(struct net *net, - struct nf_exp_event_notifier *nb); -void nf_ct_expect_unregister_notifier(struct net *net, - struct nf_exp_event_notifier *nb); - void nf_ct_expect_event_report(enum ip_conntrack_expect_events event, struct nf_conntrack_expect *exp, u32 portid, int report); diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h index fefd38db95b3..0294f3d473af 100644 --- a/include/net/netns/conntrack.h +++ b/include/net/netns/conntrack.h @@ -113,7 +113,6 @@ struct netns_ct { struct ct_pcpu __percpu *pcpu_lists; struct ip_conntrack_stat __percpu *stat; struct nf_ct_event_notifier __rcu *nf_conntrack_event_cb; - struct nf_exp_event_notifier __rcu *nf_expect_event_cb; struct nf_ip_net nf_ct_proto; #if defined(CONFIG_NF_CONNTRACK_LABELS) unsigned int labels_used; diff --git a/net/netfilter/nf_conntrack_ecache.c b/net/netfilter/nf_conntrack_ecache.c index d92f78e4bc7c..41768ff19464 100644 --- a/net/netfilter/nf_conntrack_ecache.c +++ b/net/netfilter/nf_conntrack_ecache.c @@ -240,11 +240,11 @@ void nf_ct_expect_event_report(enum ip_conntrack_expect_events event, { struct net *net = nf_ct_exp_net(exp); - struct nf_exp_event_notifier *notify; + struct nf_ct_event_notifier *notify; struct nf_conntrack_ecache *e; rcu_read_lock(); - notify = rcu_dereference(net->ct.nf_expect_event_cb); + notify = rcu_dereference(net->ct.nf_conntrack_event_cb); if (!notify) goto out_unlock; @@ -283,47 +283,10 @@ void nf_conntrack_unregister_notifier(struct net *net) mutex_lock(&nf_ct_ecache_mutex); RCU_INIT_POINTER(net->ct.nf_conntrack_event_cb, NULL); mutex_unlock(&nf_ct_ecache_mutex); - /* synchronize_rcu() is called from ctnetlink_exit. */ + /* synchronize_rcu() is called after netns pre_exit */ } EXPORT_SYMBOL_GPL(nf_conntrack_unregister_notifier); -int nf_ct_expect_register_notifier(struct net *net, - struct nf_exp_event_notifier *new) -{ - int ret; - struct nf_exp_event_notifier *notify; - - mutex_lock(&nf_ct_ecache_mutex); - notify = rcu_dereference_protected(net->ct.nf_expect_event_cb, - lockdep_is_held(&nf_ct_ecache_mutex)); - if (notify != NULL) { - ret = -EBUSY; - goto out_unlock; - } - rcu_assign_pointer(net->ct.nf_expect_event_cb, new); - ret = 0; - -out_unlock: - mutex_unlock(&nf_ct_ecache_mutex); - return ret; -} -EXPORT_SYMBOL_GPL(nf_ct_expect_register_notifier); - -void nf_ct_expect_unregister_notifier(struct net *net, - struct nf_exp_event_notifier *new) -{ - struct nf_exp_event_notifier *notify; - - mutex_lock(&nf_ct_ecache_mutex); - notify = rcu_dereference_protected(net->ct.nf_expect_event_cb, - lockdep_is_held(&nf_ct_ecache_mutex)); - BUG_ON(notify != new); - RCU_INIT_POINTER(net->ct.nf_expect_event_cb, NULL); - mutex_unlock(&nf_ct_ecache_mutex); - /* synchronize_rcu() is called from ctnetlink_exit. */ -} -EXPORT_SYMBOL_GPL(nf_ct_expect_unregister_notifier); - void nf_conntrack_ecache_work(struct net *net, enum nf_ct_ecache_state state) { struct nf_conntrack_net *cnet = nf_ct_pernet(net); diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 6d6f7cd70753..5008fa0891b3 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -3104,7 +3104,7 @@ nla_put_failure: #ifdef CONFIG_NF_CONNTRACK_EVENTS static int -ctnetlink_expect_event(unsigned int events, struct nf_exp_event *item) +ctnetlink_expect_event(unsigned int events, const struct nf_exp_event *item) { struct nf_conntrack_expect *exp = item->exp; struct net *net = nf_ct_exp_net(exp); @@ -3756,9 +3756,6 @@ static int ctnetlink_stat_exp_cpu(struct sk_buff *skb, #ifdef CONFIG_NF_CONNTRACK_EVENTS static struct nf_ct_event_notifier ctnl_notifier = { .ct_event = ctnetlink_conntrack_event, -}; - -static struct nf_exp_event_notifier ctnl_notifier_exp = { .exp_event = ctnetlink_expect_event, }; #endif @@ -3852,42 +3849,21 @@ MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_CTNETLINK_EXP); static int __net_init ctnetlink_net_init(struct net *net) { #ifdef CONFIG_NF_CONNTRACK_EVENTS - int ret; - nf_conntrack_register_notifier(net, &ctnl_notifier); - - ret = nf_ct_expect_register_notifier(net, &ctnl_notifier_exp); - if (ret < 0) { - pr_err("ctnetlink_init: cannot expect register notifier.\n"); - nf_conntrack_unregister_notifier(net); - return ret; - } #endif return 0; } -static void ctnetlink_net_exit(struct net *net) +static void ctnetlink_net_pre_exit(struct net *net) { #ifdef CONFIG_NF_CONNTRACK_EVENTS - nf_ct_expect_unregister_notifier(net, &ctnl_notifier_exp); nf_conntrack_unregister_notifier(net); #endif } -static void __net_exit ctnetlink_net_exit_batch(struct list_head *net_exit_list) -{ - struct net *net; - - list_for_each_entry(net, net_exit_list, exit_list) - ctnetlink_net_exit(net); - - /* wait for other cpus until they are done with ctnl_notifiers */ - synchronize_rcu(); -} - static struct pernet_operations ctnetlink_net_ops = { .init = ctnetlink_net_init, - .exit_batch = ctnetlink_net_exit_batch, + .pre_exit = ctnetlink_net_pre_exit, }; static int __init ctnetlink_init(void) -- cgit v1.2.3 From 723783d077e39c256a1fafebbd97cbb14207c28f Mon Sep 17 00:00:00 2001 From: Yunsheng Lin Date: Thu, 26 Aug 2021 10:49:47 +0800 Subject: sock: remove one redundant SKB_FRAG_PAGE_ORDER macro Both SKB_FRAG_PAGE_ORDER are defined to the same value in net/core/sock.c and drivers/vhost/net.c. Move the SKB_FRAG_PAGE_ORDER definition to net/core/sock.h, as both net/core/sock.c and drivers/vhost/net.c include it, and it seems a reasonable file to put the macro. Signed-off-by: Yunsheng Lin Acked-by: Jason Wang Signed-off-by: David S. Miller --- drivers/vhost/net.c | 2 -- include/net/sock.h | 1 + net/core/sock.c | 1 - 3 files changed, 1 insertion(+), 3 deletions(-) (limited to 'include/net') diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index 6414bd5741b8..3a249ee7e144 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -643,8 +643,6 @@ static bool tx_can_batch(struct vhost_virtqueue *vq, size_t total_len) !vhost_vq_avail_empty(vq->dev, vq); } -#define SKB_FRAG_PAGE_ORDER get_order(32768) - static bool vhost_net_page_frag_refill(struct vhost_net *net, unsigned int sz, struct page_frag *pfrag, gfp_t gfp) { diff --git a/include/net/sock.h b/include/net/sock.h index 95b25777b53e..66a9a90f9558 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -2717,6 +2717,7 @@ extern int sysctl_optmem_max; extern __u32 sysctl_wmem_default; extern __u32 sysctl_rmem_default; +#define SKB_FRAG_PAGE_ORDER get_order(32768) DECLARE_STATIC_KEY_FALSE(net_high_order_alloc_disable_key); static inline int sk_get_wmem0(const struct sock *sk, const struct proto *proto) diff --git a/net/core/sock.c b/net/core/sock.c index 950f1e70dbf5..62627e868e03 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -2574,7 +2574,6 @@ static void sk_leave_memory_pressure(struct sock *sk) } } -#define SKB_FRAG_PAGE_ORDER get_order(32768) DEFINE_STATIC_KEY_FALSE(net_high_order_alloc_disable_key); /** -- cgit v1.2.3 From 49b99da2c9ce13ffcd93fe3a0f5670791c1d76f7 Mon Sep 17 00:00:00 2001 From: Rocco Yue Date: Fri, 27 Aug 2021 23:04:12 +0800 Subject: ipv6: add IFLA_INET6_RA_MTU to expose mtu value The kernel provides a "/proc/sys/net/ipv6/conf//mtu" file, which can temporarily record the mtu value of the last received RA message when the RA mtu value is lower than the interface mtu, but this proc has following limitations: (1) when the interface mtu (/sys/class/net//mtu) is updeated, mtu6 (/proc/sys/net/ipv6/conf//mtu) will be updated to the value of interface mtu; (2) mtu6 (/proc/sys/net/ipv6/conf//mtu) only affect ipv6 connection, and not affect ipv4. Therefore, when the mtu option is carried in the RA message, there will be a problem that the user sometimes cannot obtain RA mtu value correctly by reading mtu6. After this patch set, if a RA message carries the mtu option, you can send a netlink msg which nlmsg_type is RTM_GETLINK, and then by parsing the attribute of IFLA_INET6_RA_MTU to get the mtu value carried in the RA message received on the inet6 device. In addition, you can also get a link notification when ra_mtu is updated so it doesn't have to poll. In this way, if the MTU values that the device receives from the network in the PCO IPv4 and the RA IPv6 procedures are different, the user can obtain the correct ipv6 ra_mtu value and compare the value of ra_mtu and ipv4 mtu, then the device can use the lower MTU value for both IPv4 and IPv6. Signed-off-by: Rocco Yue Reviewed-by: David Ahern Link: https://lore.kernel.org/r/20210827150412.9267-1-rocco.yue@mediatek.com Signed-off-by: Jakub Kicinski --- include/net/if_inet6.h | 2 ++ include/uapi/linux/if_link.h | 1 + net/ipv6/addrconf.c | 10 ++++++++++ net/ipv6/ndisc.c | 17 +++++++++++------ tools/include/uapi/linux/if_link.h | 1 + 5 files changed, 25 insertions(+), 6 deletions(-) (limited to 'include/net') diff --git a/include/net/if_inet6.h b/include/net/if_inet6.h index 42235c178b06..653e7d0f65cb 100644 --- a/include/net/if_inet6.h +++ b/include/net/if_inet6.h @@ -210,6 +210,8 @@ struct inet6_dev { unsigned long tstamp; /* ipv6InterfaceTable update timestamp */ struct rcu_head rcu; + + unsigned int ra_mtu; }; static inline void ipv6_eth_mc_map(const struct in6_addr *addr, char *buf) diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 8aad65b69054..eebd3894fe89 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -417,6 +417,7 @@ enum { IFLA_INET6_ICMP6STATS, /* statistics (icmpv6) */ IFLA_INET6_TOKEN, /* device token */ IFLA_INET6_ADDR_GEN_MODE, /* implicit address generator mode */ + IFLA_INET6_RA_MTU, /* mtu carried in the RA message */ __IFLA_INET6_MAX }; diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 8381288a0d6e..17756f3ed33b 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -394,6 +394,7 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev) ndev->cnf.addr_gen_mode = IN6_ADDR_GEN_MODE_STABLE_PRIVACY; ndev->cnf.mtu6 = dev->mtu; + ndev->ra_mtu = 0; ndev->nd_parms = neigh_parms_alloc(dev, &nd_tbl); if (!ndev->nd_parms) { kfree(ndev); @@ -3849,6 +3850,7 @@ restart: } idev->tstamp = jiffies; + idev->ra_mtu = 0; /* Last: Shot the device (if unregistered) */ if (unregister) { @@ -5543,6 +5545,7 @@ static inline size_t inet6_ifla6_size(void) + nla_total_size(ICMP6_MIB_MAX * 8) /* IFLA_INET6_ICMP6STATS */ + nla_total_size(sizeof(struct in6_addr)) /* IFLA_INET6_TOKEN */ + nla_total_size(1) /* IFLA_INET6_ADDR_GEN_MODE */ + + nla_total_size(4) /* IFLA_INET6_RA_MTU */ + 0; } @@ -5651,6 +5654,10 @@ static int inet6_fill_ifla6_attrs(struct sk_buff *skb, struct inet6_dev *idev, if (nla_put_u8(skb, IFLA_INET6_ADDR_GEN_MODE, idev->cnf.addr_gen_mode)) goto nla_put_failure; + if (idev->ra_mtu && + nla_put_u32(skb, IFLA_INET6_RA_MTU, idev->ra_mtu)) + goto nla_put_failure; + return 0; nla_put_failure: @@ -5767,6 +5774,9 @@ update_lft: static const struct nla_policy inet6_af_policy[IFLA_INET6_MAX + 1] = { [IFLA_INET6_ADDR_GEN_MODE] = { .type = NLA_U8 }, [IFLA_INET6_TOKEN] = { .len = sizeof(struct in6_addr) }, + [IFLA_INET6_RA_MTU] = { .type = NLA_REJECT, + .reject_message = + "IFLA_INET6_RA_MTU can not be set" }, }; static int check_addr_gen_mode(int mode) diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index c467c6419893..4b098521a44c 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -1391,12 +1391,6 @@ skip_defrtr: } } - /* - * Send a notify if RA changed managed/otherconf flags or timer settings - */ - if (send_ifinfo_notify) - inet6_ifinfo_notify(RTM_NEWLINK, in6_dev); - skip_linkparms: /* @@ -1496,6 +1490,11 @@ skip_routeinfo: memcpy(&n, ((u8 *)(ndopts.nd_opts_mtu+1))+2, sizeof(mtu)); mtu = ntohl(n); + if (in6_dev->ra_mtu != mtu) { + in6_dev->ra_mtu = mtu; + send_ifinfo_notify = true; + } + if (mtu < IPV6_MIN_MTU || mtu > skb->dev->mtu) { ND_PRINTK(2, warn, "RA: invalid mtu: %d\n", mtu); } else if (in6_dev->cnf.mtu6 != mtu) { @@ -1519,6 +1518,12 @@ skip_routeinfo: ND_PRINTK(2, warn, "RA: invalid RA options\n"); } out: + /* Send a notify if RA changed managed/otherconf flags or + * timer settings or ra_mtu value + */ + if (send_ifinfo_notify) + inet6_ifinfo_notify(RTM_NEWLINK, in6_dev); + fib6_info_release(rt); if (neigh) neigh_release(neigh); diff --git a/tools/include/uapi/linux/if_link.h b/tools/include/uapi/linux/if_link.h index eb15f319aa57..b3610fdd1fee 100644 --- a/tools/include/uapi/linux/if_link.h +++ b/tools/include/uapi/linux/if_link.h @@ -230,6 +230,7 @@ enum { IFLA_INET6_ICMP6STATS, /* statistics (icmpv6) */ IFLA_INET6_TOKEN, /* device token */ IFLA_INET6_ADDR_GEN_MODE, /* implicit address generator mode */ + IFLA_INET6_RA_MTU, /* mtu carried in the RA message */ __IFLA_INET6_MAX }; -- cgit v1.2.3 From 7a3f5b0de3647c854e34269c3332d7a1e902901a Mon Sep 17 00:00:00 2001 From: Ryoga Saito Date: Tue, 17 Aug 2021 08:39:37 +0000 Subject: netfilter: add netfilter hooks to SRv6 data plane This patch introduces netfilter hooks for solving the problem that conntrack couldn't record both inner flows and outer flows. This patch also introduces a new sysctl toggle for enabling lightweight tunnel netfilter hooks. Signed-off-by: Ryoga Saito Signed-off-by: Pablo Neira Ayuso --- Documentation/networking/nf_conntrack-sysctl.rst | 7 ++ include/net/lwtunnel.h | 3 + include/net/netfilter/nf_hooks_lwtunnel.h | 7 ++ net/core/lwtunnel.c | 3 + net/ipv6/seg6_iptunnel.c | 75 ++++++++++++++- net/ipv6/seg6_local.c | 111 ++++++++++++++++------- net/netfilter/Makefile | 3 + net/netfilter/nf_conntrack_standalone.c | 15 +++ net/netfilter/nf_hooks_lwtunnel.c | 53 +++++++++++ 9 files changed, 241 insertions(+), 36 deletions(-) create mode 100644 include/net/netfilter/nf_hooks_lwtunnel.h create mode 100644 net/netfilter/nf_hooks_lwtunnel.c (limited to 'include/net') diff --git a/Documentation/networking/nf_conntrack-sysctl.rst b/Documentation/networking/nf_conntrack-sysctl.rst index 024d784157c8..34ca762ea56f 100644 --- a/Documentation/networking/nf_conntrack-sysctl.rst +++ b/Documentation/networking/nf_conntrack-sysctl.rst @@ -184,6 +184,13 @@ nf_conntrack_gre_timeout_stream - INTEGER (seconds) This extended timeout will be used in case there is an GRE stream detected. +nf_hooks_lwtunnel - BOOLEAN + - 0 - disabled (default) + - not 0 - enabled + + If this option is enabled, the lightweight tunnel netfilter hooks are + enabled. This option cannot be disabled once it is enabled. + nf_flowtable_tcp_timeout - INTEGER (seconds) default 30 diff --git a/include/net/lwtunnel.h b/include/net/lwtunnel.h index 05cfd6ff6528..6f15e6fa154e 100644 --- a/include/net/lwtunnel.h +++ b/include/net/lwtunnel.h @@ -51,6 +51,9 @@ struct lwtunnel_encap_ops { }; #ifdef CONFIG_LWTUNNEL + +DECLARE_STATIC_KEY_FALSE(nf_hooks_lwtunnel_enabled); + void lwtstate_free(struct lwtunnel_state *lws); static inline struct lwtunnel_state * diff --git a/include/net/netfilter/nf_hooks_lwtunnel.h b/include/net/netfilter/nf_hooks_lwtunnel.h new file mode 100644 index 000000000000..52e27920f829 --- /dev/null +++ b/include/net/netfilter/nf_hooks_lwtunnel.h @@ -0,0 +1,7 @@ +#include +#include + +#ifdef CONFIG_SYSCTL +int nf_hooks_lwtunnel_sysctl_handler(struct ctl_table *table, int write, + void *buffer, size_t *lenp, loff_t *ppos); +#endif diff --git a/net/core/lwtunnel.c b/net/core/lwtunnel.c index d0ae987d2de9..2820aca2173a 100644 --- a/net/core/lwtunnel.c +++ b/net/core/lwtunnel.c @@ -23,6 +23,9 @@ #include #include +DEFINE_STATIC_KEY_FALSE(nf_hooks_lwtunnel_enabled); +EXPORT_SYMBOL_GPL(nf_hooks_lwtunnel_enabled); + #ifdef CONFIG_MODULES static const char *lwtunnel_encap_str(enum lwtunnel_encap_types encap_type) diff --git a/net/ipv6/seg6_iptunnel.c b/net/ipv6/seg6_iptunnel.c index 897fa59c47de..6ebc7aa24466 100644 --- a/net/ipv6/seg6_iptunnel.c +++ b/net/ipv6/seg6_iptunnel.c @@ -26,6 +26,8 @@ #ifdef CONFIG_IPV6_SEG6_HMAC #include #endif +#include +#include static size_t seg6_lwt_headroom(struct seg6_iptunnel_encap *tuninfo) { @@ -295,11 +297,19 @@ static int seg6_do_srh(struct sk_buff *skb) ipv6_hdr(skb)->payload_len = htons(skb->len - sizeof(struct ipv6hdr)); skb_set_transport_header(skb, sizeof(struct ipv6hdr)); + nf_reset_ct(skb); return 0; } -static int seg6_input(struct sk_buff *skb) +static int seg6_input_finish(struct net *net, struct sock *sk, + struct sk_buff *skb) +{ + return dst_input(skb); +} + +static int seg6_input_core(struct net *net, struct sock *sk, + struct sk_buff *skb) { struct dst_entry *orig_dst = skb_dst(skb); struct dst_entry *dst = NULL; @@ -337,10 +347,41 @@ static int seg6_input(struct sk_buff *skb) if (unlikely(err)) return err; - return dst_input(skb); + if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled)) + return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, + dev_net(skb->dev), NULL, skb, NULL, + skb_dst(skb)->dev, seg6_input_finish); + + return seg6_input_finish(dev_net(skb->dev), NULL, skb); } -static int seg6_output(struct net *net, struct sock *sk, struct sk_buff *skb) +static int seg6_input_nf(struct sk_buff *skb) +{ + struct net_device *dev = skb_dst(skb)->dev; + struct net *net = dev_net(skb->dev); + + switch (skb->protocol) { + case htons(ETH_P_IP): + return NF_HOOK(NFPROTO_IPV4, NF_INET_POST_ROUTING, net, NULL, + skb, NULL, dev, seg6_input_core); + case htons(ETH_P_IPV6): + return NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING, net, NULL, + skb, NULL, dev, seg6_input_core); + } + + return -EINVAL; +} + +static int seg6_input(struct sk_buff *skb) +{ + if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled)) + return seg6_input_nf(skb); + + return seg6_input_core(dev_net(skb->dev), NULL, skb); +} + +static int seg6_output_core(struct net *net, struct sock *sk, + struct sk_buff *skb) { struct dst_entry *orig_dst = skb_dst(skb); struct dst_entry *dst = NULL; @@ -387,12 +428,40 @@ static int seg6_output(struct net *net, struct sock *sk, struct sk_buff *skb) if (unlikely(err)) goto drop; + if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled)) + return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, net, sk, skb, + NULL, skb_dst(skb)->dev, dst_output); + return dst_output(net, sk, skb); drop: kfree_skb(skb); return err; } +static int seg6_output_nf(struct net *net, struct sock *sk, struct sk_buff *skb) +{ + struct net_device *dev = skb_dst(skb)->dev; + + switch (skb->protocol) { + case htons(ETH_P_IP): + return NF_HOOK(NFPROTO_IPV4, NF_INET_POST_ROUTING, net, sk, skb, + NULL, dev, seg6_output_core); + case htons(ETH_P_IPV6): + return NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING, net, sk, skb, + NULL, dev, seg6_output_core); + } + + return -EINVAL; +} + +static int seg6_output(struct net *net, struct sock *sk, struct sk_buff *skb) +{ + if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled)) + return seg6_output_nf(net, sk, skb); + + return seg6_output_core(net, sk, skb); +} + static int seg6_build_state(struct net *net, struct nlattr *nla, unsigned int family, const void *cfg, struct lwtunnel_state **ts, diff --git a/net/ipv6/seg6_local.c b/net/ipv6/seg6_local.c index 60bf3b877957..ddc8dfcd4e2b 100644 --- a/net/ipv6/seg6_local.c +++ b/net/ipv6/seg6_local.c @@ -30,6 +30,8 @@ #include #include #include +#include +#include #define SEG6_F_ATTR(i) BIT(i) @@ -413,12 +415,33 @@ drop: return -EINVAL; } +static int input_action_end_dx6_finish(struct net *net, struct sock *sk, + struct sk_buff *skb) +{ + struct dst_entry *orig_dst = skb_dst(skb); + struct in6_addr *nhaddr = NULL; + struct seg6_local_lwt *slwt; + + slwt = seg6_local_lwtunnel(orig_dst->lwtstate); + + /* The inner packet is not associated to any local interface, + * so we do not call netif_rx(). + * + * If slwt->nh6 is set to ::, then lookup the nexthop for the + * inner packet's DA. Otherwise, use the specified nexthop. + */ + if (!ipv6_addr_any(&slwt->nh6)) + nhaddr = &slwt->nh6; + + seg6_lookup_nexthop(skb, nhaddr, 0); + + return dst_input(skb); +} + /* decapsulate and forward to specified nexthop */ static int input_action_end_dx6(struct sk_buff *skb, struct seg6_local_lwt *slwt) { - struct in6_addr *nhaddr = NULL; - /* this function accepts IPv6 encapsulated packets, with either * an SRH with SL=0, or no SRH. */ @@ -429,40 +452,30 @@ static int input_action_end_dx6(struct sk_buff *skb, if (!pskb_may_pull(skb, sizeof(struct ipv6hdr))) goto drop; - /* The inner packet is not associated to any local interface, - * so we do not call netif_rx(). - * - * If slwt->nh6 is set to ::, then lookup the nexthop for the - * inner packet's DA. Otherwise, use the specified nexthop. - */ - - if (!ipv6_addr_any(&slwt->nh6)) - nhaddr = &slwt->nh6; - skb_set_transport_header(skb, sizeof(struct ipv6hdr)); + nf_reset_ct(skb); - seg6_lookup_nexthop(skb, nhaddr, 0); + if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled)) + return NF_HOOK(NFPROTO_IPV6, NF_INET_PRE_ROUTING, + dev_net(skb->dev), NULL, skb, NULL, + skb_dst(skb)->dev, input_action_end_dx6_finish); - return dst_input(skb); + return input_action_end_dx6_finish(dev_net(skb->dev), NULL, skb); drop: kfree_skb(skb); return -EINVAL; } -static int input_action_end_dx4(struct sk_buff *skb, - struct seg6_local_lwt *slwt) +static int input_action_end_dx4_finish(struct net *net, struct sock *sk, + struct sk_buff *skb) { + struct dst_entry *orig_dst = skb_dst(skb); + struct seg6_local_lwt *slwt; struct iphdr *iph; __be32 nhaddr; int err; - if (!decap_and_validate(skb, IPPROTO_IPIP)) - goto drop; - - if (!pskb_may_pull(skb, sizeof(struct iphdr))) - goto drop; - - skb->protocol = htons(ETH_P_IP); + slwt = seg6_local_lwtunnel(orig_dst->lwtstate); iph = ip_hdr(skb); @@ -470,14 +483,34 @@ static int input_action_end_dx4(struct sk_buff *skb, skb_dst_drop(skb); - skb_set_transport_header(skb, sizeof(struct iphdr)); - err = ip_route_input(skb, nhaddr, iph->saddr, 0, skb->dev); - if (err) - goto drop; + if (err) { + kfree_skb(skb); + return -EINVAL; + } return dst_input(skb); +} + +static int input_action_end_dx4(struct sk_buff *skb, + struct seg6_local_lwt *slwt) +{ + if (!decap_and_validate(skb, IPPROTO_IPIP)) + goto drop; + + if (!pskb_may_pull(skb, sizeof(struct iphdr))) + goto drop; + + skb->protocol = htons(ETH_P_IP); + skb_set_transport_header(skb, sizeof(struct iphdr)); + nf_reset_ct(skb); + + if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled)) + return NF_HOOK(NFPROTO_IPV4, NF_INET_PRE_ROUTING, + dev_net(skb->dev), NULL, skb, NULL, + skb_dst(skb)->dev, input_action_end_dx4_finish); + return input_action_end_dx4_finish(dev_net(skb->dev), NULL, skb); drop: kfree_skb(skb); return -EINVAL; @@ -645,6 +678,7 @@ static struct sk_buff *end_dt_vrf_core(struct sk_buff *skb, skb_dst_drop(skb); skb_set_transport_header(skb, hdrlen); + nf_reset_ct(skb); return end_dt_vrf_rcv(skb, family, vrf); @@ -1078,7 +1112,8 @@ static void seg6_local_update_counters(struct seg6_local_lwt *slwt, u64_stats_update_end(&pcounters->syncp); } -static int seg6_local_input(struct sk_buff *skb) +static int seg6_local_input_core(struct net *net, struct sock *sk, + struct sk_buff *skb) { struct dst_entry *orig_dst = skb_dst(skb); struct seg6_action_desc *desc; @@ -1086,11 +1121,6 @@ static int seg6_local_input(struct sk_buff *skb) unsigned int len = skb->len; int rc; - if (skb->protocol != htons(ETH_P_IPV6)) { - kfree_skb(skb); - return -EINVAL; - } - slwt = seg6_local_lwtunnel(orig_dst->lwtstate); desc = slwt->desc; @@ -1104,6 +1134,21 @@ static int seg6_local_input(struct sk_buff *skb) return rc; } +static int seg6_local_input(struct sk_buff *skb) +{ + if (skb->protocol != htons(ETH_P_IPV6)) { + kfree_skb(skb); + return -EINVAL; + } + + if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled)) + return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_IN, + dev_net(skb->dev), NULL, skb, skb->dev, NULL, + seg6_local_input_core); + + return seg6_local_input_core(dev_net(skb->dev), NULL, skb); +} + static const struct nla_policy seg6_local_policy[SEG6_LOCAL_MAX + 1] = { [SEG6_LOCAL_ACTION] = { .type = NLA_U32 }, [SEG6_LOCAL_SRH] = { .type = NLA_BINARY }, diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index 049890e00a3d..aab20e575ecd 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -212,3 +212,6 @@ obj-$(CONFIG_IP_SET) += ipset/ # IPVS obj-$(CONFIG_IP_VS) += ipvs/ + +# lwtunnel +obj-$(CONFIG_LWTUNNEL) += nf_hooks_lwtunnel.o diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index e84b499b7bfa..7e0d956da51d 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -22,6 +22,9 @@ #include #include #include +#ifdef CONFIG_LWTUNNEL +#include +#endif #include static bool enable_hooks __read_mostly; @@ -612,6 +615,9 @@ enum nf_ct_sysctl_index { NF_SYSCTL_CT_PROTO_TIMEOUT_GRE, NF_SYSCTL_CT_PROTO_TIMEOUT_GRE_STREAM, #endif +#ifdef CONFIG_LWTUNNEL + NF_SYSCTL_CT_LWTUNNEL, +#endif __NF_SYSCTL_CT_LAST_SYSCTL, }; @@ -958,6 +964,15 @@ static struct ctl_table nf_ct_sysctl_table[] = { .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, +#endif +#ifdef CONFIG_LWTUNNEL + [NF_SYSCTL_CT_LWTUNNEL] = { + .procname = "nf_hooks_lwtunnel", + .data = NULL, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = nf_hooks_lwtunnel_sysctl_handler, + }, #endif {} }; diff --git a/net/netfilter/nf_hooks_lwtunnel.c b/net/netfilter/nf_hooks_lwtunnel.c new file mode 100644 index 000000000000..00e89ffd78f6 --- /dev/null +++ b/net/netfilter/nf_hooks_lwtunnel.c @@ -0,0 +1,53 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include +#include +#include + +static inline int nf_hooks_lwtunnel_get(void) +{ + if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled)) + return 1; + else + return 0; +} + +static inline int nf_hooks_lwtunnel_set(int enable) +{ + if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled)) { + if (!enable) + return -EBUSY; + } else if (enable) { + static_branch_enable(&nf_hooks_lwtunnel_enabled); + } + + return 0; +} + +#ifdef CONFIG_SYSCTL +int nf_hooks_lwtunnel_sysctl_handler(struct ctl_table *table, int write, + void *buffer, size_t *lenp, loff_t *ppos) +{ + int proc_nf_hooks_lwtunnel_enabled = 0; + struct ctl_table tmp = { + .procname = table->procname, + .data = &proc_nf_hooks_lwtunnel_enabled, + .maxlen = sizeof(int), + .mode = table->mode, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, + }; + int ret; + + if (!write) + proc_nf_hooks_lwtunnel_enabled = nf_hooks_lwtunnel_get(); + + ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos); + + if (write && ret == 0) + ret = nf_hooks_lwtunnel_set(proc_nf_hooks_lwtunnel_enabled); + + return ret; +} +EXPORT_SYMBOL_GPL(nf_hooks_lwtunnel_sysctl_handler); +#endif /* CONFIG_SYSCTL */ -- cgit v1.2.3 From ca49bfd90a9dde175d2929dc1544b54841e33804 Mon Sep 17 00:00:00 2001 From: Maxim Mikityanskiy Date: Thu, 26 Aug 2021 14:54:25 +0300 Subject: sch_htb: Fix inconsistency when leaf qdisc creation fails In HTB offload mode, qdiscs of leaf classes are grafted to netdev queues. sch_htb expects the dev_queue field of these qdiscs to point to the corresponding queues. However, qdisc creation may fail, and in that case noop_qdisc is used instead. Its dev_queue doesn't point to the right queue, so sch_htb can lose track of used netdev queues, which will cause internal inconsistencies. This commit fixes this bug by keeping track of the netdev queue inside struct htb_class. All reads of cl->leaf.q->dev_queue are replaced by the new field, the two values are synced on writes, and WARNs are added to assert equality of the two values. The driver API has changed: when TC_HTB_LEAF_DEL needs to move a queue, the driver used to pass the old and new queue IDs to sch_htb. Now that there is a new field (offload_queue) in struct htb_class that needs to be updated on this operation, the driver will pass the old class ID to sch_htb instead (it already knows the new class ID). Fixes: d03b195b5aa0 ("sch_htb: Hierarchical QoS hardware offload") Signed-off-by: Maxim Mikityanskiy Reviewed-by: Tariq Toukan Link: https://lore.kernel.org/r/20210826115425.1744053-1-maximmi@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/en/qos.c | 15 ++-- drivers/net/ethernet/mellanox/mlx5/core/en/qos.h | 4 +- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 3 +- include/net/pkt_cls.h | 3 +- net/sched/sch_htb.c | 97 +++++++++++++++-------- 5 files changed, 72 insertions(+), 50 deletions(-) (limited to 'include/net') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/qos.c b/drivers/net/ethernet/mellanox/mlx5/core/en/qos.c index c9ac69f62f21..e8a8d78e3e4d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/qos.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/qos.c @@ -733,8 +733,8 @@ static void mlx5e_reset_qdisc(struct net_device *dev, u16 qid) spin_unlock_bh(qdisc_lock(qdisc)); } -int mlx5e_htb_leaf_del(struct mlx5e_priv *priv, u16 classid, u16 *old_qid, - u16 *new_qid, struct netlink_ext_ack *extack) +int mlx5e_htb_leaf_del(struct mlx5e_priv *priv, u16 *classid, + struct netlink_ext_ack *extack) { struct mlx5e_qos_node *node; struct netdev_queue *txq; @@ -742,11 +742,9 @@ int mlx5e_htb_leaf_del(struct mlx5e_priv *priv, u16 classid, u16 *old_qid, bool opened; int err; - qos_dbg(priv->mdev, "TC_HTB_LEAF_DEL classid %04x\n", classid); - - *old_qid = *new_qid = 0; + qos_dbg(priv->mdev, "TC_HTB_LEAF_DEL classid %04x\n", *classid); - node = mlx5e_sw_node_find(priv, classid); + node = mlx5e_sw_node_find(priv, *classid); if (!node) return -ENOENT; @@ -764,7 +762,7 @@ int mlx5e_htb_leaf_del(struct mlx5e_priv *priv, u16 classid, u16 *old_qid, err = mlx5_qos_destroy_node(priv->mdev, node->hw_id); if (err) /* Not fatal. */ qos_warn(priv->mdev, "Failed to destroy leaf node %u (class %04x), err = %d\n", - node->hw_id, classid, err); + node->hw_id, *classid, err); mlx5e_sw_node_delete(priv, node); @@ -826,8 +824,7 @@ int mlx5e_htb_leaf_del(struct mlx5e_priv *priv, u16 classid, u16 *old_qid, if (opened) mlx5e_reactivate_qos_sq(priv, moved_qid, txq); - *old_qid = mlx5e_qid_from_qos(&priv->channels, moved_qid); - *new_qid = mlx5e_qid_from_qos(&priv->channels, qid); + *classid = node->classid; return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/qos.h b/drivers/net/ethernet/mellanox/mlx5/core/en/qos.h index 5af7991fcd19..757682b7c0e0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/qos.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/qos.h @@ -34,8 +34,8 @@ int mlx5e_htb_leaf_alloc_queue(struct mlx5e_priv *priv, u16 classid, struct netlink_ext_ack *extack); int mlx5e_htb_leaf_to_inner(struct mlx5e_priv *priv, u16 classid, u16 child_classid, u64 rate, u64 ceil, struct netlink_ext_ack *extack); -int mlx5e_htb_leaf_del(struct mlx5e_priv *priv, u16 classid, u16 *old_qid, - u16 *new_qid, struct netlink_ext_ack *extack); +int mlx5e_htb_leaf_del(struct mlx5e_priv *priv, u16 *classid, + struct netlink_ext_ack *extack); int mlx5e_htb_leaf_del_last(struct mlx5e_priv *priv, u16 classid, bool force, struct netlink_ext_ack *extack); int mlx5e_htb_node_modify(struct mlx5e_priv *priv, u16 classid, u64 rate, u64 ceil, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 26d2f78c7706..47efd858964d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -3000,8 +3000,7 @@ static int mlx5e_setup_tc_htb(struct mlx5e_priv *priv, struct tc_htb_qopt_offloa return mlx5e_htb_leaf_to_inner(priv, htb->parent_classid, htb->classid, htb->rate, htb->ceil, htb->extack); case TC_HTB_LEAF_DEL: - return mlx5e_htb_leaf_del(priv, htb->classid, &htb->moved_qid, &htb->qid, - htb->extack); + return mlx5e_htb_leaf_del(priv, &htb->classid, htb->extack); case TC_HTB_LEAF_DEL_LAST: case TC_HTB_LEAF_DEL_LAST_FORCE: return mlx5e_htb_leaf_del_last(priv, htb->classid, diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index 8fb47fc61097..83a6d0792180 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -816,10 +816,9 @@ enum tc_htb_command { struct tc_htb_qopt_offload { struct netlink_ext_ack *extack; enum tc_htb_command command; - u16 classid; u32 parent_classid; + u16 classid; u16 qid; - u16 moved_qid; u64 rate; u64 ceil; }; diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index 81ea8332547a..5067a6e5d4fd 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -125,6 +125,7 @@ struct htb_class { struct htb_class_leaf { int deficit[TC_HTB_MAXDEPTH]; struct Qdisc *q; + struct netdev_queue *offload_queue; } leaf; struct htb_class_inner { struct htb_prio clprio[TC_HTB_NUMPRIO]; @@ -1411,24 +1412,47 @@ htb_graft_helper(struct netdev_queue *dev_queue, struct Qdisc *new_q) return old_q; } -static void htb_offload_move_qdisc(struct Qdisc *sch, u16 qid_old, u16 qid_new) +static struct netdev_queue *htb_offload_get_queue(struct htb_class *cl) +{ + struct netdev_queue *queue; + + queue = cl->leaf.offload_queue; + if (!(cl->leaf.q->flags & TCQ_F_BUILTIN)) + WARN_ON(cl->leaf.q->dev_queue != queue); + + return queue; +} + +static void htb_offload_move_qdisc(struct Qdisc *sch, struct htb_class *cl_old, + struct htb_class *cl_new, bool destroying) { struct netdev_queue *queue_old, *queue_new; struct net_device *dev = qdisc_dev(sch); - struct Qdisc *qdisc; - queue_old = netdev_get_tx_queue(dev, qid_old); - queue_new = netdev_get_tx_queue(dev, qid_new); + queue_old = htb_offload_get_queue(cl_old); + queue_new = htb_offload_get_queue(cl_new); - if (dev->flags & IFF_UP) - dev_deactivate(dev); - qdisc = dev_graft_qdisc(queue_old, NULL); - qdisc->dev_queue = queue_new; - qdisc = dev_graft_qdisc(queue_new, qdisc); - if (dev->flags & IFF_UP) - dev_activate(dev); + if (!destroying) { + struct Qdisc *qdisc; - WARN_ON(!(qdisc->flags & TCQ_F_BUILTIN)); + if (dev->flags & IFF_UP) + dev_deactivate(dev); + qdisc = dev_graft_qdisc(queue_old, NULL); + WARN_ON(qdisc != cl_old->leaf.q); + } + + if (!(cl_old->leaf.q->flags & TCQ_F_BUILTIN)) + cl_old->leaf.q->dev_queue = queue_new; + cl_old->leaf.offload_queue = queue_new; + + if (!destroying) { + struct Qdisc *qdisc; + + qdisc = dev_graft_qdisc(queue_new, cl_old->leaf.q); + if (dev->flags & IFF_UP) + dev_activate(dev); + WARN_ON(!(qdisc->flags & TCQ_F_BUILTIN)); + } } static int htb_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, @@ -1442,10 +1466,8 @@ static int htb_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, if (cl->level) return -EINVAL; - if (q->offload) { - dev_queue = new->dev_queue; - WARN_ON(dev_queue != cl->leaf.q->dev_queue); - } + if (q->offload) + dev_queue = htb_offload_get_queue(cl); if (!new) { new = qdisc_create_dflt(dev_queue, &pfifo_qdisc_ops, @@ -1514,6 +1536,8 @@ static void htb_parent_to_leaf(struct Qdisc *sch, struct htb_class *cl, parent->ctokens = parent->cbuffer; parent->t_c = ktime_get_ns(); parent->cmode = HTB_CAN_SEND; + if (q->offload) + parent->leaf.offload_queue = cl->leaf.offload_queue; } static void htb_parent_to_leaf_offload(struct Qdisc *sch, @@ -1534,6 +1558,7 @@ static int htb_destroy_class_offload(struct Qdisc *sch, struct htb_class *cl, struct netlink_ext_ack *extack) { struct tc_htb_qopt_offload offload_opt; + struct netdev_queue *dev_queue; struct Qdisc *q = cl->leaf.q; struct Qdisc *old = NULL; int err; @@ -1542,16 +1567,15 @@ static int htb_destroy_class_offload(struct Qdisc *sch, struct htb_class *cl, return -EINVAL; WARN_ON(!q); - if (!destroying) { - /* On destroy of HTB, two cases are possible: - * 1. q is a normal qdisc, but q->dev_queue has noop qdisc. - * 2. q is a noop qdisc (for nodes that were inner), - * q->dev_queue is noop_netdev_queue. + dev_queue = htb_offload_get_queue(cl); + old = htb_graft_helper(dev_queue, NULL); + if (destroying) + /* Before HTB is destroyed, the kernel grafts noop_qdisc to + * all queues. */ - old = htb_graft_helper(q->dev_queue, NULL); - WARN_ON(!old); + WARN_ON(!(old->flags & TCQ_F_BUILTIN)); + else WARN_ON(old != q); - } if (cl->parent) { cl->parent->bstats_bias.bytes += q->bstats.bytes; @@ -1570,18 +1594,17 @@ static int htb_destroy_class_offload(struct Qdisc *sch, struct htb_class *cl, if (!err || destroying) qdisc_put(old); else - htb_graft_helper(q->dev_queue, old); + htb_graft_helper(dev_queue, old); if (last_child) return err; - if (!err && offload_opt.moved_qid != 0) { - if (destroying) - q->dev_queue = netdev_get_tx_queue(qdisc_dev(sch), - offload_opt.qid); - else - htb_offload_move_qdisc(sch, offload_opt.moved_qid, - offload_opt.qid); + if (!err && offload_opt.classid != TC_H_MIN(cl->common.classid)) { + u32 classid = TC_H_MAJ(sch->handle) | + TC_H_MIN(offload_opt.classid); + struct htb_class *moved_cl = htb_find(classid, sch); + + htb_offload_move_qdisc(sch, moved_cl, cl, destroying); } return err; @@ -1704,9 +1727,11 @@ static int htb_delete(struct Qdisc *sch, unsigned long arg, } if (last_child) { - struct netdev_queue *dev_queue; + struct netdev_queue *dev_queue = sch->dev_queue; + + if (q->offload) + dev_queue = htb_offload_get_queue(cl); - dev_queue = q->offload ? cl->leaf.q->dev_queue : sch->dev_queue; new_q = qdisc_create_dflt(dev_queue, &pfifo_qdisc_ops, cl->parent->common.classid, NULL); @@ -1878,7 +1903,7 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, } dev_queue = netdev_get_tx_queue(dev, offload_opt.qid); } else { /* First child. */ - dev_queue = parent->leaf.q->dev_queue; + dev_queue = htb_offload_get_queue(parent); old_q = htb_graft_helper(dev_queue, NULL); WARN_ON(old_q != parent->leaf.q); offload_opt = (struct tc_htb_qopt_offload) { @@ -1935,6 +1960,8 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, /* leaf (we) needs elementary qdisc */ cl->leaf.q = new_q ? new_q : &noop_qdisc; + if (q->offload) + cl->leaf.offload_queue = dev_queue; cl->parent = parent; -- cgit v1.2.3 From b9edbfe1adecfc48fd11061dce68afb03d6adbdc Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Thu, 2 Sep 2021 14:36:17 +0900 Subject: flow: fix object-size-mismatch warning in flowi{4,6}_to_flowi_common() Commit 3df98d79215ace13 ("lsm,selinux: pass flowi_common instead of flowi to the LSM hooks") introduced flowi{4,6}_to_flowi_common() functions which cause UBSAN warning when building with LLVM 11.0.1 on Ubuntu 21.04. ================================================================================ UBSAN: object-size-mismatch in ./include/net/flow.h:197:33 member access within address ffffc9000109fbd8 with insufficient space for an object of type 'struct flowi' CPU: 2 PID: 7410 Comm: systemd-resolve Not tainted 5.14.0 #51 Hardware name: VMware, Inc. VMware Virtual Platform/440BX Desktop Reference Platform, BIOS 6.00 02/27/2020 Call Trace: dump_stack_lvl+0x103/0x171 ubsan_type_mismatch_common+0x1de/0x390 __ubsan_handle_type_mismatch_v1+0x41/0x50 udp_sendmsg+0xda2/0x1300 ? ip_skb_dst_mtu+0x1f0/0x1f0 ? sock_rps_record_flow+0xe/0x200 ? inet_send_prepare+0x2d/0x90 sock_sendmsg+0x49/0x80 ____sys_sendmsg+0x269/0x370 __sys_sendmsg+0x15e/0x1d0 ? syscall_enter_from_user_mode+0xf0/0x1b0 do_syscall_64+0x3d/0xb0 entry_SYSCALL_64_after_hwframe+0x44/0xae RIP: 0033:0x7f7081a50497 Code: 0c 00 f7 d8 64 89 02 48 c7 c0 ff ff ff ff eb b7 0f 1f 00 f3 0f 1e fa 64 8b 04 25 18 00 00 00 85 c0 75 10 b8 2e 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 51 c3 48 83 ec 28 89 54 24 1c 48 89 74 24 10 RSP: 002b:00007ffc153870f8 EFLAGS: 00000246 ORIG_RAX: 000000000000002e RAX: ffffffffffffffda RBX: 000000000000000c RCX: 00007f7081a50497 RDX: 0000000000000000 RSI: 00007ffc15387140 RDI: 000000000000000c RBP: 00007ffc15387140 R08: 0000563f29a5e4fc R09: 000000000000cd28 R10: 0000563f29a68a30 R11: 0000000000000246 R12: 000000000000000c R13: 0000000000000001 R14: 0000563f29a68a30 R15: 0000563f29a5e50c ================================================================================ I don't think we need to call flowi{4,6}_to_flowi() from these functions because the first member of "struct flowi4" and "struct flowi6" is struct flowi_common __fl_common; while the first member of "struct flowi" is union { struct flowi_common __fl_common; struct flowi4 ip4; struct flowi6 ip6; struct flowidn dn; } u; which should point to the same address without access to "struct flowi". Signed-off-by: Tetsuo Handa Signed-off-by: David S. Miller --- include/net/flow.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/net/flow.h b/include/net/flow.h index 6f5e70240071..58beb16a49b8 100644 --- a/include/net/flow.h +++ b/include/net/flow.h @@ -194,7 +194,7 @@ static inline struct flowi *flowi4_to_flowi(struct flowi4 *fl4) static inline struct flowi_common *flowi4_to_flowi_common(struct flowi4 *fl4) { - return &(flowi4_to_flowi(fl4)->u.__fl_common); + return &(fl4->__fl_common); } static inline struct flowi *flowi6_to_flowi(struct flowi6 *fl6) @@ -204,7 +204,7 @@ static inline struct flowi *flowi6_to_flowi(struct flowi6 *fl6) static inline struct flowi_common *flowi6_to_flowi_common(struct flowi6 *fl6) { - return &(flowi6_to_flowi(fl6)->u.__fl_common); + return &(fl6->__fl_common); } static inline struct flowi *flowidn_to_flowi(struct flowidn *fldn) -- cgit v1.2.3